From scoder at codespeak.net Wed Mar 1 09:48:19 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Wed Mar 1 09:48:22 2006 Subject: [Lxml-checkins] r23821 - in lxml/trunk/src/lxml: . tests Message-ID: <20060301084819.E491810077@code0.codespeak.net> Author: scoder Date: Wed Mar 1 09:48:17 2006 New Revision: 23821 Added: lxml/trunk/src/lxml/tests/test_parser.py Modified: lxml/trunk/src/lxml/etree.pyx lxml/trunk/src/lxml/parser.pxi lxml/trunk/src/lxml/tests/common_imports.py lxml/trunk/src/lxml/tests/test_io.py Log: new parser API: uses XMLParser class constructed with keyword arguments to setup libxml2 parse options Modified: lxml/trunk/src/lxml/etree.pyx ============================================================================== --- lxml/trunk/src/lxml/etree.pyx (original) +++ lxml/trunk/src/lxml/etree.pyx Wed Mar 1 09:48:17 2006 @@ -82,23 +82,20 @@ cdef _Document _parseDocument(source, parser): cdef xmlDoc* c_doc - - # XXX ignore parser !! - # XXX simplistic (c)StringIO support if hasattr(source, 'getvalue'): - c_doc = theParser.parseDoc(source.getvalue()) + c_doc = theParser.parseDoc(source.getvalue(), parser) else: filename = _getFilenameForFile(source) # Support for unamed file-like object (eg urlgrabber.urlopen) if not filename and hasattr(source, 'read'): - c_doc = theParser.parseDoc(source.read()) + c_doc = theParser.parseDoc(source.read(), parser) # Otherwise parse the file directly from the filesystem else: if filename is None: filename = source # open filename - c_doc = theParser.parseDocFromFile(filename) + c_doc = theParser.parseDocFromFile(filename, parser) if c_doc is NULL: return None else: @@ -995,7 +992,7 @@ else: # XXX read XML into memory not the fastest way to do this data = file.read() - doc = _documentFactory( theParser.parseDoc(data) ) + doc = _documentFactory( theParser.parseDoc(data, None) ) else: doc = _documentFactory( theParser.newDoc() ) @@ -1014,7 +1011,7 @@ cdef xmlDoc* c_doc if isinstance(text, unicode): text = _stripDeclaration(text.encode('UTF-8')) - c_doc = theParser.parseDoc(text) + c_doc = theParser.parseDoc(text, None) return _documentFactory(c_doc).getroot() fromstring = XML Modified: lxml/trunk/src/lxml/parser.pxi ============================================================================== --- lxml/trunk/src/lxml/parser.pxi (original) +++ lxml/trunk/src/lxml/parser.pxi Wed Mar 1 09:48:17 2006 @@ -6,9 +6,67 @@ class XMLSyntaxError(LxmlSyntaxError): pass -cdef int _getParseOptions(): - return (xmlparser.XML_PARSE_NOENT | xmlparser.XML_PARSE_NOCDATA | - xmlparser.XML_PARSE_NOWARNING | xmlparser.XML_PARSE_NOERROR) +cdef int _DEFAULT_PARSE_OPTIONS +_DEFAULT_PARSE_OPTIONS = ( + xmlparser.XML_PARSE_NOENT | + xmlparser.XML_PARSE_NOCDATA | + xmlparser.XML_PARSE_NOWARNING | + xmlparser.XML_PARSE_NOERROR + ) + +cdef int _ORIG_DEFAULT_PARSE_OPTIONS +_ORIG_DEFAULT_PARSE_OPTIONS = _DEFAULT_PARSE_OPTIONS + + +cdef class XMLParser: + """The XML parser. Parsers can be supplied as additional argument to + various parse functions of the lxml API. A default parser is always + available and can be replaced by a call to the global function + 'set_default_parser'. New parsers can be created at any time without a + major run-time overhead. + + The keyword arguments in the constructor are mainly based on the libxml2 + parser configuration. The 'from_parser' keyword additionally allows to + provide a parser whose configurations is copied before applying the + additional arguments. Note that DTD validation obviously implies loading + the DTD. + """ + cdef int _parse_options + def __init__(self, load_dtd=False, validate_dtd=False, no_network=False, + ns_clean=False, from_parser=None): + cdef int parse_options + if from_parser is not None: + parse_options = from_parser._parse_options + else: + parse_options = _ORIG_DEFAULT_PARSE_OPTIONS + + if validate_dtd: + parse_options = parse_options | xmlparser.XML_PARSE_DTDLOAD | \ + xmlparser.XML_PARSE_DTDVALID + if load_dtd: + parse_options = parse_options | xmlparser.XML_PARSE_DTDLOAD | \ + xmlparser.XML_PARSE_DTDATTR + if no_blanks: + parse_options = parse_options | xmlparser.XML_PARSE_NOBLANKS + if no_network: + parse_options = parse_options | xmlparser.XML_PARSE_NONET + if ns_clean: + parse_options = parse_options | xmlparser.XML_PARSE_NSCLEAN + + self._parse_options = parse_options + + +def set_default_parser(parser=None): + """Set a default XMLParser. This parser is used globally whenever no + parser is supplied to the various parse functions of the lxml API. If + this function is called without a parser (or if it is None), the default + parser is reset to the original configuration. + """ + if parser is not None: + _DEFAULT_PARSE_OPTIONS = (parser)._parse_options + else: + _DEFAULT_PARSE_OPTIONS = _ORIG_DEFAULT_PARSE_OPTIONS + cdef class Parser: @@ -25,12 +83,18 @@ #print "freeing dictionary (cleanup parser)" xmlparser.xmlDictFree(self._c_dict) - cdef xmlDoc* parseDoc(self, text) except NULL: + cdef xmlDoc* parseDoc(self, text, parser) except NULL: """Parse document, share dictionary if possible. """ cdef xmlDoc* result cdef xmlParserCtxt* pctxt cdef int parse_error + + if parser is not None: + parse_options = (parser)._parse_options + else: + parse_options = _DEFAULT_PARSE_OPTIONS + self._initParse() pctxt = xmlparser.xmlCreateDocParserCtxt(text) if pctxt is NULL: @@ -39,7 +103,7 @@ self._prepareParse(pctxt) xmlparser.xmlCtxtUseOptions( pctxt, - _getParseOptions()) + parse_options) parse_error = xmlparser.xmlParseDocument(pctxt) # in case of errors, clean up context plus any document if parse_error != 0 or not pctxt.wellFormed: @@ -53,17 +117,23 @@ xmlparser.xmlFreeParserCtxt(pctxt) return result - cdef xmlDoc* parseDocFromFile(self, char* filename) except NULL: + cdef xmlDoc* parseDocFromFile(self, char* filename, parser) except NULL: + cdef int parse_options cdef xmlDoc* result cdef xmlParserCtxt* pctxt + if parser is not None: + parse_options = (parser)._parse_options + else: + parse_options = _DEFAULT_PARSE_OPTIONS + self._initParse() pctxt = xmlparser.xmlNewParserCtxt() self._prepareParse(pctxt) # XXX set options twice? needed to shut up libxml2 - xmlparser.xmlCtxtUseOptions(pctxt, _getParseOptions()) + xmlparser.xmlCtxtUseOptions(pctxt, parse_options) result = xmlparser.xmlCtxtReadFile(pctxt, filename, - NULL, _getParseOptions()) + NULL, parse_options) if result is NULL: if pctxt.lastError.domain == xmlerror.XML_FROM_IO: raise IOError, "Could not open file %s" % filename Modified: lxml/trunk/src/lxml/tests/common_imports.py ============================================================================== --- lxml/trunk/src/lxml/tests/common_imports.py (original) +++ lxml/trunk/src/lxml/tests/common_imports.py Wed Mar 1 09:48:17 2006 @@ -17,6 +17,17 @@ def _rootstring(self, tree): return etree.tostring(tree.getroot()).replace(' ', '').replace('\n', '') +class SillyFileLike: + def __init__(self, xml_data=''): + self.xml_data = xml_data + self.done = False + + def read(self, amount=None): + if not self.done: + self.done = True + return self.xml_data + return '' + def fileInTestDir(name): _testdir = os.path.split(__file__)[0] return os.path.join(_testdir, name) Modified: lxml/trunk/src/lxml/tests/test_io.py ============================================================================== --- lxml/trunk/src/lxml/tests/test_io.py (original) +++ lxml/trunk/src/lxml/tests/test_io.py Wed Mar 1 09:48:17 2006 @@ -7,7 +7,7 @@ import unittest import tempfile, gzip -from common_imports import etree, ElementTree, fileInTestDir +from common_imports import etree, ElementTree, fileInTestDir, SillyFileLike class IOTestCaseBase(unittest.TestCase): """(c)ElementTree compatibility for IO functions/methods @@ -84,15 +84,6 @@ root = self.etree.ElementTree().parse(f) self.assert_(root.tag.endswith('foo')) -class SillyFileLike: - def __init__(self): - self.done = False - - def read(self, amount=None): - if not self.done: - self.done = True - return '' - return '' class ETreeIOTestCase(IOTestCaseBase): etree = etree Added: lxml/trunk/src/lxml/tests/test_parser.py ============================================================================== --- (empty file) +++ lxml/trunk/src/lxml/tests/test_parser.py Wed Mar 1 09:48:17 2006 @@ -0,0 +1,41 @@ +# -*- coding: UTF-8 -*- + +""" +Tests specific to the parser API +""" + + +import unittest, doctest + +from StringIO import StringIO +import os, shutil, tempfile, copy +import gzip +import urllib2 + +from common_imports import etree, HelperTestCase, canonicalize, SillyFileLike + +class ETreeParserTestCase(HelperTestCase): + def test_parse_options(self): + xml = '' + strip_xml = '' + + f = SillyFileLike(xml) + parser = etree.XMLParser(ns_clean=False) + root = etree.ElementTree().parse(f, parser) + self.assertEqual(etree.tostring(root), xml) + + f = SillyFileLike(xml) + parser = etree.XMLParser(ns_clean=True) + root = etree.ElementTree().parse(f, parser) + self.assertEqual(etree.tostring(root), strip_xml) + + +def test_suite(): + suite = unittest.TestSuite() + suite.addTests([unittest.makeSuite(ETreeParserTestCase)]) +# suite.addTests( +# [doctest.DocFileSuite('../../../doc/parser.txt')]) + return suite + +if __name__ == '__main__': + unittest.main() From scoder at codespeak.net Wed Mar 1 09:50:50 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Wed Mar 1 09:50:51 2006 Subject: [Lxml-checkins] r23822 - lxml/trunk/src/lxml Message-ID: <20060301085050.BAC0610077@code0.codespeak.net> Author: scoder Date: Wed Mar 1 09:50:49 2006 New Revision: 23822 Modified: lxml/trunk/src/lxml/parser.pxi Log: removed no_blanks parser option - not considered useful Modified: lxml/trunk/src/lxml/parser.pxi ============================================================================== --- lxml/trunk/src/lxml/parser.pxi (original) +++ lxml/trunk/src/lxml/parser.pxi Wed Mar 1 09:50:49 2006 @@ -46,8 +46,6 @@ if load_dtd: parse_options = parse_options | xmlparser.XML_PARSE_DTDLOAD | \ xmlparser.XML_PARSE_DTDATTR - if no_blanks: - parse_options = parse_options | xmlparser.XML_PARSE_NOBLANKS if no_network: parse_options = parse_options | xmlparser.XML_PARSE_NONET if ns_clean: From scoder at codespeak.net Wed Mar 1 09:56:41 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Wed Mar 1 09:56:42 2006 Subject: [Lxml-checkins] r23823 - in lxml/branch/scoder2/src/lxml: . tests Message-ID: <20060301085641.9344710074@code0.codespeak.net> Author: scoder Date: Wed Mar 1 09:56:39 2006 New Revision: 23823 Added: lxml/branch/scoder2/src/lxml/tests/test_parser.py - copied unchanged from r23822, lxml/trunk/src/lxml/tests/test_parser.py Modified: lxml/branch/scoder2/src/lxml/etree.pyx lxml/branch/scoder2/src/lxml/parser.pxi lxml/branch/scoder2/src/lxml/tests/common_imports.py lxml/branch/scoder2/src/lxml/tests/test_io.py Log: merged in new parser API from trunk Modified: lxml/branch/scoder2/src/lxml/etree.pyx ============================================================================== --- lxml/branch/scoder2/src/lxml/etree.pyx (original) +++ lxml/branch/scoder2/src/lxml/etree.pyx Wed Mar 1 09:56:39 2006 @@ -82,23 +82,20 @@ cdef _Document _parseDocument(source, parser): cdef xmlDoc* c_doc - - # XXX ignore parser !! - # XXX simplistic (c)StringIO support if hasattr(source, 'getvalue'): - c_doc = theParser.parseDoc(source.getvalue()) + c_doc = theParser.parseDoc(source.getvalue(), parser) else: filename = _getFilenameForFile(source) # Support for unamed file-like object (eg urlgrabber.urlopen) if not filename and hasattr(source, 'read'): - c_doc = theParser.parseDoc(source.read()) + c_doc = theParser.parseDoc(source.read(), parser) # Otherwise parse the file directly from the filesystem else: if filename is None: filename = source # open filename - c_doc = theParser.parseDocFromFile(filename) + c_doc = theParser.parseDocFromFile(filename, parser) if c_doc is NULL: return None else: @@ -995,7 +992,7 @@ else: # XXX read XML into memory not the fastest way to do this data = file.read() - doc = _documentFactory( theParser.parseDoc(data) ) + doc = _documentFactory( theParser.parseDoc(data, None) ) else: doc = _documentFactory( theParser.newDoc() ) @@ -1014,7 +1011,7 @@ cdef xmlDoc* c_doc if isinstance(text, unicode): text = _stripDeclaration(text.encode('UTF-8')) - c_doc = theParser.parseDoc(text) + c_doc = theParser.parseDoc(text, None) return _documentFactory(c_doc).getroot() fromstring = XML Modified: lxml/branch/scoder2/src/lxml/parser.pxi ============================================================================== --- lxml/branch/scoder2/src/lxml/parser.pxi (original) +++ lxml/branch/scoder2/src/lxml/parser.pxi Wed Mar 1 09:56:39 2006 @@ -6,9 +6,65 @@ class XMLSyntaxError(LxmlSyntaxError): pass -cdef int _getParseOptions(): - return (xmlparser.XML_PARSE_NOENT | xmlparser.XML_PARSE_NOCDATA | - xmlparser.XML_PARSE_NOWARNING | xmlparser.XML_PARSE_NOERROR) +cdef int _DEFAULT_PARSE_OPTIONS +_DEFAULT_PARSE_OPTIONS = ( + xmlparser.XML_PARSE_NOENT | + xmlparser.XML_PARSE_NOCDATA | + xmlparser.XML_PARSE_NOWARNING | + xmlparser.XML_PARSE_NOERROR + ) + +cdef int _ORIG_DEFAULT_PARSE_OPTIONS +_ORIG_DEFAULT_PARSE_OPTIONS = _DEFAULT_PARSE_OPTIONS + + +cdef class XMLParser: + """The XML parser. Parsers can be supplied as additional argument to + various parse functions of the lxml API. A default parser is always + available and can be replaced by a call to the global function + 'set_default_parser'. New parsers can be created at any time without a + major run-time overhead. + + The keyword arguments in the constructor are mainly based on the libxml2 + parser configuration. The 'from_parser' keyword additionally allows to + provide a parser whose configurations is copied before applying the + additional arguments. Note that DTD validation obviously implies loading + the DTD. + """ + cdef int _parse_options + def __init__(self, load_dtd=False, validate_dtd=False, no_network=False, + ns_clean=False, from_parser=None): + cdef int parse_options + if from_parser is not None: + parse_options = from_parser._parse_options + else: + parse_options = _ORIG_DEFAULT_PARSE_OPTIONS + + if validate_dtd: + parse_options = parse_options | xmlparser.XML_PARSE_DTDLOAD | \ + xmlparser.XML_PARSE_DTDVALID + if load_dtd: + parse_options = parse_options | xmlparser.XML_PARSE_DTDLOAD | \ + xmlparser.XML_PARSE_DTDATTR + if no_network: + parse_options = parse_options | xmlparser.XML_PARSE_NONET + if ns_clean: + parse_options = parse_options | xmlparser.XML_PARSE_NSCLEAN + + self._parse_options = parse_options + + +def set_default_parser(parser=None): + """Set a default XMLParser. This parser is used globally whenever no + parser is supplied to the various parse functions of the lxml API. If + this function is called without a parser (or if it is None), the default + parser is reset to the original configuration. + """ + if parser is not None: + _DEFAULT_PARSE_OPTIONS = (parser)._parse_options + else: + _DEFAULT_PARSE_OPTIONS = _ORIG_DEFAULT_PARSE_OPTIONS + cdef class Parser: @@ -25,12 +81,18 @@ #print "freeing dictionary (cleanup parser)" xmlparser.xmlDictFree(self._c_dict) - cdef xmlDoc* parseDoc(self, text) except NULL: + cdef xmlDoc* parseDoc(self, text, parser) except NULL: """Parse document, share dictionary if possible. """ cdef xmlDoc* result cdef xmlParserCtxt* pctxt cdef int parse_error + + if parser is not None: + parse_options = (parser)._parse_options + else: + parse_options = _DEFAULT_PARSE_OPTIONS + self._initParse() pctxt = xmlparser.xmlCreateDocParserCtxt(text) if pctxt is NULL: @@ -39,7 +101,7 @@ self._prepareParse(pctxt) xmlparser.xmlCtxtUseOptions( pctxt, - _getParseOptions()) + parse_options) parse_error = xmlparser.xmlParseDocument(pctxt) # in case of errors, clean up context plus any document if parse_error != 0 or not pctxt.wellFormed: @@ -53,17 +115,23 @@ xmlparser.xmlFreeParserCtxt(pctxt) return result - cdef xmlDoc* parseDocFromFile(self, char* filename) except NULL: + cdef xmlDoc* parseDocFromFile(self, char* filename, parser) except NULL: + cdef int parse_options cdef xmlDoc* result cdef xmlParserCtxt* pctxt + if parser is not None: + parse_options = (parser)._parse_options + else: + parse_options = _DEFAULT_PARSE_OPTIONS + self._initParse() pctxt = xmlparser.xmlNewParserCtxt() self._prepareParse(pctxt) # XXX set options twice? needed to shut up libxml2 - xmlparser.xmlCtxtUseOptions(pctxt, _getParseOptions()) + xmlparser.xmlCtxtUseOptions(pctxt, parse_options) result = xmlparser.xmlCtxtReadFile(pctxt, filename, - NULL, _getParseOptions()) + NULL, parse_options) if result is NULL: if pctxt.lastError.domain == xmlerror.XML_FROM_IO: raise IOError, "Could not open file %s" % filename Modified: lxml/branch/scoder2/src/lxml/tests/common_imports.py ============================================================================== --- lxml/branch/scoder2/src/lxml/tests/common_imports.py (original) +++ lxml/branch/scoder2/src/lxml/tests/common_imports.py Wed Mar 1 09:56:39 2006 @@ -17,6 +17,17 @@ def _rootstring(self, tree): return etree.tostring(tree.getroot()).replace(' ', '').replace('\n', '') +class SillyFileLike: + def __init__(self, xml_data=''): + self.xml_data = xml_data + self.done = False + + def read(self, amount=None): + if not self.done: + self.done = True + return self.xml_data + return '' + def fileInTestDir(name): _testdir = os.path.split(__file__)[0] return os.path.join(_testdir, name) Modified: lxml/branch/scoder2/src/lxml/tests/test_io.py ============================================================================== --- lxml/branch/scoder2/src/lxml/tests/test_io.py (original) +++ lxml/branch/scoder2/src/lxml/tests/test_io.py Wed Mar 1 09:56:39 2006 @@ -7,7 +7,7 @@ import unittest import tempfile, gzip -from common_imports import etree, ElementTree, fileInTestDir +from common_imports import etree, ElementTree, fileInTestDir, SillyFileLike class IOTestCaseBase(unittest.TestCase): """(c)ElementTree compatibility for IO functions/methods @@ -84,15 +84,6 @@ root = self.etree.ElementTree().parse(f) self.assert_(root.tag.endswith('foo')) -class SillyFileLike: - def __init__(self): - self.done = False - - def read(self, amount=None): - if not self.done: - self.done = True - return '' - return '' class ETreeIOTestCase(IOTestCaseBase): etree = etree From scoder at codespeak.net Wed Mar 1 17:14:53 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Wed Mar 1 17:14:54 2006 Subject: [Lxml-checkins] r23838 - in lxml/branch/scoder2: doc src/lxml src/lxml/tests Message-ID: <20060301161453.199D71007C@code0.codespeak.net> Author: scoder Date: Wed Mar 1 17:14:45 2006 New Revision: 23838 Added: lxml/branch/scoder2/doc/extensions.txt Modified: lxml/branch/scoder2/src/lxml/nsclasses.pxi lxml/branch/scoder2/src/lxml/tests/test_xslt.py lxml/branch/scoder2/src/lxml/xslt.pxi Log: large clean up, fix some UTF-8 conversion bugs, new FunctionNamespace API, new doctests for using XPath extensions through FunctionNamespace Added: lxml/branch/scoder2/doc/extensions.txt ============================================================================== --- (empty file) +++ lxml/branch/scoder2/doc/extensions.txt Wed Mar 1 17:14:45 2006 @@ -0,0 +1,182 @@ +Extension functions for XPath and XSLT +====================================== + +This document describes how to use Python extension functions in XPath and +XSLT. They allow you to do things like this:: + + + +Here is how such a function looks like. As the first argument it always +receives the XPathContext object that is associated with the current +evaluation of the XPath expression. The other arguments are provided by the +respective call. + + >>> def hello(context, a): + ... return "Hello %s" % a + +Now we need to register it as an extension. In order to use it in XPath/XSLT, +it needs to have a (namespaced) name by which it can be called during +evaluation. This is done using the FunctionNamespace class. For simplicity, +we choose the empty namespace (None):: + + >>> from lxml import etree + >>> ns = etree.FunctionNamespace(None) + >>> ns['say-hello'] = hello + +This registers the function 'foo' with the name 'myfunction' in the default +namespace. Now we're going to create a document that we can run XPath +expressions against:: + + >>> from lxml import etree + >>> from StringIO import StringIO + >>> f = StringIO('') + >>> doc = etree.parse(f) + >>> root = doc.getroot() + +Done. Now we can have XPath call our new function:: + + >>> print root.xpath("say-hello('world')") + Hello world + >>> print root.xpath('say-hello(local-name(*))') + Hello b + +Note how we call both a Python function (say-hello) and an XPath function +(local-name) in exactly the same way. Normally, however, you would want to +separate the two in different namespaces. The FunctionNamespace class allows +you to do this:: + + >>> ns = etree.FunctionNamespace('http://mydomain.org/myfunctions') + >>> ns['say-hello'] = hello + >>> print root.xpath('f:say-hello(local-name(*))', {'f' : 'http://mydomain.org/myfunctions'}) + Hello b + +In this case, however, you must specify a prefix for the function namespace. +If you always use the same prefix for the functions, you can also register it +with the namespace:: + + >>> ns.prefix = 'f' + >>> print root.xpath('f:say-hello(local-name(*))') + Hello b + +This only works with functions and FunctionNamespace objects, not with the +general Namespace object that registers element classes. If you assign the +same prefix to more than one namespace, the resulting behaviour is undefined. + + +--- UPDATE BELOW --- + + >>> FAILS FROM HERE ! + +The XPathEvaluator takes the document, an optional dictionary of namespace +prefix to namespace URI mappings, and an optional list of extensions. We'll +just pass in extensions for now:: + + >>> e = etree.XPathEvaluator(doc, extensions=[extension]) + +Now we can use the evaluator to make XPath queries against the document:: + + >>> r = e.evaluate('/a') + >>> r[0].tag + 'a' + +This is not using the extension function. We'll try a very simple +XPath query that does now. It doesn't really use the document at all:: + + >>> e.evaluate("foo('world')") + 'Hello world' + +Let's create a slightly more complicated extension now, one that uses +a namespaced function. We'll just reuse the function foo, but register +it under a different name, and a namespace:: + + >>> extension2 = { ('http://codespeak.net/ns/test', 'different-name') : foo } + +Now let's set up an evaluator to use it. We'll also register our +original extension. As we want to use a namespace function, we first +need to register a namespace prefix we can use in the XPath +expression, so that we can access the namespace. This just like when +you'd want to access a namespaced XML element or attribute:: + + >>> e = etree.XPathEvaluator(doc, + ... namespaces={'test': 'http://codespeak.net/ns/test'}, + ... extensions=[extension, extension2]) + +Since we registered the original extension too for this evaluator, our +`foo` extension function still works:: + + >>> e.evaluate("foo('world')") + 'Hello world' + +But now, we also have access to our namespaced `different-name` +extension function:: + + >>> e.evaluate("test:different-name('there')") + 'Hello there' + +Besides strings is possible to return a number of different objects +from extension functions, such as numbers (floats) and booleans:: + + >>> def returnsFloat(evaluator): + ... return 1.7 + >>> def returnsBool(evaluator): + ... return True + >>> extension3 = { (None, 'returnsFloat') : returnsFloat, + ... (None, 'returnsBool') : returnsBool } + >>> e = etree.XPathEvaluator(doc, None, extensions=[extension3]) + >>> e.evaluate("returnsFloat()") + 1.7 + >>> e.evaluate("returnsBool()") + True + +It's also possible to register namespaces with a evaluator later on:: + + >>> f = StringIO('') + >>> ns_doc = etree.parse(f) + >>> e = etree.XPathEvaluator(ns_doc) + >>> e.registerNamespace('foo', 'http://codespeak.net/ns/test') + >>> e.evaluate('/foo:a')[0].tag + '{http://codespeak.net/ns/test}a' + +Note: the following is rather shaky and like won't work yet in the real world. + +It is also possible to return lists of nodes, and this way it is possible +to return XML structures:: + + >>> def returnsNodeSet(evaluator): + ... results = etree.Element('results') + ... result = etree.SubElement(results, 'result') + ... result.text = "Alpha" + ... result2 = etree.SubElement(results, 'result') + ... result2.text = "Beta" + ... result3 = etree.SubElement(results, 'result') + ... result3.text = "Gamma" + ... return [results] + >>> extension4 = { (None, 'returnsNodeSet') : returnsNodeSet } + >>> e = etree.XPathEvaluator(doc, None, extensions=[extension4]) + >>> r = e.evaluate("returnsNodeSet()") + >>> len(r) + 1 + >>> t = r[0] + >>> t.tag + 'results' + >>> len(t) + 3 + >>> t[0].tag + 'result' + >>> t[0].text + 'Alpha' + >>> t[1].text + 'Beta' + +It's even possible to filter that result set with another XPath +expression:: + + >>> r = e.evaluate("returnsNodeSet()/result") + >>> len(r) + 3 + >>> r[0].tag + 'result' + >>> r[1].tag + 'result' + >>> r[0].text + 'Alpha' Modified: lxml/branch/scoder2/src/lxml/nsclasses.pxi ============================================================================== --- lxml/branch/scoder2/src/lxml/nsclasses.pxi (original) +++ lxml/branch/scoder2/src/lxml/nsclasses.pxi Wed Mar 1 17:14:45 2006 @@ -17,6 +17,9 @@ cdef object __NAMESPACE_REGISTRIES __NAMESPACE_REGISTRIES = {} +cdef object __FUNCTION_NAMESPACE_REGISTRIES +__FUNCTION_NAMESPACE_REGISTRIES = {} + def Namespace(ns_uri): """Retrieve the namespace object associated with the given URI. Creates a new one if it does not yet exist.""" @@ -27,18 +30,25 @@ try: return __NAMESPACE_REGISTRIES[ns_utf] except KeyError: - registry = __NAMESPACE_REGISTRIES[ns_utf] = _NamespaceRegistry(ns_uri) + registry = __NAMESPACE_REGISTRIES[ns_utf] = \ + _NamespaceRegistry(ns_uri) + return registry + +def FunctionNamespace(ns_uri): + """Retrieve the function namespace object associated with the given + URI. Creates a new one if it does not yet exist. A function namespace can + only be used to register extension functions.""" + if ns_uri: + ns_utf = ns_uri.encode('UTF-8') + else: + ns_utf = None + try: + return __FUNCTION_NAMESPACE_REGISTRIES[ns_utf] + except KeyError: + registry = __FUNCTION_NAMESPACE_REGISTRIES[ns_utf] = \ + _FunctionNamespaceRegistry(ns_uri) return registry -## just an idea for a different API: -## -## def FunctionNamespace(ns_uri): -## """Retrieve the function namespace object associated with the given -## URI. Creates a new one if it does not yet exist. A function namespace can -## only be used to register extension functions.""" -## # This is a dummy for now. It only defines the correct API. -## return Namespace(ns_uri) -## cdef class _NamespaceRegistry: "Dictionary-like registry for namespace implementations" @@ -94,12 +104,50 @@ self._classes.clear() self._extensions.clear() -cdef object _find_all_namespaces(): - "Hack to register all extension functions in XSLT" - ns_uris = [] - for s in __NAMESPACE_REGISTRIES.keys(): - ns_uris.append(unicode(s, 'UTF-8')) - return ns_uris +cdef class _FunctionNamespaceRegistry(_NamespaceRegistry): + cdef object _prefix + cdef object _prefix_utf + property prefix: + "Namespace prefix for extension functions." + def __del__(self): + self._prefix = None # no prefix configured + def __get__(self): + return self._prefix + def __set__(self, prefix): + if prefix is None: + prefix = '' # empty prefix + self._prefix_utf = prefix.encode('UTF-8') + self._prefix = prefix + + def __setitem__(self, name, item): + if not callable(item): + raise NamespaceRegistryError, "Registered function must be callable." + if name is None: + name_utf = None + else: + name_utf = name.encode('UTF-8') + self._extensions[name_utf] = item + + def __getitem__(self, name): + return self._extensions[name] + +cdef object _find_all_extensions(): + "Internal lookup function to find all extension functions for XSLT/XPath." + cdef _NamespaceRegistry registry + ns_extensions = {} + for (ns_utf, registry) in __FUNCTION_NAMESPACE_REGISTRIES.items(): + if registry._extensions: + ns_extensions[ns_utf] = registry._extensions + return ns_extensions + +cdef object _find_all_extension_prefixes(): + "Internal lookup function to find all function prefixes for XSLT/XPath." + cdef _FunctionNamespaceRegistry registry + ns_prefixes = {} + for (ns_utf, registry) in __FUNCTION_NAMESPACE_REGISTRIES.items(): + if registry._prefix_utf is not None: + ns_prefixes[registry._prefix_utf] = ns_utf + return ns_prefixes cdef _NamespaceRegistry _find_namespace_registry(object ns_uri): if ns_uri: @@ -109,14 +157,22 @@ return __NAMESPACE_REGISTRIES[ns_utf] cdef _find_extensions(namespaces): + """Returns a dictionary that maps each namespace in the provided list to a + dictionary of name-function mappings defined under that namespace.""" + cdef _NamespaceRegistry registry extension_dict = {} for ns_uri in namespaces: + if ns_uri: + ns_utf = ns_uri.encode('UTF-8') + else: + ns_utf = None try: - extensions = _find_namespace_registry(ns_uri)._extensions + registry = __FUNCTION_NAMESPACE_REGISTRIES[ns_uri] except KeyError: continue + extensions = registry._extensions if extensions: - extension_dict[ns_uri] = extensions + extension_dict[ns_utf] = extensions return extension_dict cdef object _find_element_class(char* c_namespace_utf, Modified: lxml/branch/scoder2/src/lxml/tests/test_xslt.py ============================================================================== --- lxml/branch/scoder2/src/lxml/tests/test_xslt.py (original) +++ lxml/branch/scoder2/src/lxml/tests/test_xslt.py Wed Mar 1 17:14:45 2006 @@ -269,7 +269,7 @@ def mytext(ctxt, values): return 'X' * len(values) - namespace = etree.Namespace('testns') + namespace = etree.FunctionNamespace('testns') namespace['mytext'] = mytext result = tree.xslt(style) Modified: lxml/branch/scoder2/src/lxml/xslt.pxi ============================================================================== --- lxml/branch/scoder2/src/lxml/xslt.pxi (original) +++ lxml/branch/scoder2/src/lxml/xslt.pxi Wed Mar 1 17:14:45 2006 @@ -35,7 +35,7 @@ ################################################################################ # support for extension functions in XPath/XSLT -cdef class ContextBuilder: +cdef class NamespaceContext: cdef _Document _doc cdef object _extensions cdef object _namespaces @@ -49,15 +49,19 @@ cdef object _exc_info def __init__(self, namespaces, extensions): + self._utf_refs = {} + # fix old format extensions if isinstance(extensions, (list, tuple)): new_extensions = {} for extension in extensions: for (ns_uri, name), function in extension.items(): + ns_utf = self._to_utf(ns_uri) + name_utf = self._to_utf(name) try: - new_extensions[ns_uri][name] = function + new_extensions[ns_utf][name_utf] = function except KeyError: - new_extensions[ns_uri] = {name : function} + new_extensions[ns_utf] = {name_utf : function} extensions = new_extensions or None self._doc = None @@ -67,7 +71,6 @@ self._registered_namespaces = [] self._registered_extensions = [] self._extension_functions = {} - self._utf_refs = {} def _to_utf(self, s): "Convert to UTF-8 and keep a reference to the encoded string" @@ -83,14 +86,16 @@ self._doc = doc self._exc_info = None namespaces = self._namespaces + ns_prefixes = _find_all_extension_prefixes() + if ns_prefixes: + self.registerNamespaces(ns_prefixes) if namespaces is not None: self.registerNamespaces(namespaces) - namespace_uris = namespaces.values() + extensions = _find_extensions(namespaces.values()) else: - namespace_uris = _find_all_namespaces() - extensions = _find_extensions(namespace_uris) + extensions = _find_all_extensions() if extensions: - self.registerExtensionFunctions(extensions) + self._registerExtensionFunctions(extensions) if self._extensions is not None: self.registerExtensionFunctions(self._extensions) @@ -105,6 +110,8 @@ self._utf_refs.clear() self._doc = None + # namespaces (internal UTF-8 versions with leading '_') + def addNamespace(self, prefix, uri): if self._namespaces is None: self._namespaces = {prefix : uri} @@ -115,13 +122,17 @@ for prefix, uri in namespaces.items(): self.registerNamespace(prefix, uri) + def registerNamespace(self, prefix, ns_uri): + prefix_utf = self._to_utf(prefix) + ns_uri_utf = self._to_utf(ns_uri) + self._contextRegisterNamespace(prefix_utf, ns_uri_utf) + self._registered_namespaces.append(prefix_utf) + cdef _unregisterNamespaces(self): - for prefix in self._registered_namespaces: - self._unregisterNamespace(self._to_utf(prefix)) + for prefix_utf in self._registered_namespaces: + self._contextUnregisterNamespace(prefix_utf) - def registerNamespace(self, prefix, uri): - self._registerNamespace(self._to_utf(prefix), self._to_utf(uri)) - self._registered_namespaces.append(prefix) + # extension functions (internal UTF-8 versions with leading '_') def registerExtensionFunctions(self, extensions): for ns_uri, extension in extensions.items(): @@ -129,16 +140,22 @@ self.registerExtensionFunction(ns_uri, name, function) def registerExtensionFunction(self, ns_uri, name, function): - ns_uri_utf = self._to_utf(ns_uri) - name_utf = self._to_utf(name) - self._registerExtensionFunction(ns_uri_utf, name_utf) + self._registerExtensionFunction( + self._to_utf(ns_uri), self._to_utf(name), function) + + cdef _registerExtensionFunctions(self, extensions_utf): + for ns_uri_utf, extension in extensions_utf.items(): + for name_utf, function in extension.items(): + self._registerExtensionFunction(ns_uri_utf, name_utf, function) + + cdef _registerExtensionFunction(self, ns_uri_utf, name_utf, function): + self._contextRegisterExtensionFunction(ns_uri_utf, name_utf) self._extension_functions[(ns_uri_utf, name_utf)] = function - self._registered_extensions.append((ns_uri, name)) + self._registered_extensions.append((ns_uri_utf, name_utf)) cdef _unregisterExtensionFunctions(self): - for ns_uri, name in self._registered_extensions: - self._unregisterExtensionFunction(self._to_utf(ns_uri), - self._to_utf(name)) + for ns_uri_utf, name_utf in self._registered_extensions: + self._contextUnregisterExtensionFunction(ns_uri_utf, name_utf) def find_extension(self, ns_uri_utf, name_utf): return self._extension_functions[(ns_uri_utf, name_utf)] @@ -173,11 +190,11 @@ ################################################################################ # XSLT -cdef class XSLTContextBuilder(ContextBuilder): +cdef class XSLTContext(NamespaceContext): cdef xslt.xsltTransformContext* _xsltCtxt def __init__(self, namespaces, extensions): self._xsltCtxt = NULL - ContextBuilder.__init__(self, namespaces, extensions) + NamespaceContext.__init__(self, namespaces, extensions) cdef register_context(self, xslt.xsltTransformContext* xsltCtxt, _Document doc): self._xsltCtxt = xsltCtxt @@ -202,21 +219,21 @@ self._xsltCtxt = NULL xslt.xsltFreeTransformContext(xsltCtxt) - def _registerNamespace(self, prefix_utf, uri_utf): - # ZZZ: don't know if this is the right thing to do + def _contextRegisterNamespace(self, prefix_utf, uri_utf): + # ZZZ: don't know if this is the right thing to do for XSLT, but works xpath.xmlXPathRegisterNs(self._xsltCtxt.xpathCtxt, prefix_utf, uri_utf) - def _unregisterNamespace(self, prefix_utf): - # ZZZ: don't know if this is the right thing to do + def _contextUnregisterNamespace(self, prefix_utf): + # ZZZ: don't know if this is the right thing to do for XSLT, but works xpath.xmlXPathRegisterNs(self._xsltCtxt.xpathCtxt, prefix_utf, NULL) - def _registerExtensionFunction(self, ns_uri_utf, name_utf): + def _contextRegisterExtensionFunction(self, ns_uri_utf, name_utf): if ns_uri_utf is None: raise XSLTExtensionError, "extensions must have non-empty namespaces" xslt.xsltRegisterExtFunction(self._xsltCtxt, name_utf, ns_uri_utf, _xpathCallback) - def _unregisterExtensionFunction(self, ns_uri_utf, name_utf): + def _contextUnregisterExtensionFunction(self, ns_uri_utf, name_utf): if ns_uri_utf is not None: xslt.xsltRegisterExtFunction(self._xsltCtxt, name_utf, ns_uri_utf, _xpathCallback) @@ -225,7 +242,7 @@ cdef class XSLT: """Turn a document into an XSLT object. """ - cdef XSLTContextBuilder _context_builder + cdef XSLTContext _context cdef xslt.xsltStylesheet* _c_style def __init__(self, xslt_input, extensions=None): @@ -246,7 +263,7 @@ raise XSLTParseError, "Cannot parse style sheet" self._c_style = c_style - self._context_builder = XSLTContextBuilder(None, extensions) + self._context = XSLTContext(None, extensions) # XXX is it worthwile to use xsltPrecomputeStylesheet here? def __dealloc__(self): @@ -294,7 +311,7 @@ else: params = NULL - self._context_builder.register_context(transform_ctxt, input_doc) + self._context.register_context(transform_ctxt, input_doc) c_result = xslt.xsltApplyStylesheetUser(self._c_style, c_doc, params, NULL, NULL, transform_ctxt) @@ -303,7 +320,7 @@ # deallocate space for parameters cstd.free(params) - self._context_builder.free_context() + self._context.free_context() _destroyFakeDoc(input_doc._c_doc, c_doc) if c_result is NULL: @@ -346,13 +363,13 @@ ################################################################################ # XPath -cdef class XPathContextBuilder(ContextBuilder): +cdef class XPathContext(NamespaceContext): cdef xpath.xmlXPathContext* _xpathCtxt cdef object _variables cdef object _registered_variables def __init__(self, namespaces, extensions, variables): self._xpathCtxt = NULL - ContextBuilder.__init__(self, namespaces, extensions) + NamespaceContext.__init__(self, namespaces, extensions) self._variables = variables self._registered_variables = [] @@ -409,13 +426,13 @@ xpath.xmlXPathRegisterVariable(xpathCtxt, name_utf, NULL) xpath.xmlXPathFreeObject(xpathVarValue) - def _registerNamespace(self, prefix_utf, uri_utf): + def _contextRegisterNamespace(self, prefix_utf, uri_utf): xpath.xmlXPathRegisterNs(self._xpathCtxt, prefix_utf, uri_utf) - def _unregisterNamespace(self, prefix_utf): + def _contextUnregisterNamespace(self, prefix_utf): xpath.xmlXPathRegisterNs(self._xpathCtxt, prefix_utf, NULL) - def _registerExtensionFunction(self, ns_uri_utf, name_utf): + def _contextRegisterExtensionFunction(self, ns_uri_utf, name_utf): if ns_uri_utf is not None: xpath.xmlXPathRegisterFuncNS(self._xpathCtxt, name_utf, ns_uri_utf, _xpathCallback) @@ -423,7 +440,7 @@ xpath.xmlXPathRegisterFunc(self._xpathCtxt, name_utf, _xpathCallback) - def _unregisterExtensionFunction(self, ns_uri_utf, name_utf): + def _contextUnregisterExtensionFunction(self, ns_uri_utf, name_utf): if ns_uri_utf is not None: xpath.xmlXPathRegisterFuncNS(self._xpathCtxt, name_utf, ns_uri_utf, NULL) @@ -432,10 +449,10 @@ cdef class XPathEvaluatorBase: - cdef XPathContextBuilder _context_builder + cdef XPathContext _context def __init__(self, namespaces, extensions, variables=None): - self._context_builder = XPathContextBuilder(namespaces, extensions, variables) + self._context = XPathContext(namespaces, extensions, variables) cdef _nsextract_path(self, path_utf): namespaces = {} @@ -455,7 +472,7 @@ return path_utf, namespaces cdef object _handle_result(self, xpath.xmlXPathObject* xpathObj, _Document doc): - _exc_info = self._context_builder._exc_info + _exc_info = self._context._exc_info if _exc_info is not None: type, value, traceback = _exc_info raise type, value, traceback @@ -502,7 +519,7 @@ def registerNamespace(self, prefix, uri): """Register a namespace with the XPath context. """ - self._context_builder.addNamespace(prefix, uri) + self._context.addNamespace(prefix, uri) def registerNamespaces(self, namespaces): """Register a prefix -> uri dict. @@ -525,17 +542,17 @@ # if element context is requested; unfortunately need to modify ctxt xpathCtxt.node = c_ctxt_node - self._context_builder.register_context(xpathCtxt, self._doc) - self._context_builder.registerVariables(variable_dict) + self._context.register_context(xpathCtxt, self._doc) + self._context.registerVariables(variable_dict) path = path.encode('UTF-8') - self._context_builder._release() + self._context._release() xpathObj = xpath.xmlXPathEvalExpression(path, xpathCtxt) - self._context_builder.unregister_context() + self._context.unregister_context() return self._handle_result(xpathObj, self._doc) - + #def clone(self): # # XXX pretty expensive so calling this from callback is probably # # not desirable @@ -545,12 +562,12 @@ """Create an XPath evaluator for an element. """ cdef _Element _element - + def __init__(self, _Element element, namespaces=None, extensions=None): XPathDocumentEvaluator.__init__( self, element._doc, namespaces, extensions) self._element = element - + def evaluate(self, _path, **variables): """Evaluate an XPath expression on the element. Variables may be given as keyword arguments. Note that namespaces are @@ -562,7 +579,7 @@ return XPathDocumentEvaluator(etree_or_element, namespaces, extensions) else: return XPathElementEvaluator(etree_or_element, namespaces, extensions) - + def Extension(module, function_mapping, ns_uri=None): functions = [] for function_name, xpath_name in function_mapping.items(): @@ -602,12 +619,12 @@ xpathCtxt = xpath.xmlXPathNewContext(document._c_doc) xpathCtxt.node = element._c_node - self._context_builder.register_context(xpathCtxt, document) - self._context_builder.registerVariables(variables) + self._context.register_context(xpathCtxt, document) + self._context.registerVariables(variables) xpathObj = xpath.xmlXPathCompiledEval(self._xpath, xpathCtxt) - self._context_builder.unregister_context() + self._context.unregister_context() xpath.xmlXPathFreeContext(xpathCtxt) @@ -709,7 +726,7 @@ cdef xpath.xmlXPathContext* rctxt cdef _Document doc cdef xpath.xmlXPathObject* obj - cdef ContextBuilder extensions + cdef NamespaceContext extensions rctxt = ctxt.context @@ -721,7 +738,7 @@ uri = None # get our evaluator - extensions = (rctxt.userData) + extensions = (rctxt.userData) # lookup up the extension function in the evaluator f = extensions.find_extension(uri, name) From scoder at codespeak.net Thu Mar 2 08:43:15 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Thu Mar 2 08:43:17 2006 Subject: [Lxml-checkins] r23896 - lxml/branch/scoder2/src/lxml Message-ID: <20060302074315.C1989100A0@code0.codespeak.net> Author: scoder Date: Thu Mar 2 08:43:09 2006 New Revision: 23896 Modified: lxml/branch/scoder2/src/lxml/xslt.pxi Log: more clean ups, always free temporary references from extension function evaluation at next call Modified: lxml/branch/scoder2/src/lxml/xslt.pxi ============================================================================== --- lxml/branch/scoder2/src/lxml/xslt.pxi (original) +++ lxml/branch/scoder2/src/lxml/xslt.pxi Thu Mar 2 08:43:09 2006 @@ -35,7 +35,7 @@ ################################################################################ # support for extension functions in XPath/XSLT -cdef class NamespaceContext: +cdef class BaseContext: cdef _Document _doc cdef object _extensions cdef object _namespaces @@ -43,7 +43,7 @@ cdef object _registered_extensions cdef object _extension_functions cdef object _utf_refs - # for exception handling: + # for exception handling and temporary reference keeping: cdef object _temp_elements cdef object _temp_docs cdef object _exc_info @@ -71,6 +71,8 @@ self._registered_namespaces = [] self._registered_extensions = [] self._extension_functions = {} + self._temp_elements = {} + self._temp_docs = {} def _to_utf(self, s): "Convert to UTF-8 and keep a reference to the encoded string" @@ -160,18 +162,19 @@ def find_extension(self, ns_uri_utf, name_utf): return self._extension_functions[(ns_uri_utf, name_utf)] - # exception handling + # Python reference keeping during XPath function evaluation - cdef _release(self): - self._temp_elements = {} - self._temp_docs = {} + cdef _release_temp_refs(self): + "Free temporarily referenced objects from this context." + self._temp_elements.clear() + self._temp_docs.clear() cdef _hold(self, obj): """A way to temporarily hold references to nodes in the evaluator. This is needed because otherwise nodes created in XPath extension - functions would be reference counted too soon, during the - XPath evaluation. + functions would be reference counted too soon, during the XPath + evaluation. This is most important in the case of exceptions. """ cdef _NodeBase element if isinstance(obj, _NodeBase): @@ -190,11 +193,11 @@ ################################################################################ # XSLT -cdef class XSLTContext(NamespaceContext): +cdef class XSLTContext(BaseContext): cdef xslt.xsltTransformContext* _xsltCtxt def __init__(self, namespaces, extensions): self._xsltCtxt = NULL - NamespaceContext.__init__(self, namespaces, extensions) + BaseContext.__init__(self, namespaces, extensions) cdef register_context(self, xslt.xsltTransformContext* xsltCtxt, _Document doc): self._xsltCtxt = xsltCtxt @@ -311,6 +314,7 @@ else: params = NULL + self._context._release_temp_refs() self._context.register_context(transform_ctxt, input_doc) c_result = xslt.xsltApplyStylesheetUser(self._c_style, c_doc, params, @@ -363,13 +367,13 @@ ################################################################################ # XPath -cdef class XPathContext(NamespaceContext): +cdef class XPathContext(BaseContext): cdef xpath.xmlXPathContext* _xpathCtxt cdef object _variables cdef object _registered_variables def __init__(self, namespaces, extensions, variables): self._xpathCtxt = NULL - NamespaceContext.__init__(self, namespaces, extensions) + BaseContext.__init__(self, namespaces, extensions) self._variables = variables self._registered_variables = [] @@ -542,13 +546,12 @@ # if element context is requested; unfortunately need to modify ctxt xpathCtxt.node = c_ctxt_node + self._context._release_temp_refs() self._context.register_context(xpathCtxt, self._doc) self._context.registerVariables(variable_dict) path = path.encode('UTF-8') - self._context._release() xpathObj = xpath.xmlXPathEvalExpression(path, xpathCtxt) - self._context.unregister_context() return self._handle_result(xpathObj, self._doc) @@ -619,6 +622,7 @@ xpathCtxt = xpath.xmlXPathNewContext(document._c_doc) xpathCtxt.node = element._c_node + self._context._release_temp_refs() self._context.register_context(xpathCtxt, document) self._context.registerVariables(variables) @@ -641,11 +645,10 @@ cdef xpath.xmlXPathObject* _wrapXPathObject(object obj) except NULL: cdef xpath.xmlNodeSet* resultSet cdef _NodeBase node - if isinstance(obj, str): - # XXX use the Wrap variant? Or leak... - return xpath.xmlXPathNewCString(obj) if isinstance(obj, unicode): obj = obj.encode("utf-8") + if isinstance(obj, str): + # XXX use the Wrap variant? Or leak... return xpath.xmlXPathNewCString(obj) if isinstance(obj, types.BooleanType): return xpath.xmlXPathNewBoolean(obj) @@ -726,7 +729,7 @@ cdef xpath.xmlXPathContext* rctxt cdef _Document doc cdef xpath.xmlXPathObject* obj - cdef NamespaceContext extensions + cdef BaseContext extensions rctxt = ctxt.context @@ -738,9 +741,9 @@ uri = None # get our evaluator - extensions = (rctxt.userData) + extensions = (rctxt.userData) - # lookup up the extension function in the evaluator + # lookup up the extension function in the context f = extensions.find_extension(uri, name) args = [] From scoder at codespeak.net Thu Mar 2 12:09:40 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Thu Mar 2 12:09:42 2006 Subject: [Lxml-checkins] r23897 - lxml/branch/scoder2/src/lxml Message-ID: <20060302110940.CA8BD100A0@code0.codespeak.net> Author: scoder Date: Thu Mar 2 12:09:39 2006 New Revision: 23897 Modified: lxml/branch/scoder2/src/lxml/xslt.pxi Log: bug fixes for XSLT extension code: it does not support the None namespace and *must* *not* register any prefixes Modified: lxml/branch/scoder2/src/lxml/xslt.pxi ============================================================================== --- lxml/branch/scoder2/src/lxml/xslt.pxi (original) +++ lxml/branch/scoder2/src/lxml/xslt.pxi Thu Mar 2 12:09:39 2006 @@ -12,6 +12,9 @@ class XSLTSaveError(XSLTError): pass +class XSLTExtensionError(XSLTError): + pass + class XPathError(LxmlError): pass @@ -84,19 +87,21 @@ utf = self._utf_refs[s] = s.encode('UTF8') return utf - cdef _register_context(self, _Document doc): + cdef _register_context(self, _Document doc, int allow_none_namespace): self._doc = doc self._exc_info = None namespaces = self._namespaces - ns_prefixes = _find_all_extension_prefixes() - if ns_prefixes: - self.registerNamespaces(ns_prefixes) if namespaces is not None: self.registerNamespaces(namespaces) extensions = _find_extensions(namespaces.values()) else: extensions = _find_all_extensions() if extensions: + if not allow_none_namespace: + try: + del extensions[None] + except KeyError: + pass self._registerExtensionFunctions(extensions) if self._extensions is not None: self.registerExtensionFunctions(self._extensions) @@ -201,7 +206,7 @@ cdef register_context(self, xslt.xsltTransformContext* xsltCtxt, _Document doc): self._xsltCtxt = xsltCtxt - self._register_context(doc) + self._register_context(doc, 0) xsltCtxt.xpathCtxt.userData = self cdef unregister_context(self): @@ -379,7 +384,10 @@ cdef register_context(self, xpath.xmlXPathContext* xpathCtxt, _Document doc): self._xpathCtxt = xpathCtxt - self._register_context(doc) + ns_prefixes = _find_all_extension_prefixes() + if ns_prefixes: + self.registerNamespaces(ns_prefixes) + self._register_context(doc, 1) if self._variables is not None: self.registerVariables(self._variables) xpathCtxt.userData = self From scoder at codespeak.net Thu Mar 2 12:10:53 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Thu Mar 2 12:10:54 2006 Subject: [Lxml-checkins] r23898 - lxml/branch/scoder2/doc Message-ID: <20060302111053.B4D5A100A4@code0.codespeak.net> Author: scoder Date: Thu Mar 2 12:10:47 2006 New Revision: 23898 Modified: lxml/branch/scoder2/doc/extensions.txt Log: partial rewrite of extension doc file, more doc tests, also for XSLT Modified: lxml/branch/scoder2/doc/extensions.txt ============================================================================== --- lxml/branch/scoder2/doc/extensions.txt (original) +++ lxml/branch/scoder2/doc/extensions.txt Thu Mar 2 12:10:47 2006 @@ -6,22 +6,28 @@ -Here is how such a function looks like. As the first argument it always -receives the XPathContext object that is associated with the current +Here is how such a function looks like. As the first argument, it always +receives the XPath context object that is associated with the current evaluation of the XPath expression. The other arguments are provided by the respective call. >>> def hello(context, a): ... return "Hello %s" % a + >>> def ola(context, a): + ... return "Ola %s" % a -Now we need to register it as an extension. In order to use it in XPath/XSLT, -it needs to have a (namespaced) name by which it can be called during -evaluation. This is done using the FunctionNamespace class. For simplicity, -we choose the empty namespace (None):: + +The FunctionNamespace +--------------------- + +In order to use a function in XPath/XSLT, it needs to have a (namespaced) name +by which it can be called during evaluation. This is done using the +FunctionNamespace class. For simplicity, we choose the empty namespace +(None):: >>> from lxml import etree >>> ns = etree.FunctionNamespace(None) - >>> ns['say-hello'] = hello + >>> ns['hello'] = hello This registers the function 'foo' with the name 'myfunction' in the default namespace. Now we're going to create a document that we can run XPath @@ -29,118 +35,135 @@ >>> from lxml import etree >>> from StringIO import StringIO - >>> f = StringIO('') + >>> f = StringIO('Haegar') >>> doc = etree.parse(f) >>> root = doc.getroot() -Done. Now we can have XPath call our new function:: +Done. Now we can have XPath expressions call our new function:: - >>> print root.xpath("say-hello('world')") + >>> print root.xpath("hello('world')") Hello world - >>> print root.xpath('say-hello(local-name(*))') + >>> print root.xpath('hello(local-name(*))') Hello b + >>> print root.xpath('hello(string(b))') + Hello Haegar -Note how we call both a Python function (say-hello) and an XPath function +Note how we call both a Python function (hello) and an XPath function (local-name) in exactly the same way. Normally, however, you would want to separate the two in different namespaces. The FunctionNamespace class allows you to do this:: >>> ns = etree.FunctionNamespace('http://mydomain.org/myfunctions') - >>> ns['say-hello'] = hello - >>> print root.xpath('f:say-hello(local-name(*))', {'f' : 'http://mydomain.org/myfunctions'}) - Hello b - -In this case, however, you must specify a prefix for the function namespace. -If you always use the same prefix for the functions, you can also register it -with the namespace:: - - >>> ns.prefix = 'f' - >>> print root.xpath('f:say-hello(local-name(*))') + >>> ns['hello'] = hello + >>> print root.xpath('f:hello(local-name(*))', {'f' : 'http://mydomain.org/myfunctions'}) Hello b -This only works with functions and FunctionNamespace objects, not with the -general Namespace object that registers element classes. If you assign the -same prefix to more than one namespace, the resulting behaviour is undefined. - - ---- UPDATE BELOW --- - - >>> FAILS FROM HERE ! - -The XPathEvaluator takes the document, an optional dictionary of namespace -prefix to namespace URI mappings, and an optional list of extensions. We'll -just pass in extensions for now:: - - >>> e = etree.XPathEvaluator(doc, extensions=[extension]) - -Now we can use the evaluator to make XPath queries against the document:: - - >>> r = e.evaluate('/a') - >>> r[0].tag - 'a' - -This is not using the extension function. We'll try a very simple -XPath query that does now. It doesn't really use the document at all:: - - >>> e.evaluate("foo('world')") - 'Hello world' - -Let's create a slightly more complicated extension now, one that uses -a namespaced function. We'll just reuse the function foo, but register -it under a different name, and a namespace:: - >>> extension2 = { ('http://codespeak.net/ns/test', 'different-name') : foo } - -Now let's set up an evaluator to use it. We'll also register our -original extension. As we want to use a namespace function, we first -need to register a namespace prefix we can use in the XPath -expression, so that we can access the namespace. This just like when -you'd want to access a namespaced XML element or attribute:: +Global prefix assignment +------------------------ - >>> e = etree.XPathEvaluator(doc, - ... namespaces={'test': 'http://codespeak.net/ns/test'}, - ... extensions=[extension, extension2]) - -Since we registered the original extension too for this evaluator, our -`foo` extension function still works:: - - >>> e.evaluate("foo('world')") - 'Hello world' - -But now, we also have access to our namespaced `different-name` -extension function:: - - >>> e.evaluate("test:different-name('there')") - 'Hello there' - -Besides strings is possible to return a number of different objects -from extension functions, such as numbers (floats) and booleans:: +In the last example, you had to specify a prefix for the function namespace. +If you always use the same prefix for a function namespace, you can also +register it with the namespace:: + + >>> ns = etree.FunctionNamespace('http://mydomain.org/myother/functions') + >>> ns.prefix = 'es' + >>> ns['hello'] = ola + >>> print root.xpath('es:hello(local-name(*))') + Ola b + +This is a global assignment, so take care not to assign the same prefix to +more than one namespace. The resulting behaviour in that case is completely +undefined. It is always a good idea to consistently use the same meaningful +prefix for each namespace throughout your application. + +The prefix assignment only works with functions and FunctionNamespace objects, +not with the general Namespace object that registers element classes. The +reasoning is that elements in lxml do not care about prefixes anyway, so it +would rather complicate things than be of any help. + + +What to return from a function +------------------------------ + +Extension functions can return any data type for which there is an XPath +equivalent. This includes numbers, boolean values, elements and lists of +elements. >>> def returnsFloat(evaluator): ... return 1.7 >>> def returnsBool(evaluator): ... return True - >>> extension3 = { (None, 'returnsFloat') : returnsFloat, - ... (None, 'returnsBool') : returnsBool } - >>> e = etree.XPathEvaluator(doc, None, extensions=[extension3]) - >>> e.evaluate("returnsFloat()") + >>> def returnFirstNode(evaluator, nodes): + ... return nodes[0] + + >>> ns = etree.FunctionNamespace(None) + >>> ns['float'] = returnsFloat + >>> ns['bool'] = returnsBool + >>> ns['first'] = returnFirstNode + + >>> e = etree.XPathEvaluator(doc) + >>> e.evaluate("float()") 1.7 - >>> e.evaluate("returnsBool()") + >>> e.evaluate("bool()") True + >>> e.evaluate("count(first(//b))") + 1.0 -It's also possible to register namespaces with a evaluator later on:: + +Evaluators and XSLT +------------------- + +Extension functions work for all ways of evaluating XPath expressions and for +XSLT execution:: + + >>> e = etree.XPathEvaluator(doc) + >>> print e.evaluate('es:hello(local-name(/a))') + Ola a + + >>> e = etree.XPathEvaluator(doc, namespaces={'f' : 'http://mydomain.org/myfunctions'}) + >>> print e.evaluate('f:hello(local-name(/a))') + Hello a + + >>> xslt = etree.XSLT(etree.ElementTree(etree.XML(''' + ... + ... + ... + ... + ... '''))) + >>> print xslt(doc) + Ola Haegar + +It is also possible to register namespaces with an evaluator later on. While +the following example involves no functions, the idea should still be clear:: - >>> f = StringIO('') + >>> f = StringIO('') >>> ns_doc = etree.parse(f) >>> e = etree.XPathEvaluator(ns_doc) - >>> e.registerNamespace('foo', 'http://codespeak.net/ns/test') + >>> e.evaluate('/a') + [] + +This obviously returns nothing, but when we register the namespace with the +evaluator, we can access it via a prefix. Note that this prefix mapping is +only known to this evaluator, as opposed to the global mapping of the +FunctionNamespace objects:: + + >>> e.registerNamespace('foo', 'http://mydomain.org/myfunctions') >>> e.evaluate('/foo:a')[0].tag - '{http://codespeak.net/ns/test}a' + '{http://mydomain.org/myfunctions}a' + + +BETA Features +------------- -Note: the following is rather shaky and like won't work yet in the real world. +Note: the following features are still in beta state. They may not work as +expected. -It is also possible to return lists of nodes, and this way it is possible -to return XML structures:: +It is possible to return lists of newly created nodes as XML structures:: >>> def returnsNodeSet(evaluator): ... results = etree.Element('results') @@ -168,8 +191,7 @@ >>> t[1].text 'Beta' -It's even possible to filter that result set with another XPath -expression:: +It's even possible to filter that result set with another XPath expression:: >>> r = e.evaluate("returnsNodeSet()/result") >>> len(r) From scoder at codespeak.net Thu Mar 2 12:11:56 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Thu Mar 2 12:11:56 2006 Subject: [Lxml-checkins] r23899 - lxml/branch/scoder2/doc Message-ID: <20060302111156.7D15E100A4@code0.codespeak.net> Author: scoder Date: Thu Mar 2 12:11:55 2006 New Revision: 23899 Modified: lxml/branch/scoder2/doc/xpath.txt Log: call old XPath extensions 'depricated' in doc Modified: lxml/branch/scoder2/doc/xpath.txt ============================================================================== --- lxml/branch/scoder2/doc/xpath.txt (original) +++ lxml/branch/scoder2/doc/xpath.txt Thu Mar 2 12:11:55 2006 @@ -1,13 +1,16 @@ XPath extension functions ========================= -This document describes how to deal with XPath extension -functions. This documentation is preliminary as the API is still in -flux. - -An extension function is defined in Python. In order to use it in -XPath, it needs to have a name by which it can be called in XPath, and -an optional namespace URI. +This document describes the OLD DEPRICATED way of dealing with XPath extension +functions. For updated documentation, please see the new Namespace API +described in the documentations of `nsclasses`_ and `extensions`_. + +`nsclasses`_: nsclasses.html +`extensions`_: extensions.html + +Extension functions are defined in Python. In order to use such a function, it +must have a name by which it can be called in XPath, and an optional namespace +URI. As the first argument a function will always receive the XPathEvaluator object that is currently in the process of evaluating From scoder at codespeak.net Thu Mar 2 12:37:25 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Thu Mar 2 12:37:26 2006 Subject: [Lxml-checkins] r23900 - lxml/branch/scoder2/src/lxml Message-ID: <20060302113725.522B81008F@code0.codespeak.net> Author: scoder Date: Thu Mar 2 12:37:24 2006 New Revision: 23900 Modified: lxml/branch/scoder2/src/lxml/xslt.pxi Log: removed unused exception XPathNamespaceError Modified: lxml/branch/scoder2/src/lxml/xslt.pxi ============================================================================== --- lxml/branch/scoder2/src/lxml/xslt.pxi (original) +++ lxml/branch/scoder2/src/lxml/xslt.pxi Thu Mar 2 12:37:24 2006 @@ -21,9 +21,6 @@ class XPathContextError(XPathError): pass -class XPathNamespaceError(XPathError): - pass - class XPathResultError(XPathError): pass @@ -740,7 +737,7 @@ cdef BaseContext extensions rctxt = ctxt.context - + # get information on what function is called name = rctxt.function if rctxt.functionURI is not NULL: @@ -753,7 +750,7 @@ # lookup up the extension function in the context f = extensions.find_extension(uri, name) - + args = [] doc = extensions._doc for i from 0 <= i < nargs: From scoder at codespeak.net Thu Mar 2 12:37:59 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Thu Mar 2 12:38:01 2006 Subject: [Lxml-checkins] r23901 - lxml/branch/scoder2/doc Message-ID: <20060302113759.F0CEE1008F@code0.codespeak.net> Author: scoder Date: Thu Mar 2 12:37:58 2006 New Revision: 23901 Modified: lxml/branch/scoder2/doc/extensions.txt Log: small clarifications in doc/extensions.txt Modified: lxml/branch/scoder2/doc/extensions.txt ============================================================================== --- lxml/branch/scoder2/doc/extensions.txt (original) +++ lxml/branch/scoder2/doc/extensions.txt Thu Mar 2 12:37:58 2006 @@ -48,7 +48,7 @@ >>> print root.xpath('hello(string(b))') Hello Haegar -Note how we call both a Python function (hello) and an XPath function +Note how we call both a Python function (hello) and an XPath built-in function (local-name) in exactly the same way. Normally, however, you would want to separate the two in different namespaces. The FunctionNamespace class allows you to do this:: @@ -138,8 +138,8 @@ >>> print xslt(doc) Ola Haegar -It is also possible to register namespaces with an evaluator later on. While -the following example involves no functions, the idea should still be clear:: +It is also possible to register namespaces with a single evaluator. While the +following example involves no functions, the idea should still be clear:: >>> f = StringIO('') >>> ns_doc = etree.parse(f) From scoder at codespeak.net Thu Mar 2 12:43:00 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Thu Mar 2 12:43:07 2006 Subject: [Lxml-checkins] r23902 - lxml/branch/scoder2/src/lxml Message-ID: <20060302114300.DA0D9100A4@code0.codespeak.net> Author: scoder Date: Thu Mar 2 12:42:49 2006 New Revision: 23902 Modified: lxml/branch/scoder2/src/lxml/etree.pyx Log: be consistent with argument names (**_variables) Modified: lxml/branch/scoder2/src/lxml/etree.pyx ============================================================================== --- lxml/branch/scoder2/src/lxml/etree.pyx (original) +++ lxml/branch/scoder2/src/lxml/etree.pyx Thu Mar 2 12:42:49 2006 @@ -654,8 +654,8 @@ def findall(self, path): return _elementpath.findall(self, path) - def xpath(self, _path, _namespaces=None, **variables): - return XPathElementEvaluator(self, _namespaces).evaluate(_path, **variables) + def xpath(self, _path, _namespaces=None, **_variables): + return XPathElementEvaluator(self, _namespaces).evaluate(_path, **_variables) cdef _Element _elementFactory(_Document doc, xmlNode* c_node): cdef _Element result From scoder at codespeak.net Thu Mar 2 12:43:40 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Thu Mar 2 12:43:41 2006 Subject: [Lxml-checkins] r23903 - lxml/branch/scoder2/src/lxml/tests Message-ID: <20060302114340.D0402100A4@code0.codespeak.net> Author: scoder Date: Thu Mar 2 12:43:39 2006 New Revision: 23903 Modified: lxml/branch/scoder2/src/lxml/tests/test_xslt.py Log: include doc/extensions.txt in unit tests (test_xslt.py) Modified: lxml/branch/scoder2/src/lxml/tests/test_xslt.py ============================================================================== --- lxml/branch/scoder2/src/lxml/tests/test_xslt.py (original) +++ lxml/branch/scoder2/src/lxml/tests/test_xslt.py Thu Mar 2 12:43:39 2006 @@ -281,6 +281,8 @@ suite.addTests([unittest.makeSuite(ETreeXSLTTestCase)]) suite.addTests( [doctest.DocFileSuite('../../../doc/xpath.txt')]) + suite.addTests( + [doctest.DocFileSuite('../../../doc/extensions.txt')]) return suite if __name__ == '__main__': From scoder at codespeak.net Thu Mar 2 13:54:39 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Thu Mar 2 13:54:41 2006 Subject: [Lxml-checkins] r23904 - in lxml/branch/scoder2: doc src/lxml Message-ID: <20060302125439.B7761100A4@code0.codespeak.net> Author: scoder Date: Thu Mar 2 13:54:37 2006 New Revision: 23904 Modified: lxml/branch/scoder2/doc/extensions.txt lxml/branch/scoder2/src/lxml/xslt.pxi Log: pass None as first argument to extension functions Modified: lxml/branch/scoder2/doc/extensions.txt ============================================================================== --- lxml/branch/scoder2/doc/extensions.txt (original) +++ lxml/branch/scoder2/doc/extensions.txt Thu Mar 2 13:54:37 2006 @@ -7,13 +7,14 @@ Here is how such a function looks like. As the first argument, it always -receives the XPath context object that is associated with the current -evaluation of the XPath expression. The other arguments are provided by the -respective call. +receives a dummy object. It is currently None, but do not rely on this as it +may become meaningful in later versions of lxml. The other arguments are +provided by the respective call in the XPath expression. Any number of +arguments is allowed. - >>> def hello(context, a): + >>> def hello(dummy, a): ... return "Hello %s" % a - >>> def ola(context, a): + >>> def ola(dummy, a): ... return "Ola %s" % a @@ -90,11 +91,11 @@ equivalent. This includes numbers, boolean values, elements and lists of elements. - >>> def returnsFloat(evaluator): + >>> def returnsFloat(_): ... return 1.7 - >>> def returnsBool(evaluator): + >>> def returnsBool(_): ... return True - >>> def returnFirstNode(evaluator, nodes): + >>> def returnFirstNode(_, nodes): ... return nodes[0] >>> ns = etree.FunctionNamespace(None) Modified: lxml/branch/scoder2/src/lxml/xslt.pxi ============================================================================== --- lxml/branch/scoder2/src/lxml/xslt.pxi (original) +++ lxml/branch/scoder2/src/lxml/xslt.pxi Thu Mar 2 13:54:37 2006 @@ -759,7 +759,7 @@ try: # call the function - res = f(extensions, *args) + res = f(None, *args) # hold python objects temporarily so that they won't get deallocated # during processing extensions._hold(res) From scoder at codespeak.net Thu Mar 2 14:16:57 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Thu Mar 2 14:16:59 2006 Subject: [Lxml-checkins] r23906 - in lxml/branch/scoder2: doc src/lxml/tests Message-ID: <20060302131657.D4225100A7@code0.codespeak.net> Author: scoder Date: Thu Mar 2 14:16:56 2006 New Revision: 23906 Removed: lxml/branch/scoder2/src/lxml/tests/test_parser.py Modified: lxml/branch/scoder2/doc/api.txt Log: removed parser test file and move the test into a doctest in api.txt to show parser usage ; also show new use pattern for calling XSLT objects instead of their apply() method Modified: lxml/branch/scoder2/doc/api.txt ============================================================================== --- lxml/branch/scoder2/doc/api.txt (original) +++ lxml/branch/scoder2/doc/api.txt Thu Mar 2 14:16:56 2006 @@ -13,6 +13,32 @@ however some incompatibilities (see compatibility.txt). There are also some extensions. +The following examples usually assume this to be executed first:: + + >>> import lxml.etree + >>> from StringIO import StringIO + + +XMLParser +--------- + +One of the differences is the parser. It is based on libxml2 and therefore +only supports options that are backed by the library. Parsers take a number +of keyword arguments. The following is an example for namespace cleanup +during parsing, first with the default parser, then with a parametrized one:: + + >>> xml = '' + + >>> et = lxml.etree.parse(StringIO(xml)) + >>> print lxml.etree.tostring(et.getroot()) + + + >>> parser = lxml.etree.XMLParser(ns_clean=True) + >>> et = lxml.etree.parse(StringIO(xml), parser) + >>> print lxml.etree.tostring(et.getroot()) + + + xpath method on ElementTree, Element ------------------------------------ @@ -45,9 +71,6 @@ Example:: - >>> import lxml.etree - >>> from StringIO import StringIO - >>> f = StringIO('') >>> doc = lxml.etree.parse(f) >>> r = doc.xpath('/foo/bar') @@ -91,26 +114,26 @@ >>> xslt_doc = lxml.etree.parse(f) >>> style = lxml.etree.XSLT(xslt_doc) -You can then apply the style against some ElementTree document, and this -results in another ElementTree object:: +You can then apply the style against some ElementTree document by simply +calling it, and this results in another ElementTree object:: >>> f = StringIO('Text') >>> doc = lxml.etree.parse(f) - >>> result = style.apply(doc) + >>> result = style(doc) The result object can accessed like a normal ElementTree document:: >>> result.getroot().text 'Text' -but can also be turned into an (XML or text) string using the style's -``tostring`` method:: +but, as opposed to normal ElementTree objects, can also be turned into an (XML +or text) string by applying the str() function:: - >>> style.tostring(result) + >>> str(result) '\nText\n' -It is also possible to pass parameters, in the form of XPath expressions, -to the XSLT template:: +It is possible to pass parameters, in the form of XPath expressions, to the +XSLT template:: >>> f = StringIO('''\ ... Author: scoder Date: Thu Mar 2 14:22:25 2006 New Revision: 23908 Removed: lxml/trunk/src/lxml/tests/test_parser.py Modified: lxml/trunk/doc/api.txt Log: updated documentation on XMLParser and XSLT, replaced test_parser.py by doctest (merged from scoder2 branch) Modified: lxml/trunk/doc/api.txt ============================================================================== --- lxml/trunk/doc/api.txt (original) +++ lxml/trunk/doc/api.txt Thu Mar 2 14:22:25 2006 @@ -13,6 +13,32 @@ however some incompatibilities (see compatibility.txt). There are also some extensions. +The following examples usually assume this to be executed first:: + + >>> import lxml.etree + >>> from StringIO import StringIO + + +XMLParser +--------- + +One of the differences is the parser. It is based on libxml2 and therefore +only supports options that are backed by the library. Parsers take a number +of keyword arguments. The following is an example for namespace cleanup +during parsing, first with the default parser, then with a parametrized one:: + + >>> xml = '' + + >>> et = lxml.etree.parse(StringIO(xml)) + >>> print lxml.etree.tostring(et.getroot()) + + + >>> parser = lxml.etree.XMLParser(ns_clean=True) + >>> et = lxml.etree.parse(StringIO(xml), parser) + >>> print lxml.etree.tostring(et.getroot()) + + + xpath method on ElementTree, Element ------------------------------------ @@ -45,9 +71,6 @@ Example:: - >>> import lxml.etree - >>> from StringIO import StringIO - >>> f = StringIO('') >>> doc = lxml.etree.parse(f) >>> r = doc.xpath('/foo/bar') @@ -91,26 +114,26 @@ >>> xslt_doc = lxml.etree.parse(f) >>> style = lxml.etree.XSLT(xslt_doc) -You can then apply the style against some ElementTree document, and this -results in another ElementTree object:: +You can then apply the style against some ElementTree document by simply +calling it, and this results in another ElementTree object:: >>> f = StringIO('Text') >>> doc = lxml.etree.parse(f) - >>> result = style.apply(doc) + >>> result = style(doc) The result object can accessed like a normal ElementTree document:: >>> result.getroot().text 'Text' -but can also be turned into an (XML or text) string using the style's -``tostring`` method:: +but, as opposed to normal ElementTree objects, can also be turned into an (XML +or text) string by applying the str() function:: - >>> style.tostring(result) + >>> str(result) '\nText\n' -It is also possible to pass parameters, in the form of XPath expressions, -to the XSLT template:: +It is possible to pass parameters, in the form of XPath expressions, to the +XSLT template:: >>> f = StringIO('''\ ... Author: scoder Date: Thu Mar 2 14:53:42 2006 New Revision: 23909 Modified: lxml/branch/scoder2/doc/xpath.txt Log: provide better reference to other doc files in xpath.txt Modified: lxml/branch/scoder2/doc/xpath.txt ============================================================================== --- lxml/branch/scoder2/doc/xpath.txt (original) +++ lxml/branch/scoder2/doc/xpath.txt Thu Mar 2 14:53:42 2006 @@ -3,10 +3,7 @@ This document describes the OLD DEPRICATED way of dealing with XPath extension functions. For updated documentation, please see the new Namespace API -described in the documentations of `nsclasses`_ and `extensions`_. - -`nsclasses`_: nsclasses.html -`extensions`_: extensions.html +described in nsclasses.txt and extensions.txt. Extension functions are defined in Python. In order to use such a function, it must have a name by which it can be called in XPath, and an optional namespace From scoder at codespeak.net Fri Mar 3 15:31:51 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Fri Mar 3 15:31:53 2006 Subject: [Lxml-checkins] r23946 - lxml/branch/resolver Message-ID: <20060303143151.98BD210036@code0.codespeak.net> Author: scoder Date: Fri Mar 3 15:31:50 2006 New Revision: 23946 Added: lxml/branch/resolver/ - copied from r23945, lxml/branch/scoder2/ Log: new branch to test resolver setup From scoder at codespeak.net Fri Mar 3 17:42:43 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Fri Mar 3 17:42:44 2006 Subject: [Lxml-checkins] r23949 - lxml/trunk/src/lxml Message-ID: <20060303164243.58AB6100BA@code0.codespeak.net> Author: scoder Date: Fri Mar 3 17:42:41 2006 New Revision: 23949 Modified: lxml/trunk/src/lxml/etree.pyx Log: use libxml2 file parser from within ElementTree(file=...) to store file URL correctly - this is especially needed by XSLT Modified: lxml/trunk/src/lxml/etree.pyx ============================================================================== --- lxml/trunk/src/lxml/etree.pyx (original) +++ lxml/trunk/src/lxml/etree.pyx Fri Mar 3 17:42:41 2006 @@ -975,7 +975,7 @@ element.tag = tag return element -def ElementTree(_Element element=None, file=None): +def ElementTree(_Element element=None, file=None, parser=None): cdef xmlNode* c_next cdef xmlNode* c_node cdef xmlNode* c_node_copy @@ -985,14 +985,13 @@ if element is not None: doc = element._doc elif file is not None: - if isinstance(file, str) or isinstance(file, unicode): - f = open(file, 'r') - data = f.read() - f.close() + if isinstance(file, (str, unicode)): + filename = file.encode('UTF-8') + doc = _documentFactory( theParser.parseDocFromFile(filename, parser) ) else: # XXX read XML into memory not the fastest way to do this data = file.read() - doc = _documentFactory( theParser.parseDoc(data, None) ) + doc = _documentFactory( theParser.parseDoc(data, parser) ) else: doc = _documentFactory( theParser.newDoc() ) From scoder at codespeak.net Fri Mar 3 17:51:39 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Fri Mar 3 17:51:41 2006 Subject: [Lxml-checkins] r23950 - lxml/trunk/src/lxml Message-ID: <20060303165139.3924B100BA@code0.codespeak.net> Author: scoder Date: Fri Mar 3 17:51:32 2006 New Revision: 23950 Modified: lxml/trunk/src/lxml/etree.pyx Log: refactoring of ElementTree(file=) case: simply call _parseDocument function that already handles everything Modified: lxml/trunk/src/lxml/etree.pyx ============================================================================== --- lxml/trunk/src/lxml/etree.pyx (original) +++ lxml/trunk/src/lxml/etree.pyx Fri Mar 3 17:51:32 2006 @@ -985,13 +985,7 @@ if element is not None: doc = element._doc elif file is not None: - if isinstance(file, (str, unicode)): - filename = file.encode('UTF-8') - doc = _documentFactory( theParser.parseDocFromFile(filename, parser) ) - else: - # XXX read XML into memory not the fastest way to do this - data = file.read() - doc = _documentFactory( theParser.parseDoc(data, parser) ) + doc = _parseDocument(file, parser) else: doc = _documentFactory( theParser.newDoc() ) From scoder at codespeak.net Fri Mar 3 17:56:24 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Fri Mar 3 17:56:26 2006 Subject: [Lxml-checkins] r23951 - lxml/branch/scoder2/doc Message-ID: <20060303165624.87BA9100BB@code0.codespeak.net> Author: scoder Date: Fri Mar 3 17:56:18 2006 New Revision: 23951 Modified: lxml/branch/scoder2/doc/namespace_extensions.txt Log: clean up namespace_extensions.txt after introducing extension_functions.txt Modified: lxml/branch/scoder2/doc/namespace_extensions.txt ============================================================================== --- lxml/branch/scoder2/doc/namespace_extensions.txt (original) +++ lxml/branch/scoder2/doc/namespace_extensions.txt Fri Mar 3 17:56:18 2006 @@ -2,6 +2,8 @@ Implementing namespaces with the Namespace class ================================================ +(Also see extension_functions.txt) + Imagine, you have a namespace called 'http://hui.de/honk' and have to treat all of its elements in a specific way, say, to find out if they are really honking. You could provide a function called 'is_honking' @@ -114,7 +116,7 @@ True >>> print honk_element[0].honking Traceback (most recent call last): - File "/var/tmp/python-2.4.2-root/usr/lib/python2.4/doctest.py", line 1243, in __run + File "/usr/lib/python2.4/doctest.py", line 1243, in __run compileflags, 1) in test.globs File "", line 1, in ? print honk_element[0].honking @@ -154,28 +156,8 @@ HONK >>> print honk_element[0].honking Traceback (most recent call last): - File "/var/tmp/python-2.4.2-root/usr/lib/python2.4/doctest.py", line 1243, in __run + File "/usr/lib/python2.4/doctest.py", line 1243, in __run compileflags, 1) in test.globs File "", line 1, in ? print honk_element[0].honking AttributeError: 'HonkNSElement' object has no attribute 'honking' - - -XPath extension functions -========================= - -The same API is used for extension functions in XPath. If you associate a -name in the namespace with a callable object (that is not a subclass of -ElementBase), it will be used as extension function in XPath evaluations. - ->>> from lxml.etree import Namespace ->>> def tag_of(context, elem): -... return elem[0].tag ->>> namespace = Namespace('myfunctions') ->>> namespace['tagname'] = tag_of - -You can then use your new function in XPath expressions: - ->>> element = XML('') ->>> element.xpath('f:tagname(//honk)', {'f' : 'myfunctions'}) -'honk' From scoder at codespeak.net Fri Mar 3 17:57:49 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Fri Mar 3 17:57:51 2006 Subject: [Lxml-checkins] r23952 - lxml/branch/scoder2/src/lxml Message-ID: <20060303165749.93076100BB@code0.codespeak.net> Author: scoder Date: Fri Mar 3 17:57:42 2006 New Revision: 23952 Modified: lxml/branch/scoder2/src/lxml/etree.pyx Log: refactoring of ElementTree(file=) case, as in trunk Modified: lxml/branch/scoder2/src/lxml/etree.pyx ============================================================================== --- lxml/branch/scoder2/src/lxml/etree.pyx (original) +++ lxml/branch/scoder2/src/lxml/etree.pyx Fri Mar 3 17:57:42 2006 @@ -975,7 +975,7 @@ element.tag = tag return element -def ElementTree(_Element element=None, file=None): +def ElementTree(_Element element=None, file=None, parser=None): cdef xmlNode* c_next cdef xmlNode* c_node cdef xmlNode* c_node_copy @@ -985,14 +985,7 @@ if element is not None: doc = element._doc elif file is not None: - if isinstance(file, str) or isinstance(file, unicode): - f = open(file, 'r') - data = f.read() - f.close() - else: - # XXX read XML into memory not the fastest way to do this - data = file.read() - doc = _documentFactory( theParser.parseDoc(data, None) ) + doc = _parseDocument(file, parser) else: doc = _documentFactory( theParser.newDoc() ) From scoder at codespeak.net Fri Mar 3 19:57:59 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Fri Mar 3 19:58:00 2006 Subject: [Lxml-checkins] r23956 - lxml/trunk/src/lxml/tests Message-ID: <20060303185759.AE680100BC@code0.codespeak.net> Author: scoder Date: Fri Mar 3 19:57:58 2006 New Revision: 23956 Modified: lxml/trunk/src/lxml/tests/test_xslt.py Log: test cases to check that document('') works in stylesheets loaded from a file Modified: lxml/trunk/src/lxml/tests/test_xslt.py ============================================================================== --- lxml/trunk/src/lxml/tests/test_xslt.py (original) +++ lxml/trunk/src/lxml/tests/test_xslt.py Fri Mar 3 19:57:58 2006 @@ -224,6 +224,26 @@ self.assertEquals(self._rootstring(result), 'C') + def test_xslt_document_parse(self): + # make sure document('') works from loaded files + xslt = etree.XSLT(etree.parse(fileInTestDir("test-document.xslt"))) + result = xslt(etree.XML('')) + root = result.getroot() + self.assertEquals(root.tag, + 'test') + self.assertEquals(root[0].tag, + '{http://www.w3.org/1999/XSL/Transform}stylesheet') + + def test_xslt_document_elementtree(self): + # make sure document('') works from loaded files + xslt = etree.XSLT(etree.ElementTree(file=fileInTestDir("test-document.xslt"))) + result = xslt(etree.XML('')) + root = result.getroot() + self.assertEquals(root.tag, + 'test') + self.assertEquals(root[0].tag, + '{http://www.w3.org/1999/XSL/Transform}stylesheet') + def test_suite(): suite = unittest.TestSuite() suite.addTests([unittest.makeSuite(ETreeXSLTTestCase)]) From scoder at codespeak.net Fri Mar 3 20:04:45 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Fri Mar 3 20:04:46 2006 Subject: [Lxml-checkins] r23957 - lxml/trunk/src/lxml/tests Message-ID: <20060303190445.D8BF4100BC@code0.codespeak.net> Author: scoder Date: Fri Mar 3 20:04:44 2006 New Revision: 23957 Added: lxml/trunk/src/lxml/tests/test-document.xslt Log: [forgot to add new file] test cases to check that document('') works in stylesheets loaded from a file Added: lxml/trunk/src/lxml/tests/test-document.xslt ============================================================================== --- (empty file) +++ lxml/trunk/src/lxml/tests/test-document.xslt Fri Mar 3 20:04:44 2006 @@ -0,0 +1,10 @@ + + + + + + + + + From scoder at codespeak.net Fri Mar 3 20:06:39 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Fri Mar 3 20:06:41 2006 Subject: [Lxml-checkins] r23958 - lxml/branch/scoder2/src/lxml/tests Message-ID: <20060303190639.A8F98100BC@code0.codespeak.net> Author: scoder Date: Fri Mar 3 20:06:37 2006 New Revision: 23958 Added: lxml/branch/scoder2/src/lxml/tests/test-document.xslt - copied unchanged from r23957, lxml/trunk/src/lxml/tests/test-document.xslt Log: test case file copied from trunk From scoder at codespeak.net Fri Mar 3 20:08:11 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Fri Mar 3 20:08:12 2006 Subject: [Lxml-checkins] r23959 - lxml/branch/scoder2/src/lxml/tests Message-ID: <20060303190811.1BADB100BC@code0.codespeak.net> Author: scoder Date: Fri Mar 3 20:08:10 2006 New Revision: 23959 Modified: lxml/branch/scoder2/src/lxml/tests/test_xslt.py Log: merge from trunk: test cases to check that document('') works in stylesheets loaded from a file Modified: lxml/branch/scoder2/src/lxml/tests/test_xslt.py ============================================================================== --- lxml/branch/scoder2/src/lxml/tests/test_xslt.py (original) +++ lxml/branch/scoder2/src/lxml/tests/test_xslt.py Fri Mar 3 20:08:10 2006 @@ -276,6 +276,26 @@ self.assertEquals(self._rootstring(result), 'X') + def test_xslt_document_parse(self): + # make sure document('') works from loaded files + xslt = etree.XSLT(etree.parse(fileInTestDir("test-document.xslt"))) + result = xslt(etree.XML('')) + root = result.getroot() + self.assertEquals(root.tag, + 'test') + self.assertEquals(root[0].tag, + '{http://www.w3.org/1999/XSL/Transform}stylesheet') + + def test_xslt_document_elementtree(self): + # make sure document('') works from loaded files + xslt = etree.XSLT(etree.ElementTree(file=fileInTestDir("test-document.xslt"))) + result = xslt(etree.XML('')) + root = result.getroot() + self.assertEquals(root.tag, + 'test') + self.assertEquals(root[0].tag, + '{http://www.w3.org/1999/XSL/Transform}stylesheet') + def test_suite(): suite = unittest.TestSuite() suite.addTests([unittest.makeSuite(ETreeXSLTTestCase)]) From scoder at codespeak.net Fri Mar 3 20:21:20 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Fri Mar 3 20:21:21 2006 Subject: [Lxml-checkins] r23960 - lxml/branch/scoder2/src/lxml Message-ID: <20060303192120.DE550100BC@code0.codespeak.net> Author: scoder Date: Fri Mar 3 20:21:14 2006 New Revision: 23960 Modified: lxml/branch/scoder2/src/lxml/etree.pyx lxml/branch/scoder2/src/lxml/xslt.pxi Log: move more argument names out of the way of keyword arguments by prefixing them with '_', remove the prefix in some places where it makes sense to use arguments as keyword arguments Modified: lxml/branch/scoder2/src/lxml/etree.pyx ============================================================================== --- lxml/branch/scoder2/src/lxml/etree.pyx (original) +++ lxml/branch/scoder2/src/lxml/etree.pyx Fri Mar 3 20:21:14 2006 @@ -213,7 +213,7 @@ return root.findall(path) # extensions to ElementTree API - def xpath(self, _path, _namespaces=None, **_variables): + def xpath(self, _path, namespaces=None, **_variables): """XPath evaluate in context of document. namespaces is an optional dictionary with prefix to namespace URI @@ -228,9 +228,9 @@ against the same document, it is more efficient to use XPathEvaluator directly. """ - return XPathDocumentEvaluator(self._doc, _namespaces).evaluate(_path, **_variables) + return XPathDocumentEvaluator(self._doc, namespaces).evaluate(_path, **_variables) - def xslt(self, _xslt, _extensions=None, **_kw): + def xslt(self, _xslt, extensions=None, **_kw): """Transform this document using other document. xslt is a tree that should be XSLT @@ -242,7 +242,7 @@ multiple documents, it is more efficient to use the XSLT class directly. """ - style = XSLT(_xslt, _extensions) + style = XSLT(_xslt, extensions) return style.apply(self, **_kw) def relaxng(self, relaxng): @@ -654,8 +654,8 @@ def findall(self, path): return _elementpath.findall(self, path) - def xpath(self, _path, _namespaces=None, **_variables): - return XPathElementEvaluator(self, _namespaces).evaluate(_path, **_variables) + def xpath(self, _path, namespaces=None, **_variables): + return XPathElementEvaluator(self, namespaces).evaluate(_path, **_variables) cdef _Element _elementFactory(_Document doc, xmlNode* c_node): cdef _Element result @@ -936,13 +936,13 @@ # module-level API for ElementTree -def Element(tag, attrib=None, nsmap=None, **extra): +def Element(tag, attrib=None, nsmap=None, **_extra): cdef _Document doc cdef _Element result cdef xmlNode* c_node cdef xmlDoc* c_doc c_doc = theParser.newDoc() - c_node = _createElement(c_doc, tag, attrib, extra) + c_node = _createElement(c_doc, tag, attrib, _extra) tree.xmlDocSetRootElement(c_doc, c_node) # add namespaces to node if necessary _addNamespaces(c_doc, c_node, nsmap) @@ -963,16 +963,16 @@ tree.xmlAddChild(doc._c_doc, c_node) return _commentFactory(doc, c_node) -def SubElement(_Element parent, tag, attrib=None, nsmap=None, **extra): +def SubElement(_Element _parent, _tag, attrib=None, nsmap=None, **_extra): cdef xmlNode* c_node cdef _Element element - c_node = _createElement(parent._doc._c_doc, tag, attrib, extra) - element = _elementFactory(parent._doc, c_node) - parent.append(element) + c_node = _createElement(_parent._doc._c_doc, _tag, attrib, _extra) + element = _elementFactory(_parent._doc, c_node) + _parent.append(element) # add namespaces to node if necessary - _addNamespaces(parent._doc._c_doc, c_node, nsmap) + _addNamespaces(_parent._doc._c_doc, c_node, nsmap) # XXX hack for namespaces - element.tag = tag + element.tag = _tag return element def ElementTree(_Element element=None, file=None, parser=None): Modified: lxml/branch/scoder2/src/lxml/xslt.pxi ============================================================================== --- lxml/branch/scoder2/src/lxml/xslt.pxi (original) +++ lxml/branch/scoder2/src/lxml/xslt.pxi Fri Mar 3 20:21:14 2006 @@ -536,11 +536,11 @@ for prefix, uri in namespaces.items(): self.registerNamespace(prefix, uri) - def evaluate(self, _path, **variables): + def evaluate(self, _path, **_variables): """Evaluate an XPath expression on the document. Variables may be given as keyword arguments. Note that namespaces are currently not supported for variables.""" - return self._evaluate(_path, NULL, variables) + return self._evaluate(_path, NULL, _variables) cdef object _evaluate(self, path, xmlNode* c_ctxt_node, variable_dict): cdef xpath.xmlXPathContext* xpathCtxt @@ -576,11 +576,11 @@ self, element._doc, namespaces, extensions) self._element = element - def evaluate(self, _path, **variables): + def evaluate(self, _path, **_variables): """Evaluate an XPath expression on the element. Variables may be given as keyword arguments. Note that namespaces are currently not supported for variables.""" - return self._evaluate(_path, self._element._c_node, variables) + return self._evaluate(_path, self._element._c_node, _variables) def XPathEvaluator(etree_or_element, namespaces=None, extensions=None): if isinstance(etree_or_element, _ElementTree): @@ -609,7 +609,7 @@ if self._xpath is NULL: raise XPathSyntaxError, "Error in xpath expression." - def evaluate(self, _etree_or_element, **variables): + def evaluate(self, _etree_or_element, **_variables): cdef xpath.xmlXPathContext* xpathCtxt cdef xpath.xmlXPathObject* xpathObj cdef _Document document @@ -629,7 +629,7 @@ self._context._release_temp_refs() self._context.register_context(xpathCtxt, document) - self._context.registerVariables(variables) + self._context.registerVariables(_variables) xpathObj = xpath.xmlXPathCompiledEval(self._xpath, xpathCtxt) From scoder at codespeak.net Fri Mar 3 20:23:47 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Fri Mar 3 20:23:49 2006 Subject: [Lxml-checkins] r23961 - lxml/branch/scoder2/src/lxml Message-ID: <20060303192347.0A931100BC@code0.codespeak.net> Author: scoder Date: Fri Mar 3 20:23:41 2006 New Revision: 23961 Modified: lxml/branch/scoder2/src/lxml/etree.pyx Log: forgot one argument prefix Modified: lxml/branch/scoder2/src/lxml/etree.pyx ============================================================================== --- lxml/branch/scoder2/src/lxml/etree.pyx (original) +++ lxml/branch/scoder2/src/lxml/etree.pyx Fri Mar 3 20:23:41 2006 @@ -936,20 +936,20 @@ # module-level API for ElementTree -def Element(tag, attrib=None, nsmap=None, **_extra): +def Element(_tag, attrib=None, nsmap=None, **_extra): cdef _Document doc cdef _Element result cdef xmlNode* c_node cdef xmlDoc* c_doc c_doc = theParser.newDoc() - c_node = _createElement(c_doc, tag, attrib, _extra) + c_node = _createElement(c_doc, _tag, attrib, _extra) tree.xmlDocSetRootElement(c_doc, c_node) # add namespaces to node if necessary _addNamespaces(c_doc, c_node, nsmap) # XXX hack for namespaces doc = _documentFactory(c_doc) result = _elementFactory(doc, c_node) - result.tag = tag + result.tag = _tag return result def Comment(text=None): From scoder at codespeak.net Fri Mar 3 20:30:30 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Fri Mar 3 20:30:36 2006 Subject: [Lxml-checkins] r23962 - lxml/branch/scoder2/src/lxml Message-ID: <20060303193030.57429100BC@code0.codespeak.net> Author: scoder Date: Fri Mar 3 20:30:29 2006 New Revision: 23962 Modified: lxml/branch/scoder2/src/lxml/xslt.pxd Log: commented out unused field in xsltTransformContext Modified: lxml/branch/scoder2/src/lxml/xslt.pxd ============================================================================== --- lxml/branch/scoder2/src/lxml/xslt.pxd (original) +++ lxml/branch/scoder2/src/lxml/xslt.pxd Fri Mar 3 20:30:29 2006 @@ -7,7 +7,7 @@ ctypedef struct xsltTransformContext: xmlXPathContext* xpathCtxt - void* _private +# void* _private cdef xsltStylesheet* xsltParseStylesheetDoc(xmlDoc* doc) cdef void xsltFreeStylesheet(xsltStylesheet* sheet) From scoder at codespeak.net Fri Mar 3 21:03:33 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Fri Mar 3 21:03:35 2006 Subject: [Lxml-checkins] r23963 - in lxml/trunk: doc src/lxml Message-ID: <20060303200333.A8091100BC@code0.codespeak.net> Author: scoder Date: Fri Mar 3 21:03:26 2006 New Revision: 23963 Modified: lxml/trunk/doc/api.txt lxml/trunk/src/lxml/etree.pyx Log: only describe new callable API for XSLT usage, remove hints about previous limitations Modified: lxml/trunk/doc/api.txt ============================================================================== --- lxml/trunk/doc/api.txt (original) +++ lxml/trunk/doc/api.txt Fri Mar 3 21:03:26 2006 @@ -148,31 +148,26 @@ >>> f = StringIO('Text') >>> doc = lxml.etree.parse(f) -The parameters are passed as keyword parameters to ``apply``. First +The parameters are passed as keyword parameters to the transform call. First let's try passing in a simple string expression:: - >>> result = style.apply(doc, a="'A'") - >>> style.tostring(result) + >>> result = style(doc, a="'A'") + >>> str(result) '\nA\n' Let's try a non-string XPath expression now:: - >>> result = style.apply(doc, a="/a/b/text()") - >>> style.tostring(result) + >>> result = style(doc, a="/a/b/text()") + >>> str(result) '\nText\n' -There's also a convenience method on the tree object for doing XSLT +There's also a convenience method on the tree object for doing XSL transformations. This is less efficient if you want to apply the same -XSLT transformation to multiple documents, but is shorter to write:: +XSL transformation to multiple documents, but is shorter to write:: >>> result = doc.xslt(xslt_doc, a="'A'") - >>> lxml.etree.tostring(result.getroot()) - 'A' - -A limitation is that the style object cannot be used to serialize to -XML. The etree.tostring function can be used, but it does not take -style rules into account, which matters in case of non-XML output -among other things. + >>> str(result) + '\nA\n' RelaxNG ------- Modified: lxml/trunk/src/lxml/etree.pyx ============================================================================== --- lxml/trunk/src/lxml/etree.pyx (original) +++ lxml/trunk/src/lxml/etree.pyx Fri Mar 3 21:03:26 2006 @@ -243,7 +243,7 @@ class directly. """ style = XSLT(xslt) - return style.apply(self, **kw) + return style(self, **kw) def relaxng(self, relaxng): """Validate this document using other document. From scoder at codespeak.net Fri Mar 3 21:06:42 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Fri Mar 3 21:06:44 2006 Subject: [Lxml-checkins] r23964 - lxml/branch/scoder2/src/lxml Message-ID: <20060303200642.62752100BC@code0.codespeak.net> Author: scoder Date: Fri Mar 3 21:06:36 2006 New Revision: 23964 Modified: lxml/branch/scoder2/src/lxml/etree.pyx Log: doc updates merged from trunk Modified: lxml/branch/scoder2/src/lxml/etree.pyx ============================================================================== --- lxml/branch/scoder2/src/lxml/etree.pyx (original) +++ lxml/branch/scoder2/src/lxml/etree.pyx Fri Mar 3 21:06:36 2006 @@ -243,7 +243,7 @@ class directly. """ style = XSLT(_xslt, extensions) - return style.apply(self, **_kw) + return style(self, **_kw) def relaxng(self, relaxng): """Validate this document using other document. From scoder at codespeak.net Fri Mar 3 21:06:51 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Fri Mar 3 21:06:53 2006 Subject: [Lxml-checkins] r23965 - lxml/branch/scoder2/doc Message-ID: <20060303200651.91F39100BD@code0.codespeak.net> Author: scoder Date: Fri Mar 3 21:06:50 2006 New Revision: 23965 Modified: lxml/branch/scoder2/doc/api.txt Log: doc updates merged from trunk Modified: lxml/branch/scoder2/doc/api.txt ============================================================================== --- lxml/branch/scoder2/doc/api.txt (original) +++ lxml/branch/scoder2/doc/api.txt Fri Mar 3 21:06:50 2006 @@ -148,31 +148,26 @@ >>> f = StringIO('Text') >>> doc = lxml.etree.parse(f) -The parameters are passed as keyword parameters to ``apply``. First +The parameters are passed as keyword parameters to the transform call. First let's try passing in a simple string expression:: - >>> result = style.apply(doc, a="'A'") - >>> style.tostring(result) + >>> result = style(doc, a="'A'") + >>> str(result) '\nA\n' Let's try a non-string XPath expression now:: - >>> result = style.apply(doc, a="/a/b/text()") - >>> style.tostring(result) + >>> result = style(doc, a="/a/b/text()") + >>> str(result) '\nText\n' -There's also a convenience method on the tree object for doing XSLT +There's also a convenience method on the tree object for doing XSL transformations. This is less efficient if you want to apply the same -XSLT transformation to multiple documents, but is shorter to write:: +XSL transformation to multiple documents, but is shorter to write:: >>> result = doc.xslt(xslt_doc, a="'A'") - >>> lxml.etree.tostring(result.getroot()) - 'A' - -A limitation is that the style object cannot be used to serialize to -XML. The etree.tostring function can be used, but it does not take -style rules into account, which matters in case of non-XML output -among other things. + >>> str(result) + '\nA\n' RelaxNG ------- From scoder at codespeak.net Fri Mar 3 21:11:10 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Fri Mar 3 21:11:21 2006 Subject: [Lxml-checkins] r23966 - lxml/branch/scoder2 Message-ID: <20060303201110.7AEC0100BC@code0.codespeak.net> Author: scoder Date: Fri Mar 3 21:10:59 2006 New Revision: 23966 Modified: lxml/branch/scoder2/MANIFEST.in Log: added .xslt .rng .xml files in tests directory to MANIFEST.in Modified: lxml/branch/scoder2/MANIFEST.in ============================================================================== --- lxml/branch/scoder2/MANIFEST.in (original) +++ lxml/branch/scoder2/MANIFEST.in Fri Mar 3 21:10:59 2006 @@ -1,3 +1,4 @@ -include setup.py MANIFEST.in *.txt lxmldistutils.py +include setup.py MANIFEST.in *.txt recursive-include src *.pyx *.pxd *.pxi *.py etree.c +recursive-include src/lxml/tests *.rng *.xslt *.xml recursive-include doc *.txt From scoder at codespeak.net Fri Mar 3 21:13:06 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Fri Mar 3 21:13:07 2006 Subject: [Lxml-checkins] r23967 - lxml/trunk Message-ID: <20060303201306.68CA6100C2@code0.codespeak.net> Author: scoder Date: Fri Mar 3 21:12:55 2006 New Revision: 23967 Modified: lxml/trunk/MANIFEST.in Log: added .xslt .rng .xml files in tests directory to MANIFEST.in Modified: lxml/trunk/MANIFEST.in ============================================================================== --- lxml/trunk/MANIFEST.in (original) +++ lxml/trunk/MANIFEST.in Fri Mar 3 21:12:55 2006 @@ -1,3 +1,4 @@ -include setup.py MANIFEST.in *.txt lxmldistutils.py +include setup.py MANIFEST.in *.txt recursive-include src *.pyx *.pxd *.pxi *.py etree.c +recursive-include src/lxml/tests *.rng *.xslt *.xml recursive-include doc *.txt From scoder at codespeak.net Fri Mar 3 21:15:41 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Fri Mar 3 21:15:43 2006 Subject: [Lxml-checkins] r23968 - lxml/branch/scoder2 Message-ID: <20060303201541.E2173100BC@code0.codespeak.net> Author: scoder Date: Fri Mar 3 21:15:40 2006 New Revision: 23968 Removed: lxml/branch/scoder2/lxmldistutils.py Log: removed legacy file Deleted: /lxml/branch/scoder2/lxmldistutils.py ============================================================================== --- /lxml/branch/scoder2/lxmldistutils.py Fri Mar 3 21:15:40 2006 +++ (empty file) @@ -1,59 +0,0 @@ -# Based on the version from Pyrex, cleaned up -# Added feature to allow include path option to support pxd files better - -import distutils.command.build_ext -import Pyrex.Compiler.Main -from Pyrex.Compiler.Errors import PyrexError -from distutils.dep_util import newer -import os -import sys - -def replace_suffix(path, new_suffix): - return os.path.splitext(path)[0] + new_suffix - -class build_ext(distutils.command.build_ext.build_ext): - - description = ("compile Pyrex scripts, then build C/C++ extensions " - "(compile/link to build directory)") - - def finalize_options(self): - distutils.command.build_ext.build_ext.finalize_options(self) - - def swig_sources(self, sources, dummy=None): - if not self.extensions: - return - - pyx_sources = [source for source in sources - if source.endswith('.pyx')] - other_sources = [source for source in sources - if not source.endswith('.pyx')] - c_sources = [] - - for pyx in pyx_sources: - # should I raise an exception if it doesn't exist? - if os.path.exists(pyx): - source = pyx - target = replace_suffix(source, '.c') - c_sources.append(target) - if newer(source, target) or self.force: - self.pyrex_compile(source) - return c_sources + other_sources - - def pyrex_compile(self, source): - options = Pyrex.Compiler.Main.CompilationOptions( - show_version=0, - use_listing_file=0, - errors_to_stderr=1, - include_path=self.get_pxd_include_paths(), - c_only=1, - obj_only=1, - output_file=None) - - result = Pyrex.Compiler.Main.compile(source, options) - if result.num_errors <> 0: - sys.exit(1) - - def get_pxd_include_paths(self): - """Override this to return a list of include paths for pyrex. - """ - return [] From scoder at codespeak.net Fri Mar 3 21:15:53 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Fri Mar 3 21:15:55 2006 Subject: [Lxml-checkins] r23969 - lxml/trunk Message-ID: <20060303201553.04FE0100C0@code0.codespeak.net> Author: scoder Date: Fri Mar 3 21:15:47 2006 New Revision: 23969 Removed: lxml/trunk/lxmldistutils.py Log: removed legacy file Deleted: /lxml/trunk/lxmldistutils.py ============================================================================== --- /lxml/trunk/lxmldistutils.py Fri Mar 3 21:15:47 2006 +++ (empty file) @@ -1,59 +0,0 @@ -# Based on the version from Pyrex, cleaned up -# Added feature to allow include path option to support pxd files better - -import distutils.command.build_ext -import Pyrex.Compiler.Main -from Pyrex.Compiler.Errors import PyrexError -from distutils.dep_util import newer -import os -import sys - -def replace_suffix(path, new_suffix): - return os.path.splitext(path)[0] + new_suffix - -class build_ext(distutils.command.build_ext.build_ext): - - description = ("compile Pyrex scripts, then build C/C++ extensions " - "(compile/link to build directory)") - - def finalize_options(self): - distutils.command.build_ext.build_ext.finalize_options(self) - - def swig_sources(self, sources, dummy=None): - if not self.extensions: - return - - pyx_sources = [source for source in sources - if source.endswith('.pyx')] - other_sources = [source for source in sources - if not source.endswith('.pyx')] - c_sources = [] - - for pyx in pyx_sources: - # should I raise an exception if it doesn't exist? - if os.path.exists(pyx): - source = pyx - target = replace_suffix(source, '.c') - c_sources.append(target) - if newer(source, target) or self.force: - self.pyrex_compile(source) - return c_sources + other_sources - - def pyrex_compile(self, source): - options = Pyrex.Compiler.Main.CompilationOptions( - show_version=0, - use_listing_file=0, - errors_to_stderr=1, - include_path=self.get_pxd_include_paths(), - c_only=1, - obj_only=1, - output_file=None) - - result = Pyrex.Compiler.Main.compile(source, options) - if result.num_errors <> 0: - sys.exit(1) - - def get_pxd_include_paths(self): - """Override this to return a list of include paths for pyrex. - """ - return [] From scoder at codespeak.net Sun Mar 5 10:29:26 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Sun Mar 5 10:29:28 2006 Subject: [Lxml-checkins] r23976 - lxml/trunk/src/lxml Message-ID: <20060305092926.79664100A8@code0.codespeak.net> Author: scoder Date: Sun Mar 5 10:29:24 2006 New Revision: 23976 Modified: lxml/trunk/src/lxml/etree.pyx Log: fixed typo Modified: lxml/trunk/src/lxml/etree.pyx ============================================================================== --- lxml/trunk/src/lxml/etree.pyx (original) +++ lxml/trunk/src/lxml/etree.pyx Sun Mar 5 10:29:24 2006 @@ -303,7 +303,7 @@ _destroyFakeDoc(c_base_doc, c_doc) if bytes < 0: - raise C14NError, "C18N failed" + raise C14NError, "C14N failed" if not hasattr(file, 'write'): file = open(file, 'wb') file.write(data) From scoder at codespeak.net Sun Mar 5 10:57:29 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Sun Mar 5 10:57:31 2006 Subject: [Lxml-checkins] r23977 - lxml/trunk/src/lxml/tests Message-ID: <20060305095729.B3DD5100A8@code0.codespeak.net> Author: scoder Date: Sun Mar 5 10:57:28 2006 New Revision: 23977 Modified: lxml/trunk/src/lxml/tests/test_xslt.py Log: new test case for repeated transformation, does not fail, but is worth testing Modified: lxml/trunk/src/lxml/tests/test_xslt.py ============================================================================== --- lxml/trunk/src/lxml/tests/test_xslt.py (original) +++ lxml/trunk/src/lxml/tests/test_xslt.py Sun Mar 5 10:57:28 2006 @@ -154,6 +154,24 @@ etree.tostring(result.getroot()) + def test_xslt_repeat_transform(self): + xml = '' + xslt = '''\ + + + Some text + + +''' + source = self.parse(xml) + styledoc = self.parse(xslt) + transform = etree.XSLT(styledoc) + result = transform(source) + result = transform(source) + etree.tostring(result.getroot()) + result = transform(source) + etree.tostring(result.getroot()) + def test_xslt_empty(self): # could segfault if result contains "empty document" xml = '' From scoder at codespeak.net Sun Mar 5 11:09:25 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Sun Mar 5 11:09:26 2006 Subject: [Lxml-checkins] r23978 - lxml/trunk/doc Message-ID: <20060305100925.34F7F100A8@code0.codespeak.net> Author: scoder Date: Sun Mar 5 11:09:18 2006 New Revision: 23978 Modified: lxml/trunk/doc/api.txt Log: make doc tests more readable by changing variable names Modified: lxml/trunk/doc/api.txt ============================================================================== --- lxml/trunk/doc/api.txt (original) +++ lxml/trunk/doc/api.txt Sun Mar 5 11:09:18 2006 @@ -112,14 +112,14 @@ ... ... ''') >>> xslt_doc = lxml.etree.parse(f) - >>> style = lxml.etree.XSLT(xslt_doc) + >>> transform = lxml.etree.XSLT(xslt_doc) -You can then apply the style against some ElementTree document by simply +You can then run the transformation on an ElementTree document by simply calling it, and this results in another ElementTree object:: >>> f = StringIO('Text') >>> doc = lxml.etree.parse(f) - >>> result = style(doc) + >>> result = transform(doc) The result object can accessed like a normal ElementTree document:: @@ -144,26 +144,27 @@ ... ... ''') >>> xslt_doc = lxml.etree.parse(f) - >>> style = lxml.etree.XSLT(xslt_doc) + >>> transform = lxml.etree.XSLT(xslt_doc) >>> f = StringIO('Text') >>> doc = lxml.etree.parse(f) The parameters are passed as keyword parameters to the transform call. First let's try passing in a simple string expression:: - >>> result = style(doc, a="'A'") + >>> result = transform(doc, a="'A'") >>> str(result) '\nA\n' Let's try a non-string XPath expression now:: - >>> result = style(doc, a="/a/b/text()") + >>> result = transform(doc, a="/a/b/text()") >>> str(result) '\nText\n' There's also a convenience method on the tree object for doing XSL -transformations. This is less efficient if you want to apply the same -XSL transformation to multiple documents, but is shorter to write:: +transformations. This is less efficient if you want to apply the same XSL +transformation to multiple documents, but is shorter to write, as you do not +have to instantiate a stylesheet yourself:: >>> result = doc.xslt(xslt_doc, a="'A'") >>> str(result) From scoder at codespeak.net Sun Mar 5 12:02:52 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Sun Mar 5 12:02:54 2006 Subject: [Lxml-checkins] r23980 - lxml/trunk/src/lxml/tests Message-ID: <20060305110252.BD383100A6@code0.codespeak.net> Author: scoder Date: Sun Mar 5 12:02:51 2006 New Revision: 23980 Modified: lxml/trunk/src/lxml/tests/test_xslt.py Log: updated test case to cover more use patterns Modified: lxml/trunk/src/lxml/tests/test_xslt.py ============================================================================== --- lxml/trunk/src/lxml/tests/test_xslt.py (original) +++ lxml/trunk/src/lxml/tests/test_xslt.py Sun Mar 5 12:02:51 2006 @@ -166,11 +166,17 @@ source = self.parse(xml) styledoc = self.parse(xslt) transform = etree.XSLT(styledoc) + result = transform.apply(source) + result = transform.apply(source) + etree.tostring(result.getroot()) + result = transform.apply(source) + etree.tostring(result.getroot()) + result = transform(source) result = transform(source) - etree.tostring(result.getroot()) + str(result) result = transform(source) - etree.tostring(result.getroot()) + str(result) def test_xslt_empty(self): # could segfault if result contains "empty document" From scoder at codespeak.net Sun Mar 5 12:45:18 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Sun Mar 5 12:45:20 2006 Subject: [Lxml-checkins] r23981 - lxml/branch/scoder2/src/lxml/tests Message-ID: <20060305114518.BD0A71009B@code0.codespeak.net> Author: scoder Date: Sun Mar 5 12:45:17 2006 New Revision: 23981 Modified: lxml/branch/scoder2/src/lxml/tests/test_xslt.py Log: copied new test case from trunk Modified: lxml/branch/scoder2/src/lxml/tests/test_xslt.py ============================================================================== --- lxml/branch/scoder2/src/lxml/tests/test_xslt.py (original) +++ lxml/branch/scoder2/src/lxml/tests/test_xslt.py Sun Mar 5 12:45:17 2006 @@ -169,6 +169,30 @@ etree.tostring(result.getroot()) + def test_xslt_repeat_transform(self): + xml = '' + xslt = '''\ + + + Some text + + +''' + source = self.parse(xml) + styledoc = self.parse(xslt) + transform = etree.XSLT(styledoc) + result = transform.apply(source) + result = transform.apply(source) + etree.tostring(result.getroot()) + result = transform.apply(source) + etree.tostring(result.getroot()) + + result = transform(source) + result = transform(source) + str(result) + result = transform(source) + str(result) + def test_xslt_empty(self): # could segfault if result contains "empty document" xml = '' From scoder at codespeak.net Sun Mar 5 12:47:15 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Sun Mar 5 12:47:16 2006 Subject: [Lxml-checkins] r23982 - lxml/branch/scoder2/src/lxml Message-ID: <20060305114715.8124F1009B@code0.codespeak.net> Author: scoder Date: Sun Mar 5 12:47:14 2006 New Revision: 23982 Modified: lxml/branch/scoder2/src/lxml/etree.pyx lxml/branch/scoder2/src/lxml/tree.pxd Log: use xmlDOMWrapReconcileNamespaces when copying nodes between documents Modified: lxml/branch/scoder2/src/lxml/etree.pyx ============================================================================== --- lxml/branch/scoder2/src/lxml/etree.pyx (original) +++ lxml/branch/scoder2/src/lxml/etree.pyx Sun Mar 5 12:47:14 2006 @@ -1369,7 +1369,8 @@ """ changeDocumentBelowHelper(node._c_node, doc) tree.xmlReconciliateNs(doc._c_doc, node._c_node) - + tree.xmlDOMWrapReconcileNamespaces(NULL, node._c_node, 1) + cdef void changeDocumentBelowHelper(xmlNode* c_node, _Document doc): cdef ProxyRef* ref cdef xmlNode* c_current Modified: lxml/branch/scoder2/src/lxml/tree.pxd ============================================================================== --- lxml/branch/scoder2/src/lxml/tree.pxd (original) +++ lxml/branch/scoder2/src/lxml/tree.pxd Sun Mar 5 12:47:14 2006 @@ -154,6 +154,7 @@ cdef xmlDoc* xmlCopyDoc(xmlDoc* doc, int recursive) cdef xmlNode* xmlCopyNode(xmlNode* node, int extended) cdef int xmlReconciliateNs(xmlDoc* doc, xmlNode* tree) + cdef int xmlDOMWrapReconcileNamespaces(void* ctxt, xmlNode* tree, int options) cdef xmlBuffer* xmlBufferCreate() cdef char* xmlBufferContent(xmlBuffer* buf) From scoder at codespeak.net Sun Mar 5 13:00:40 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Sun Mar 5 13:00:41 2006 Subject: [Lxml-checkins] r23983 - lxml/branch/scoder2/src/lxml Message-ID: <20060305120040.D4BEF1009C@code0.codespeak.net> Author: scoder Date: Sun Mar 5 13:00:39 2006 New Revision: 23983 Modified: lxml/branch/scoder2/src/lxml/etree.pyx lxml/branch/scoder2/src/lxml/tree.pxd Log: clean up of last patch Modified: lxml/branch/scoder2/src/lxml/etree.pyx ============================================================================== --- lxml/branch/scoder2/src/lxml/etree.pyx (original) +++ lxml/branch/scoder2/src/lxml/etree.pyx Sun Mar 5 13:00:39 2006 @@ -1369,7 +1369,7 @@ """ changeDocumentBelowHelper(node._c_node, doc) tree.xmlReconciliateNs(doc._c_doc, node._c_node) - tree.xmlDOMWrapReconcileNamespaces(NULL, node._c_node, 1) + tree.xmlDOMWrapReconcileNamespaces(NULL, node._c_node, 1) cdef void changeDocumentBelowHelper(xmlNode* c_node, _Document doc): cdef ProxyRef* ref Modified: lxml/branch/scoder2/src/lxml/tree.pxd ============================================================================== --- lxml/branch/scoder2/src/lxml/tree.pxd (original) +++ lxml/branch/scoder2/src/lxml/tree.pxd Sun Mar 5 13:00:39 2006 @@ -154,7 +154,8 @@ cdef xmlDoc* xmlCopyDoc(xmlDoc* doc, int recursive) cdef xmlNode* xmlCopyNode(xmlNode* node, int extended) cdef int xmlReconciliateNs(xmlDoc* doc, xmlNode* tree) - cdef int xmlDOMWrapReconcileNamespaces(void* ctxt, xmlNode* tree, int options) + cdef int xmlDOMWrapReconcileNamespaces(void* ctxt, xmlNode* tree, + int options) cdef xmlBuffer* xmlBufferCreate() cdef char* xmlBufferContent(xmlBuffer* buf) From scoder at codespeak.net Sun Mar 5 13:03:24 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Sun Mar 5 13:03:26 2006 Subject: [Lxml-checkins] r23984 - lxml/branch/scoder2/src/lxml Message-ID: <20060305120324.A88611009B@code0.codespeak.net> Author: scoder Date: Sun Mar 5 13:03:23 2006 New Revision: 23984 Modified: lxml/branch/scoder2/src/lxml/etree.pyx lxml/branch/scoder2/src/lxml/tree.pxd Log: reverted last two revisions to undo patch (will be moved into a branch) Modified: lxml/branch/scoder2/src/lxml/etree.pyx ============================================================================== --- lxml/branch/scoder2/src/lxml/etree.pyx (original) +++ lxml/branch/scoder2/src/lxml/etree.pyx Sun Mar 5 13:03:23 2006 @@ -1369,8 +1369,7 @@ """ changeDocumentBelowHelper(node._c_node, doc) tree.xmlReconciliateNs(doc._c_doc, node._c_node) - tree.xmlDOMWrapReconcileNamespaces(NULL, node._c_node, 1) - + cdef void changeDocumentBelowHelper(xmlNode* c_node, _Document doc): cdef ProxyRef* ref cdef xmlNode* c_current Modified: lxml/branch/scoder2/src/lxml/tree.pxd ============================================================================== --- lxml/branch/scoder2/src/lxml/tree.pxd (original) +++ lxml/branch/scoder2/src/lxml/tree.pxd Sun Mar 5 13:03:23 2006 @@ -154,8 +154,6 @@ cdef xmlDoc* xmlCopyDoc(xmlDoc* doc, int recursive) cdef xmlNode* xmlCopyNode(xmlNode* node, int extended) cdef int xmlReconciliateNs(xmlDoc* doc, xmlNode* tree) - cdef int xmlDOMWrapReconcileNamespaces(void* ctxt, xmlNode* tree, - int options) cdef xmlBuffer* xmlBufferCreate() cdef char* xmlBufferContent(xmlBuffer* buf) From scoder at codespeak.net Sun Mar 5 13:05:05 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Sun Mar 5 13:05:07 2006 Subject: [Lxml-checkins] r23985 - lxml/branch/namespace_reconsiliation Message-ID: <20060305120505.509B71009B@code0.codespeak.net> Author: scoder Date: Sun Mar 5 13:05:04 2006 New Revision: 23985 Added: lxml/branch/namespace_reconsiliation/ - copied from r23984, lxml/trunk/ Log: new branch to incorporate libxml2 CVS support for better namespace cleanup From scoder at codespeak.net Sun Mar 5 13:12:00 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Sun Mar 5 13:12:01 2006 Subject: [Lxml-checkins] r23986 - lxml/branch/namespace_reconsiliation/src/lxml Message-ID: <20060305121200.3FA8C100A2@code0.codespeak.net> Author: scoder Date: Sun Mar 5 13:11:58 2006 New Revision: 23986 Modified: lxml/branch/namespace_reconsiliation/src/lxml/etree.pyx lxml/branch/namespace_reconsiliation/src/lxml/tree.pxd Log: call tree.xmlDOMWrapReconcileNamespaces after copying nodes Modified: lxml/branch/namespace_reconsiliation/src/lxml/etree.pyx ============================================================================== --- lxml/branch/namespace_reconsiliation/src/lxml/etree.pyx (original) +++ lxml/branch/namespace_reconsiliation/src/lxml/etree.pyx Sun Mar 5 13:11:58 2006 @@ -1369,7 +1369,8 @@ """ changeDocumentBelowHelper(node._c_node, doc) tree.xmlReconciliateNs(doc._c_doc, node._c_node) - + tree.xmlDOMWrapReconcileNamespaces(NULL, node._c_node, 1) + cdef void changeDocumentBelowHelper(xmlNode* c_node, _Document doc): cdef ProxyRef* ref cdef xmlNode* c_current Modified: lxml/branch/namespace_reconsiliation/src/lxml/tree.pxd ============================================================================== --- lxml/branch/namespace_reconsiliation/src/lxml/tree.pxd (original) +++ lxml/branch/namespace_reconsiliation/src/lxml/tree.pxd Sun Mar 5 13:11:58 2006 @@ -154,6 +154,8 @@ cdef xmlDoc* xmlCopyDoc(xmlDoc* doc, int recursive) cdef xmlNode* xmlCopyNode(xmlNode* node, int extended) cdef int xmlReconciliateNs(xmlDoc* doc, xmlNode* tree) + cdef int xmlDOMWrapReconcileNamespaces(void* ctxt, xmlNode* tree, + int options) cdef xmlBuffer* xmlBufferCreate() cdef char* xmlBufferContent(xmlBuffer* buf) From scoder at codespeak.net Sun Mar 5 13:18:55 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Sun Mar 5 13:18:56 2006 Subject: [Lxml-checkins] r23987 - lxml/branch/scoder2/src/lxml Message-ID: <20060305121855.E670F100A2@code0.codespeak.net> Author: scoder Date: Sun Mar 5 13:18:54 2006 New Revision: 23987 Modified: lxml/branch/scoder2/src/lxml/etree.pyx Log: removed typo as in trunk Modified: lxml/branch/scoder2/src/lxml/etree.pyx ============================================================================== --- lxml/branch/scoder2/src/lxml/etree.pyx (original) +++ lxml/branch/scoder2/src/lxml/etree.pyx Sun Mar 5 13:18:54 2006 @@ -303,7 +303,7 @@ _destroyFakeDoc(c_base_doc, c_doc) if bytes < 0: - raise C14NError, "C18N failed" + raise C14NError, "C14N failed" if not hasattr(file, 'write'): file = open(file, 'wb') file.write(data) From scoder at codespeak.net Sun Mar 5 13:47:33 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Sun Mar 5 13:47:35 2006 Subject: [Lxml-checkins] r23988 - lxml/branch/scoder2/src/lxml/tests Message-ID: <20060305124733.DE8C0100A2@code0.codespeak.net> Author: scoder Date: Sun Mar 5 13:47:32 2006 New Revision: 23988 Modified: lxml/branch/scoder2/src/lxml/tests/test_xslt.py Log: new test case that tries to instantiate an XSLT from a subtree of a document (i.e. a non-root stylesheet element) Modified: lxml/branch/scoder2/src/lxml/tests/test_xslt.py ============================================================================== --- lxml/branch/scoder2/src/lxml/tests/test_xslt.py (original) +++ lxml/branch/scoder2/src/lxml/tests/test_xslt.py Sun Mar 5 13:47:32 2006 @@ -44,6 +44,21 @@ st = etree.XSLT(style.getroot()) self.assertRaises(TypeError, etree.XSLT, None) + def test_xslt_input_partial_doc(self): + tree = self.parse('BC') + style = self.parse('''\ + + + + + + + +''') + + st = etree.XSLT(style.getroot()[0]) + def test_xslt_broken(self): tree = self.parse('') style = self.parse('''\ From scoder at codespeak.net Sun Mar 5 13:49:20 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Sun Mar 5 13:49:22 2006 Subject: [Lxml-checkins] r23989 - lxml/branch/scoder2/src/lxml Message-ID: <20060305124920.BF12B100A2@code0.codespeak.net> Author: scoder Date: Sun Mar 5 13:49:19 2006 New Revision: 23989 Modified: lxml/branch/scoder2/src/lxml/xslt.pxi Log: use _fakeRootDoc for XSLT instantiation to assure using the correct root node (fixes test case from last revision) Modified: lxml/branch/scoder2/src/lxml/xslt.pxi ============================================================================== --- lxml/branch/scoder2/src/lxml/xslt.pxi (original) +++ lxml/branch/scoder2/src/lxml/xslt.pxi Sun Mar 5 13:49:19 2006 @@ -255,9 +255,17 @@ # doesn't change cdef xslt.xsltStylesheet* c_style cdef xmlDoc* c_doc + cdef xmlDoc* fake_c_doc cdef _Document doc + cdef _NodeBase root_node + doc = _documentOrRaise(xslt_input) - c_doc = tree.xmlCopyDoc(doc._c_doc, 1) + root_node = _rootNodeOf(xslt_input) + + fake_c_doc = _fakeRootDoc(doc._c_doc, root_node._c_node) + c_doc = tree.xmlCopyDoc(fake_c_doc, 1) + _destroyFakeDoc(doc._c_doc, fake_c_doc) + # XXX work around bug in xmlCopyDoc (fix is upcoming in new release # of libxml2) if doc._c_doc.URL is not NULL: From scoder at codespeak.net Sun Mar 5 14:59:33 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Sun Mar 5 14:59:35 2006 Subject: [Lxml-checkins] r23990 - lxml/branch/scoder2/src/lxml/tests Message-ID: <20060305135933.F36511009C@code0.codespeak.net> Author: scoder Date: Sun Mar 5 14:59:32 2006 New Revision: 23990 Modified: lxml/branch/scoder2/src/lxml/tests/test_xslt.py Log: extended test case to also test for parse exception Modified: lxml/branch/scoder2/src/lxml/tests/test_xslt.py ============================================================================== --- lxml/branch/scoder2/src/lxml/tests/test_xslt.py (original) +++ lxml/branch/scoder2/src/lxml/tests/test_xslt.py Sun Mar 5 14:59:32 2006 @@ -57,7 +57,10 @@ ''') - st = etree.XSLT(style.getroot()[0]) + self.assertRaises(etree.XSLTParseError, etree.XSLT, style) + root_node = style.getroot() + self.assertRaises(etree.XSLTParseError, etree.XSLT, root_node) + st = etree.XSLT(root_node[0]) def test_xslt_broken(self): tree = self.parse('') From scoder at codespeak.net Mon Mar 6 13:09:27 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Mon Mar 6 13:09:28 2006 Subject: [Lxml-checkins] r24008 - lxml/trunk/doc Message-ID: <20060306120927.328A31009F@code0.codespeak.net> Author: scoder Date: Mon Mar 6 13:09:25 2006 New Revision: 24008 Modified: lxml/trunk/doc/compatibility.txt Log: mention that ElementTree is in stdlib in Py2.5 Modified: lxml/trunk/doc/compatibility.txt ============================================================================== --- lxml/trunk/doc/compatibility.txt (original) +++ lxml/trunk/doc/compatibility.txt Mon Mar 6 13:09:25 2006 @@ -14,7 +14,10 @@ # ElementTree from elementtree.ElementTree import Element - When switching over code from ElementTree to etree, and you're using + # ElementTree in the Python 2.5 standard library + from xml.etree.ElementTree import Element + + When switching over code from ElementTree to lxml.etree, and you're using the package name prefix 'ElementTree', you can do the following:: # instead of From scoder at codespeak.net Mon Mar 6 13:21:40 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Mon Mar 6 13:21:40 2006 Subject: [Lxml-checkins] r24009 - lxml/trunk/doc Message-ID: <20060306122140.4F5EC1009F@code0.codespeak.net> Author: scoder Date: Mon Mar 6 13:21:39 2006 New Revision: 24009 Modified: lxml/trunk/doc/main.txt Log: clean up main page in docs Modified: lxml/trunk/doc/main.txt ============================================================================== --- lxml/trunk/doc/main.txt (original) +++ lxml/trunk/doc/main.txt Mon Mar 6 13:21:39 2006 @@ -84,6 +84,8 @@ Download -------- +* `lxml 0.8`_ (2005-11-03) + * `lxml 0.7`_ (2005-06-15) * `lxml 0.6`_ (2005-05-14) @@ -92,12 +94,6 @@ * `lxml 0.5`_ (2005-04-08) -.. _`lxml 0.6`: lxml-0.6.tgz - -.. _`lxml 0.5.1`: lxml-0.5.1.tgz - -.. _`lxml 0.5`: lxml-0.5.tgz - See also the `installation instructions`_. .. _`installation instructions`: installation.html From scoder at codespeak.net Mon Mar 6 13:22:57 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Mon Mar 6 13:22:57 2006 Subject: [Lxml-checkins] r24010 - lxml/branch/scoder2/doc Message-ID: <20060306122257.279601009F@code0.codespeak.net> Author: scoder Date: Mon Mar 6 13:22:56 2006 New Revision: 24010 Modified: lxml/branch/scoder2/doc/main.txt Log: mention namespace API, reference new XPath extension docs Modified: lxml/branch/scoder2/doc/main.txt ============================================================================== --- lxml/branch/scoder2/doc/main.txt (original) +++ lxml/branch/scoder2/doc/main.txt Mon Mar 6 13:22:56 2006 @@ -53,8 +53,12 @@ lxml also `extends this API`_ to expose libxml2 and libxslt specific functionality, such as XPath_, `Relax NG`_, `XML Schema`_, `XSLT`_, and -`c14n`_. There is also more `detailed information`_ about what's -possible with XPath. +`c14n`_. Python code can be called from XPath expressions and XSLT stylesheets +through the use of `extension functions`_. + +In addition to the ElementTree API, lxml also features an API for +`implementing namespaces`_ using tag specific element classes. This is a +simple way to write arbitrary XML driven APIs on top of lxml. .. _`ElementTree API`: http://effbot.org/zone/element-index.htm @@ -62,7 +66,7 @@ .. _`extends this API`: api.html -.. _`detailed information`: xpath.html +.. _`extension functions`_: extensions.html .. _XPath: http://www.w3.org/TR/xpath @@ -74,6 +78,8 @@ .. _`c14n`: http://www.w3.org/TR/2001/REC-xml-c14n-20010315 +.. _`implementing namespaces`: namespace_extensions.html + Mailing list ------------ From scoder at codespeak.net Mon Mar 6 13:30:36 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Mon Mar 6 13:30:38 2006 Subject: [Lxml-checkins] r24011 - lxml/branch/scoder2/src/lxml Message-ID: <20060306123036.565941009F@code0.codespeak.net> Author: scoder Date: Mon Mar 6 13:30:34 2006 New Revision: 24011 Modified: lxml/branch/scoder2/src/lxml/relaxng.pxi Log: clean up after fixing XSLT parsing of Elements (not only ElementTrees) Modified: lxml/branch/scoder2/src/lxml/relaxng.pxi ============================================================================== --- lxml/branch/scoder2/src/lxml/relaxng.pxi (original) +++ lxml/branch/scoder2/src/lxml/relaxng.pxi Mon Mar 6 13:30:34 2006 @@ -93,7 +93,7 @@ cdef object _build_relaxng_relocation_stylesheet(): - return XSLT(ElementTree(XML('''\ + return XSLT(XML('''\ -'''))) +''')) From scoder at codespeak.net Mon Mar 6 13:32:10 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Mon Mar 6 13:32:12 2006 Subject: [Lxml-checkins] r24012 - lxml/branch/scoder2/doc Message-ID: <20060306123210.E5FAC1009F@code0.codespeak.net> Author: scoder Date: Mon Mar 6 13:32:09 2006 New Revision: 24012 Modified: lxml/branch/scoder2/doc/main.txt Log: merged in doc clean up from trunk Modified: lxml/branch/scoder2/doc/main.txt ============================================================================== --- lxml/branch/scoder2/doc/main.txt (original) +++ lxml/branch/scoder2/doc/main.txt Mon Mar 6 13:32:09 2006 @@ -90,6 +90,8 @@ Download -------- +* `lxml 0.8`_ (2005-11-03) + * `lxml 0.7`_ (2005-06-15) * `lxml 0.6`_ (2005-05-14) @@ -98,12 +100,6 @@ * `lxml 0.5`_ (2005-04-08) -.. _`lxml 0.6`: lxml-0.6.tgz - -.. _`lxml 0.5.1`: lxml-0.5.1.tgz - -.. _`lxml 0.5`: lxml-0.5.tgz - See also the `installation instructions`_. .. _`installation instructions`: installation.html From scoder at codespeak.net Mon Mar 6 13:44:07 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Mon Mar 6 13:44:08 2006 Subject: [Lxml-checkins] r24013 - lxml/branch/scoder2/doc Message-ID: <20060306124407.512121009F@code0.codespeak.net> Author: scoder Date: Mon Mar 6 13:44:06 2006 New Revision: 24013 Modified: lxml/branch/scoder2/doc/compatibility.txt Log: merged in doc changes from trunk Modified: lxml/branch/scoder2/doc/compatibility.txt ============================================================================== --- lxml/branch/scoder2/doc/compatibility.txt (original) +++ lxml/branch/scoder2/doc/compatibility.txt Mon Mar 6 13:44:06 2006 @@ -14,7 +14,10 @@ # ElementTree from elementtree.ElementTree import Element - When switching over code from ElementTree to etree, and you're using + # ElementTree in the Python 2.5 standard library + from xml.etree.ElementTree import Element + + When switching over code from ElementTree to lxml.etree, and you're using the package name prefix 'ElementTree', you can do the following:: # instead of From scoder at codespeak.net Mon Mar 6 13:47:44 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Mon Mar 6 13:47:45 2006 Subject: [Lxml-checkins] r24014 - lxml/branch/scoder2/doc Message-ID: <20060306124744.1FC6C1009F@code0.codespeak.net> Author: scoder Date: Mon Mar 6 13:47:43 2006 New Revision: 24014 Modified: lxml/branch/scoder2/doc/api.txt Log: merged in doc changes from trunk Modified: lxml/branch/scoder2/doc/api.txt ============================================================================== --- lxml/branch/scoder2/doc/api.txt (original) +++ lxml/branch/scoder2/doc/api.txt Mon Mar 6 13:47:43 2006 @@ -112,14 +112,14 @@ ... ... ''') >>> xslt_doc = lxml.etree.parse(f) - >>> style = lxml.etree.XSLT(xslt_doc) + >>> transform = lxml.etree.XSLT(xslt_doc) -You can then apply the style against some ElementTree document by simply +You can then run the transformation on an ElementTree document by simply calling it, and this results in another ElementTree object:: >>> f = StringIO('Text') >>> doc = lxml.etree.parse(f) - >>> result = style(doc) + >>> result = transform(doc) The result object can accessed like a normal ElementTree document:: @@ -144,26 +144,27 @@ ... ... ''') >>> xslt_doc = lxml.etree.parse(f) - >>> style = lxml.etree.XSLT(xslt_doc) + >>> transform = lxml.etree.XSLT(xslt_doc) >>> f = StringIO('Text') >>> doc = lxml.etree.parse(f) The parameters are passed as keyword parameters to the transform call. First let's try passing in a simple string expression:: - >>> result = style(doc, a="'A'") + >>> result = transform(doc, a="'A'") >>> str(result) '\nA\n' Let's try a non-string XPath expression now:: - >>> result = style(doc, a="/a/b/text()") + >>> result = transform(doc, a="/a/b/text()") >>> str(result) '\nText\n' There's also a convenience method on the tree object for doing XSL -transformations. This is less efficient if you want to apply the same -XSL transformation to multiple documents, but is shorter to write:: +transformations. This is less efficient if you want to apply the same XSL +transformation to multiple documents, but is shorter to write, as you do not +have to instantiate a stylesheet yourself:: >>> result = doc.xslt(xslt_doc, a="'A'") >>> str(result) From scoder at codespeak.net Mon Mar 6 14:04:35 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Mon Mar 6 14:04:35 2006 Subject: [Lxml-checkins] r24015 - lxml/branch/htmlparser Message-ID: <20060306130435.843401009F@code0.codespeak.net> Author: scoder Date: Mon Mar 6 14:04:34 2006 New Revision: 24015 Added: lxml/branch/htmlparser/ - copied from r24014, lxml/trunk/ Log: new branch to implement HTML parser From scoder at codespeak.net Mon Mar 6 15:33:04 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Mon Mar 6 15:33:05 2006 Subject: [Lxml-checkins] r24018 - lxml/trunk/src/lxml Message-ID: <20060306143304.8414B100C1@code0.codespeak.net> Author: scoder Date: Mon Mar 6 15:33:03 2006 New Revision: 24018 Modified: lxml/trunk/src/lxml/parser.pxi Log: more intuitive keyword arguments, remove from_parser keyword which complicated things too much, possible copy(**args) method to retrieve modified parser copy (currently commented out) Modified: lxml/trunk/src/lxml/parser.pxi ============================================================================== --- lxml/trunk/src/lxml/parser.pxi (original) +++ lxml/trunk/src/lxml/parser.pxi Mon Mar 6 15:33:03 2006 @@ -26,24 +26,19 @@ major run-time overhead. The keyword arguments in the constructor are mainly based on the libxml2 - parser configuration. The 'from_parser' keyword additionally allows to - provide a parser whose configurations is copied before applying the - additional arguments. Note that DTD validation obviously implies loading - the DTD. + parser configuration. A DTD will only be loaded if validation or + attribute default values are requested. """ cdef int _parse_options - def __init__(self, load_dtd=False, validate_dtd=False, no_network=False, - ns_clean=False, from_parser=None): + def __init__(self, attribute_defaults=False, dtd_validation=False, + no_network=False, ns_clean=False): cdef int parse_options - if from_parser is not None: - parse_options = from_parser._parse_options - else: - parse_options = _ORIG_DEFAULT_PARSE_OPTIONS + parse_options = _ORIG_DEFAULT_PARSE_OPTIONS - if validate_dtd: + if dtd_validation: parse_options = parse_options | xmlparser.XML_PARSE_DTDLOAD | \ xmlparser.XML_PARSE_DTDVALID - if load_dtd: + if attribute_defaults: parse_options = parse_options | xmlparser.XML_PARSE_DTDLOAD | \ xmlparser.XML_PARSE_DTDATTR if no_network: @@ -53,6 +48,23 @@ self._parse_options = parse_options +## def copy(self, attribute_defaults=None, dtd_validation=None, +## no_network=None, ns_clean=None): +## cdef int parse_options +## parse_options = self._parse_options +## if attribute_defaults is None: +## attribute_defaults = parse_options & xmlparser.XML_PARSE_DTDATTR +## if dtd_validation is None: +## dtd_validation = parse_options & xmlparser.XML_PARSE_DTDVALID +## if no_network is None: +## no_network = parse_options & xmlparser.XML_PARSE_NONET +## if ns_clean is None: +## ns_clean = parse_options & xmlparser.XML_PARSE_NSCLEAN + +## return self.__class__(attribute_defaults=attribute_defaults, +## dtd_validation=dtd_validation, +## no_network=no_network, ns_clean=ns_clean) + def set_default_parser(parser=None): """Set a default XMLParser. This parser is used globally whenever no From scoder at codespeak.net Mon Mar 6 15:37:11 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Mon Mar 6 15:37:13 2006 Subject: [Lxml-checkins] r24019 - lxml/branch/scoder2/src/lxml Message-ID: <20060306143711.DBE66100C1@code0.codespeak.net> Author: scoder Date: Mon Mar 6 15:37:05 2006 New Revision: 24019 Modified: lxml/branch/scoder2/src/lxml/parser.pxi Log: merged in parser API changes from trunk Modified: lxml/branch/scoder2/src/lxml/parser.pxi ============================================================================== --- lxml/branch/scoder2/src/lxml/parser.pxi (original) +++ lxml/branch/scoder2/src/lxml/parser.pxi Mon Mar 6 15:37:05 2006 @@ -26,24 +26,19 @@ major run-time overhead. The keyword arguments in the constructor are mainly based on the libxml2 - parser configuration. The 'from_parser' keyword additionally allows to - provide a parser whose configurations is copied before applying the - additional arguments. Note that DTD validation obviously implies loading - the DTD. + parser configuration. A DTD will only be loaded if validation or + attribute default values are requested. """ cdef int _parse_options - def __init__(self, load_dtd=False, validate_dtd=False, no_network=False, - ns_clean=False, from_parser=None): + def __init__(self, attribute_defaults=False, dtd_validation=False, + no_network=False, ns_clean=False): cdef int parse_options - if from_parser is not None: - parse_options = from_parser._parse_options - else: - parse_options = _ORIG_DEFAULT_PARSE_OPTIONS + parse_options = _ORIG_DEFAULT_PARSE_OPTIONS - if validate_dtd: + if dtd_validation: parse_options = parse_options | xmlparser.XML_PARSE_DTDLOAD | \ xmlparser.XML_PARSE_DTDVALID - if load_dtd: + if attribute_defaults: parse_options = parse_options | xmlparser.XML_PARSE_DTDLOAD | \ xmlparser.XML_PARSE_DTDATTR if no_network: @@ -53,6 +48,23 @@ self._parse_options = parse_options +## def copy(self, attribute_defaults=None, dtd_validation=None, +## no_network=None, ns_clean=None): +## cdef int parse_options +## parse_options = self._parse_options +## if attribute_defaults is None: +## attribute_defaults = parse_options & xmlparser.XML_PARSE_DTDATTR +## if dtd_validation is None: +## dtd_validation = parse_options & xmlparser.XML_PARSE_DTDVALID +## if no_network is None: +## no_network = parse_options & xmlparser.XML_PARSE_NONET +## if ns_clean is None: +## ns_clean = parse_options & xmlparser.XML_PARSE_NSCLEAN + +## return self.__class__(attribute_defaults=attribute_defaults, +## dtd_validation=dtd_validation, +## no_network=no_network, ns_clean=ns_clean) + def set_default_parser(parser=None): """Set a default XMLParser. This parser is used globally whenever no From scoder at codespeak.net Tue Mar 7 11:08:51 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Tue Mar 7 11:08:53 2006 Subject: [Lxml-checkins] r24041 - lxml/trunk Message-ID: <20060307100851.CB9C0100A3@code0.codespeak.net> Author: scoder Date: Tue Mar 7 11:08:50 2006 New Revision: 24041 Added: lxml/trunk/bench.py Log: new benchmark script Added: lxml/trunk/bench.py ============================================================================== --- (empty file) +++ lxml/trunk/bench.py Tue Mar 7 11:08:50 2006 @@ -0,0 +1,98 @@ +import sys, timeit +from itertools import * + +from lxml import etree + +def atoz(): + return iter('abcdefghijklmnopqrstuvwxyz') + +class BenchMark(object): + ALL_TREES = (1,2) + def setup(self, trees=ALL_TREES): + if 1 in trees: + root = etree.Element('{a}root') + for ch1 in atoz(): + el = etree.SubElement(root, "{b}"+ch1) + for ch2 in atoz(): + for i in range(100): + etree.SubElement(el, "{c}%s%03d" % (ch2, i)) + + self.root1 = root + self.tree1 = etree.ElementTree(root) + + if 2 in trees: + root = etree.Element('{x}root') + for ch1 in atoz(): + for i in range(100): + el = etree.SubElement(root, "{y}%s%03d" % (ch1, i)) + for ch2 in atoz(): + etree.SubElement(el, "{z}"+ch2) + + self.root2 = root + self.tree2 = etree.ElementTree(root) + + def benchmarks(self): + """Returns a list of all benchmarks. + + A benchmark is a tuple containing a method name and a list of tree + numbers. Trees are prepared by the setup function. + """ + benchmarks = [] + for name in dir(self): + if not name.startswith('bench_'): + continue + method = getattr(self, name) + tree_sets = method.__doc__.split() + if tree_sets: + for tree_set in tree_sets: + benchmarks.append((name, sorted(imap(int, tree_set.split(','))))) + else: + for tree in bench.ALL_TREES: + benchmarks.append((name, [tree])) + return benchmarks + + +class LxmlBenchMark(BenchMark): + def bench_append_from_document(self, tree1, root1, tree2, root2): + "1,2" # needs trees 1 and 2 + for el in root2: + root1.append(root2[0]) + + def bench_rotate_children(self, tree, root): + "1 2" # runs on tree 1 or 2 independently + for i in range(100): + root[-1] = root[0] + + def bench_reorder(self, tree, root): + "1 2" + for i in range(len(root)/2): + root[-i] = root[0] + + +if __name__ == '__main__': + bench = LxmlBenchMark() + benchmarks = bench.benchmarks() + + if len(sys.argv) > 1: + selected = [ "bench_%s" % name for name in sys.argv[1:] ] + benchmarks = [ b for b in benchmarks if b[0] in selected ] + + benchmarks.sort() # by name + + for bench_name, tree_set in benchmarks: + bench_args = ', '.join("bench.tree%d, bench.root%d" % (tree, tree) + for tree in tree_set) + + timer = timeit.Timer( + "bench.%s(%s)" % (bench_name, bench_args), + "from __main__ import bench ; bench.setup(%s)" % str(tuple(tree_set)) + ) + + print "%-25s (T%-6s)" % (bench_name[6:], ',T'.join(imap(str, tree_set))[:6]), + sys.stdout.flush() + + result = timer.repeat(4, 1000)[1:] # run benchmark, but ignore first run + + for t in result: + print "%8.4f" % t, + print "msec/pass, avg: %8.4f" % (sum(result) / 3) From scoder at codespeak.net Tue Mar 7 11:09:23 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Tue Mar 7 11:09:25 2006 Subject: [Lxml-checkins] r24042 - lxml/branch/scoder2 Message-ID: <20060307100923.ED937100A3@code0.codespeak.net> Author: scoder Date: Tue Mar 7 11:09:22 2006 New Revision: 24042 Added: lxml/branch/scoder2/bench.py - copied unchanged from r24041, lxml/trunk/bench.py Log: copied benchmark script from trunk From scoder at codespeak.net Tue Mar 7 11:19:43 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Tue Mar 7 11:19:45 2006 Subject: [Lxml-checkins] r24043 - lxml/trunk/src/lxml Message-ID: <20060307101943.9A06A100C8@code0.codespeak.net> Author: scoder Date: Tue Mar 7 11:19:42 2006 New Revision: 24043 Modified: lxml/trunk/src/lxml/etree.pyx Log: patch that avoids recursion in changeDocumentBelow if the new node was moved inside its own document Modified: lxml/trunk/src/lxml/etree.pyx ============================================================================== --- lxml/trunk/src/lxml/etree.pyx (original) +++ lxml/trunk/src/lxml/etree.pyx Tue Mar 7 11:19:42 2006 @@ -329,14 +329,16 @@ def __setitem__(self, index, _NodeBase element): cdef xmlNode* c_node cdef xmlNode* c_next + cdef int foreign c_node = _findChild(self._c_node, index) if c_node is NULL: raise IndexError + foreign = self._doc is not element._doc c_next = element._c_node.next _removeText(c_node.next) tree.xmlReplaceNode(c_node, element._c_node) _moveTail(c_next, element._c_node) - changeDocumentBelow(element, self._doc) + changeDocumentBelow(element, self._doc, foreign) def __delitem__(self, index): cdef xmlNode* c_node @@ -355,6 +357,7 @@ cdef xmlNode* c_node cdef xmlNode* c_next cdef _Element mynode + cdef int foreign # first, find start of slice c_node = _findChild(self._c_node, start) # now delete the slice @@ -369,6 +372,7 @@ # if the next element is in the list, insert before it for node in value: mynode = node + foreign = self._doc is not mynode._doc # store possible text tail c_next = mynode._c_node.next # now move node previous to insertion point @@ -377,7 +381,7 @@ # and move tail just behind his node _moveTail(c_next, mynode._c_node) # move it into a new document - changeDocumentBelow(mynode, self._doc) + changeDocumentBelow(mynode, self._doc, foreign) def __deepcopy__(self, memo): return self.__copy__() @@ -397,6 +401,8 @@ def append(self, _Element element): cdef xmlNode* c_next cdef xmlNode* c_next2 + cdef int foreign + foreign = self._doc is not element._doc # store possible text node c_next = element._c_node.next # XXX what if element is coming from a different document? @@ -406,7 +412,7 @@ _moveTail(c_next, element._c_node) # uh oh, elements may be pointing to different doc when # parent element has moved; change them too.. - changeDocumentBelow(element, self._doc) + changeDocumentBelow(element, self._doc, foreign) def clear(self): cdef xmlAttr* c_attr @@ -434,14 +440,16 @@ def insert(self, index, _Element element): cdef xmlNode* c_node cdef xmlNode* c_next + cdef int foreign c_node = _findChild(self._c_node, index) if c_node is NULL: self.append(element) return + foreign = self._doc is not element._doc c_next = element._c_node.next tree.xmlAddPrevSibling(c_node, element._c_node) _moveTail(c_next, element._c_node) - changeDocumentBelow(element, self._doc) + changeDocumentBelow(element, self._doc, foreign) def remove(self, _Element element): cdef xmlNode* c_node @@ -1359,7 +1367,7 @@ return source.filename return None -cdef void changeDocumentBelow(_NodeBase node, _Document doc): +cdef void changeDocumentBelow(_NodeBase node, _Document doc, int recursive): """For a node and all nodes below, change document. A node can change document in certain operations as an XML @@ -1367,7 +1375,8 @@ tree below (including the current node). It also reconciliates namespaces so they're correct inside the new environment. """ - changeDocumentBelowHelper(node._c_node, doc) + if recursive: + changeDocumentBelowHelper(node._c_node, doc) tree.xmlReconciliateNs(doc._c_doc, node._c_node) cdef void changeDocumentBelowHelper(xmlNode* c_node, _Document doc): From scoder at codespeak.net Tue Mar 7 11:21:21 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Tue Mar 7 11:21:23 2006 Subject: [Lxml-checkins] r24044 - lxml/branch/scoder2/src/lxml Message-ID: <20060307102121.E5370100C8@code0.codespeak.net> Author: scoder Date: Tue Mar 7 11:21:15 2006 New Revision: 24044 Modified: lxml/branch/scoder2/src/lxml/etree.pyx Log: merge from trunk: patch that avoids recursion in changeDocumentBelow if the new node was moved inside its own document Modified: lxml/branch/scoder2/src/lxml/etree.pyx ============================================================================== --- lxml/branch/scoder2/src/lxml/etree.pyx (original) +++ lxml/branch/scoder2/src/lxml/etree.pyx Tue Mar 7 11:21:15 2006 @@ -329,14 +329,16 @@ def __setitem__(self, index, _NodeBase element): cdef xmlNode* c_node cdef xmlNode* c_next + cdef int foreign c_node = _findChild(self._c_node, index) if c_node is NULL: raise IndexError + foreign = self._doc is not element._doc c_next = element._c_node.next _removeText(c_node.next) tree.xmlReplaceNode(c_node, element._c_node) _moveTail(c_next, element._c_node) - changeDocumentBelow(element, self._doc) + changeDocumentBelow(element, self._doc, foreign) def __delitem__(self, index): cdef xmlNode* c_node @@ -355,6 +357,7 @@ cdef xmlNode* c_node cdef xmlNode* c_next cdef _Element mynode + cdef int foreign # first, find start of slice c_node = _findChild(self._c_node, start) # now delete the slice @@ -369,6 +372,7 @@ # if the next element is in the list, insert before it for node in value: mynode = node + foreign = self._doc is not mynode._doc # store possible text tail c_next = mynode._c_node.next # now move node previous to insertion point @@ -377,7 +381,7 @@ # and move tail just behind his node _moveTail(c_next, mynode._c_node) # move it into a new document - changeDocumentBelow(mynode, self._doc) + changeDocumentBelow(mynode, self._doc, foreign) def __deepcopy__(self, memo): return self.__copy__() @@ -397,6 +401,8 @@ def append(self, _Element element): cdef xmlNode* c_next cdef xmlNode* c_next2 + cdef int foreign + foreign = self._doc is not element._doc # store possible text node c_next = element._c_node.next # XXX what if element is coming from a different document? @@ -406,7 +412,7 @@ _moveTail(c_next, element._c_node) # uh oh, elements may be pointing to different doc when # parent element has moved; change them too.. - changeDocumentBelow(element, self._doc) + changeDocumentBelow(element, self._doc, foreign) def clear(self): cdef xmlAttr* c_attr @@ -434,14 +440,16 @@ def insert(self, index, _Element element): cdef xmlNode* c_node cdef xmlNode* c_next + cdef int foreign c_node = _findChild(self._c_node, index) if c_node is NULL: self.append(element) return + foreign = self._doc is not element._doc c_next = element._c_node.next tree.xmlAddPrevSibling(c_node, element._c_node) _moveTail(c_next, element._c_node) - changeDocumentBelow(element, self._doc) + changeDocumentBelow(element, self._doc, foreign) def remove(self, _Element element): cdef xmlNode* c_node @@ -1359,7 +1367,7 @@ return source.filename return None -cdef void changeDocumentBelow(_NodeBase node, _Document doc): +cdef void changeDocumentBelow(_NodeBase node, _Document doc, int recursive): """For a node and all nodes below, change document. A node can change document in certain operations as an XML @@ -1367,7 +1375,8 @@ tree below (including the current node). It also reconciliates namespaces so they're correct inside the new environment. """ - changeDocumentBelowHelper(node._c_node, doc) + if recursive: + changeDocumentBelowHelper(node._c_node, doc) tree.xmlReconciliateNs(doc._c_doc, node._c_node) cdef void changeDocumentBelowHelper(xmlNode* c_node, _Document doc): From scoder at codespeak.net Tue Mar 7 12:19:55 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Tue Mar 7 12:19:58 2006 Subject: [Lxml-checkins] r24050 - lxml/trunk/src/lxml Message-ID: <20060307111955.08111100CC@code0.codespeak.net> Author: scoder Date: Tue Mar 7 12:19:44 2006 New Revision: 24050 Modified: lxml/trunk/src/lxml/etree.pyx Log: prevent segfaults by catching None arguments in API functions Modified: lxml/trunk/src/lxml/etree.pyx ============================================================================== --- lxml/trunk/src/lxml/etree.pyx (original) +++ lxml/trunk/src/lxml/etree.pyx Tue Mar 7 12:19:44 2006 @@ -402,6 +402,7 @@ cdef xmlNode* c_next cdef xmlNode* c_next2 cdef int foreign + _raiseIfNone(element) foreign = self._doc is not element._doc # store possible text node c_next = element._c_node.next @@ -441,6 +442,7 @@ cdef xmlNode* c_node cdef xmlNode* c_next cdef int foreign + _raiseIfNone(element) c_node = _findChild(self._c_node, index) if c_node is NULL: self.append(element) @@ -453,6 +455,7 @@ def remove(self, _Element element): cdef xmlNode* c_node + _raiseIfNone(element) c_node = self._c_node.children while c_node is not NULL: if c_node is element._c_node: @@ -572,6 +575,7 @@ def index(self, _Element x, start=None, stop=None): cdef int k cdef xmlNode* c_child + _raiseIfNone(x) k = 0 c_child = self._c_node.children @@ -974,6 +978,7 @@ def SubElement(_Element parent, tag, attrib=None, nsmap=None, **extra): cdef xmlNode* c_node cdef _Element element + _raiseIfNone(parent) c_node = _createElement(parent._doc._c_doc, tag, attrib, extra) element = _elementFactory(parent._doc, c_node) parent.append(element) @@ -1031,6 +1036,7 @@ cdef char* enc assert element is not None + # better, but not ET compatible : _raiseIfNone(element) #if encoding is None: # encoding = 'UTF-8' @@ -1089,6 +1095,10 @@ # Private helper functions +cdef void _raiseIfNone(el): + if el is None: + raise TypeError, "Argument must not be None." + cdef _Document _documentOrRaise(object input): cdef _Document doc doc = _documentOf(input) From scoder at codespeak.net Tue Mar 7 12:21:21 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Tue Mar 7 12:21:22 2006 Subject: [Lxml-checkins] r24051 - lxml/trunk Message-ID: <20060307112121.16C40100CC@code0.codespeak.net> Author: scoder Date: Tue Mar 7 12:21:10 2006 New Revision: 24051 Modified: lxml/trunk/bench.py Log: added third tree (deep), fix benchmarks to have them test what they actually should Modified: lxml/trunk/bench.py ============================================================================== --- lxml/trunk/bench.py (original) +++ lxml/trunk/bench.py Tue Mar 7 12:21:10 2006 @@ -1,19 +1,22 @@ -import sys, timeit +import sys, string, timeit from itertools import * from lxml import etree -def atoz(): - return iter('abcdefghijklmnopqrstuvwxyz') - class BenchMark(object): ALL_TREES = (1,2) def setup(self, trees=ALL_TREES): + atoz = string.ascii_lowercase + def tag(ns='y'): + for i in count(): + yield "{%s}z%d" % (ns,i) + if 1 in trees: + # tree with some 2nd level and loads of 3rd level children root = etree.Element('{a}root') - for ch1 in atoz(): + for ch1 in atoz: el = etree.SubElement(root, "{b}"+ch1) - for ch2 in atoz(): + for ch2 in atoz: for i in range(100): etree.SubElement(el, "{c}%s%03d" % (ch2, i)) @@ -21,16 +24,27 @@ self.tree1 = etree.ElementTree(root) if 2 in trees: + # tree with loads of 2nd level and fewer 3rd level children root = etree.Element('{x}root') - for ch1 in atoz(): + for ch1 in atoz: for i in range(100): el = etree.SubElement(root, "{y}%s%03d" % (ch1, i)) - for ch2 in atoz(): + for ch2 in atoz: etree.SubElement(el, "{z}"+ch2) self.root2 = root self.tree2 = etree.ElementTree(root) + if 3 in trees: + # deep tree with constant number of children + root = etree.Element('{x}root') + children = [root] + for i in range(10): + children = list(imap(etree.SubElement, children*3, tag())) + + self.root3 = root + self.tree3 = etree.ElementTree(root) + def benchmarks(self): """Returns a list of all benchmarks. @@ -54,19 +68,18 @@ class LxmlBenchMark(BenchMark): def bench_append_from_document(self, tree1, root1, tree2, root2): - "1,2" # needs trees 1 and 2 + "1,2 2,3" # needs trees 1 and 2 or trees 2 and 3 for el in root2: root1.append(root2[0]) def bench_rotate_children(self, tree, root): - "1 2" # runs on tree 1 or 2 independently + #"1 2 3" # runs on any single tree independently for i in range(100): - root[-1] = root[0] + root.append(root[0]) def bench_reorder(self, tree, root): - "1 2" - for i in range(len(root)/2): - root[-i] = root[0] + for i in range(1,len(root)/2): + root[-i:-i] = root[0] if __name__ == '__main__': @@ -91,8 +104,8 @@ print "%-25s (T%-6s)" % (bench_name[6:], ',T'.join(imap(str, tree_set))[:6]), sys.stdout.flush() - result = timer.repeat(4, 1000)[1:] # run benchmark, but ignore first run + result = timer.repeat(3, 100) for t in result: print "%8.4f" % t, - print "msec/pass, avg: %8.4f" % (sum(result) / 3) + print "msec/pass, avg: %8.4f" % (sum(result) / len(result)) From scoder at codespeak.net Tue Mar 7 12:27:18 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Tue Mar 7 12:27:19 2006 Subject: [Lxml-checkins] r24052 - lxml/trunk Message-ID: <20060307112718.6E696100CC@code0.codespeak.net> Author: scoder Date: Tue Mar 7 12:27:17 2006 New Revision: 24052 Modified: lxml/trunk/bench.py Log: catch empty docstring in bench cases Modified: lxml/trunk/bench.py ============================================================================== --- lxml/trunk/bench.py (original) +++ lxml/trunk/bench.py Tue Mar 7 12:27:17 2006 @@ -56,7 +56,10 @@ if not name.startswith('bench_'): continue method = getattr(self, name) - tree_sets = method.__doc__.split() + if method.__doc__: + tree_sets = method.__doc__.split() + else: + tree_sets = () if tree_sets: for tree_set in tree_sets: benchmarks.append((name, sorted(imap(int, tree_set.split(','))))) From scoder at codespeak.net Tue Mar 7 12:43:36 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Tue Mar 7 12:43:37 2006 Subject: [Lxml-checkins] r24054 - lxml/trunk/src/lxml Message-ID: <20060307114336.2D157100A3@code0.codespeak.net> Author: scoder Date: Tue Mar 7 12:43:35 2006 New Revision: 24054 Modified: lxml/trunk/src/lxml/etree.pyx Log: avoid deleting child slice if it is empty Modified: lxml/trunk/src/lxml/etree.pyx ============================================================================== --- lxml/trunk/src/lxml/etree.pyx (original) +++ lxml/trunk/src/lxml/etree.pyx Tue Mar 7 12:43:35 2006 @@ -361,9 +361,10 @@ # first, find start of slice c_node = _findChild(self._c_node, start) # now delete the slice - _deleteSlice(c_node, start, stop) - # now find start of slice again, for insertion (just before it) - c_node = _findChild(self._c_node, start) + if start != stop: + _deleteSlice(c_node, start, stop) + # now find start of slice again, for insertion (just before it) + c_node = _findChild(self._c_node, start) # if the insertion point is at the end, append there if c_node is NULL: for node in value: @@ -371,6 +372,7 @@ return # if the next element is in the list, insert before it for node in value: + _raiseIfNone(node) mynode = node foreign = self._doc is not mynode._doc # store possible text tail From scoder at codespeak.net Tue Mar 7 12:46:29 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Tue Mar 7 12:46:30 2006 Subject: [Lxml-checkins] r24055 - lxml/trunk Message-ID: <20060307114629.A6797100A3@code0.codespeak.net> Author: scoder Date: Tue Mar 7 12:46:23 2006 New Revision: 24055 Modified: lxml/trunk/bench.py Log: clean up after test run to save memory Modified: lxml/trunk/bench.py ============================================================================== --- lxml/trunk/bench.py (original) +++ lxml/trunk/bench.py Tue Mar 7 12:46:23 2006 @@ -4,7 +4,7 @@ from lxml import etree class BenchMark(object): - ALL_TREES = (1,2) + ALL_TREES = (1,2,3) def setup(self, trees=ALL_TREES): atoz = string.ascii_lowercase def tag(ns='y'): @@ -20,8 +20,8 @@ for i in range(100): etree.SubElement(el, "{c}%s%03d" % (ch2, i)) - self.root1 = root - self.tree1 = etree.ElementTree(root) + self._root1 = root + self._tree1 = etree.ElementTree(root) if 2 in trees: # tree with loads of 2nd level and fewer 3rd level children @@ -32,8 +32,8 @@ for ch2 in atoz: etree.SubElement(el, "{z}"+ch2) - self.root2 = root - self.tree2 = etree.ElementTree(root) + self._root2 = root + self._tree2 = etree.ElementTree(root) if 3 in trees: # deep tree with constant number of children @@ -42,8 +42,13 @@ for i in range(10): children = list(imap(etree.SubElement, children*3, tag())) - self.root3 = root - self.tree3 = etree.ElementTree(root) + self._root3 = root + self._tree3 = etree.ElementTree(root) + + def cleanup(self): + for name in dir(self): + if name.startswith('_root') or name.startswith('_tree'): + delattr(self, name) def benchmarks(self): """Returns a list of all benchmarks. @@ -96,7 +101,7 @@ benchmarks.sort() # by name for bench_name, tree_set in benchmarks: - bench_args = ', '.join("bench.tree%d, bench.root%d" % (tree, tree) + bench_args = ', '.join("bench._tree%d, bench._root%d" % (tree, tree) for tree in tree_set) timer = timeit.Timer( @@ -109,6 +114,8 @@ result = timer.repeat(3, 100) + bench.cleanup() + for t in result: print "%8.4f" % t, print "msec/pass, avg: %8.4f" % (sum(result) / len(result)) From scoder at codespeak.net Tue Mar 7 12:52:55 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Tue Mar 7 12:52:57 2006 Subject: [Lxml-checkins] r24056 - lxml/trunk Message-ID: <20060307115255.E1206100A3@code0.codespeak.net> Author: scoder Date: Tue Mar 7 12:52:54 2006 New Revision: 24056 Modified: lxml/trunk/bench.py Log: small fix to allow arbitrary tree combinations in benchmarks Modified: lxml/trunk/bench.py ============================================================================== --- lxml/trunk/bench.py (original) +++ lxml/trunk/bench.py Tue Mar 7 12:52:54 2006 @@ -67,7 +67,7 @@ tree_sets = () if tree_sets: for tree_set in tree_sets: - benchmarks.append((name, sorted(imap(int, tree_set.split(','))))) + benchmarks.append((name, map(int, tree_set.split(',')))) else: for tree in bench.ALL_TREES: benchmarks.append((name, [tree])) @@ -76,7 +76,7 @@ class LxmlBenchMark(BenchMark): def bench_append_from_document(self, tree1, root1, tree2, root2): - "1,2 2,3" # needs trees 1 and 2 or trees 2 and 3 + "1,2 2,3 1,3" # needs trees 1 and 2, or 2 and 3, or 1 and 3 for el in root2: root1.append(root2[0]) @@ -98,7 +98,7 @@ selected = [ "bench_%s" % name for name in sys.argv[1:] ] benchmarks = [ b for b in benchmarks if b[0] in selected ] - benchmarks.sort() # by name + benchmarks.sort() # by name and tree tuple for bench_name, tree_set in benchmarks: bench_args = ', '.join("bench._tree%d, bench._root%d" % (tree, tree) From scoder at codespeak.net Tue Mar 7 13:09:00 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Tue Mar 7 13:09:02 2006 Subject: [Lxml-checkins] r24059 - in lxml/branch/scoder2: . src/lxml Message-ID: <20060307120900.D2A5710095@code0.codespeak.net> Author: scoder Date: Tue Mar 7 13:08:59 2006 New Revision: 24059 Modified: lxml/branch/scoder2/bench.py lxml/branch/scoder2/src/lxml/etree.pyx Log: merged in various updates from trunk regarding segfaults and benchmarking Modified: lxml/branch/scoder2/bench.py ============================================================================== --- lxml/branch/scoder2/bench.py (original) +++ lxml/branch/scoder2/bench.py Tue Mar 7 13:08:59 2006 @@ -1,35 +1,54 @@ -import sys, timeit +import sys, string, timeit from itertools import * from lxml import etree -def atoz(): - return iter('abcdefghijklmnopqrstuvwxyz') - class BenchMark(object): - ALL_TREES = (1,2) + ALL_TREES = (1,2,3) def setup(self, trees=ALL_TREES): + atoz = string.ascii_lowercase + def tag(ns='y'): + for i in count(): + yield "{%s}z%d" % (ns,i) + if 1 in trees: + # tree with some 2nd level and loads of 3rd level children root = etree.Element('{a}root') - for ch1 in atoz(): + for ch1 in atoz: el = etree.SubElement(root, "{b}"+ch1) - for ch2 in atoz(): + for ch2 in atoz: for i in range(100): etree.SubElement(el, "{c}%s%03d" % (ch2, i)) - self.root1 = root - self.tree1 = etree.ElementTree(root) + self._root1 = root + self._tree1 = etree.ElementTree(root) if 2 in trees: + # tree with loads of 2nd level and fewer 3rd level children root = etree.Element('{x}root') - for ch1 in atoz(): + for ch1 in atoz: for i in range(100): el = etree.SubElement(root, "{y}%s%03d" % (ch1, i)) - for ch2 in atoz(): + for ch2 in atoz: etree.SubElement(el, "{z}"+ch2) - self.root2 = root - self.tree2 = etree.ElementTree(root) + self._root2 = root + self._tree2 = etree.ElementTree(root) + + if 3 in trees: + # deep tree with constant number of children + root = etree.Element('{x}root') + children = [root] + for i in range(10): + children = list(imap(etree.SubElement, children*3, tag())) + + self._root3 = root + self._tree3 = etree.ElementTree(root) + + def cleanup(self): + for name in dir(self): + if name.startswith('_root') or name.startswith('_tree'): + delattr(self, name) def benchmarks(self): """Returns a list of all benchmarks. @@ -42,10 +61,13 @@ if not name.startswith('bench_'): continue method = getattr(self, name) - tree_sets = method.__doc__.split() + if method.__doc__: + tree_sets = method.__doc__.split() + else: + tree_sets = () if tree_sets: for tree_set in tree_sets: - benchmarks.append((name, sorted(imap(int, tree_set.split(','))))) + benchmarks.append((name, map(int, tree_set.split(',')))) else: for tree in bench.ALL_TREES: benchmarks.append((name, [tree])) @@ -54,19 +76,18 @@ class LxmlBenchMark(BenchMark): def bench_append_from_document(self, tree1, root1, tree2, root2): - "1,2" # needs trees 1 and 2 + "1,2 2,3 1,3" # needs trees 1 and 2, or 2 and 3, or 1 and 3 for el in root2: root1.append(root2[0]) def bench_rotate_children(self, tree, root): - "1 2" # runs on tree 1 or 2 independently + #"1 2 3" # runs on any single tree independently for i in range(100): - root[-1] = root[0] + root.append(root[0]) def bench_reorder(self, tree, root): - "1 2" - for i in range(len(root)/2): - root[-i] = root[0] + for i in range(1,len(root)/2): + root[-i:-i] = root[0] if __name__ == '__main__': @@ -77,10 +98,10 @@ selected = [ "bench_%s" % name for name in sys.argv[1:] ] benchmarks = [ b for b in benchmarks if b[0] in selected ] - benchmarks.sort() # by name + benchmarks.sort() # by name and tree tuple for bench_name, tree_set in benchmarks: - bench_args = ', '.join("bench.tree%d, bench.root%d" % (tree, tree) + bench_args = ', '.join("bench._tree%d, bench._root%d" % (tree, tree) for tree in tree_set) timer = timeit.Timer( @@ -91,8 +112,10 @@ print "%-25s (T%-6s)" % (bench_name[6:], ',T'.join(imap(str, tree_set))[:6]), sys.stdout.flush() - result = timer.repeat(4, 1000)[1:] # run benchmark, but ignore first run + result = timer.repeat(3, 100) + + bench.cleanup() for t in result: print "%8.4f" % t, - print "msec/pass, avg: %8.4f" % (sum(result) / 3) + print "msec/pass, avg: %8.4f" % (sum(result) / len(result)) Modified: lxml/branch/scoder2/src/lxml/etree.pyx ============================================================================== --- lxml/branch/scoder2/src/lxml/etree.pyx (original) +++ lxml/branch/scoder2/src/lxml/etree.pyx Tue Mar 7 13:08:59 2006 @@ -361,9 +361,10 @@ # first, find start of slice c_node = _findChild(self._c_node, start) # now delete the slice - _deleteSlice(c_node, start, stop) - # now find start of slice again, for insertion (just before it) - c_node = _findChild(self._c_node, start) + if start != stop: + _deleteSlice(c_node, start, stop) + # now find start of slice again, for insertion (just before it) + c_node = _findChild(self._c_node, start) # if the insertion point is at the end, append there if c_node is NULL: for node in value: @@ -371,6 +372,7 @@ return # if the next element is in the list, insert before it for node in value: + _raiseIfNone(node) mynode = node foreign = self._doc is not mynode._doc # store possible text tail @@ -402,6 +404,7 @@ cdef xmlNode* c_next cdef xmlNode* c_next2 cdef int foreign + _raiseIfNone(element) foreign = self._doc is not element._doc # store possible text node c_next = element._c_node.next @@ -441,6 +444,7 @@ cdef xmlNode* c_node cdef xmlNode* c_next cdef int foreign + _raiseIfNone(element) c_node = _findChild(self._c_node, index) if c_node is NULL: self.append(element) @@ -453,6 +457,7 @@ def remove(self, _Element element): cdef xmlNode* c_node + _raiseIfNone(element) c_node = self._c_node.children while c_node is not NULL: if c_node is element._c_node: @@ -572,6 +577,7 @@ def index(self, _Element x, start=None, stop=None): cdef int k cdef xmlNode* c_child + _raiseIfNone(x) k = 0 c_child = self._c_node.children @@ -974,6 +980,7 @@ def SubElement(_Element _parent, _tag, attrib=None, nsmap=None, **_extra): cdef xmlNode* c_node cdef _Element element + _raiseIfNone(_parent) c_node = _createElement(_parent._doc._c_doc, _tag, attrib, _extra) element = _elementFactory(_parent._doc, c_node) _parent.append(element) @@ -1031,6 +1038,7 @@ cdef char* enc assert element is not None + # better, but not ET compatible : _raiseIfNone(element) #if encoding is None: # encoding = 'UTF-8' @@ -1089,6 +1097,10 @@ # Private helper functions +cdef void _raiseIfNone(el): + if el is None: + raise TypeError, "Argument must not be None." + cdef _Document _documentOrRaise(object input): cdef _Document doc doc = _documentOf(input) From scoder at codespeak.net Tue Mar 7 15:49:39 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Tue Mar 7 15:49:40 2006 Subject: [Lxml-checkins] r24071 - lxml/trunk Message-ID: <20060307144939.389ED1006F@code0.codespeak.net> Author: scoder Date: Tue Mar 7 15:49:37 2006 New Revision: 24071 Modified: lxml/trunk/Makefile lxml/trunk/bench.py Log: refactoring of benchmark setup, add 'bench' target to Makefile Modified: lxml/trunk/Makefile ============================================================================== --- lxml/trunk/Makefile (original) +++ lxml/trunk/Makefile Tue Mar 7 15:49:37 2006 @@ -18,6 +18,9 @@ test_inplace: inplace $(PYTHON) test.py $(TESTFLAGS) $(TESTOPTS) +bench_inplace: inplace + PYTHONPATH="src:$$PYTHONPATH" $(PYTHON) bench.py + ftest_build: build $(PYTHON) test.py -f $(TESTFLAGS) $(TESTOPTS) @@ -27,6 +30,8 @@ # XXX What should the default be? test: test_inplace +bench: bench_inplace + ftest: ftest_inplace clean: Modified: lxml/trunk/bench.py ============================================================================== --- lxml/trunk/bench.py (original) +++ lxml/trunk/bench.py Tue Mar 7 15:49:37 2006 @@ -4,46 +4,61 @@ from lxml import etree class BenchMark(object): - ALL_TREES = (1,2,3) - def setup(self, trees=ALL_TREES): - atoz = string.ascii_lowercase - def tag(ns='y'): - for i in count(): - yield "{%s}z%d" % (ns,i) - - if 1 in trees: - # tree with some 2nd level and loads of 3rd level children - root = etree.Element('{a}root') - for ch1 in atoz: - el = etree.SubElement(root, "{b}"+ch1) - for ch2 in atoz: - for i in range(100): - etree.SubElement(el, "{c}%s%03d" % (ch2, i)) + atoz = string.ascii_lowercase - self._root1 = root - self._tree1 = etree.ElementTree(root) + def setup(self, trees=()): + if not trees: + trees = self._all_trees() + + for tree in trees: + setup = getattr(self, '_setup_tree%d' % tree) + root = setup() + setattr(self, '_root%d' % tree, root) + setattr(self, '_tree%d' % tree, etree.ElementTree(root)) - if 2 in trees: - # tree with loads of 2nd level and fewer 3rd level children - root = etree.Element('{x}root') - for ch1 in atoz: + def _all_trees(self): + all_trees = [] + for name in dir(self): + if name.startswith('_setup_tree'): + all_trees.append(int(name[11:])) + return all_trees + + def _setup_tree1(self): + "tree with some 2nd level and loads of 3rd level children" + root = etree.Element('{a}root') + atoz = self.atoz + for ch1 in atoz: + el = etree.SubElement(root, "{b}"+ch1) + for ch2 in atoz: for i in range(100): - el = etree.SubElement(root, "{y}%s%03d" % (ch1, i)) - for ch2 in atoz: - etree.SubElement(el, "{z}"+ch2) - - self._root2 = root - self._tree2 = etree.ElementTree(root) - - if 3 in trees: - # deep tree with constant number of children - root = etree.Element('{x}root') - children = [root] - for i in range(10): - children = list(imap(etree.SubElement, children*3, tag())) + etree.SubElement(el, "{c}%s%03d" % (ch2, i)) + return root + + def _setup_tree2(self): + "tree with loads of 2nd level and fewer 3rd level children" + root = etree.Element('{x}root') + atoz = self.atoz + SubElement = etree.SubElement + for ch1 in atoz: + for i in range(100): + el = SubElement(root, "{y}%s%03d" % (ch1, i)) + for ch2 in atoz: + SubElement(el, "{z}"+ch2) + return root - self._root3 = root - self._tree3 = etree.ElementTree(root) + def _setup_tree3(self): + "deep tree with constant number of children per node" + root = etree.Element('{x}root') + SubElement = etree.SubElement + tags = self._tags + children = [root] + for i in range(10): + children = list(imap(SubElement, children*3, tags())) + return root + + def _tags(ns='y'): + for i in count(): + yield "{%s}z%d" % (ns,i) def cleanup(self): for name in dir(self): @@ -56,6 +71,7 @@ A benchmark is a tuple containing a method name and a list of tree numbers. Trees are prepared by the setup function. """ + all_trees = self._all_trees() benchmarks = [] for name in dir(self): if not name.startswith('bench_'): @@ -69,19 +85,47 @@ for tree_set in tree_sets: benchmarks.append((name, map(int, tree_set.split(',')))) else: - for tree in bench.ALL_TREES: - benchmarks.append((name, [tree])) + try: + function = getattr(method, 'im_func', method) + arg_count = method.func_code.co_argcount / 2 + except AttributeError: + arg_count = 1 + for trees in self._permutations(all_trees, arg_count): + benchmarks.append((name, trees)) return benchmarks + def _permutations(self, seq, count): + def _permutations(prefix, remainder, count): + if count == 0: + return [ prefix[:] ] + count -= 1 + perms = [] + prefix.append(None) + for pos, el in enumerate(remainder): + new_remainder = remainder[:pos] + remainder[pos+1:] + prefix[-1] = el + perms.extend( _permutations(prefix, new_remainder, count) ) + prefix.pop() + return perms + return _permutations([], seq, count) + + +############################################################ +# Benchmarks: +############################################################ class LxmlBenchMark(BenchMark): def bench_append_from_document(self, tree1, root1, tree2, root2): - "1,2 2,3 1,3" # needs trees 1 and 2, or 2 and 3, or 1 and 3 + # == "1,2 2,3 1,3 3,1 3,2 2,1" # trees 1 and 2, or 2 and 3, or ... for el in root2: root1.append(root2[0]) + def bench_insert_from_document(self, tree1, root1, tree2, root2): + for el in root2: + root1.insert(len(root1)/2, root2[0]) + def bench_rotate_children(self, tree, root): - #"1 2 3" # runs on any single tree independently + # == "1 2 3" # runs on any single tree independently for i in range(100): root.append(root[0]) @@ -89,13 +133,20 @@ for i in range(1,len(root)/2): root[-i:-i] = root[0] + def bench_clear(self, tree, root): + root.clear() + if __name__ == '__main__': bench = LxmlBenchMark() benchmarks = bench.benchmarks() if len(sys.argv) > 1: - selected = [ "bench_%s" % name for name in sys.argv[1:] ] + selected = [] + for name in sys.argv[1:]: + if not name.startswith('bench_'): + name = 'bench_' + name + selected.append(name) benchmarks = [ b for b in benchmarks if b[0] in selected ] benchmarks.sort() # by name and tree tuple @@ -106,16 +157,16 @@ timer = timeit.Timer( "bench.%s(%s)" % (bench_name, bench_args), - "from __main__ import bench ; bench.setup(%s)" % str(tuple(tree_set)) + "from __main__ import bench ; bench.setup(%s) ; gc.enable()" % str(tuple(tree_set)) ) print "%-25s (T%-6s)" % (bench_name[6:], ',T'.join(imap(str, tree_set))[:6]), sys.stdout.flush() - result = timer.repeat(3, 100) + result = timer.repeat(3, 50) bench.cleanup() for t in result: print "%8.4f" % t, - print "msec/pass, avg: %8.4f" % (sum(result) / len(result)) + print "msec/pass, best: %8.4f" % min(result) From scoder at codespeak.net Tue Mar 7 15:50:23 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Tue Mar 7 15:50:25 2006 Subject: [Lxml-checkins] r24072 - lxml/branch/scoder2 Message-ID: <20060307145023.B2DD210097@code0.codespeak.net> Author: scoder Date: Tue Mar 7 15:50:21 2006 New Revision: 24072 Modified: lxml/branch/scoder2/Makefile lxml/branch/scoder2/bench.py Log: merged in various benchmarking updates from trunk Modified: lxml/branch/scoder2/Makefile ============================================================================== --- lxml/branch/scoder2/Makefile (original) +++ lxml/branch/scoder2/Makefile Tue Mar 7 15:50:21 2006 @@ -18,6 +18,9 @@ test_inplace: inplace $(PYTHON) test.py $(TESTFLAGS) $(TESTOPTS) +bench_inplace: inplace + PYTHONPATH="src:$$PYTHONPATH" $(PYTHON) bench.py + ftest_build: build $(PYTHON) test.py -f $(TESTFLAGS) $(TESTOPTS) @@ -27,6 +30,8 @@ # XXX What should the default be? test: test_inplace +bench: bench_inplace + ftest: ftest_inplace clean: Modified: lxml/branch/scoder2/bench.py ============================================================================== --- lxml/branch/scoder2/bench.py (original) +++ lxml/branch/scoder2/bench.py Tue Mar 7 15:50:21 2006 @@ -4,46 +4,61 @@ from lxml import etree class BenchMark(object): - ALL_TREES = (1,2,3) - def setup(self, trees=ALL_TREES): - atoz = string.ascii_lowercase - def tag(ns='y'): - for i in count(): - yield "{%s}z%d" % (ns,i) - - if 1 in trees: - # tree with some 2nd level and loads of 3rd level children - root = etree.Element('{a}root') - for ch1 in atoz: - el = etree.SubElement(root, "{b}"+ch1) - for ch2 in atoz: - for i in range(100): - etree.SubElement(el, "{c}%s%03d" % (ch2, i)) + atoz = string.ascii_lowercase - self._root1 = root - self._tree1 = etree.ElementTree(root) + def setup(self, trees=()): + if not trees: + trees = self._all_trees() + + for tree in trees: + setup = getattr(self, '_setup_tree%d' % tree) + root = setup() + setattr(self, '_root%d' % tree, root) + setattr(self, '_tree%d' % tree, etree.ElementTree(root)) - if 2 in trees: - # tree with loads of 2nd level and fewer 3rd level children - root = etree.Element('{x}root') - for ch1 in atoz: + def _all_trees(self): + all_trees = [] + for name in dir(self): + if name.startswith('_setup_tree'): + all_trees.append(int(name[11:])) + return all_trees + + def _setup_tree1(self): + "tree with some 2nd level and loads of 3rd level children" + root = etree.Element('{a}root') + atoz = self.atoz + for ch1 in atoz: + el = etree.SubElement(root, "{b}"+ch1) + for ch2 in atoz: for i in range(100): - el = etree.SubElement(root, "{y}%s%03d" % (ch1, i)) - for ch2 in atoz: - etree.SubElement(el, "{z}"+ch2) - - self._root2 = root - self._tree2 = etree.ElementTree(root) - - if 3 in trees: - # deep tree with constant number of children - root = etree.Element('{x}root') - children = [root] - for i in range(10): - children = list(imap(etree.SubElement, children*3, tag())) + etree.SubElement(el, "{c}%s%03d" % (ch2, i)) + return root + + def _setup_tree2(self): + "tree with loads of 2nd level and fewer 3rd level children" + root = etree.Element('{x}root') + atoz = self.atoz + SubElement = etree.SubElement + for ch1 in atoz: + for i in range(100): + el = SubElement(root, "{y}%s%03d" % (ch1, i)) + for ch2 in atoz: + SubElement(el, "{z}"+ch2) + return root - self._root3 = root - self._tree3 = etree.ElementTree(root) + def _setup_tree3(self): + "deep tree with constant number of children per node" + root = etree.Element('{x}root') + SubElement = etree.SubElement + tags = self._tags + children = [root] + for i in range(10): + children = list(imap(SubElement, children*3, tags())) + return root + + def _tags(ns='y'): + for i in count(): + yield "{%s}z%d" % (ns,i) def cleanup(self): for name in dir(self): @@ -56,6 +71,7 @@ A benchmark is a tuple containing a method name and a list of tree numbers. Trees are prepared by the setup function. """ + all_trees = self._all_trees() benchmarks = [] for name in dir(self): if not name.startswith('bench_'): @@ -69,19 +85,47 @@ for tree_set in tree_sets: benchmarks.append((name, map(int, tree_set.split(',')))) else: - for tree in bench.ALL_TREES: - benchmarks.append((name, [tree])) + try: + function = getattr(method, 'im_func', method) + arg_count = method.func_code.co_argcount / 2 + except AttributeError: + arg_count = 1 + for trees in self._permutations(all_trees, arg_count): + benchmarks.append((name, trees)) return benchmarks + def _permutations(self, seq, count): + def _permutations(prefix, remainder, count): + if count == 0: + return [ prefix[:] ] + count -= 1 + perms = [] + prefix.append(None) + for pos, el in enumerate(remainder): + new_remainder = remainder[:pos] + remainder[pos+1:] + prefix[-1] = el + perms.extend( _permutations(prefix, new_remainder, count) ) + prefix.pop() + return perms + return _permutations([], seq, count) + + +############################################################ +# Benchmarks: +############################################################ class LxmlBenchMark(BenchMark): def bench_append_from_document(self, tree1, root1, tree2, root2): - "1,2 2,3 1,3" # needs trees 1 and 2, or 2 and 3, or 1 and 3 + # == "1,2 2,3 1,3 3,1 3,2 2,1" # trees 1 and 2, or 2 and 3, or ... for el in root2: root1.append(root2[0]) + def bench_insert_from_document(self, tree1, root1, tree2, root2): + for el in root2: + root1.insert(len(root1)/2, root2[0]) + def bench_rotate_children(self, tree, root): - #"1 2 3" # runs on any single tree independently + # == "1 2 3" # runs on any single tree independently for i in range(100): root.append(root[0]) @@ -89,13 +133,20 @@ for i in range(1,len(root)/2): root[-i:-i] = root[0] + def bench_clear(self, tree, root): + root.clear() + if __name__ == '__main__': bench = LxmlBenchMark() benchmarks = bench.benchmarks() if len(sys.argv) > 1: - selected = [ "bench_%s" % name for name in sys.argv[1:] ] + selected = [] + for name in sys.argv[1:]: + if not name.startswith('bench_'): + name = 'bench_' + name + selected.append(name) benchmarks = [ b for b in benchmarks if b[0] in selected ] benchmarks.sort() # by name and tree tuple @@ -106,16 +157,16 @@ timer = timeit.Timer( "bench.%s(%s)" % (bench_name, bench_args), - "from __main__ import bench ; bench.setup(%s)" % str(tuple(tree_set)) + "from __main__ import bench ; bench.setup(%s) ; gc.enable()" % str(tuple(tree_set)) ) print "%-25s (T%-6s)" % (bench_name[6:], ',T'.join(imap(str, tree_set))[:6]), sys.stdout.flush() - result = timer.repeat(3, 100) + result = timer.repeat(3, 50) bench.cleanup() for t in result: print "%8.4f" % t, - print "msec/pass, avg: %8.4f" % (sum(result) / len(result)) + print "msec/pass, best: %8.4f" % min(result) From scoder at codespeak.net Wed Mar 8 08:06:11 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Wed Mar 8 08:06:13 2006 Subject: [Lxml-checkins] r24086 - lxml/trunk Message-ID: <20060308070611.E70B210095@code0.codespeak.net> Author: scoder Date: Wed Mar 8 08:06:09 2006 New Revision: 24086 Modified: lxml/trunk/bench.py Log: support for benchmarking lxml, ElementTree and cElementTree Modified: lxml/trunk/bench.py ============================================================================== --- lxml/trunk/bench.py (original) +++ lxml/trunk/bench.py Wed Mar 8 08:06:09 2006 @@ -3,9 +3,30 @@ from lxml import etree -class BenchMark(object): +_etrees = [etree] + +### cannot test these in all cases anyway (different semantics) +## +## try: +## from elementtree import ElementTree as ET +## _etrees.append(ET) +## except: +## ET = None + +## try: +## import cElementTree as cET +## _etrees.append(cET) +## except: +## cET = None + + +class BenchMarkBase(object): atoz = string.ascii_lowercase + def __init__(self, etree): + self.etree = etree + self.lib_name = etree.__name__.split('.')[-1] + def setup(self, trees=()): if not trees: trees = self._all_trees() @@ -14,7 +35,7 @@ setup = getattr(self, '_setup_tree%d' % tree) root = setup() setattr(self, '_root%d' % tree, root) - setattr(self, '_tree%d' % tree, etree.ElementTree(root)) + setattr(self, '_tree%d' % tree, self.etree.ElementTree(root)) def _all_trees(self): all_trees = [] @@ -25,20 +46,21 @@ def _setup_tree1(self): "tree with some 2nd level and loads of 3rd level children" - root = etree.Element('{a}root') + root = self.etree.Element('{a}root') atoz = self.atoz + SubElement = self.etree.SubElement for ch1 in atoz: - el = etree.SubElement(root, "{b}"+ch1) + el = SubElement(root, "{b}"+ch1) for ch2 in atoz: for i in range(100): - etree.SubElement(el, "{c}%s%03d" % (ch2, i)) + SubElement(el, "{c}%s%03d" % (ch2, i)) return root def _setup_tree2(self): "tree with loads of 2nd level and fewer 3rd level children" - root = etree.Element('{x}root') + root = self.etree.Element('{x}root') atoz = self.atoz - SubElement = etree.SubElement + SubElement = self.etree.SubElement for ch1 in atoz: for i in range(100): el = SubElement(root, "{y}%s%03d" % (ch1, i)) @@ -48,8 +70,8 @@ def _setup_tree3(self): "deep tree with constant number of children per node" - root = etree.Element('{x}root') - SubElement = etree.SubElement + root = self.etree.Element('{x}root') + SubElement = self.etree.SubElement tags = self._tags children = [root] for i in range(10): @@ -114,7 +136,7 @@ # Benchmarks: ############################################################ -class LxmlBenchMark(BenchMark): +class BenchMark(BenchMarkBase): def bench_append_from_document(self, tree1, root1, tree2, root2): # == "1,2 2,3 1,3 3,1 3,2 2,1" # trees 1 and 2, or 2 and 3, or ... for el in root2: @@ -138,8 +160,10 @@ if __name__ == '__main__': - bench = LxmlBenchMark() - benchmarks = bench.benchmarks() + benchmark_suites = map(BenchMark, _etrees) + + # sorted by name and tree tuple + benchmarks = [ sorted(b.benchmarks()) for b in benchmark_suites ] if len(sys.argv) > 1: selected = [] @@ -147,26 +171,33 @@ if not name.startswith('bench_'): name = 'bench_' + name selected.append(name) - benchmarks = [ b for b in benchmarks if b[0] in selected ] + benchmarks = [ [ b for b in bs if b[0] in selected ] + for bs in benchmarks ] + + for bench_calls in izip(*benchmarks): + for lib, config in enumerate(izip(bench_calls, benchmark_suites)): + (bench_name, tree_set), bench = config - benchmarks.sort() # by name and tree tuple + bench_args = ', '.join("bench._tree%d, bench._root%d" % (tree, tree) + for tree in tree_set) - for bench_name, tree_set in benchmarks: - bench_args = ', '.join("bench._tree%d, bench._root%d" % (tree, tree) - for tree in tree_set) + timer = timeit.Timer( + "bench.%s(%s)" % (bench_name, bench_args), + "from __main__ import bench ; bench.setup(%s) ; gc.enable()" % \ + str(tuple(tree_set)) + ) - timer = timeit.Timer( - "bench.%s(%s)" % (bench_name, bench_args), - "from __main__ import bench ; bench.setup(%s) ; gc.enable()" % str(tuple(tree_set)) - ) + print "%-12s %-25s (T%-6s)" % (bench.lib_name, bench_name[6:], + ',T'.join(imap(str, tree_set))[:6]), + sys.stdout.flush() - print "%-25s (T%-6s)" % (bench_name[6:], ',T'.join(imap(str, tree_set))[:6]), - sys.stdout.flush() + result = timer.repeat(3, 50) - result = timer.repeat(3, 50) + bench.cleanup() - bench.cleanup() + for t in result: + print "%8.4f" % t, + print "msec/pass, best: %8.4f" % min(result) - for t in result: - print "%8.4f" % t, - print "msec/pass, best: %8.4f" % min(result) + if len(benchmark_suites) > 1: + print # empty line between different benchmarks From scoder at codespeak.net Wed Mar 8 08:33:10 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Wed Mar 8 08:33:11 2006 Subject: [Lxml-checkins] r24087 - lxml/trunk/src/lxml/tests Message-ID: <20060308073310.6EF8F10095@code0.codespeak.net> Author: scoder Date: Wed Mar 8 08:33:08 2006 New Revision: 24087 Modified: lxml/trunk/src/lxml/tests/test_elementtree.py Log: test cases for deleting children and different ways of inserting them afterwards Modified: lxml/trunk/src/lxml/tests/test_elementtree.py ============================================================================== --- lxml/trunk/src/lxml/tests/test_elementtree.py (original) +++ lxml/trunk/src/lxml/tests/test_elementtree.py Wed Mar 8 08:33:08 2006 @@ -742,6 +742,75 @@ '', other) + def test_del_insert(self): + Element = self.etree.Element + SubElement = self.etree.SubElement + + a = Element('a') + b = SubElement(a, 'b') + bs = SubElement(b, 'bs') + c = SubElement(a, 'c') + cs = SubElement(c, 'cs') + + el = a[0] + self.assertXML( + '', + a) + self.assertXML('', b) + self.assertXML('', c) + + del a[0] + self.assertXML( + '', + a) + self.assertXML('', b) + self.assertXML('', c) + + a.insert(0, el) + self.assertXML( + '', + a) + self.assertXML('', b) + self.assertXML('', c) + + def test_del_setitem(self): + Element = self.etree.Element + SubElement = self.etree.SubElement + + a = Element('a') + b = SubElement(a, 'b') + bs = SubElement(b, 'bs') + c = SubElement(a, 'c') + cs = SubElement(c, 'cs') + + el = a[0] + del a[0] + a[0] = el + self.assertXML( + '', + a) + self.assertXML('', b) + self.assertXML('', c) + + def test_del_setslice(self): + Element = self.etree.Element + SubElement = self.etree.SubElement + + a = Element('a') + b = SubElement(a, 'b') + bs = SubElement(b, 'bs') + c = SubElement(a, 'c') + cs = SubElement(c, 'cs') + + el = a[0] + del a[0] + a[0:0] = [el] + self.assertXML( + '', + a) + self.assertXML('', b) + self.assertXML('', c) + def test_delitem_tail(self): ElementTree = self.etree.ElementTree f = StringIO('B2C2') @@ -795,7 +864,7 @@ self.assertXML( '', a) - + def test_insert(self): Element = self.etree.Element SubElement = self.etree.SubElement From scoder at codespeak.net Wed Mar 8 08:33:30 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Wed Mar 8 08:33:31 2006 Subject: [Lxml-checkins] r24088 - lxml/trunk Message-ID: <20060308073330.B79F210095@code0.codespeak.net> Author: scoder Date: Wed Mar 8 08:33:29 2006 New Revision: 24088 Modified: lxml/trunk/bench.py Log: bug in reorder benchmark Modified: lxml/trunk/bench.py ============================================================================== --- lxml/trunk/bench.py (original) +++ lxml/trunk/bench.py Wed Mar 8 08:33:29 2006 @@ -153,7 +153,7 @@ def bench_reorder(self, tree, root): for i in range(1,len(root)/2): - root[-i:-i] = root[0] + root[-i:-i] = [ root[0] ] def bench_clear(self, tree, root): root.clear() From scoder at codespeak.net Wed Mar 8 08:34:22 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Wed Mar 8 08:34:23 2006 Subject: [Lxml-checkins] r24089 - in lxml/branch/scoder2: . src/lxml/tests Message-ID: <20060308073422.60CCA100A7@code0.codespeak.net> Author: scoder Date: Wed Mar 8 08:34:20 2006 New Revision: 24089 Modified: lxml/branch/scoder2/bench.py lxml/branch/scoder2/src/lxml/tests/test_elementtree.py Log: more merges from trunk Modified: lxml/branch/scoder2/bench.py ============================================================================== --- lxml/branch/scoder2/bench.py (original) +++ lxml/branch/scoder2/bench.py Wed Mar 8 08:34:20 2006 @@ -3,9 +3,30 @@ from lxml import etree -class BenchMark(object): +_etrees = [etree] + +### cannot test these in all cases anyway (different semantics) +## +## try: +## from elementtree import ElementTree as ET +## _etrees.append(ET) +## except: +## ET = None + +## try: +## import cElementTree as cET +## _etrees.append(cET) +## except: +## cET = None + + +class BenchMarkBase(object): atoz = string.ascii_lowercase + def __init__(self, etree): + self.etree = etree + self.lib_name = etree.__name__.split('.')[-1] + def setup(self, trees=()): if not trees: trees = self._all_trees() @@ -14,7 +35,7 @@ setup = getattr(self, '_setup_tree%d' % tree) root = setup() setattr(self, '_root%d' % tree, root) - setattr(self, '_tree%d' % tree, etree.ElementTree(root)) + setattr(self, '_tree%d' % tree, self.etree.ElementTree(root)) def _all_trees(self): all_trees = [] @@ -25,20 +46,21 @@ def _setup_tree1(self): "tree with some 2nd level and loads of 3rd level children" - root = etree.Element('{a}root') + root = self.etree.Element('{a}root') atoz = self.atoz + SubElement = self.etree.SubElement for ch1 in atoz: - el = etree.SubElement(root, "{b}"+ch1) + el = SubElement(root, "{b}"+ch1) for ch2 in atoz: for i in range(100): - etree.SubElement(el, "{c}%s%03d" % (ch2, i)) + SubElement(el, "{c}%s%03d" % (ch2, i)) return root def _setup_tree2(self): "tree with loads of 2nd level and fewer 3rd level children" - root = etree.Element('{x}root') + root = self.etree.Element('{x}root') atoz = self.atoz - SubElement = etree.SubElement + SubElement = self.etree.SubElement for ch1 in atoz: for i in range(100): el = SubElement(root, "{y}%s%03d" % (ch1, i)) @@ -48,8 +70,8 @@ def _setup_tree3(self): "deep tree with constant number of children per node" - root = etree.Element('{x}root') - SubElement = etree.SubElement + root = self.etree.Element('{x}root') + SubElement = self.etree.SubElement tags = self._tags children = [root] for i in range(10): @@ -114,7 +136,7 @@ # Benchmarks: ############################################################ -class LxmlBenchMark(BenchMark): +class BenchMark(BenchMarkBase): def bench_append_from_document(self, tree1, root1, tree2, root2): # == "1,2 2,3 1,3 3,1 3,2 2,1" # trees 1 and 2, or 2 and 3, or ... for el in root2: @@ -131,15 +153,17 @@ def bench_reorder(self, tree, root): for i in range(1,len(root)/2): - root[-i:-i] = root[0] + root[-i:-i] = [ root[0] ] def bench_clear(self, tree, root): root.clear() if __name__ == '__main__': - bench = LxmlBenchMark() - benchmarks = bench.benchmarks() + benchmark_suites = map(BenchMark, _etrees) + + # sorted by name and tree tuple + benchmarks = [ sorted(b.benchmarks()) for b in benchmark_suites ] if len(sys.argv) > 1: selected = [] @@ -147,26 +171,33 @@ if not name.startswith('bench_'): name = 'bench_' + name selected.append(name) - benchmarks = [ b for b in benchmarks if b[0] in selected ] + benchmarks = [ [ b for b in bs if b[0] in selected ] + for bs in benchmarks ] + + for bench_calls in izip(*benchmarks): + for lib, config in enumerate(izip(bench_calls, benchmark_suites)): + (bench_name, tree_set), bench = config - benchmarks.sort() # by name and tree tuple + bench_args = ', '.join("bench._tree%d, bench._root%d" % (tree, tree) + for tree in tree_set) - for bench_name, tree_set in benchmarks: - bench_args = ', '.join("bench._tree%d, bench._root%d" % (tree, tree) - for tree in tree_set) + timer = timeit.Timer( + "bench.%s(%s)" % (bench_name, bench_args), + "from __main__ import bench ; bench.setup(%s) ; gc.enable()" % \ + str(tuple(tree_set)) + ) - timer = timeit.Timer( - "bench.%s(%s)" % (bench_name, bench_args), - "from __main__ import bench ; bench.setup(%s) ; gc.enable()" % str(tuple(tree_set)) - ) + print "%-12s %-25s (T%-6s)" % (bench.lib_name, bench_name[6:], + ',T'.join(imap(str, tree_set))[:6]), + sys.stdout.flush() - print "%-25s (T%-6s)" % (bench_name[6:], ',T'.join(imap(str, tree_set))[:6]), - sys.stdout.flush() + result = timer.repeat(3, 50) - result = timer.repeat(3, 50) + bench.cleanup() - bench.cleanup() + for t in result: + print "%8.4f" % t, + print "msec/pass, best: %8.4f" % min(result) - for t in result: - print "%8.4f" % t, - print "msec/pass, best: %8.4f" % min(result) + if len(benchmark_suites) > 1: + print # empty line between different benchmarks Modified: lxml/branch/scoder2/src/lxml/tests/test_elementtree.py ============================================================================== --- lxml/branch/scoder2/src/lxml/tests/test_elementtree.py (original) +++ lxml/branch/scoder2/src/lxml/tests/test_elementtree.py Wed Mar 8 08:34:20 2006 @@ -742,6 +742,75 @@ '', other) + def test_del_insert(self): + Element = self.etree.Element + SubElement = self.etree.SubElement + + a = Element('a') + b = SubElement(a, 'b') + bs = SubElement(b, 'bs') + c = SubElement(a, 'c') + cs = SubElement(c, 'cs') + + el = a[0] + self.assertXML( + '', + a) + self.assertXML('', b) + self.assertXML('', c) + + del a[0] + self.assertXML( + '', + a) + self.assertXML('', b) + self.assertXML('', c) + + a.insert(0, el) + self.assertXML( + '', + a) + self.assertXML('', b) + self.assertXML('', c) + + def test_del_setitem(self): + Element = self.etree.Element + SubElement = self.etree.SubElement + + a = Element('a') + b = SubElement(a, 'b') + bs = SubElement(b, 'bs') + c = SubElement(a, 'c') + cs = SubElement(c, 'cs') + + el = a[0] + del a[0] + a[0] = el + self.assertXML( + '', + a) + self.assertXML('', b) + self.assertXML('', c) + + def test_del_setslice(self): + Element = self.etree.Element + SubElement = self.etree.SubElement + + a = Element('a') + b = SubElement(a, 'b') + bs = SubElement(b, 'bs') + c = SubElement(a, 'c') + cs = SubElement(c, 'cs') + + el = a[0] + del a[0] + a[0:0] = [el] + self.assertXML( + '', + a) + self.assertXML('', b) + self.assertXML('', c) + def test_delitem_tail(self): ElementTree = self.etree.ElementTree f = StringIO('B2C2') @@ -795,7 +864,7 @@ self.assertXML( '', a) - + def test_insert(self): Element = self.etree.Element SubElement = self.etree.SubElement From scoder at codespeak.net Wed Mar 8 08:52:55 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Wed Mar 8 08:53:02 2006 Subject: [Lxml-checkins] r24090 - lxml/trunk Message-ID: <20060308075255.98177100A3@code0.codespeak.net> Author: scoder Date: Wed Mar 8 08:52:49 2006 New Revision: 24090 Modified: lxml/trunk/bench.py Log: support -a option for running on all three libraries, -i for inplace run, new benchmark reorder_slice Modified: lxml/trunk/bench.py ============================================================================== --- lxml/trunk/bench.py (original) +++ lxml/trunk/bench.py Wed Mar 8 08:52:49 2006 @@ -1,25 +1,6 @@ import sys, string, timeit from itertools import * -from lxml import etree - -_etrees = [etree] - -### cannot test these in all cases anyway (different semantics) -## -## try: -## from elementtree import ElementTree as ET -## _etrees.append(ET) -## except: -## ET = None - -## try: -## import cElementTree as cET -## _etrees.append(cET) -## except: -## cET = None - - class BenchMarkBase(object): atoz = string.ascii_lowercase @@ -155,11 +136,43 @@ for i in range(1,len(root)/2): root[-i:-i] = [ root[0] ] + def bench_reorder_slice(self, tree, root): + for i in range(1,len(root)/2): + root[-i:-i] = root[0:1] + def bench_clear(self, tree, root): root.clear() if __name__ == '__main__': + if len(sys.argv) > 1: + try: + sys.argv.remove('-i') + sys.path.insert(0, 'src') + except ValueError: + pass + + from lxml import etree + _etrees = [etree] + + if len(sys.argv) > 1: + try: + sys.argv.remove('-a') + except ValueError: + pass + else: + try: + from elementtree import ElementTree as ET + _etrees.append(ET) + except ImportError: + pass + + try: + import cElementTree as cET + _etrees.append(cET) + except ImportError: + pass + benchmark_suites = map(BenchMark, _etrees) # sorted by name and tree tuple @@ -174,6 +187,11 @@ benchmarks = [ [ b for b in bs if b[0] in selected ] for bs in benchmarks ] + + print "Running benchmark on", ', '.join(b.lib_name + for b in benchmark_suites) + print + for bench_calls in izip(*benchmarks): for lib, config in enumerate(izip(bench_calls, benchmark_suites)): (bench_name, tree_set), bench = config From scoder at codespeak.net Wed Mar 8 09:07:59 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Wed Mar 8 09:08:06 2006 Subject: [Lxml-checkins] r24091 - lxml/branch/scoder2 Message-ID: <20060308080759.E2FDD100A3@code0.codespeak.net> Author: scoder Date: Wed Mar 8 09:07:48 2006 New Revision: 24091 Modified: lxml/branch/scoder2/bench.py Log: more merges from trunk Modified: lxml/branch/scoder2/bench.py ============================================================================== --- lxml/branch/scoder2/bench.py (original) +++ lxml/branch/scoder2/bench.py Wed Mar 8 09:07:48 2006 @@ -1,25 +1,6 @@ import sys, string, timeit from itertools import * -from lxml import etree - -_etrees = [etree] - -### cannot test these in all cases anyway (different semantics) -## -## try: -## from elementtree import ElementTree as ET -## _etrees.append(ET) -## except: -## ET = None - -## try: -## import cElementTree as cET -## _etrees.append(cET) -## except: -## cET = None - - class BenchMarkBase(object): atoz = string.ascii_lowercase @@ -155,11 +136,43 @@ for i in range(1,len(root)/2): root[-i:-i] = [ root[0] ] + def bench_reorder_slice(self, tree, root): + for i in range(1,len(root)/2): + root[-i:-i] = root[0:1] + def bench_clear(self, tree, root): root.clear() if __name__ == '__main__': + if len(sys.argv) > 1: + try: + sys.argv.remove('-i') + sys.path.insert(0, 'src') + except ValueError: + pass + + from lxml import etree + _etrees = [etree] + + if len(sys.argv) > 1: + try: + sys.argv.remove('-a') + except ValueError: + pass + else: + try: + from elementtree import ElementTree as ET + _etrees.append(ET) + except ImportError: + pass + + try: + import cElementTree as cET + _etrees.append(cET) + except ImportError: + pass + benchmark_suites = map(BenchMark, _etrees) # sorted by name and tree tuple @@ -174,6 +187,11 @@ benchmarks = [ [ b for b in bs if b[0] in selected ] for bs in benchmarks ] + + print "Running benchmark on", ', '.join(b.lib_name + for b in benchmark_suites) + print + for bench_calls in izip(*benchmarks): for lib, config in enumerate(izip(bench_calls, benchmark_suites)): (bench_name, tree_set), bench = config From scoder at codespeak.net Wed Mar 8 11:18:26 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Wed Mar 8 11:18:27 2006 Subject: [Lxml-checkins] r24092 - lxml/trunk/src/lxml/tests Message-ID: <20060308101826.3615810091@code0.codespeak.net> Author: scoder Date: Wed Mar 8 11:18:24 2006 New Revision: 24092 Modified: lxml/trunk/src/lxml/tests/test_elementtree.py Log: added some test cases on negative slicing (just in case) Modified: lxml/trunk/src/lxml/tests/test_elementtree.py ============================================================================== --- lxml/trunk/src/lxml/tests/test_elementtree.py (original) +++ lxml/trunk/src/lxml/tests/test_elementtree.py Wed Mar 8 11:18:24 2006 @@ -1221,6 +1221,36 @@ [b, e], list(a)) + def test_delslice_negative1(self): + Element = self.etree.Element + SubElement = self.etree.SubElement + + a = Element('a') + b = SubElement(a, 'b') + c = SubElement(a, 'c') + d = SubElement(a, 'd') + e = SubElement(a, 'e') + + del a[1:-1] + self.assertEquals( + [b, e], + list(a)) + + def test_delslice_negative2(self): + Element = self.etree.Element + SubElement = self.etree.SubElement + + a = Element('a') + b = SubElement(a, 'b') + c = SubElement(a, 'c') + d = SubElement(a, 'd') + e = SubElement(a, 'e') + + del a[-3:-1] + self.assertEquals( + [b, e], + list(a)) + def test_delslice_tail(self): ElementTree = self.etree.ElementTree f = StringIO('B2C2D2E2') @@ -1278,6 +1308,23 @@ 'B2X2Y2Z2E2', a) + def test_setslice_negative(self): + Element = self.etree.Element + SubElement = self.etree.SubElement + + a = Element('a') + b = SubElement(a, 'b') + c = SubElement(a, 'c') + d = SubElement(a, 'd') + + x = Element('x') + y = Element('y') + + a[1:-1] = [x, y] + self.assertEquals( + [b, x, y, d], + list(a)) + def test_setslice_end(self): Element = self.etree.Element SubElement = self.etree.SubElement From scoder at codespeak.net Wed Mar 8 11:19:51 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Wed Mar 8 11:19:52 2006 Subject: [Lxml-checkins] r24093 - lxml/trunk/src/lxml Message-ID: <20060308101951.6329310091@code0.codespeak.net> Author: scoder Date: Wed Mar 8 11:19:50 2006 New Revision: 24093 Modified: lxml/trunk/src/lxml/etree.pyx Log: opt: avoid some internal traversals of element children Modified: lxml/trunk/src/lxml/etree.pyx ============================================================================== --- lxml/trunk/src/lxml/etree.pyx (original) +++ lxml/trunk/src/lxml/etree.pyx Wed Mar 8 11:19:50 2006 @@ -362,9 +362,9 @@ c_node = _findChild(self._c_node, start) # now delete the slice if start != stop: - _deleteSlice(c_node, start, stop) + c_node = _deleteSlice(c_node, start, stop) # now find start of slice again, for insertion (just before it) - c_node = _findChild(self._c_node, start) + #c_node = _findChild(self._c_node, start) # if the insertion point is at the end, append there if c_node is NULL: for node in value: @@ -575,17 +575,22 @@ return ElementChildIterator(self) def index(self, _Element x, start=None, stop=None): - cdef int k + cdef int k + cdef int l cdef xmlNode* c_child _raiseIfNone(x) k = 0 c_child = self._c_node.children # account for negative start and stop by turning them into positive + l = -1 if start is not None and start < 0: - start = len(self) + start + l = self.__len__() + start = l + start if stop is not None and stop < 0: - stop = len(self) + stop + if l < 0: + l = self.__len__() + stop = l + stop while c_child is not NULL: if _isElement(c_child): @@ -1314,13 +1319,13 @@ return (c_node.type == tree.XML_ELEMENT_NODE or c_node.type == tree.XML_COMMENT_NODE) -cdef void _deleteSlice(xmlNode* c_node, int start, int stop): +cdef xmlNode* _deleteSlice(xmlNode* c_node, int start, int stop): """Delete slice, starting with c_node, start counting at start, end at stop. """ cdef xmlNode* c_next cdef int c if c_node is NULL: - return + return NULL # now start deleting nodes c = start while c_node is not NULL and c < stop: @@ -1331,6 +1336,7 @@ _removeNode(c_node) c = c + 1 c_node = c_next + return c_node def _getNsTag(tag): """Given a tag, find namespace URI and tag name. From scoder at codespeak.net Wed Mar 8 11:20:17 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Wed Mar 8 11:20:18 2006 Subject: [Lxml-checkins] r24094 - in lxml/branch/scoder2/src/lxml: . tests Message-ID: <20060308102017.92FBD10091@code0.codespeak.net> Author: scoder Date: Wed Mar 8 11:20:15 2006 New Revision: 24094 Modified: lxml/branch/scoder2/src/lxml/etree.pyx lxml/branch/scoder2/src/lxml/tests/test_elementtree.py Log: more merges from trunk Modified: lxml/branch/scoder2/src/lxml/etree.pyx ============================================================================== --- lxml/branch/scoder2/src/lxml/etree.pyx (original) +++ lxml/branch/scoder2/src/lxml/etree.pyx Wed Mar 8 11:20:15 2006 @@ -362,9 +362,9 @@ c_node = _findChild(self._c_node, start) # now delete the slice if start != stop: - _deleteSlice(c_node, start, stop) + c_node = _deleteSlice(c_node, start, stop) # now find start of slice again, for insertion (just before it) - c_node = _findChild(self._c_node, start) + #c_node = _findChild(self._c_node, start) # if the insertion point is at the end, append there if c_node is NULL: for node in value: @@ -575,17 +575,22 @@ return ElementChildIterator(self) def index(self, _Element x, start=None, stop=None): - cdef int k + cdef int k + cdef int l cdef xmlNode* c_child _raiseIfNone(x) k = 0 c_child = self._c_node.children # account for negative start and stop by turning them into positive + l = -1 if start is not None and start < 0: - start = len(self) + start + l = self.__len__() + start = l + start if stop is not None and stop < 0: - stop = len(self) + stop + if l < 0: + l = self.__len__() + stop = l + stop while c_child is not NULL: if _isElement(c_child): @@ -1314,13 +1319,13 @@ return (c_node.type == tree.XML_ELEMENT_NODE or c_node.type == tree.XML_COMMENT_NODE) -cdef void _deleteSlice(xmlNode* c_node, int start, int stop): +cdef xmlNode* _deleteSlice(xmlNode* c_node, int start, int stop): """Delete slice, starting with c_node, start counting at start, end at stop. """ cdef xmlNode* c_next cdef int c if c_node is NULL: - return + return NULL # now start deleting nodes c = start while c_node is not NULL and c < stop: @@ -1331,6 +1336,7 @@ _removeNode(c_node) c = c + 1 c_node = c_next + return c_node def _getNsTag(tag): """Given a tag, find namespace URI and tag name. Modified: lxml/branch/scoder2/src/lxml/tests/test_elementtree.py ============================================================================== --- lxml/branch/scoder2/src/lxml/tests/test_elementtree.py (original) +++ lxml/branch/scoder2/src/lxml/tests/test_elementtree.py Wed Mar 8 11:20:15 2006 @@ -1221,6 +1221,36 @@ [b, e], list(a)) + def test_delslice_negative1(self): + Element = self.etree.Element + SubElement = self.etree.SubElement + + a = Element('a') + b = SubElement(a, 'b') + c = SubElement(a, 'c') + d = SubElement(a, 'd') + e = SubElement(a, 'e') + + del a[1:-1] + self.assertEquals( + [b, e], + list(a)) + + def test_delslice_negative2(self): + Element = self.etree.Element + SubElement = self.etree.SubElement + + a = Element('a') + b = SubElement(a, 'b') + c = SubElement(a, 'c') + d = SubElement(a, 'd') + e = SubElement(a, 'e') + + del a[-3:-1] + self.assertEquals( + [b, e], + list(a)) + def test_delslice_tail(self): ElementTree = self.etree.ElementTree f = StringIO('B2C2D2E2') @@ -1278,6 +1308,23 @@ 'B2X2Y2Z2E2', a) + def test_setslice_negative(self): + Element = self.etree.Element + SubElement = self.etree.SubElement + + a = Element('a') + b = SubElement(a, 'b') + c = SubElement(a, 'c') + d = SubElement(a, 'd') + + x = Element('x') + y = Element('y') + + a[1:-1] = [x, y] + self.assertEquals( + [b, x, y, d], + list(a)) + def test_setslice_end(self): Element = self.etree.Element SubElement = self.etree.SubElement From scoder at codespeak.net Wed Mar 8 11:23:50 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Wed Mar 8 11:23:52 2006 Subject: [Lxml-checkins] r24095 - lxml/trunk/src/lxml/tests Message-ID: <20060308102350.EFBDC10091@code0.codespeak.net> Author: scoder Date: Wed Mar 8 11:23:49 2006 New Revision: 24095 Modified: lxml/trunk/src/lxml/tests/test_etree.py Log: one more little test on index() Modified: lxml/trunk/src/lxml/tests/test_etree.py ============================================================================== --- lxml/trunk/src/lxml/tests/test_etree.py (original) +++ lxml/trunk/src/lxml/tests/test_etree.py Wed Mar 8 11:23:49 2006 @@ -191,6 +191,8 @@ ValueError, e.index, e[3], 0, 2) self.assertRaises( ValueError, e.index, e[8], 0, -3) + self.assertRaises( + ValueError, e.index, e[8], -5, -3) self.assertEquals( 8, e.index(e[8], 0, -1)) self.assertEquals( From scoder at codespeak.net Wed Mar 8 11:53:27 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Wed Mar 8 11:53:28 2006 Subject: [Lxml-checkins] r24096 - lxml/trunk/src/lxml Message-ID: <20060308105327.B210D10091@code0.codespeak.net> Author: scoder Date: Wed Mar 8 11:53:26 2006 New Revision: 24096 Modified: lxml/trunk/src/lxml/etree.pyx Log: substantial speed up of element.clear() Modified: lxml/trunk/src/lxml/etree.pyx ============================================================================== --- lxml/trunk/src/lxml/etree.pyx (original) +++ lxml/trunk/src/lxml/etree.pyx Wed Mar 8 11:53:26 2006 @@ -422,16 +422,18 @@ cdef xmlAttr* c_attr_next cdef xmlNode* c_node cdef xmlNode* c_node_next - self.text = None - self.tail = None + c_node = self._c_node + # remove self.text and self.tail + _removeText(c_node.children) + _removeText(c_node.next) # remove all attributes - c_attr = self._c_node.properties + c_attr = c_node.properties while c_attr is not NULL: c_attr_next = c_attr.next tree.xmlRemoveProp(c_attr) c_attr = c_attr_next # remove all subelements - c_node = self._c_node.children + c_node = c_node.children while c_node is not NULL: c_node_next = c_node.next if _isElement(c_node): From scoder at codespeak.net Wed Mar 8 11:56:21 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Wed Mar 8 11:56:22 2006 Subject: [Lxml-checkins] r24097 - lxml/trunk Message-ID: <20060308105621.46E5C10091@code0.codespeak.net> Author: scoder Date: Wed Mar 8 11:56:15 2006 New Revision: 24097 Modified: lxml/trunk/Makefile Log: use bench.py cmd line options Modified: lxml/trunk/Makefile ============================================================================== --- lxml/trunk/Makefile (original) +++ lxml/trunk/Makefile Wed Mar 8 11:56:15 2006 @@ -19,7 +19,7 @@ $(PYTHON) test.py $(TESTFLAGS) $(TESTOPTS) bench_inplace: inplace - PYTHONPATH="src:$$PYTHONPATH" $(PYTHON) bench.py + $(PYTHON) bench.py -i ftest_build: build $(PYTHON) test.py -f $(TESTFLAGS) $(TESTOPTS) From scoder at codespeak.net Wed Mar 8 11:57:28 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Wed Mar 8 11:57:30 2006 Subject: [Lxml-checkins] r24098 - in lxml/branch/scoder2: . src/lxml src/lxml/tests Message-ID: <20060308105728.66D7F100A3@code0.codespeak.net> Author: scoder Date: Wed Mar 8 11:57:26 2006 New Revision: 24098 Modified: lxml/branch/scoder2/Makefile lxml/branch/scoder2/src/lxml/etree.pyx lxml/branch/scoder2/src/lxml/tests/test_etree.py Log: more merges from trunk Modified: lxml/branch/scoder2/Makefile ============================================================================== --- lxml/branch/scoder2/Makefile (original) +++ lxml/branch/scoder2/Makefile Wed Mar 8 11:57:26 2006 @@ -19,7 +19,7 @@ $(PYTHON) test.py $(TESTFLAGS) $(TESTOPTS) bench_inplace: inplace - PYTHONPATH="src:$$PYTHONPATH" $(PYTHON) bench.py + $(PYTHON) bench.py -i ftest_build: build $(PYTHON) test.py -f $(TESTFLAGS) $(TESTOPTS) Modified: lxml/branch/scoder2/src/lxml/etree.pyx ============================================================================== --- lxml/branch/scoder2/src/lxml/etree.pyx (original) +++ lxml/branch/scoder2/src/lxml/etree.pyx Wed Mar 8 11:57:26 2006 @@ -422,16 +422,18 @@ cdef xmlAttr* c_attr_next cdef xmlNode* c_node cdef xmlNode* c_node_next - self.text = None - self.tail = None + c_node = self._c_node + # remove self.text and self.tail + _removeText(c_node.children) + _removeText(c_node.next) # remove all attributes - c_attr = self._c_node.properties + c_attr = c_node.properties while c_attr is not NULL: c_attr_next = c_attr.next tree.xmlRemoveProp(c_attr) c_attr = c_attr_next # remove all subelements - c_node = self._c_node.children + c_node = c_node.children while c_node is not NULL: c_node_next = c_node.next if _isElement(c_node): Modified: lxml/branch/scoder2/src/lxml/tests/test_etree.py ============================================================================== --- lxml/branch/scoder2/src/lxml/tests/test_etree.py (original) +++ lxml/branch/scoder2/src/lxml/tests/test_etree.py Wed Mar 8 11:57:26 2006 @@ -191,6 +191,8 @@ ValueError, e.index, e[3], 0, 2) self.assertRaises( ValueError, e.index, e[8], 0, -3) + self.assertRaises( + ValueError, e.index, e[8], -5, -3) self.assertEquals( 8, e.index(e[8], 0, -1)) self.assertEquals( From scoder at codespeak.net Wed Mar 8 14:36:32 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Wed Mar 8 14:36:33 2006 Subject: [Lxml-checkins] r24104 - lxml/trunk Message-ID: <20060308133632.3CD64100A9@code0.codespeak.net> Author: scoder Date: Wed Mar 8 14:36:30 2006 New Revision: 24104 Modified: lxml/trunk/bench.py Log: refactoring to speed up benchmarking: prebuild trees and use copy.deepcopy() Modified: lxml/trunk/bench.py ============================================================================== --- lxml/trunk/bench.py (original) +++ lxml/trunk/bench.py Wed Mar 8 14:36:30 2006 @@ -1,4 +1,4 @@ -import sys, string, timeit +import sys, string, time, copy, gc from itertools import * class BenchMarkBase(object): @@ -8,13 +8,19 @@ self.etree = etree self.lib_name = etree.__name__.split('.')[-1] + self.setup_times = times = [] + for tree in self._all_trees(): + setup = getattr(self, '_setup_tree%d' % tree) + root, t = setup() + times.append(t) + setattr(self, '__root%d' % tree, root) + def setup(self, trees=()): if not trees: trees = self._all_trees() for tree in trees: - setup = getattr(self, '_setup_tree%d' % tree) - root = setup() + root = copy.deepcopy( getattr(self, '__root%d' % tree) ) setattr(self, '_root%d' % tree, root) setattr(self, '_tree%d' % tree, self.etree.ElementTree(root)) @@ -26,42 +32,63 @@ return all_trees def _setup_tree1(self): - "tree with some 2nd level and loads of 3rd level children" - root = self.etree.Element('{a}root') + "tree with 26 2nd level and 520 3rd level children" atoz = self.atoz SubElement = self.etree.SubElement + current_time = time.time + t = current_time() + root = self.etree.Element('{a}root') for ch1 in atoz: el = SubElement(root, "{b}"+ch1) for ch2 in atoz: - for i in range(100): + for i in range(20): SubElement(el, "{c}%s%03d" % (ch2, i)) - return root + t = current_time() - t + return (root, t) def _setup_tree2(self): - "tree with loads of 2nd level and fewer 3rd level children" - root = self.etree.Element('{x}root') + "tree with 520 2nd level and 26 3rd level children" atoz = self.atoz SubElement = self.etree.SubElement + current_time = time.time + t = current_time() + root = self.etree.Element('{x}root') for ch1 in atoz: - for i in range(100): + for i in range(20): el = SubElement(root, "{y}%s%03d" % (ch1, i)) for ch2 in atoz: SubElement(el, "{z}"+ch2) - return root + t = current_time() - t + return (root, t) def _setup_tree3(self): - "deep tree with constant number of children per node" + "tree of depth 8 with 3 children per node" + SubElement = self.etree.SubElement + current_time = time.time + t = current_time() root = self.etree.Element('{x}root') + children = [root] + for i in range(7): + tag_no = count().next + children = [ SubElement(c, "{y}z%d" % i) + for i,c in enumerate(chain(children, children, children)) ] + t = current_time() - t + return (root, t) + + def _setup_tree4(self): + "small tree with 26 2nd level and 2 3rd level children" + atoz = self.atoz SubElement = self.etree.SubElement - tags = self._tags + current_time = time.time + t = current_time() + root = self.etree.Element('{x}root') children = [root] - for i in range(10): - children = list(imap(SubElement, children*3, tags())) - return root - - def _tags(ns='y'): - for i in count(): - yield "{%s}z%d" % (ns,i) + for ch1 in atoz: + el = SubElement(root, "{b}"+ch1) + SubElement(el, "{c}a") + SubElement(el, "{c}b") + t = current_time() - t + return (root, t) def cleanup(self): for name in dir(self): @@ -114,35 +141,61 @@ ############################################################ -# Benchmarks: +# Benchmarks ############################################################ class BenchMark(BenchMarkBase): def bench_append_from_document(self, tree1, root1, tree2, root2): # == "1,2 2,3 1,3 3,1 3,2 2,1" # trees 1 and 2, or 2 and 3, or ... for el in root2: - root1.append(root2[0]) + root1.append(el) def bench_insert_from_document(self, tree1, root1, tree2, root2): for el in root2: - root1.insert(len(root1)/2, root2[0]) + root1.insert(len(root1)/2, el) def bench_rotate_children(self, tree, root): # == "1 2 3" # runs on any single tree independently for i in range(100): - root.append(root[0]) + el = root[0] + del root[0] + root.append(el) def bench_reorder(self, tree, root): for i in range(1,len(root)/2): - root[-i:-i] = [ root[0] ] + el = root[0] + del root[0] + root[-i:-i] = [ el ] def bench_reorder_slice(self, tree, root): for i in range(1,len(root)/2): - root[-i:-i] = root[0:1] + els = root[0:1] + del root[0] + root[-i:-i] = els def bench_clear(self, tree, root): root.clear() + def bench_create_subelements(self, tree, root): + SubElement = self.etree.SubElement + for child in root: + SubElement(child, '{test}test') + + def bench_append_elements(self, tree, root): + Element = self.etree.Element + for child in root: + el = Element('{test}test') + child.append(el) + + def bench_replace_children(self, tree, root): + Element = self.etree.Element + for child in root: + el = Element('{test}test') + child[:] = [el] + +############################################################ +# Main program +############################################################ if __name__ == '__main__': if len(sys.argv) > 1: @@ -173,6 +226,8 @@ except ImportError: pass + print "Preparing test suites and trees ..." + benchmark_suites = map(BenchMark, _etrees) # sorted by name and tree tuple @@ -187,35 +242,63 @@ benchmarks = [ [ b for b in bs if b[0] in selected ] for bs in benchmarks ] + import time + def run_bench(suite, method_name, tree_set): + current_time = time.time + call_repeat = range(50) + + suite.setup(tree_set) + + call = getattr(suite, method_name) + args = list(chain(*[ (getattr(suite, '_tree%d' % tree), + getattr(suite, '_root%d' % tree)) + for tree in tree_set ])) + def calibrate(*void): + pass + t = current_time() + for i in call_repeat: + calibrate(*args) + call_overhead = current_time() - t + + times = [] + for i in range(3): + gc.collect() + gc.disable() + t = current_time() + for i in call_repeat: + call(*args) + t = max(0, current_time() - t - call_overhead) + t = 1000.0 * t / len(call_repeat) + times.append(t) + gc.enable() + + suite.cleanup() + return times + print "Running benchmark on", ', '.join(b.lib_name for b in benchmark_suites) print - for bench_calls in izip(*benchmarks): - for lib, config in enumerate(izip(bench_calls, benchmark_suites)): - (bench_name, tree_set), bench = config - - bench_args = ', '.join("bench._tree%d, bench._root%d" % (tree, tree) - for tree in tree_set) + print "Setup times for trees in seconds:" + for b in benchmark_suites: + print "%-12s : " % b.lib_name, ', '.join("%9.4f" % t + for t in b.setup_times) + print - timer = timeit.Timer( - "bench.%s(%s)" % (bench_name, bench_args), - "from __main__ import bench ; bench.setup(%s) ; gc.enable()" % \ - str(tuple(tree_set)) - ) + for bench_calls in izip(*benchmarks): + for lib, config in enumerate(izip(benchmark_suites, bench_calls)): + bench, (bench_name, tree_set) = config print "%-12s %-25s (T%-6s)" % (bench.lib_name, bench_name[6:], ',T'.join(imap(str, tree_set))[:6]), sys.stdout.flush() - result = timer.repeat(3, 50) - - bench.cleanup() + result = run_bench(bench, bench_name, tree_set) for t in result: - print "%8.4f" % t, - print "msec/pass, best: %8.4f" % min(result) + print "%9.4f" % t, + print "msec/pass, best: %9.4f" % min(result) if len(benchmark_suites) > 1: print # empty line between different benchmarks From scoder at codespeak.net Wed Mar 8 14:37:26 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Wed Mar 8 14:37:28 2006 Subject: [Lxml-checkins] r24105 - lxml/branch/scoder2 Message-ID: <20060308133726.B0244100A3@code0.codespeak.net> Author: scoder Date: Wed Mar 8 14:37:20 2006 New Revision: 24105 Modified: lxml/branch/scoder2/bench.py Log: more merges from trunk Modified: lxml/branch/scoder2/bench.py ============================================================================== --- lxml/branch/scoder2/bench.py (original) +++ lxml/branch/scoder2/bench.py Wed Mar 8 14:37:20 2006 @@ -1,4 +1,4 @@ -import sys, string, timeit +import sys, string, time, copy, gc from itertools import * class BenchMarkBase(object): @@ -8,13 +8,19 @@ self.etree = etree self.lib_name = etree.__name__.split('.')[-1] + self.setup_times = times = [] + for tree in self._all_trees(): + setup = getattr(self, '_setup_tree%d' % tree) + root, t = setup() + times.append(t) + setattr(self, '__root%d' % tree, root) + def setup(self, trees=()): if not trees: trees = self._all_trees() for tree in trees: - setup = getattr(self, '_setup_tree%d' % tree) - root = setup() + root = copy.deepcopy( getattr(self, '__root%d' % tree) ) setattr(self, '_root%d' % tree, root) setattr(self, '_tree%d' % tree, self.etree.ElementTree(root)) @@ -26,42 +32,63 @@ return all_trees def _setup_tree1(self): - "tree with some 2nd level and loads of 3rd level children" - root = self.etree.Element('{a}root') + "tree with 26 2nd level and 520 3rd level children" atoz = self.atoz SubElement = self.etree.SubElement + current_time = time.time + t = current_time() + root = self.etree.Element('{a}root') for ch1 in atoz: el = SubElement(root, "{b}"+ch1) for ch2 in atoz: - for i in range(100): + for i in range(20): SubElement(el, "{c}%s%03d" % (ch2, i)) - return root + t = current_time() - t + return (root, t) def _setup_tree2(self): - "tree with loads of 2nd level and fewer 3rd level children" - root = self.etree.Element('{x}root') + "tree with 520 2nd level and 26 3rd level children" atoz = self.atoz SubElement = self.etree.SubElement + current_time = time.time + t = current_time() + root = self.etree.Element('{x}root') for ch1 in atoz: - for i in range(100): + for i in range(20): el = SubElement(root, "{y}%s%03d" % (ch1, i)) for ch2 in atoz: SubElement(el, "{z}"+ch2) - return root + t = current_time() - t + return (root, t) def _setup_tree3(self): - "deep tree with constant number of children per node" + "tree of depth 8 with 3 children per node" + SubElement = self.etree.SubElement + current_time = time.time + t = current_time() root = self.etree.Element('{x}root') + children = [root] + for i in range(7): + tag_no = count().next + children = [ SubElement(c, "{y}z%d" % i) + for i,c in enumerate(chain(children, children, children)) ] + t = current_time() - t + return (root, t) + + def _setup_tree4(self): + "small tree with 26 2nd level and 2 3rd level children" + atoz = self.atoz SubElement = self.etree.SubElement - tags = self._tags + current_time = time.time + t = current_time() + root = self.etree.Element('{x}root') children = [root] - for i in range(10): - children = list(imap(SubElement, children*3, tags())) - return root - - def _tags(ns='y'): - for i in count(): - yield "{%s}z%d" % (ns,i) + for ch1 in atoz: + el = SubElement(root, "{b}"+ch1) + SubElement(el, "{c}a") + SubElement(el, "{c}b") + t = current_time() - t + return (root, t) def cleanup(self): for name in dir(self): @@ -114,35 +141,61 @@ ############################################################ -# Benchmarks: +# Benchmarks ############################################################ class BenchMark(BenchMarkBase): def bench_append_from_document(self, tree1, root1, tree2, root2): # == "1,2 2,3 1,3 3,1 3,2 2,1" # trees 1 and 2, or 2 and 3, or ... for el in root2: - root1.append(root2[0]) + root1.append(el) def bench_insert_from_document(self, tree1, root1, tree2, root2): for el in root2: - root1.insert(len(root1)/2, root2[0]) + root1.insert(len(root1)/2, el) def bench_rotate_children(self, tree, root): # == "1 2 3" # runs on any single tree independently for i in range(100): - root.append(root[0]) + el = root[0] + del root[0] + root.append(el) def bench_reorder(self, tree, root): for i in range(1,len(root)/2): - root[-i:-i] = [ root[0] ] + el = root[0] + del root[0] + root[-i:-i] = [ el ] def bench_reorder_slice(self, tree, root): for i in range(1,len(root)/2): - root[-i:-i] = root[0:1] + els = root[0:1] + del root[0] + root[-i:-i] = els def bench_clear(self, tree, root): root.clear() + def bench_create_subelements(self, tree, root): + SubElement = self.etree.SubElement + for child in root: + SubElement(child, '{test}test') + + def bench_append_elements(self, tree, root): + Element = self.etree.Element + for child in root: + el = Element('{test}test') + child.append(el) + + def bench_replace_children(self, tree, root): + Element = self.etree.Element + for child in root: + el = Element('{test}test') + child[:] = [el] + +############################################################ +# Main program +############################################################ if __name__ == '__main__': if len(sys.argv) > 1: @@ -173,6 +226,8 @@ except ImportError: pass + print "Preparing test suites and trees ..." + benchmark_suites = map(BenchMark, _etrees) # sorted by name and tree tuple @@ -187,35 +242,63 @@ benchmarks = [ [ b for b in bs if b[0] in selected ] for bs in benchmarks ] + import time + def run_bench(suite, method_name, tree_set): + current_time = time.time + call_repeat = range(50) + + suite.setup(tree_set) + + call = getattr(suite, method_name) + args = list(chain(*[ (getattr(suite, '_tree%d' % tree), + getattr(suite, '_root%d' % tree)) + for tree in tree_set ])) + def calibrate(*void): + pass + t = current_time() + for i in call_repeat: + calibrate(*args) + call_overhead = current_time() - t + + times = [] + for i in range(3): + gc.collect() + gc.disable() + t = current_time() + for i in call_repeat: + call(*args) + t = max(0, current_time() - t - call_overhead) + t = 1000.0 * t / len(call_repeat) + times.append(t) + gc.enable() + + suite.cleanup() + return times + print "Running benchmark on", ', '.join(b.lib_name for b in benchmark_suites) print - for bench_calls in izip(*benchmarks): - for lib, config in enumerate(izip(bench_calls, benchmark_suites)): - (bench_name, tree_set), bench = config - - bench_args = ', '.join("bench._tree%d, bench._root%d" % (tree, tree) - for tree in tree_set) + print "Setup times for trees in seconds:" + for b in benchmark_suites: + print "%-12s : " % b.lib_name, ', '.join("%9.4f" % t + for t in b.setup_times) + print - timer = timeit.Timer( - "bench.%s(%s)" % (bench_name, bench_args), - "from __main__ import bench ; bench.setup(%s) ; gc.enable()" % \ - str(tuple(tree_set)) - ) + for bench_calls in izip(*benchmarks): + for lib, config in enumerate(izip(benchmark_suites, bench_calls)): + bench, (bench_name, tree_set) = config print "%-12s %-25s (T%-6s)" % (bench.lib_name, bench_name[6:], ',T'.join(imap(str, tree_set))[:6]), sys.stdout.flush() - result = timer.repeat(3, 50) - - bench.cleanup() + result = run_bench(bench, bench_name, tree_set) for t in result: - print "%8.4f" % t, - print "msec/pass, best: %8.4f" % min(result) + print "%9.4f" % t, + print "msec/pass, best: %9.4f" % min(result) if len(benchmark_suites) > 1: print # empty line between different benchmarks From scoder at codespeak.net Wed Mar 8 15:40:05 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Wed Mar 8 15:40:07 2006 Subject: [Lxml-checkins] r24107 - lxml/trunk Message-ID: <20060308144005.C90B8100A9@code0.codespeak.net> Author: scoder Date: Wed Mar 8 15:40:04 2006 New Revision: 24107 Modified: lxml/trunk/bench.py Log: another refactoring of the benchmark script to fix the tree setup: make it independent for each iteration, not only each benchmark Modified: lxml/trunk/bench.py ============================================================================== --- lxml/trunk/bench.py (original) +++ lxml/trunk/bench.py Wed Mar 8 15:40:04 2006 @@ -14,15 +14,17 @@ root, t = setup() times.append(t) setattr(self, '__root%d' % tree, root) + def set_property(root): + setattr(self, '_root%d' % tree, + lambda : copy.deepcopy(root)) + set_property(root) def setup(self, trees=()): if not trees: trees = self._all_trees() for tree in trees: - root = copy.deepcopy( getattr(self, '__root%d' % tree) ) - setattr(self, '_root%d' % tree, root) - setattr(self, '_tree%d' % tree, self.etree.ElementTree(root)) + set_property( getattr(self, '__root%d' % tree) ) def _all_trees(self): all_trees = [] @@ -90,11 +92,6 @@ t = current_time() - t return (root, t) - def cleanup(self): - for name in dir(self): - if name.startswith('_root') or name.startswith('_tree'): - delattr(self, name) - def benchmarks(self): """Returns a list of all benchmarks. @@ -117,7 +114,7 @@ else: try: function = getattr(method, 'im_func', method) - arg_count = method.func_code.co_argcount / 2 + arg_count = method.func_code.co_argcount - 1 except AttributeError: arg_count = 1 for trees in self._permutations(all_trees, arg_count): @@ -145,49 +142,49 @@ ############################################################ class BenchMark(BenchMarkBase): - def bench_append_from_document(self, tree1, root1, tree2, root2): + def bench_append_from_document(self, root1, root2): # == "1,2 2,3 1,3 3,1 3,2 2,1" # trees 1 and 2, or 2 and 3, or ... for el in root2: root1.append(el) - def bench_insert_from_document(self, tree1, root1, tree2, root2): + def bench_insert_from_document(self, root1, root2): for el in root2: root1.insert(len(root1)/2, el) - def bench_rotate_children(self, tree, root): + def bench_rotate_children(self, root): # == "1 2 3" # runs on any single tree independently for i in range(100): el = root[0] del root[0] root.append(el) - def bench_reorder(self, tree, root): + def bench_reorder(self, root): for i in range(1,len(root)/2): el = root[0] del root[0] root[-i:-i] = [ el ] - def bench_reorder_slice(self, tree, root): + def bench_reorder_slice(self, root): for i in range(1,len(root)/2): els = root[0:1] del root[0] root[-i:-i] = els - def bench_clear(self, tree, root): + def bench_clear(self, root): root.clear() - def bench_create_subelements(self, tree, root): + def bench_create_subelements(self, root): SubElement = self.etree.SubElement for child in root: SubElement(child, '{test}test') - def bench_append_elements(self, tree, root): + def bench_append_elements(self, root): Element = self.etree.Element for child in root: el = Element('{test}test') child.append(el) - def bench_replace_children(self, tree, root): + def bench_replace_children(self, root): Element = self.etree.Element for child in root: el = Element('{test}test') @@ -245,34 +242,25 @@ import time def run_bench(suite, method_name, tree_set): current_time = time.time - call_repeat = range(50) - - suite.setup(tree_set) + call_repeat = range(10) call = getattr(suite, method_name) - args = list(chain(*[ (getattr(suite, '_tree%d' % tree), - getattr(suite, '_root%d' % tree)) - for tree in tree_set ])) - def calibrate(*void): - pass - t = current_time() - for i in call_repeat: - calibrate(*args) - call_overhead = current_time() - t + tree_builders = [ getattr(suite, '_root%d' % tree) + for tree in tree_set ] times = [] for i in range(3): gc.collect() gc.disable() - t = current_time() + t = 0 for i in call_repeat: + args = [ build() for build in tree_builders ] + t_one_call = current_time() call(*args) - t = max(0, current_time() - t - call_overhead) + t += current_time() - t_one_call t = 1000.0 * t / len(call_repeat) times.append(t) gc.enable() - - suite.cleanup() return times From scoder at codespeak.net Wed Mar 8 15:46:22 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Wed Mar 8 15:46:23 2006 Subject: [Lxml-checkins] r24109 - lxml/trunk/src/lxml/tests Message-ID: <20060308144622.E20531008E@code0.codespeak.net> Author: scoder Date: Wed Mar 8 15:46:16 2006 New Revision: 24109 Modified: lxml/trunk/src/lxml/tests/test_elementtree.py lxml/trunk/src/lxml/tests/test_etree.py Log: clean ups Modified: lxml/trunk/src/lxml/tests/test_elementtree.py ============================================================================== --- lxml/trunk/src/lxml/tests/test_elementtree.py (original) +++ lxml/trunk/src/lxml/tests/test_elementtree.py Wed Mar 8 15:46:16 2006 @@ -12,8 +12,6 @@ from StringIO import StringIO import os, shutil, tempfile, copy -import gzip -import urllib2 from common_imports import etree, ElementTree, HelperTestCase, fileInTestDir, canonicalize Modified: lxml/trunk/src/lxml/tests/test_etree.py ============================================================================== --- lxml/trunk/src/lxml/tests/test_etree.py (original) +++ lxml/trunk/src/lxml/tests/test_etree.py Wed Mar 8 15:46:16 2006 @@ -11,9 +11,6 @@ import unittest, doctest from StringIO import StringIO -import os, shutil, tempfile, copy -import gzip -import urllib2 from common_imports import etree, HelperTestCase, fileInTestDir, canonicalize From scoder at codespeak.net Wed Mar 8 15:46:42 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Wed Mar 8 15:46:43 2006 Subject: [Lxml-checkins] r24110 - in lxml/branch/scoder2: . src/lxml/tests Message-ID: <20060308144642.6238A1008E@code0.codespeak.net> Author: scoder Date: Wed Mar 8 15:46:40 2006 New Revision: 24110 Modified: lxml/branch/scoder2/bench.py lxml/branch/scoder2/src/lxml/tests/test_elementtree.py lxml/branch/scoder2/src/lxml/tests/test_etree.py Log: even more merges from trunk :) Modified: lxml/branch/scoder2/bench.py ============================================================================== --- lxml/branch/scoder2/bench.py (original) +++ lxml/branch/scoder2/bench.py Wed Mar 8 15:46:40 2006 @@ -14,15 +14,17 @@ root, t = setup() times.append(t) setattr(self, '__root%d' % tree, root) + def set_property(root): + setattr(self, '_root%d' % tree, + lambda : copy.deepcopy(root)) + set_property(root) def setup(self, trees=()): if not trees: trees = self._all_trees() for tree in trees: - root = copy.deepcopy( getattr(self, '__root%d' % tree) ) - setattr(self, '_root%d' % tree, root) - setattr(self, '_tree%d' % tree, self.etree.ElementTree(root)) + set_property( getattr(self, '__root%d' % tree) ) def _all_trees(self): all_trees = [] @@ -90,11 +92,6 @@ t = current_time() - t return (root, t) - def cleanup(self): - for name in dir(self): - if name.startswith('_root') or name.startswith('_tree'): - delattr(self, name) - def benchmarks(self): """Returns a list of all benchmarks. @@ -117,7 +114,7 @@ else: try: function = getattr(method, 'im_func', method) - arg_count = method.func_code.co_argcount / 2 + arg_count = method.func_code.co_argcount - 1 except AttributeError: arg_count = 1 for trees in self._permutations(all_trees, arg_count): @@ -145,49 +142,49 @@ ############################################################ class BenchMark(BenchMarkBase): - def bench_append_from_document(self, tree1, root1, tree2, root2): + def bench_append_from_document(self, root1, root2): # == "1,2 2,3 1,3 3,1 3,2 2,1" # trees 1 and 2, or 2 and 3, or ... for el in root2: root1.append(el) - def bench_insert_from_document(self, tree1, root1, tree2, root2): + def bench_insert_from_document(self, root1, root2): for el in root2: root1.insert(len(root1)/2, el) - def bench_rotate_children(self, tree, root): + def bench_rotate_children(self, root): # == "1 2 3" # runs on any single tree independently for i in range(100): el = root[0] del root[0] root.append(el) - def bench_reorder(self, tree, root): + def bench_reorder(self, root): for i in range(1,len(root)/2): el = root[0] del root[0] root[-i:-i] = [ el ] - def bench_reorder_slice(self, tree, root): + def bench_reorder_slice(self, root): for i in range(1,len(root)/2): els = root[0:1] del root[0] root[-i:-i] = els - def bench_clear(self, tree, root): + def bench_clear(self, root): root.clear() - def bench_create_subelements(self, tree, root): + def bench_create_subelements(self, root): SubElement = self.etree.SubElement for child in root: SubElement(child, '{test}test') - def bench_append_elements(self, tree, root): + def bench_append_elements(self, root): Element = self.etree.Element for child in root: el = Element('{test}test') child.append(el) - def bench_replace_children(self, tree, root): + def bench_replace_children(self, root): Element = self.etree.Element for child in root: el = Element('{test}test') @@ -245,34 +242,25 @@ import time def run_bench(suite, method_name, tree_set): current_time = time.time - call_repeat = range(50) - - suite.setup(tree_set) + call_repeat = range(10) call = getattr(suite, method_name) - args = list(chain(*[ (getattr(suite, '_tree%d' % tree), - getattr(suite, '_root%d' % tree)) - for tree in tree_set ])) - def calibrate(*void): - pass - t = current_time() - for i in call_repeat: - calibrate(*args) - call_overhead = current_time() - t + tree_builders = [ getattr(suite, '_root%d' % tree) + for tree in tree_set ] times = [] for i in range(3): gc.collect() gc.disable() - t = current_time() + t = 0 for i in call_repeat: + args = [ build() for build in tree_builders ] + t_one_call = current_time() call(*args) - t = max(0, current_time() - t - call_overhead) + t += current_time() - t_one_call t = 1000.0 * t / len(call_repeat) times.append(t) gc.enable() - - suite.cleanup() return times Modified: lxml/branch/scoder2/src/lxml/tests/test_elementtree.py ============================================================================== --- lxml/branch/scoder2/src/lxml/tests/test_elementtree.py (original) +++ lxml/branch/scoder2/src/lxml/tests/test_elementtree.py Wed Mar 8 15:46:40 2006 @@ -12,8 +12,6 @@ from StringIO import StringIO import os, shutil, tempfile, copy -import gzip -import urllib2 from common_imports import etree, ElementTree, HelperTestCase, fileInTestDir, canonicalize Modified: lxml/branch/scoder2/src/lxml/tests/test_etree.py ============================================================================== --- lxml/branch/scoder2/src/lxml/tests/test_etree.py (original) +++ lxml/branch/scoder2/src/lxml/tests/test_etree.py Wed Mar 8 15:46:40 2006 @@ -11,9 +11,6 @@ import unittest, doctest from StringIO import StringIO -import os, shutil, tempfile, copy -import gzip -import urllib2 from common_imports import etree, HelperTestCase, fileInTestDir, canonicalize From scoder at codespeak.net Wed Mar 8 16:27:06 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Wed Mar 8 16:27:07 2006 Subject: [Lxml-checkins] r24112 - lxml/trunk/src/lxml/tests Message-ID: <20060308152706.DBD1B10080@code0.codespeak.net> Author: scoder Date: Wed Mar 8 16:27:05 2006 New Revision: 24112 Modified: lxml/trunk/src/lxml/tests/test_elementtree.py Log: little extension to test case Modified: lxml/trunk/src/lxml/tests/test_elementtree.py ============================================================================== --- lxml/trunk/src/lxml/tests/test_elementtree.py (original) +++ lxml/trunk/src/lxml/tests/test_elementtree.py Wed Mar 8 16:27:05 2006 @@ -568,6 +568,8 @@ b.tail = 'hoi' self.assertEquals('hoi', b.tail) + self.assertEquals('dag', + a.tail) def test_tail_append(self): Element = self.etree.Element From scoder at codespeak.net Wed Mar 8 17:43:11 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Wed Mar 8 17:43:12 2006 Subject: [Lxml-checkins] r24115 - lxml/trunk/src/lxml Message-ID: <20060308164311.869DA10089@code0.codespeak.net> Author: scoder Date: Wed Mar 8 17:43:10 2006 New Revision: 24115 Modified: lxml/trunk/src/lxml/etree.pyx Log: refactoring of element namespace setting * removed some potential bugs related to UTF-8 coding and tag-name splitting - in both Element() and SubElement() - the tag name was used directly as node name, without namespace splitting - was not hazardous as name and namespace were both reset later on * removed some indirections and duplication of name/namespace setting * merged nsmap and element namespace handling into one function: _setNamespaces * renamed _addNamespaces into _setNamespaces to reflect new purpose Modified: lxml/trunk/src/lxml/etree.pyx ============================================================================== --- lxml/trunk/src/lxml/etree.pyx (original) +++ lxml/trunk/src/lxml/etree.pyx Wed Mar 8 17:43:10 2006 @@ -144,10 +144,10 @@ """ cdef xmlDoc* c_doc cdef xmlNode* c_node + cdef xmlNs* c_ns c_doc = self._doc._c_doc c_node = self._c_node - cdef xmlNs* c_ns # look for existing ns c_ns = tree.xmlSearchNsByHref(c_doc, c_node, href) if c_ns is not NULL: @@ -158,6 +158,9 @@ c_ns = tree.xmlNewNs(c_node, href, prefix) return c_ns + cdef void _setNs(self, char* href): + tree.xmlSetNs(self._c_node, self._getNs(href)) + cdef class _ElementTree: cdef _Document _doc cdef _NodeBase _context_node @@ -935,18 +938,17 @@ self._node = _elementFactory(base_node._doc, c_node) return current_node -cdef xmlNode* _createElement(xmlDoc* c_doc, object tag, +cdef xmlNode* _createElement(xmlDoc* c_doc, object name_utf, object attrib, object extra) except NULL: cdef xmlNode* c_node - tag_utf = tag.encode('UTF-8') if attrib is None: attrib = {} attrib.update(extra) - c_node = tree.xmlNewDocNode(c_doc, NULL, tag_utf, NULL) + c_node = tree.xmlNewDocNode(c_doc, NULL, name_utf, NULL) for name, value in attrib.items(): - name_utf = name.encode('UTF-8') + attr_name_utf = name.encode('UTF-8') value_utf = value.encode('UTF-8') - tree.xmlNewProp(c_node, name_utf, value_utf) + tree.xmlNewProp(c_node, attr_name_utf, value_utf) return c_node cdef xmlNode* _createComment(xmlDoc* c_doc, char* text): @@ -962,15 +964,14 @@ cdef _Element result cdef xmlNode* c_node cdef xmlDoc* c_doc + ns_utf, name_utf = _getNsTag(tag) c_doc = theParser.newDoc() - c_node = _createElement(c_doc, tag, attrib, extra) + c_node = _createElement(c_doc, name_utf, attrib, extra) tree.xmlDocSetRootElement(c_doc, c_node) - # add namespaces to node if necessary - _addNamespaces(c_doc, c_node, nsmap) - # XXX hack for namespaces doc = _documentFactory(c_doc) result = _elementFactory(doc, c_node) - result.tag = tag + # add namespaces to node if necessary + _setNamespaces(result, ns_utf, nsmap) return result def Comment(text=None): @@ -988,13 +989,12 @@ cdef xmlNode* c_node cdef _Element element _raiseIfNone(parent) - c_node = _createElement(parent._doc._c_doc, tag, attrib, extra) + ns_utf, name_utf = _getNsTag(tag) + c_node = _createElement(parent._doc._c_doc, name_utf, attrib, extra) element = _elementFactory(parent._doc, c_node) - parent.append(element) + tree.xmlAddChild(parent._c_node, c_node) # add namespaces to node if necessary - _addNamespaces(parent._doc._c_doc, c_node, nsmap) - # XXX hack for namespaces - element.tag = tag + _setNamespaces(element, ns_utf, nsmap) return element def ElementTree(_Element element=None, file=None, parser=None): @@ -1075,18 +1075,39 @@ doc = _parseDocument(source, parser) return ElementTree(doc.getroot()) -cdef _addNamespaces(xmlDoc* c_doc, xmlNode* c_node, object nsmap): - cdef xmlNs* c_ns - if nsmap is None: +cdef void _setNamespaces(_NodeBase element, object node_ns_utf, object nsmap): + "Set namespace of node and register ns-prefix mappings." + cdef xmlNs* c_ns + cdef xmlNode* c_node + cdef xmlDoc* c_doc + cdef char* c_prefix + cdef char* c_href + + if not nsmap: + if node_ns_utf is not None: + element._setNs(node_ns_utf) return + + c_node = element._c_node + c_doc = element._doc._c_doc for prefix, href in nsmap.items(): + href_utf = href.encode('UTF-8') + c_href = href_utf + if prefix is not None: + prefix_utf = prefix.encode('UTF-8') + c_prefix = prefix_utf + else: + c_prefix = NULL # add namespace with prefix if ns is not already known - c_ns = tree.xmlSearchNsByHref(c_doc, c_node, href) + c_ns = tree.xmlSearchNsByHref(c_doc, c_node, c_href) if c_ns is NULL: - if prefix is not None: - tree.xmlNewNs(c_node, href, prefix) - else: - tree.xmlNewNs(c_node, href, NULL) + c_ns = tree.xmlNewNs(c_node, c_href, c_prefix) + if href_utf == node_ns_utf: + tree.xmlSetNs(c_node, c_ns) + node_ns_utf = None + + if node_ns_utf is not None: + element._setNs(node_ns_utf) # include submodules From scoder at codespeak.net Wed Mar 8 17:52:00 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Wed Mar 8 17:52:01 2006 Subject: [Lxml-checkins] r24117 - in lxml/branch/scoder2/src/lxml: . tests Message-ID: <20060308165200.A613D1008A@code0.codespeak.net> Author: scoder Date: Wed Mar 8 17:51:59 2006 New Revision: 24117 Modified: lxml/branch/scoder2/src/lxml/etree.pyx lxml/branch/scoder2/src/lxml/tests/test_elementtree.py Log: merged in refactoring of namespace handling and other things from trunk Modified: lxml/branch/scoder2/src/lxml/etree.pyx ============================================================================== --- lxml/branch/scoder2/src/lxml/etree.pyx (original) +++ lxml/branch/scoder2/src/lxml/etree.pyx Wed Mar 8 17:51:59 2006 @@ -144,10 +144,10 @@ """ cdef xmlDoc* c_doc cdef xmlNode* c_node + cdef xmlNs* c_ns c_doc = self._doc._c_doc c_node = self._c_node - cdef xmlNs* c_ns # look for existing ns c_ns = tree.xmlSearchNsByHref(c_doc, c_node, href) if c_ns is not NULL: @@ -158,6 +158,9 @@ c_ns = tree.xmlNewNs(c_node, href, prefix) return c_ns + cdef void _setNs(self, char* href): + tree.xmlSetNs(self._c_node, self._getNs(href)) + cdef class _ElementTree: cdef _Document _doc cdef _NodeBase _context_node @@ -935,18 +938,17 @@ self._node = _elementFactory(base_node._doc, c_node) return current_node -cdef xmlNode* _createElement(xmlDoc* c_doc, object tag, +cdef xmlNode* _createElement(xmlDoc* c_doc, object name_utf, object attrib, object extra) except NULL: cdef xmlNode* c_node - tag_utf = tag.encode('UTF-8') if attrib is None: attrib = {} attrib.update(extra) - c_node = tree.xmlNewDocNode(c_doc, NULL, tag_utf, NULL) + c_node = tree.xmlNewDocNode(c_doc, NULL, name_utf, NULL) for name, value in attrib.items(): - name_utf = name.encode('UTF-8') + attr_name_utf = name.encode('UTF-8') value_utf = value.encode('UTF-8') - tree.xmlNewProp(c_node, name_utf, value_utf) + tree.xmlNewProp(c_node, attr_name_utf, value_utf) return c_node cdef xmlNode* _createComment(xmlDoc* c_doc, char* text): @@ -962,15 +964,14 @@ cdef _Element result cdef xmlNode* c_node cdef xmlDoc* c_doc + ns_utf, name_utf = _getNsTag(_tag) c_doc = theParser.newDoc() - c_node = _createElement(c_doc, _tag, attrib, _extra) + c_node = _createElement(c_doc, name_utf, attrib, _extra) tree.xmlDocSetRootElement(c_doc, c_node) - # add namespaces to node if necessary - _addNamespaces(c_doc, c_node, nsmap) - # XXX hack for namespaces doc = _documentFactory(c_doc) result = _elementFactory(doc, c_node) - result.tag = _tag + # add namespaces to node if necessary + _setNamespaces(result, ns_utf, nsmap) return result def Comment(text=None): @@ -988,13 +989,12 @@ cdef xmlNode* c_node cdef _Element element _raiseIfNone(_parent) - c_node = _createElement(_parent._doc._c_doc, _tag, attrib, _extra) + ns_utf, name_utf = _getNsTag(_tag) + c_node = _createElement(_parent._doc._c_doc, name_utf, attrib, _extra) element = _elementFactory(_parent._doc, c_node) - _parent.append(element) + tree.xmlAddChild(_parent._c_node, c_node) # add namespaces to node if necessary - _addNamespaces(_parent._doc._c_doc, c_node, nsmap) - # XXX hack for namespaces - element.tag = _tag + _setNamespaces(element, ns_utf, nsmap) return element def ElementTree(_Element element=None, file=None, parser=None): @@ -1075,18 +1075,39 @@ doc = _parseDocument(source, parser) return ElementTree(doc.getroot()) -cdef _addNamespaces(xmlDoc* c_doc, xmlNode* c_node, object nsmap): - cdef xmlNs* c_ns - if nsmap is None: +cdef void _setNamespaces(_NodeBase element, object node_ns_utf, object nsmap): + "Set namespace of node and register ns-prefix mappings." + cdef xmlNs* c_ns + cdef xmlNode* c_node + cdef xmlDoc* c_doc + cdef char* c_prefix + cdef char* c_href + + if not nsmap: + if node_ns_utf is not None: + element._setNs(node_ns_utf) return + + c_node = element._c_node + c_doc = element._doc._c_doc for prefix, href in nsmap.items(): + href_utf = href.encode('UTF-8') + c_href = href_utf + if prefix is not None: + prefix_utf = prefix.encode('UTF-8') + c_prefix = prefix_utf + else: + c_prefix = NULL # add namespace with prefix if ns is not already known - c_ns = tree.xmlSearchNsByHref(c_doc, c_node, href) + c_ns = tree.xmlSearchNsByHref(c_doc, c_node, c_href) if c_ns is NULL: - if prefix is not None: - tree.xmlNewNs(c_node, href, prefix) - else: - tree.xmlNewNs(c_node, href, NULL) + c_ns = tree.xmlNewNs(c_node, c_href, c_prefix) + if href_utf == node_ns_utf: + tree.xmlSetNs(c_node, c_ns) + node_ns_utf = None + + if node_ns_utf is not None: + element._setNs(node_ns_utf) # include submodules Modified: lxml/branch/scoder2/src/lxml/tests/test_elementtree.py ============================================================================== --- lxml/branch/scoder2/src/lxml/tests/test_elementtree.py (original) +++ lxml/branch/scoder2/src/lxml/tests/test_elementtree.py Wed Mar 8 17:51:59 2006 @@ -568,6 +568,8 @@ b.tail = 'hoi' self.assertEquals('hoi', b.tail) + self.assertEquals('dag', + a.tail) def test_tail_append(self): Element = self.etree.Element From scoder at codespeak.net Wed Mar 8 20:54:07 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Wed Mar 8 20:54:08 2006 Subject: [Lxml-checkins] r24122 - lxml/trunk/src/lxml Message-ID: <20060308195407.1046F100AC@code0.codespeak.net> Author: scoder Date: Wed Mar 8 20:54:04 2006 New Revision: 24122 Modified: lxml/trunk/src/lxml/etree.pyx lxml/trunk/src/lxml/nsclasses.pxi lxml/trunk/src/lxml/tree.pxd lxml/trunk/src/lxml/xslt.pxi Log: use dedicated _utf8() function for UTF-8 encoding - a little faster and IMHO more readable Modified: lxml/trunk/src/lxml/etree.pyx ============================================================================== --- lxml/trunk/src/lxml/etree.pyx (original) +++ lxml/trunk/src/lxml/etree.pyx Wed Mar 8 20:54:04 2006 @@ -511,7 +511,7 @@ if value is None: return # now add new text node with value at start - text = value.encode('UTF-8') + text = _utf8(value) c_text_node = tree.xmlNewDocText(self._doc._c_doc, text) if self._c_node.children is NULL: @@ -531,7 +531,7 @@ _removeText(self._c_node.next) if value is None: return - text = value.encode('UTF-8') + text = _utf8(value) c_text_node = tree.xmlNewDocText(self._doc._c_doc, text) # XXX what if we're the top element? tree.xmlAddNextSibling(self._c_node, c_text_node) @@ -767,7 +767,7 @@ def __setitem__(self, key, value): cdef xmlNs* c_ns ns, tag = _getNsTag(key) - value = value.encode('UTF-8') + value = _utf8(value) if ns is None: tree.xmlSetProp(self._c_node, tag, value) else: @@ -946,8 +946,8 @@ attrib.update(extra) c_node = tree.xmlNewDocNode(c_doc, NULL, name_utf, NULL) for name, value in attrib.items(): - attr_name_utf = name.encode('UTF-8') - value_utf = value.encode('UTF-8') + attr_name_utf = _utf8(name) + value_utf = _utf8(value) tree.xmlNewProp(c_node, attr_name_utf, value_utf) return c_node @@ -978,8 +978,9 @@ cdef _Document doc cdef xmlNode* c_node if text is None: - text = '' - text = ' %s ' % text.encode('UTF-8') + text = ' ' + else: + text = ' %s ' % _utf8(text) doc = _documentFactory( theParser.newDoc() ) c_node = _createComment(doc._c_doc, text) tree.xmlAddChild(doc._c_doc, c_node) @@ -1025,7 +1026,7 @@ def XML(text): cdef xmlDoc* c_doc if isinstance(text, unicode): - text = _stripDeclaration(text.encode('UTF-8')) + text = _stripDeclaration(_utf8(text)) c_doc = theParser.parseDoc(text, None) return _documentFactory(c_doc).getroot() @@ -1091,10 +1092,10 @@ c_node = element._c_node c_doc = element._doc._c_doc for prefix, href in nsmap.items(): - href_utf = href.encode('UTF-8') + href_utf = _utf8(href) c_href = href_utf if prefix is not None: - prefix_utf = prefix.encode('UTF-8') + prefix_utf = _utf8(prefix) c_prefix = prefix_utf else: c_prefix = NULL @@ -1365,7 +1366,7 @@ """Given a tag, find namespace URI and tag name. Return None for NS uri if no namespace URI available. """ - tag = tag.encode('UTF-8') + tag = _utf8(tag) if tag[0] == '{': i = tag.find('}') assert i != -1 @@ -1387,6 +1388,14 @@ return tree.PyUnicode_DecodeUTF8(s, tree.strlen(s), "strict") return tree.PyString_FromStringAndSize(s, tree.strlen(s)) +cdef object _utf8(object s): + if tree.PyString_Check(s): + return s + elif tree.PyUnicode_Check(s): + return tree.PyUnicode_AsUTF8String(s) + else: + raise TypeError, "Argument must be string or unicode." + cdef object _namespacedName(xmlNode* c_node): if c_node.ns is NULL or c_node.ns.href is NULL: return funicode(c_node.name) Modified: lxml/trunk/src/lxml/nsclasses.pxi ============================================================================== --- lxml/trunk/src/lxml/nsclasses.pxi (original) +++ lxml/trunk/src/lxml/nsclasses.pxi Wed Mar 8 20:54:04 2006 @@ -20,7 +20,7 @@ def Namespace(ns_uri): if ns_uri: - ns_utf = ns_uri.encode('UTF-8') + ns_utf = _utf8(ns_uri) else: ns_utf = None try: @@ -70,7 +70,7 @@ if name is None: name_utf = None else: - name_utf = name.encode('UTF-8') + name_utf = _utf8(name) d[name_utf] = item def __getitem__(self, name): @@ -92,7 +92,7 @@ cdef _NamespaceRegistry _find_namespace_registry(object ns_uri): if ns_uri: - ns_utf = ns_uri.encode('UTF-8') + ns_utf = _utf8(ns_uri) else: ns_utf = None return __NAMESPACE_CLASSES[ns_utf] Modified: lxml/trunk/src/lxml/tree.pxd ============================================================================== --- lxml/trunk/src/lxml/tree.pxd (original) +++ lxml/trunk/src/lxml/tree.pxd Wed Mar 8 20:54:04 2006 @@ -13,6 +13,11 @@ cdef void Py_INCREF(object o) cdef object PyUnicode_DecodeUTF8(char* s, int size, char* errors) cdef object PyString_FromStringAndSize(char* s, int size) + cdef object PyString_FromString(char* s) + + cdef int PyUnicode_Check(object obj) + cdef int PyString_Check(object obj) + cdef object PyUnicode_AsUTF8String(object ustring) cdef extern from "libxml/encoding.h": ctypedef struct xmlCharEncodingHandler Modified: lxml/trunk/src/lxml/xslt.pxi ============================================================================== --- lxml/trunk/src/lxml/xslt.pxi (original) +++ lxml/trunk/src/lxml/xslt.pxi Wed Mar 8 20:54:04 2006 @@ -84,9 +84,9 @@ i = 0 keep_ref = [] for key, value in _kw.items(): - k = key.encode('UTF-8') + k = _utf8(key) keep_ref.append(k) - v = value.encode('UTF-8') + v = _utf8(value) keep_ref.append(v) params[i] = k i = i + 1 @@ -198,8 +198,8 @@ def registerNamespace(self, prefix, uri): """Register a namespace with the XPath context. """ - s_prefix = prefix.encode('UTF8') - s_uri = uri.encode('UTF8') + s_prefix = _utf8(prefix) + s_uri = _utf8(uri) # XXX should check be done to verify namespace doesn't already exist? ns_register_status = xpath.xmlXPathRegisterNs( self._c_ctxt, s_prefix, s_uri) @@ -226,7 +226,7 @@ # if element context is requested; unfortunately need to modify ctxt self._c_ctxt.node = c_ctxt_node - path = path.encode('UTF-8') + path = _utf8(path) self._exc_info = None self._release() xpathObj = xpath.xmlXPathEvalExpression(path, self._c_ctxt) @@ -308,11 +308,11 @@ cdef xpath.xmlXPathObject* _wrapXPathObject(object obj) except NULL: cdef xpath.xmlNodeSet* resultSet cdef _NodeBase node - if isinstance(obj, str): + if tree.PyString_Check(obj): # XXX use the Wrap variant? Or leak... return xpath.xmlXPathNewCString(obj) - if isinstance(obj, unicode): - obj = obj.encode("utf-8") + if tree.PyUnicode_Check(obj): + obj = _utf8(obj) return xpath.xmlXPathNewCString(obj) if isinstance(obj, types.BooleanType): return xpath.xmlXPathNewBoolean(obj) From scoder at codespeak.net Wed Mar 8 21:02:58 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Wed Mar 8 21:02:59 2006 Subject: [Lxml-checkins] r24123 - lxml/branch/scoder2/src/lxml Message-ID: <20060308200258.A169E100AC@code0.codespeak.net> Author: scoder Date: Wed Mar 8 21:02:57 2006 New Revision: 24123 Modified: lxml/branch/scoder2/src/lxml/etree.pyx lxml/branch/scoder2/src/lxml/nsclasses.pxi lxml/branch/scoder2/src/lxml/tree.pxd lxml/branch/scoder2/src/lxml/xslt.pxi Log: merged in new _utf8() function from trunk, use it in a few more places Modified: lxml/branch/scoder2/src/lxml/etree.pyx ============================================================================== --- lxml/branch/scoder2/src/lxml/etree.pyx (original) +++ lxml/branch/scoder2/src/lxml/etree.pyx Wed Mar 8 21:02:57 2006 @@ -511,7 +511,7 @@ if value is None: return # now add new text node with value at start - text = value.encode('UTF-8') + text = _utf8(value) c_text_node = tree.xmlNewDocText(self._doc._c_doc, text) if self._c_node.children is NULL: @@ -531,7 +531,7 @@ _removeText(self._c_node.next) if value is None: return - text = value.encode('UTF-8') + text = _utf8(value) c_text_node = tree.xmlNewDocText(self._doc._c_doc, text) # XXX what if we're the top element? tree.xmlAddNextSibling(self._c_node, c_text_node) @@ -767,7 +767,7 @@ def __setitem__(self, key, value): cdef xmlNs* c_ns ns, tag = _getNsTag(key) - value = value.encode('UTF-8') + value = _utf8(value) if ns is None: tree.xmlSetProp(self._c_node, tag, value) else: @@ -946,8 +946,8 @@ attrib.update(extra) c_node = tree.xmlNewDocNode(c_doc, NULL, name_utf, NULL) for name, value in attrib.items(): - attr_name_utf = name.encode('UTF-8') - value_utf = value.encode('UTF-8') + attr_name_utf = _utf8(name) + value_utf = _utf8(value) tree.xmlNewProp(c_node, attr_name_utf, value_utf) return c_node @@ -978,8 +978,9 @@ cdef _Document doc cdef xmlNode* c_node if text is None: - text = '' - text = ' %s ' % text.encode('UTF-8') + text = ' ' + else: + text = ' %s ' % _utf8(text) doc = _documentFactory( theParser.newDoc() ) c_node = _createComment(doc._c_doc, text) tree.xmlAddChild(doc._c_doc, c_node) @@ -1025,7 +1026,7 @@ def XML(text): cdef xmlDoc* c_doc if isinstance(text, unicode): - text = _stripDeclaration(text.encode('UTF-8')) + text = _stripDeclaration(_utf8(text)) c_doc = theParser.parseDoc(text, None) return _documentFactory(c_doc).getroot() @@ -1091,10 +1092,10 @@ c_node = element._c_node c_doc = element._doc._c_doc for prefix, href in nsmap.items(): - href_utf = href.encode('UTF-8') + href_utf = _utf8(href) c_href = href_utf if prefix is not None: - prefix_utf = prefix.encode('UTF-8') + prefix_utf = _utf8(prefix) c_prefix = prefix_utf else: c_prefix = NULL @@ -1365,7 +1366,7 @@ """Given a tag, find namespace URI and tag name. Return None for NS uri if no namespace URI available. """ - tag = tag.encode('UTF-8') + tag = _utf8(tag) if tag[0] == '{': i = tag.find('}') assert i != -1 @@ -1387,6 +1388,14 @@ return tree.PyUnicode_DecodeUTF8(s, tree.strlen(s), "strict") return tree.PyString_FromStringAndSize(s, tree.strlen(s)) +cdef object _utf8(object s): + if tree.PyString_Check(s): + return s + elif tree.PyUnicode_Check(s): + return tree.PyUnicode_AsUTF8String(s) + else: + raise TypeError, "Argument must be string or unicode." + cdef object _namespacedName(xmlNode* c_node): if c_node.ns is NULL or c_node.ns.href is NULL: return funicode(c_node.name) Modified: lxml/branch/scoder2/src/lxml/nsclasses.pxi ============================================================================== --- lxml/branch/scoder2/src/lxml/nsclasses.pxi (original) +++ lxml/branch/scoder2/src/lxml/nsclasses.pxi Wed Mar 8 21:02:57 2006 @@ -24,7 +24,7 @@ """Retrieve the namespace object associated with the given URI. Creates a new one if it does not yet exist.""" if ns_uri: - ns_utf = ns_uri.encode('UTF-8') + ns_utf = _utf8(ns_uri) else: ns_utf = None try: @@ -39,7 +39,7 @@ URI. Creates a new one if it does not yet exist. A function namespace can only be used to register extension functions.""" if ns_uri: - ns_utf = ns_uri.encode('UTF-8') + ns_utf = _utf8(ns_uri) else: ns_utf = None try: @@ -91,7 +91,7 @@ if name is None: name_utf = None else: - name_utf = name.encode('UTF-8') + name_utf = _utf8(name) d[name_utf] = item def __getitem__(self, name): @@ -116,7 +116,7 @@ def __set__(self, prefix): if prefix is None: prefix = '' # empty prefix - self._prefix_utf = prefix.encode('UTF-8') + self._prefix_utf = _utf8(prefix) self._prefix = prefix def __setitem__(self, name, item): @@ -125,7 +125,7 @@ if name is None: name_utf = None else: - name_utf = name.encode('UTF-8') + name_utf = _utf8(name) self._extensions[name_utf] = item def __getitem__(self, name): @@ -151,7 +151,7 @@ cdef _NamespaceRegistry _find_namespace_registry(object ns_uri): if ns_uri: - ns_utf = ns_uri.encode('UTF-8') + ns_utf = _utf8(ns_uri) else: ns_utf = None return __NAMESPACE_REGISTRIES[ns_utf] @@ -163,7 +163,7 @@ extension_dict = {} for ns_uri in namespaces: if ns_uri: - ns_utf = ns_uri.encode('UTF-8') + ns_utf = _utf8(ns_uri) else: ns_utf = None try: Modified: lxml/branch/scoder2/src/lxml/tree.pxd ============================================================================== --- lxml/branch/scoder2/src/lxml/tree.pxd (original) +++ lxml/branch/scoder2/src/lxml/tree.pxd Wed Mar 8 21:02:57 2006 @@ -13,6 +13,11 @@ cdef void Py_INCREF(object o) cdef object PyUnicode_DecodeUTF8(char* s, int size, char* errors) cdef object PyString_FromStringAndSize(char* s, int size) + cdef object PyString_FromString(char* s) + + cdef int PyUnicode_Check(object obj) + cdef int PyString_Check(object obj) + cdef object PyUnicode_AsUTF8String(object ustring) cdef extern from "libxml/encoding.h": ctypedef struct xmlCharEncodingHandler Modified: lxml/branch/scoder2/src/lxml/xslt.pxi ============================================================================== --- lxml/branch/scoder2/src/lxml/xslt.pxi (original) +++ lxml/branch/scoder2/src/lxml/xslt.pxi Wed Mar 8 21:02:57 2006 @@ -81,7 +81,7 @@ try: return self._utf_refs[s] except KeyError: - utf = self._utf_refs[s] = s.encode('UTF8') + utf = self._utf_refs[s] = _utf8(s) return utf cdef _register_context(self, _Document doc, int allow_none_namespace): @@ -312,9 +312,9 @@ i = 0 keep_ref = [] for key, value in _kw.items(): - k = key.encode('UTF-8') + k = _utf8(key) keep_ref.append(k) - v = value.encode('UTF-8') + v = _utf8(value) keep_ref.append(v) params[i] = k i = i + 1 @@ -563,7 +563,7 @@ self._context.register_context(xpathCtxt, self._doc) self._context.registerVariables(variable_dict) - path = path.encode('UTF-8') + path = _utf8(path) xpathObj = xpath.xmlXPathEvalExpression(path, xpathCtxt) self._context.unregister_context() @@ -609,7 +609,7 @@ def __init__(self, path, namespaces=None, extensions=None): self.path = path - path = path.encode('UTF-8') + path = _utf8(path) if namespaces is None: path, namespaces = self._nsextract_path(path) XPathEvaluatorBase.__init__(self, namespaces, extensions, None) @@ -658,9 +658,9 @@ cdef xpath.xmlXPathObject* _wrapXPathObject(object obj) except NULL: cdef xpath.xmlNodeSet* resultSet cdef _NodeBase node - if isinstance(obj, unicode): - obj = obj.encode("utf-8") - if isinstance(obj, str): + if tree.PyUnicode_Check(obj): + obj = _utf8(obj) + if tree.PyString_Check(obj): # XXX use the Wrap variant? Or leak... return xpath.xmlXPathNewCString(obj) if isinstance(obj, types.BooleanType): From scoder at codespeak.net Wed Mar 8 22:18:25 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Wed Mar 8 22:18:26 2006 Subject: [Lxml-checkins] r24126 - lxml/trunk/src/lxml Message-ID: <20060308211825.58FF4100AB@code0.codespeak.net> Author: scoder Date: Wed Mar 8 22:18:23 2006 New Revision: 24126 Modified: lxml/trunk/src/lxml/etree.pyx Log: some clean up, short cut to exception in element.remove() if searched child does not have self as parent Modified: lxml/trunk/src/lxml/etree.pyx ============================================================================== --- lxml/trunk/src/lxml/etree.pyx (original) +++ lxml/trunk/src/lxml/etree.pyx Wed Mar 8 22:18:23 2006 @@ -405,17 +405,18 @@ def append(self, _Element element): cdef xmlNode* c_next - cdef xmlNode* c_next2 + cdef xmlNode* c_node cdef int foreign _raiseIfNone(element) foreign = self._doc is not element._doc + c_node = element._c_node # store possible text node - c_next = element._c_node.next + c_next = c_node.next # XXX what if element is coming from a different document? - tree.xmlUnlinkNode(element._c_node) + tree.xmlUnlinkNode(c_node) # move node itself - tree.xmlAddChild(self._c_node, element._c_node) - _moveTail(c_next, element._c_node) + tree.xmlAddChild(self._c_node, c_node) + _moveTail(c_next, c_node) # uh oh, elements may be pointing to different doc when # parent element has moved; change them too.. changeDocumentBelow(element, self._doc, foreign) @@ -462,11 +463,15 @@ def remove(self, _Element element): cdef xmlNode* c_node + cdef xmlNode* c_search_node _raiseIfNone(element) + c_search_node = element._c_node + if c_search_node.parent is not self._c_node: + raise ValueError, "Element is not a child of this node." c_node = self._c_node.children while c_node is not NULL: - if c_node is element._c_node: - _removeText(element._c_node.next) + if c_node is c_search_node: + _removeText(c_search_node.next) tree.xmlUnlinkNode(element._c_node) return c_node = c_node.next From scoder at codespeak.net Wed Mar 8 22:37:52 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Wed Mar 8 22:37:54 2006 Subject: [Lxml-checkins] r24128 - lxml/trunk/src/lxml Message-ID: <20060308213752.E83C8100AB@code0.codespeak.net> Author: scoder Date: Wed Mar 8 22:37:51 2006 New Revision: 24128 Modified: lxml/trunk/src/lxml/etree.pyx Log: clean up Modified: lxml/trunk/src/lxml/etree.pyx ============================================================================== --- lxml/trunk/src/lxml/etree.pyx (original) +++ lxml/trunk/src/lxml/etree.pyx Wed Mar 8 22:37:51 2006 @@ -366,8 +366,6 @@ # now delete the slice if start != stop: c_node = _deleteSlice(c_node, start, stop) - # now find start of slice again, for insertion (just before it) - #c_node = _findChild(self._c_node, start) # if the insertion point is at the end, append there if c_node is NULL: for node in value: From scoder at codespeak.net Wed Mar 8 22:38:16 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Wed Mar 8 22:38:17 2006 Subject: [Lxml-checkins] r24129 - lxml/trunk Message-ID: <20060308213816.017D9100AB@code0.codespeak.net> Author: scoder Date: Wed Mar 8 22:38:15 2006 New Revision: 24129 Modified: lxml/trunk/bench.py Log: more benchmarks for element methods Modified: lxml/trunk/bench.py ============================================================================== --- lxml/trunk/bench.py (original) +++ lxml/trunk/bench.py Wed Mar 8 22:38:15 2006 @@ -190,6 +190,24 @@ el = Element('{test}test') child[:] = [el] + def bench_remove_children(self, root): + for child in root: + root.remove(child) + + def bench_remove_children_reversed(self, root): + for child in reversed(root[:]): + root.remove(child) + + def bench_set_attributes(self, root): + for child in root: + child.set('a', 'bla') + + def bench_setget_attributes(self, root): + for child in root: + child.set('a', 'bla') + for child in root: + child.get('a') + ############################################################ # Main program ############################################################ From scoder at codespeak.net Thu Mar 9 07:03:25 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Thu Mar 9 07:03:31 2006 Subject: [Lxml-checkins] r24136 - lxml/trunk Message-ID: <20060309060325.B72E21008E@code0.codespeak.net> Author: scoder Date: Thu Mar 9 07:03:19 2006 New Revision: 24136 Modified: lxml/trunk/bench.py Log: new benchmark that shows insufficient speed when creating elements (problem in nsclasses lookup) Modified: lxml/trunk/bench.py ============================================================================== --- lxml/trunk/bench.py (original) +++ lxml/trunk/bench.py Thu Mar 9 07:03:19 2006 @@ -208,6 +208,10 @@ for child in root: child.get('a') + def bench_getchildren(self, root): + for child in root: + child.getchildren() + ############################################################ # Main program ############################################################ From scoder at codespeak.net Thu Mar 9 07:09:01 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Thu Mar 9 07:09:02 2006 Subject: [Lxml-checkins] r24137 - lxml/trunk/src/lxml Message-ID: <20060309060901.4E5261008E@code0.codespeak.net> Author: scoder Date: Thu Mar 9 07:09:00 2006 New Revision: 24137 Modified: lxml/trunk/src/lxml/etree.pyx lxml/trunk/src/lxml/tree.pxd Log: some more C-isation and shortcuts Modified: lxml/trunk/src/lxml/etree.pyx ============================================================================== --- lxml/trunk/src/lxml/etree.pyx (original) +++ lxml/trunk/src/lxml/etree.pyx Thu Mar 9 07:09:00 2006 @@ -552,6 +552,7 @@ def __getslice__(self, start, stop): cdef xmlNode* c_node + cdef _Document doc cdef int c # this does not work for negative start, stop, however, # python seems to convert these to positive start, stop before @@ -561,9 +562,12 @@ return [] c = start result = [] + doc = self._doc while c_node is not NULL and c < stop: if _isElement(c_node): - result.append(_elementFactory(self._doc, c_node)) + ret = tree.PyList_Append(result, _elementFactory(doc, c_node)) + if ret: + raise c = c + 1 c_node = c_node.next return result @@ -586,7 +590,13 @@ cdef int k cdef int l cdef xmlNode* c_child + cdef xmlNode* c_search_node _raiseIfNone(x) + + c_search_node = x._c_node + if c_search_node.parent is not self._c_node: + raise ValueError, "Element is not a child of this node." + k = 0 c_child = self._c_node.children @@ -602,7 +612,7 @@ while c_child is not NULL: if _isElement(c_child): - if c_child is x._c_node: + if c_child is c_search_node: if ((start is None or k >= start) and (stop is None or k < stop)): return k @@ -641,11 +651,16 @@ def getchildren(self): cdef xmlNode* c_node + cdef _Document doc + cdef int ret result = [] + doc = self._doc c_node = self._c_node.children while c_node is not NULL: if _isElement(c_node): - result.append(_elementFactory(self._doc, c_node)) + ret = tree.PyList_Append(result, _elementFactory(doc, c_node)) + if ret: + raise c_node = c_node.next return result Modified: lxml/trunk/src/lxml/tree.pxd ============================================================================== --- lxml/trunk/src/lxml/tree.pxd (original) +++ lxml/trunk/src/lxml/tree.pxd Thu Mar 9 07:09:00 2006 @@ -12,13 +12,15 @@ cdef object PyFile_Name(object p) cdef void Py_INCREF(object o) cdef object PyUnicode_DecodeUTF8(char* s, int size, char* errors) - cdef object PyString_FromStringAndSize(char* s, int size) - cdef object PyString_FromString(char* s) - cdef int PyUnicode_Check(object obj) cdef int PyString_Check(object obj) cdef object PyUnicode_AsUTF8String(object ustring) - + cdef object PyString_FromStringAndSize(char* s, int size) + cdef object PyString_FromString(char* s) + cdef int PyList_Append(object l, object obj) + cdef PyObject* PyDict_GetItem(object d, object key) + cdef PyObject* PyDict_GetItemString(object d, char* key) + cdef extern from "libxml/encoding.h": ctypedef struct xmlCharEncodingHandler cdef xmlCharEncodingHandler* xmlFindCharEncodingHandler(char* name) @@ -183,4 +185,3 @@ cdef extern from "libxml/xmlstring.h": cdef char* xmlStrdup(char* cur) - From scoder at codespeak.net Thu Mar 9 07:10:48 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Thu Mar 9 07:10:49 2006 Subject: [Lxml-checkins] r24138 - lxml/trunk/src/lxml Message-ID: <20060309061048.3673C1008E@code0.codespeak.net> Author: scoder Date: Thu Mar 9 07:10:47 2006 New Revision: 24138 Modified: lxml/trunk/src/lxml/nsclasses.pxi Log: rewrote lookup functions in C-style, speed up of 10% to 4x in benchmarks Modified: lxml/trunk/src/lxml/nsclasses.pxi ============================================================================== --- lxml/trunk/src/lxml/nsclasses.pxi (original) +++ lxml/trunk/src/lxml/nsclasses.pxi Thu Mar 9 07:10:47 2006 @@ -73,29 +73,40 @@ name_utf = _utf8(name) d[name_utf] = item - def __getitem__(self, name): - try: - return self._classes[name] - except KeyError: - return self._extensions[name] + cdef __getitem__(self, name): + cdef tree.PyObject* dict_result + cdef char* c_name + c_name = name + dict_result = tree.PyDict_GetItem(self._classes, c_name) + if dict_result is NULL: + dict_result = tree.PyDict_GetItem(self._extensions, c_name) + if dict_result is NULL: + raise KeyError, "Name not registered." + return dict_result def clear(self): self._classes.clear() self._extensions.clear() + #self.self._xslt_elements.clear() cdef object _find_all_namespaces(): "Hack to register all extension functions in XSLT" ns_uris = [] for s in __NAMESPACE_CLASSES.keys(): - ns_uris.append(unicode(s, 'UTF-8')) + ns_uris.append(funicode(s)) return ns_uris cdef _NamespaceRegistry _find_namespace_registry(object ns_uri): + cdef tree.PyObject* dict_result if ns_uri: ns_utf = _utf8(ns_uri) + dict_result = tree.PyDict_GetItemString(__NAMESPACE_CLASSES, ns_utf) + else: + dict_result = tree.PyDict_GetItem(__NAMESPACE_CLASSES, None) + if dict_result is NULL: + raise else: - ns_utf = None - return __NAMESPACE_CLASSES[ns_utf] + return <_NamespaceRegistry>dict_result cdef _find_extensions(namespaces): extension_dict = {} @@ -110,28 +121,29 @@ cdef object _find_element_class(char* c_namespace_utf, char* c_element_name_utf): + cdef tree.PyObject* dict_result cdef _NamespaceRegistry registry - element_name_utf = c_element_name_utf - if c_namespace_utf == NULL: - if element_name_utf[:1] == '{': - namespace_utf, element_name_utf = element_name_utf[1:].split('}', 1) - else: - namespace_utf = None + cdef object result + if c_namespace_utf is not NULL: + dict_result = tree.PyDict_GetItemString(__NAMESPACE_CLASSES, c_namespace_utf) else: - namespace_utf = c_namespace_utf - - try: - registry = __NAMESPACE_CLASSES[namespace_utf] - except KeyError: + dict_result = tree.PyDict_GetItem(__NAMESPACE_CLASSES, None) + if dict_result is NULL: return _Element + + registry = dict_result classes = registry._classes - try: - return classes[element_name_utf] - except KeyError: - pass - try: - return classes[None] - except KeyError: - return _Element + if c_element_name_utf is not NULL: + dict_result = tree.PyDict_GetItemString(classes, c_element_name_utf) + else: + dict_result = NULL + + if dict_result is NULL: + dict_result = tree.PyDict_GetItem(classes, None) + if dict_result is not NULL: + result = dict_result + return result + else: + return _Element From scoder at codespeak.net Thu Mar 9 07:26:57 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Thu Mar 9 07:26:59 2006 Subject: [Lxml-checkins] r24139 - lxml/trunk/src/lxml Message-ID: <20060309062657.801B51008E@code0.codespeak.net> Author: scoder Date: Thu Mar 9 07:26:56 2006 New Revision: 24139 Modified: lxml/trunk/src/lxml/nsclasses.pxi Log: clean up, another refactoring of NamespaceRegistry.__getitem__() Modified: lxml/trunk/src/lxml/nsclasses.pxi ============================================================================== --- lxml/trunk/src/lxml/nsclasses.pxi (original) +++ lxml/trunk/src/lxml/nsclasses.pxi Thu Mar 9 07:26:56 2006 @@ -73,13 +73,15 @@ name_utf = _utf8(name) d[name_utf] = item - cdef __getitem__(self, name): + def __getitem__(self, name): + name_utf = _utf8(name) + return self._get(name_utf) + + cdef object _get(self, char* c_name): cdef tree.PyObject* dict_result - cdef char* c_name - c_name = name - dict_result = tree.PyDict_GetItem(self._classes, c_name) + dict_result = tree.PyDict_GetItemString(self._classes, c_name) if dict_result is NULL: - dict_result = tree.PyDict_GetItem(self._extensions, c_name) + dict_result = tree.PyDict_GetItemString(self._extensions, c_name) if dict_result is NULL: raise KeyError, "Name not registered." return dict_result @@ -89,13 +91,6 @@ self._extensions.clear() #self.self._xslt_elements.clear() -cdef object _find_all_namespaces(): - "Hack to register all extension functions in XSLT" - ns_uris = [] - for s in __NAMESPACE_CLASSES.keys(): - ns_uris.append(funicode(s)) - return ns_uris - cdef _NamespaceRegistry _find_namespace_registry(object ns_uri): cdef tree.PyObject* dict_result if ns_uri: @@ -108,17 +103,6 @@ else: return <_NamespaceRegistry>dict_result -cdef _find_extensions(namespaces): - extension_dict = {} - for ns_uri in namespaces: - try: - extensions = _find_namespace_registry(ns_uri)._extensions - except KeyError: - continue - if extensions: - extension_dict[ns_uri] = extensions - return extension_dict - cdef object _find_element_class(char* c_namespace_utf, char* c_element_name_utf): cdef tree.PyObject* dict_result From scoder at codespeak.net Thu Mar 9 07:40:05 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Thu Mar 9 07:40:07 2006 Subject: [Lxml-checkins] r24140 - lxml/trunk/src/lxml Message-ID: <20060309064005.F3DDD1008E@code0.codespeak.net> Author: scoder Date: Thu Mar 9 07:40:04 2006 New Revision: 24140 Modified: lxml/trunk/src/lxml/nsclasses.pxi Log: more clean ups Modified: lxml/trunk/src/lxml/nsclasses.pxi ============================================================================== --- lxml/trunk/src/lxml/nsclasses.pxi (original) +++ lxml/trunk/src/lxml/nsclasses.pxi Thu Mar 9 07:40:04 2006 @@ -91,18 +91,6 @@ self._extensions.clear() #self.self._xslt_elements.clear() -cdef _NamespaceRegistry _find_namespace_registry(object ns_uri): - cdef tree.PyObject* dict_result - if ns_uri: - ns_utf = _utf8(ns_uri) - dict_result = tree.PyDict_GetItemString(__NAMESPACE_CLASSES, ns_utf) - else: - dict_result = tree.PyDict_GetItem(__NAMESPACE_CLASSES, None) - if dict_result is NULL: - raise - else: - return <_NamespaceRegistry>dict_result - cdef object _find_element_class(char* c_namespace_utf, char* c_element_name_utf): cdef tree.PyObject* dict_result From scoder at codespeak.net Thu Mar 9 07:49:00 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Thu Mar 9 07:49:02 2006 Subject: [Lxml-checkins] r24141 - in lxml/branch/scoder2: . src/lxml Message-ID: <20060309064900.02ECD1008E@code0.codespeak.net> Author: scoder Date: Thu Mar 9 07:48:54 2006 New Revision: 24141 Modified: lxml/branch/scoder2/bench.py lxml/branch/scoder2/src/lxml/etree.pyx lxml/branch/scoder2/src/lxml/tree.pxd Log: loads of merges and speed ups from trunk Modified: lxml/branch/scoder2/bench.py ============================================================================== --- lxml/branch/scoder2/bench.py (original) +++ lxml/branch/scoder2/bench.py Thu Mar 9 07:48:54 2006 @@ -190,6 +190,28 @@ el = Element('{test}test') child[:] = [el] + def bench_remove_children(self, root): + for child in root: + root.remove(child) + + def bench_remove_children_reversed(self, root): + for child in reversed(root[:]): + root.remove(child) + + def bench_set_attributes(self, root): + for child in root: + child.set('a', 'bla') + + def bench_setget_attributes(self, root): + for child in root: + child.set('a', 'bla') + for child in root: + child.get('a') + + def bench_getchildren(self, root): + for child in root: + child.getchildren() + ############################################################ # Main program ############################################################ Modified: lxml/branch/scoder2/src/lxml/etree.pyx ============================================================================== --- lxml/branch/scoder2/src/lxml/etree.pyx (original) +++ lxml/branch/scoder2/src/lxml/etree.pyx Thu Mar 9 07:48:54 2006 @@ -366,8 +366,6 @@ # now delete the slice if start != stop: c_node = _deleteSlice(c_node, start, stop) - # now find start of slice again, for insertion (just before it) - #c_node = _findChild(self._c_node, start) # if the insertion point is at the end, append there if c_node is NULL: for node in value: @@ -405,17 +403,18 @@ def append(self, _Element element): cdef xmlNode* c_next - cdef xmlNode* c_next2 + cdef xmlNode* c_node cdef int foreign _raiseIfNone(element) foreign = self._doc is not element._doc + c_node = element._c_node # store possible text node - c_next = element._c_node.next + c_next = c_node.next # XXX what if element is coming from a different document? - tree.xmlUnlinkNode(element._c_node) + tree.xmlUnlinkNode(c_node) # move node itself - tree.xmlAddChild(self._c_node, element._c_node) - _moveTail(c_next, element._c_node) + tree.xmlAddChild(self._c_node, c_node) + _moveTail(c_next, c_node) # uh oh, elements may be pointing to different doc when # parent element has moved; change them too.. changeDocumentBelow(element, self._doc, foreign) @@ -462,11 +461,15 @@ def remove(self, _Element element): cdef xmlNode* c_node + cdef xmlNode* c_search_node _raiseIfNone(element) + c_search_node = element._c_node + if c_search_node.parent is not self._c_node: + raise ValueError, "Element is not a child of this node." c_node = self._c_node.children while c_node is not NULL: - if c_node is element._c_node: - _removeText(element._c_node.next) + if c_node is c_search_node: + _removeText(c_search_node.next) tree.xmlUnlinkNode(element._c_node) return c_node = c_node.next @@ -549,6 +552,7 @@ def __getslice__(self, start, stop): cdef xmlNode* c_node + cdef _Document doc cdef int c # this does not work for negative start, stop, however, # python seems to convert these to positive start, stop before @@ -558,9 +562,12 @@ return [] c = start result = [] + doc = self._doc while c_node is not NULL and c < stop: if _isElement(c_node): - result.append(_elementFactory(self._doc, c_node)) + ret = tree.PyList_Append(result, _elementFactory(doc, c_node)) + if ret: + raise c = c + 1 c_node = c_node.next return result @@ -583,7 +590,13 @@ cdef int k cdef int l cdef xmlNode* c_child + cdef xmlNode* c_search_node _raiseIfNone(x) + + c_search_node = x._c_node + if c_search_node.parent is not self._c_node: + raise ValueError, "Element is not a child of this node." + k = 0 c_child = self._c_node.children @@ -599,7 +612,7 @@ while c_child is not NULL: if _isElement(c_child): - if c_child is x._c_node: + if c_child is c_search_node: if ((start is None or k >= start) and (stop is None or k < stop)): return k @@ -638,11 +651,16 @@ def getchildren(self): cdef xmlNode* c_node + cdef _Document doc + cdef int ret result = [] + doc = self._doc c_node = self._c_node.children while c_node is not NULL: if _isElement(c_node): - result.append(_elementFactory(self._doc, c_node)) + ret = tree.PyList_Append(result, _elementFactory(doc, c_node)) + if ret: + raise c_node = c_node.next return result Modified: lxml/branch/scoder2/src/lxml/tree.pxd ============================================================================== --- lxml/branch/scoder2/src/lxml/tree.pxd (original) +++ lxml/branch/scoder2/src/lxml/tree.pxd Thu Mar 9 07:48:54 2006 @@ -12,13 +12,15 @@ cdef object PyFile_Name(object p) cdef void Py_INCREF(object o) cdef object PyUnicode_DecodeUTF8(char* s, int size, char* errors) - cdef object PyString_FromStringAndSize(char* s, int size) - cdef object PyString_FromString(char* s) - cdef int PyUnicode_Check(object obj) cdef int PyString_Check(object obj) cdef object PyUnicode_AsUTF8String(object ustring) - + cdef object PyString_FromStringAndSize(char* s, int size) + cdef object PyString_FromString(char* s) + cdef int PyList_Append(object l, object obj) + cdef PyObject* PyDict_GetItem(object d, object key) + cdef PyObject* PyDict_GetItemString(object d, char* key) + cdef extern from "libxml/encoding.h": ctypedef struct xmlCharEncodingHandler cdef xmlCharEncodingHandler* xmlFindCharEncodingHandler(char* name) @@ -183,4 +185,3 @@ cdef extern from "libxml/xmlstring.h": cdef char* xmlStrdup(char* cur) - From scoder at codespeak.net Thu Mar 9 07:51:23 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Thu Mar 9 07:51:25 2006 Subject: [Lxml-checkins] r24142 - lxml/branch/scoder2/src/lxml Message-ID: <20060309065123.EC41B1008E@code0.codespeak.net> Author: scoder Date: Thu Mar 9 07:51:22 2006 New Revision: 24142 Modified: lxml/branch/scoder2/src/lxml/nsclasses.pxi Log: merged changes in nsclasses from trunk, adapted to reimplementation Modified: lxml/branch/scoder2/src/lxml/nsclasses.pxi ============================================================================== --- lxml/branch/scoder2/src/lxml/nsclasses.pxi (original) +++ lxml/branch/scoder2/src/lxml/nsclasses.pxi Thu Mar 9 07:51:22 2006 @@ -95,14 +95,22 @@ d[name_utf] = item def __getitem__(self, name): - try: - return self._classes[name] - except KeyError: - return self._extensions[name] + name_utf = _utf8(name) + return self._get(name_utf) + + cdef object _get(self, char* c_name): + cdef tree.PyObject* dict_result + dict_result = tree.PyDict_GetItemString(self._classes, c_name) + if dict_result is NULL: + dict_result = tree.PyDict_GetItemString(self._extensions, c_name) + if dict_result is NULL: + raise KeyError, "Name not registered." + return dict_result def clear(self): self._classes.clear() self._extensions.clear() + #self.self._xslt_elements.clear() cdef class _FunctionNamespaceRegistry(_NamespaceRegistry): cdef object _prefix @@ -128,8 +136,12 @@ name_utf = _utf8(name) self._extensions[name_utf] = item - def __getitem__(self, name): - return self._extensions[name] + cdef object _get(self, char* c_name): + cdef tree.PyObject* dict_result + dict_result = tree.PyDict_GetItemString(self._extensions, c_name) + if dict_result is NULL: + raise KeyError, "Name not registered." + return dict_result cdef object _find_all_extensions(): "Internal lookup function to find all extension functions for XSLT/XPath." @@ -149,56 +161,49 @@ ns_prefixes[registry._prefix_utf] = ns_utf return ns_prefixes -cdef _NamespaceRegistry _find_namespace_registry(object ns_uri): - if ns_uri: - ns_utf = _utf8(ns_uri) - else: - ns_utf = None - return __NAMESPACE_REGISTRIES[ns_utf] - cdef _find_extensions(namespaces): """Returns a dictionary that maps each namespace in the provided list to a dictionary of name-function mappings defined under that namespace.""" - cdef _NamespaceRegistry registry + cdef tree.PyObject* dict_result extension_dict = {} for ns_uri in namespaces: if ns_uri: ns_utf = _utf8(ns_uri) else: ns_utf = None - try: - registry = __FUNCTION_NAMESPACE_REGISTRIES[ns_uri] - except KeyError: + dict_result = tree.PyDict_GetItemString( + __FUNCTION_NAMESPACE_REGISTRIES, ns_utf) + if dict_result is NULL: continue - extensions = registry._extensions + extensions = (<_NamespaceRegistry>dict_result)._extensions if extensions: extension_dict[ns_utf] = extensions return extension_dict cdef object _find_element_class(char* c_namespace_utf, char* c_element_name_utf): + cdef tree.PyObject* dict_result cdef _NamespaceRegistry registry - element_name_utf = c_element_name_utf - if c_namespace_utf == NULL: - if element_name_utf[:1] == '{': - namespace_utf, element_name_utf = element_name_utf[1:].split('}', 1) - else: - namespace_utf = None + cdef object result + if c_namespace_utf is not NULL: + dict_result = tree.PyDict_GetItemString(__NAMESPACE_REGISTRIES, c_namespace_utf) else: - namespace_utf = c_namespace_utf - - try: - registry = __NAMESPACE_REGISTRIES[namespace_utf] - except KeyError: + dict_result = tree.PyDict_GetItem(__NAMESPACE_REGISTRIES, None) + if dict_result is NULL: return _Element + + registry = <_NamespaceRegistry>dict_result classes = registry._classes - try: - return classes[element_name_utf] - except KeyError: - pass - try: - return classes[None] - except KeyError: - return _Element + if c_element_name_utf is not NULL: + dict_result = tree.PyDict_GetItemString(classes, c_element_name_utf) + else: + dict_result = NULL + + if dict_result is NULL: + dict_result = tree.PyDict_GetItem(classes, None) + if dict_result is not NULL: + return dict_result + else: + return _Element From scoder at codespeak.net Thu Mar 9 08:06:54 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Thu Mar 9 08:06:56 2006 Subject: [Lxml-checkins] r24143 - lxml/branch/scoder2/src/lxml Message-ID: <20060309070654.B719E1008E@code0.codespeak.net> Author: scoder Date: Thu Mar 9 08:06:53 2006 New Revision: 24143 Modified: lxml/branch/scoder2/src/lxml/nsclasses.pxi lxml/branch/scoder2/src/lxml/tree.pxd Log: some more clean ups and C-ifications in nsclasses Modified: lxml/branch/scoder2/src/lxml/nsclasses.pxi ============================================================================== --- lxml/branch/scoder2/src/lxml/nsclasses.pxi (original) +++ lxml/branch/scoder2/src/lxml/nsclasses.pxi Thu Mar 9 08:06:53 2006 @@ -165,19 +165,23 @@ """Returns a dictionary that maps each namespace in the provided list to a dictionary of name-function mappings defined under that namespace.""" cdef tree.PyObject* dict_result + cdef char* c_ns_utf extension_dict = {} for ns_uri in namespaces: - if ns_uri: - ns_utf = _utf8(ns_uri) + if ns_uri is None: + dict_result = tree.PyDict_GetItem( + __FUNCTION_NAMESPACE_REGISTRIES, None) else: - ns_utf = None - dict_result = tree.PyDict_GetItemString( - __FUNCTION_NAMESPACE_REGISTRIES, ns_utf) + ns_utf = _utf8(ns_uri) + c_ns_utf = ns_utf + dict_result = tree.PyDict_GetItemString( + __FUNCTION_NAMESPACE_REGISTRIES, c_ns_utf) if dict_result is NULL: continue extensions = (<_NamespaceRegistry>dict_result)._extensions if extensions: - extension_dict[ns_utf] = extensions + tree.PyDict_SetItemString( + extension_dict, c_ns_utf, extensions) return extension_dict cdef object _find_element_class(char* c_namespace_utf, @@ -186,9 +190,11 @@ cdef _NamespaceRegistry registry cdef object result if c_namespace_utf is not NULL: - dict_result = tree.PyDict_GetItemString(__NAMESPACE_REGISTRIES, c_namespace_utf) + dict_result = tree.PyDict_GetItemString( + __NAMESPACE_REGISTRIES, c_namespace_utf) else: - dict_result = tree.PyDict_GetItem(__NAMESPACE_REGISTRIES, None) + dict_result = tree.PyDict_GetItem( + __NAMESPACE_REGISTRIES, None) if dict_result is NULL: return _Element @@ -196,7 +202,8 @@ classes = registry._classes if c_element_name_utf is not NULL: - dict_result = tree.PyDict_GetItemString(classes, c_element_name_utf) + dict_result = tree.PyDict_GetItemString( + classes, c_element_name_utf) else: dict_result = NULL Modified: lxml/branch/scoder2/src/lxml/tree.pxd ============================================================================== --- lxml/branch/scoder2/src/lxml/tree.pxd (original) +++ lxml/branch/scoder2/src/lxml/tree.pxd Thu Mar 9 08:06:53 2006 @@ -18,8 +18,9 @@ cdef object PyString_FromStringAndSize(char* s, int size) cdef object PyString_FromString(char* s) cdef int PyList_Append(object l, object obj) - cdef PyObject* PyDict_GetItem(object d, object key) + cdef int PyDict_SetItemString(object d, char* key, object value) cdef PyObject* PyDict_GetItemString(object d, char* key) + cdef PyObject* PyDict_GetItem(object d, object key) cdef extern from "libxml/encoding.h": ctypedef struct xmlCharEncodingHandler From scoder at codespeak.net Thu Mar 9 08:09:58 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Thu Mar 9 08:10:00 2006 Subject: [Lxml-checkins] r24144 - lxml/trunk/src/lxml Message-ID: <20060309070958.DBDFD1008E@code0.codespeak.net> Author: scoder Date: Thu Mar 9 08:09:57 2006 New Revision: 24144 Modified: lxml/trunk/src/lxml/nsclasses.pxi lxml/trunk/src/lxml/tree.pxd Log: merged in clean ups from scoder2 branch Modified: lxml/trunk/src/lxml/nsclasses.pxi ============================================================================== --- lxml/trunk/src/lxml/nsclasses.pxi (original) +++ lxml/trunk/src/lxml/nsclasses.pxi Thu Mar 9 08:09:57 2006 @@ -97,9 +97,11 @@ cdef _NamespaceRegistry registry cdef object result if c_namespace_utf is not NULL: - dict_result = tree.PyDict_GetItemString(__NAMESPACE_CLASSES, c_namespace_utf) + dict_result = tree.PyDict_GetItemString( + __NAMESPACE_CLASSES, c_namespace_utf) else: - dict_result = tree.PyDict_GetItem(__NAMESPACE_CLASSES, None) + dict_result = tree.PyDict_GetItem( + __NAMESPACE_CLASSES, None) if dict_result is NULL: return _Element @@ -107,7 +109,8 @@ classes = registry._classes if c_element_name_utf is not NULL: - dict_result = tree.PyDict_GetItemString(classes, c_element_name_utf) + dict_result = tree.PyDict_GetItemString( + classes, c_element_name_utf) else: dict_result = NULL Modified: lxml/trunk/src/lxml/tree.pxd ============================================================================== --- lxml/trunk/src/lxml/tree.pxd (original) +++ lxml/trunk/src/lxml/tree.pxd Thu Mar 9 08:09:57 2006 @@ -18,8 +18,9 @@ cdef object PyString_FromStringAndSize(char* s, int size) cdef object PyString_FromString(char* s) cdef int PyList_Append(object l, object obj) - cdef PyObject* PyDict_GetItem(object d, object key) + cdef int PyDict_SetItemString(object d, char* key, object value) cdef PyObject* PyDict_GetItemString(object d, char* key) + cdef PyObject* PyDict_GetItem(object d, object key) cdef extern from "libxml/encoding.h": ctypedef struct xmlCharEncodingHandler From scoder at codespeak.net Thu Mar 9 09:32:17 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Thu Mar 9 09:32:18 2006 Subject: [Lxml-checkins] r24146 - lxml/trunk Message-ID: <20060309083217.DAE8F100AB@code0.codespeak.net> Author: scoder Date: Thu Mar 9 09:32:16 2006 New Revision: 24146 Modified: lxml/trunk/bench.py Log: new benchmarks: deepcopy, get_children_slice Modified: lxml/trunk/bench.py ============================================================================== --- lxml/trunk/bench.py (original) +++ lxml/trunk/bench.py Thu Mar 9 09:32:16 2006 @@ -212,6 +212,19 @@ for child in root: child.getchildren() + def bench_get_children_slice(self, root): + for child in root: + child[:] + + def bench_get_children_slice_2x(self, root): + for child in root: + children = child[:] + child[:] + + def bench_deepcopy(self, root): + for child in root: + copy.deepcopy(child) + ############################################################ # Main program ############################################################ From scoder at codespeak.net Thu Mar 9 09:33:27 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Thu Mar 9 09:33:28 2006 Subject: [Lxml-checkins] r24147 - lxml/branch/scoder2 Message-ID: <20060309083327.D5F11100AB@code0.codespeak.net> Author: scoder Date: Thu Mar 9 09:33:26 2006 New Revision: 24147 Modified: lxml/branch/scoder2/bench.py Log: merged in new benchmarks from trunk Modified: lxml/branch/scoder2/bench.py ============================================================================== --- lxml/branch/scoder2/bench.py (original) +++ lxml/branch/scoder2/bench.py Thu Mar 9 09:33:26 2006 @@ -212,6 +212,19 @@ for child in root: child.getchildren() + def bench_get_children_slice(self, root): + for child in root: + child[:] + + def bench_get_children_slice_2x(self, root): + for child in root: + children = child[:] + child[:] + + def bench_deepcopy(self, root): + for child in root: + copy.deepcopy(child) + ############################################################ # Main program ############################################################ From scoder at codespeak.net Thu Mar 9 11:01:53 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Thu Mar 9 11:01:54 2006 Subject: [Lxml-checkins] r24157 - lxml/trunk/src/lxml/tests Message-ID: <20060309100153.4C88C100B0@code0.codespeak.net> Author: scoder Date: Thu Mar 9 11:01:52 2006 New Revision: 24157 Modified: lxml/trunk/src/lxml/tests/test_unicode.py Log: test unicode namespace Modified: lxml/trunk/src/lxml/tests/test_unicode.py ============================================================================== --- lxml/trunk/src/lxml/tests/test_unicode.py (original) +++ lxml/trunk/src/lxml/tests/test_unicode.py Thu Mar 9 11:01:52 2006 @@ -6,7 +6,7 @@ ascii_uni = u'a' -uni = u'?' +uni = u'?\uF8D2' # klingon etc. class UnicodeTestCase(unittest.TestCase): def test_unicode_xml(self): @@ -17,6 +17,11 @@ el = etree.Element(uni) self.assertEquals(uni, el.tag) + def test_unicode_nstag(self): + tag = u"{%s}%s" % (uni, uni) + el = etree.Element(tag) + self.assertEquals(tag, el.tag) + def test_unicode_attr(self): el = etree.Element('foo', {'bar': uni}) self.assertEquals(uni, el.attrib['bar']) From scoder at codespeak.net Thu Mar 9 11:02:08 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Thu Mar 9 11:02:10 2006 Subject: [Lxml-checkins] r24158 - lxml/trunk Message-ID: <20060309100208.C3CAC100B0@code0.codespeak.net> Author: scoder Date: Thu Mar 9 11:02:07 2006 New Revision: 24158 Modified: lxml/trunk/bench.py Log: benchmark for element.tag Modified: lxml/trunk/bench.py ============================================================================== --- lxml/trunk/bench.py (original) +++ lxml/trunk/bench.py Thu Mar 9 11:02:07 2006 @@ -225,6 +225,10 @@ for child in root: copy.deepcopy(child) + def bench_tag(self, root): + for child in root: + child.tag + ############################################################ # Main program ############################################################ From scoder at codespeak.net Thu Mar 9 11:16:24 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Thu Mar 9 11:16:25 2006 Subject: [Lxml-checkins] r24159 - lxml/trunk/src/lxml Message-ID: <20060309101624.67636100B5@code0.codespeak.net> Author: scoder Date: Thu Mar 9 11:16:23 2006 New Revision: 24159 Modified: lxml/trunk/src/lxml/etree.pyx Log: faster rewrite of _namespacedName Modified: lxml/trunk/src/lxml/etree.pyx ============================================================================== --- lxml/trunk/src/lxml/etree.pyx (original) +++ lxml/trunk/src/lxml/etree.pyx Thu Mar 9 11:16:23 2006 @@ -1415,12 +1415,19 @@ raise TypeError, "Argument must be string or unicode." cdef object _namespacedName(xmlNode* c_node): + cdef char* href + cdef char* name + cdef object s + name = c_node.name if c_node.ns is NULL or c_node.ns.href is NULL: - return funicode(c_node.name) + return funicode(name) else: - # XXX optimize - s = "{%s}%s" % (c_node.ns.href, c_node.name) - return funicode(s) + href = c_node.ns.href + s = tree.PyString_FromFormat("{%s}%s", href, name) + if isutf8(href) or isutf8(name): + return unicode(s, 'UTF-8') + else: + return s def _getFilenameForFile(source): """Given a Python File or Gzip object, give filename back. From scoder at codespeak.net Thu Mar 9 11:16:47 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Thu Mar 9 11:16:48 2006 Subject: [Lxml-checkins] r24160 - lxml/trunk/src/lxml Message-ID: <20060309101647.426C8100B5@code0.codespeak.net> Author: scoder Date: Thu Mar 9 11:16:46 2006 New Revision: 24160 Modified: lxml/trunk/src/lxml/etree.pyx Log: raise assertion if non-ascii 8-bit strings are passed to API functions Modified: lxml/trunk/src/lxml/etree.pyx ============================================================================== --- lxml/trunk/src/lxml/etree.pyx (original) +++ lxml/trunk/src/lxml/etree.pyx Thu Mar 9 11:16:46 2006 @@ -1408,6 +1408,7 @@ cdef object _utf8(object s): if tree.PyString_Check(s): + assert not isutf8(s), "All strings must be Unicode or ASCII" return s elif tree.PyUnicode_Check(s): return tree.PyUnicode_AsUTF8String(s) From scoder at codespeak.net Thu Mar 9 11:46:00 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Thu Mar 9 11:46:01 2006 Subject: [Lxml-checkins] r24163 - lxml/trunk/src/lxml Message-ID: <20060309104600.034BF10089@code0.codespeak.net> Author: scoder Date: Thu Mar 9 11:45:59 2006 New Revision: 24163 Modified: lxml/trunk/src/lxml/tree.pxd Log: forgot to commit function declaration of PyString_FromFormat Modified: lxml/trunk/src/lxml/tree.pxd ============================================================================== --- lxml/trunk/src/lxml/tree.pxd (original) +++ lxml/trunk/src/lxml/tree.pxd Thu Mar 9 11:45:59 2006 @@ -21,6 +21,7 @@ cdef int PyDict_SetItemString(object d, char* key, object value) cdef PyObject* PyDict_GetItemString(object d, char* key) cdef PyObject* PyDict_GetItem(object d, object key) + cdef object PyString_FromFormat(char* format, ...) cdef extern from "libxml/encoding.h": ctypedef struct xmlCharEncodingHandler From scoder at codespeak.net Thu Mar 9 11:49:07 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Thu Mar 9 11:49:09 2006 Subject: [Lxml-checkins] r24164 - lxml/trunk/src/lxml Message-ID: <20060309104907.4BA0F10089@code0.codespeak.net> Author: scoder Date: Thu Mar 9 11:49:06 2006 New Revision: 24164 Modified: lxml/trunk/src/lxml/etree.pyx lxml/trunk/src/lxml/tree.pxd Log: faster implementation of _getNsTag() Modified: lxml/trunk/src/lxml/etree.pyx ============================================================================== --- lxml/trunk/src/lxml/etree.pyx (original) +++ lxml/trunk/src/lxml/etree.pyx Thu Mar 9 11:49:06 2006 @@ -1380,17 +1380,6 @@ c_node = c_next return c_node -def _getNsTag(tag): - """Given a tag, find namespace URI and tag name. - Return None for NS uri if no namespace URI available. - """ - tag = _utf8(tag) - if tag[0] == '{': - i = tag.find('}') - assert i != -1 - return tag[1:i], tag[i + 1:] - return None, tag - cdef int isutf8(char* string): cdef int i i = 0 @@ -1415,6 +1404,27 @@ else: raise TypeError, "Argument must be string or unicode." +def _getNsTag(tag): + """Given a tag, find namespace URI and tag name. + Return None for NS uri if no namespace URI available. + """ + cdef char* c_tag + cdef char* c_pos + cdef int nslen + tag = _utf8(tag) + c_tag = tag + if c_tag[0] == c'{': + c_pos = tree.xmlStrchr(c_tag+1, c'}') + if c_pos is NULL: + raise ValueError, "Invalid tag name" + nslen = c_pos - c_tag - 1 + ns = tree.PyString_FromStringAndSize(c_tag+1, nslen) + c_tag = c_pos + 1 + else: + ns = None + tag = tree.PyString_FromString(c_tag) + return ns, tag + cdef object _namespacedName(xmlNode* c_node): cdef char* href cdef char* name Modified: lxml/trunk/src/lxml/tree.pxd ============================================================================== --- lxml/trunk/src/lxml/tree.pxd (original) +++ lxml/trunk/src/lxml/tree.pxd Thu Mar 9 11:49:06 2006 @@ -187,3 +187,4 @@ cdef extern from "libxml/xmlstring.h": cdef char* xmlStrdup(char* cur) + cdef char* xmlStrchr(char* cur, char value) From scoder at codespeak.net Thu Mar 9 11:51:15 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Thu Mar 9 11:51:17 2006 Subject: [Lxml-checkins] r24166 - in lxml/branch/scoder2: . src/lxml src/lxml/tests Message-ID: <20060309105115.7F6A4100AB@code0.codespeak.net> Author: scoder Date: Thu Mar 9 11:51:13 2006 New Revision: 24166 Modified: lxml/branch/scoder2/bench.py lxml/branch/scoder2/src/lxml/etree.pyx lxml/branch/scoder2/src/lxml/tests/test_unicode.py lxml/branch/scoder2/src/lxml/tree.pxd Log: loads of merges from trunk Modified: lxml/branch/scoder2/bench.py ============================================================================== --- lxml/branch/scoder2/bench.py (original) +++ lxml/branch/scoder2/bench.py Thu Mar 9 11:51:13 2006 @@ -225,6 +225,10 @@ for child in root: copy.deepcopy(child) + def bench_tag(self, root): + for child in root: + child.tag + ############################################################ # Main program ############################################################ Modified: lxml/branch/scoder2/src/lxml/etree.pyx ============================================================================== --- lxml/branch/scoder2/src/lxml/etree.pyx (original) +++ lxml/branch/scoder2/src/lxml/etree.pyx Thu Mar 9 11:51:13 2006 @@ -1380,17 +1380,6 @@ c_node = c_next return c_node -def _getNsTag(tag): - """Given a tag, find namespace URI and tag name. - Return None for NS uri if no namespace URI available. - """ - tag = _utf8(tag) - if tag[0] == '{': - i = tag.find('}') - assert i != -1 - return tag[1:i], tag[i + 1:] - return None, tag - cdef int isutf8(char* string): cdef int i i = 0 @@ -1408,19 +1397,48 @@ cdef object _utf8(object s): if tree.PyString_Check(s): + assert not isutf8(s), "All strings must be Unicode or ASCII" return s elif tree.PyUnicode_Check(s): return tree.PyUnicode_AsUTF8String(s) else: raise TypeError, "Argument must be string or unicode." +def _getNsTag(tag): + """Given a tag, find namespace URI and tag name. + Return None for NS uri if no namespace URI available. + """ + cdef char* c_tag + cdef char* c_pos + cdef int nslen + tag = _utf8(tag) + c_tag = tag + if c_tag[0] == c'{': + c_pos = tree.xmlStrchr(c_tag+1, c'}') + if c_pos is NULL: + raise ValueError, "Invalid tag name" + nslen = c_pos - c_tag - 1 + ns = tree.PyString_FromStringAndSize(c_tag+1, nslen) + c_tag = c_pos + 1 + else: + ns = None + tag = tree.PyString_FromString(c_tag) + return ns, tag + cdef object _namespacedName(xmlNode* c_node): + cdef char* href + cdef char* name + cdef object s + name = c_node.name if c_node.ns is NULL or c_node.ns.href is NULL: - return funicode(c_node.name) + return funicode(name) else: - # XXX optimize - s = "{%s}%s" % (c_node.ns.href, c_node.name) - return funicode(s) + href = c_node.ns.href + s = tree.PyString_FromFormat("{%s}%s", href, name) + if isutf8(href) or isutf8(name): + return unicode(s, 'UTF-8') + else: + return s def _getFilenameForFile(source): """Given a Python File or Gzip object, give filename back. Modified: lxml/branch/scoder2/src/lxml/tests/test_unicode.py ============================================================================== --- lxml/branch/scoder2/src/lxml/tests/test_unicode.py (original) +++ lxml/branch/scoder2/src/lxml/tests/test_unicode.py Thu Mar 9 11:51:13 2006 @@ -6,7 +6,7 @@ ascii_uni = u'a' -uni = u'?' +uni = u'?\uF8D2' # klingon etc. class UnicodeTestCase(unittest.TestCase): def test_unicode_xml(self): @@ -17,6 +17,11 @@ el = etree.Element(uni) self.assertEquals(uni, el.tag) + def test_unicode_nstag(self): + tag = u"{%s}%s" % (uni, uni) + el = etree.Element(tag) + self.assertEquals(tag, el.tag) + def test_unicode_attr(self): el = etree.Element('foo', {'bar': uni}) self.assertEquals(uni, el.attrib['bar']) Modified: lxml/branch/scoder2/src/lxml/tree.pxd ============================================================================== --- lxml/branch/scoder2/src/lxml/tree.pxd (original) +++ lxml/branch/scoder2/src/lxml/tree.pxd Thu Mar 9 11:51:13 2006 @@ -21,6 +21,7 @@ cdef int PyDict_SetItemString(object d, char* key, object value) cdef PyObject* PyDict_GetItemString(object d, char* key) cdef PyObject* PyDict_GetItem(object d, object key) + cdef object PyString_FromFormat(char* format, ...) cdef extern from "libxml/encoding.h": ctypedef struct xmlCharEncodingHandler @@ -186,3 +187,4 @@ cdef extern from "libxml/xmlstring.h": cdef char* xmlStrdup(char* cur) + cdef char* xmlStrchr(char* cur, char value) From scoder at codespeak.net Fri Mar 10 09:51:48 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Fri Mar 10 09:51:49 2006 Subject: [Lxml-checkins] r24203 - lxml/trunk Message-ID: <20060310085148.CD804100BC@code0.codespeak.net> Author: scoder Date: Fri Mar 10 09:51:47 2006 New Revision: 24203 Modified: lxml/trunk/bench.py Log: partial rewrite to support text and attributes as selected by decorators, faster cloning of ElementTrees as suggested by FL Modified: lxml/trunk/bench.py ============================================================================== --- lxml/trunk/bench.py (original) +++ lxml/trunk/bench.py Fri Mar 10 09:51:47 2006 @@ -1,30 +1,113 @@ import sys, string, time, copy, gc from itertools import * +_TEXT = "some ASCII text" +_UTEXT = u"some klingon: \F8D2" +_ATTRIBUTES = { + '{attr}test' : _UTEXT, + 'bla' : _TEXT + } + +def with_attributes(use_attributes): + "Decorator for benchmarks that use attributes" + value = {False : 0, True : 1}[ bool(use_attributes) ] + def set_value(function): + try: + function.ATTRIBUTES.add(value) + except AttributeError: + function.ATTRIBUTES = set([value]) + return function + return set_value + +def with_text(no_text=False, text=False, utext=False): + "Decorator for benchmarks that use text" + values = [] + if no_text: + values.append(0) + if text: + values.append(1) + if utext: + values.append(2) + def set_value(function): + try: + function.TEXT.add(values) + except AttributeError: + function.TEXT = set(values) + return function + return set_value + + class BenchMarkBase(object): atoz = string.ascii_lowercase + _LIB_NAME_MAP = { + 'etree' : 'lxe', + 'ElementTree' : 'ET', + 'cElementTree' : 'cET' + } + def __init__(self, etree): self.etree = etree - self.lib_name = etree.__name__.split('.')[-1] + libname = etree.__name__.split('.')[-1] + self.lib_name = self._LIB_NAME_MAP.get(libname, libname) - self.setup_times = times = [] + if libname == 'etree': + deepcopy = copy.deepcopy + def set_property(root, fname): + setattr(self, fname, lambda : deepcopy(root)) + else: + def set_property(root, fname): + setattr(self, fname, self.et_make_factory(root)) + + attribute_list = list(izip(count(), ({}, _ATTRIBUTES))) + text_list = list(izip(count(), (None, _TEXT, _UTEXT))) + build_name = self._tree_builder_name + + self.setup_times = [] for tree in self._all_trees(): + times = [] + self.setup_times.append(times) setup = getattr(self, '_setup_tree%d' % tree) - root, t = setup() - times.append(t) - setattr(self, '__root%d' % tree, root) - def set_property(root): - setattr(self, '_root%d' % tree, - lambda : copy.deepcopy(root)) - set_property(root) - - def setup(self, trees=()): - if not trees: - trees = self._all_trees() - - for tree in trees: - set_property( getattr(self, '__root%d' % tree) ) + for an, attributes in attribute_list: + for tn, text in text_list: + root, t = setup(text, attributes) + times.append(t) + set_property(root, build_name(tree, tn, an)) + + def _tree_builder_name(self, tree, tn, an): + return '_root%d_T%d_A%d' % (tree, tn, an) + + def tree_builder(self, tree, tn, an): + return getattr(self, self._tree_builder_name(tree, tn, an)) + + def et_make_factory(self, elem): + def generate_elem(append, elem, level): + var = "e" + str(level) + arg = repr(elem.tag) + if elem.attrib: + arg += ", **%r" % elem.attrib + if level == 1: + append(" e1 = Element(%s)" % arg) + else: + append(" %s = SubElement(e%d, %s)" % (var, level-1, arg)) + if elem.text: + append(" %s.text = %r" % (var, elem.text)) + if elem.tail: + append(" %s.tail = %r" % (var, elem.tail)) + for e in elem: + generate_elem(append, e, level+1) + # generate code for a function that creates a tree + output = ["def element_factory():"] + generate_elem(output.append, elem, 1) + output.append(" return e1") + # setup global function namespace + namespace = { + "Element" : self.etree.Element, + "SubElement" : self.etree.SubElement + } + # create function object + exec "\n".join(output) in namespace + return namespace["element_factory"] def _all_trees(self): all_trees = [] @@ -33,7 +116,7 @@ all_trees.append(int(name[11:])) return all_trees - def _setup_tree1(self): + def _setup_tree1(self, text, attributes): "tree with 26 2nd level and 520 3rd level children" atoz = self.atoz SubElement = self.etree.SubElement @@ -41,14 +124,14 @@ t = current_time() root = self.etree.Element('{a}root') for ch1 in atoz: - el = SubElement(root, "{b}"+ch1) + el = SubElement(root, "{b}"+ch1, attributes) for ch2 in atoz: for i in range(20): SubElement(el, "{c}%s%03d" % (ch2, i)) t = current_time() - t return (root, t) - def _setup_tree2(self): + def _setup_tree2(self, text, attributes): "tree with 520 2nd level and 26 3rd level children" atoz = self.atoz SubElement = self.etree.SubElement @@ -57,13 +140,13 @@ root = self.etree.Element('{x}root') for ch1 in atoz: for i in range(20): - el = SubElement(root, "{y}%s%03d" % (ch1, i)) + el = SubElement(root, "{y}%s%03d" % (ch1, i), attributes) for ch2 in atoz: SubElement(el, "{z}"+ch2) t = current_time() - t return (root, t) - def _setup_tree3(self): + def _setup_tree3(self, text, attributes): "tree of depth 8 with 3 children per node" SubElement = self.etree.SubElement current_time = time.time @@ -72,12 +155,12 @@ children = [root] for i in range(7): tag_no = count().next - children = [ SubElement(c, "{y}z%d" % i) + children = [ SubElement(c, "{y}z%d" % i, attributes) for i,c in enumerate(chain(children, children, children)) ] t = current_time() - t return (root, t) - def _setup_tree4(self): + def _setup_tree4(self, text, attributes): "small tree with 26 2nd level and 2 3rd level children" atoz = self.atoz SubElement = self.etree.SubElement @@ -86,9 +169,9 @@ root = self.etree.Element('{x}root') children = [root] for ch1 in atoz: - el = SubElement(root, "{b}"+ch1) - SubElement(el, "{c}a") - SubElement(el, "{c}b") + el = SubElement(root, "{b}"+ch1, attributes) + SubElement(el, "{c}a", attributes) + SubElement(el, "{c}b", attributes) t = current_time() - t return (root, t) @@ -109,16 +192,21 @@ else: tree_sets = () if tree_sets: - for tree_set in tree_sets: - benchmarks.append((name, map(int, tree_set.split(',')))) + tree_tuples = [ map(int, tree_set.split(',')) + for tree_set in tree_sets ] else: try: function = getattr(method, 'im_func', method) arg_count = method.func_code.co_argcount - 1 except AttributeError: arg_count = 1 - for trees in self._permutations(all_trees, arg_count): - benchmarks.append((name, trees)) + tree_tuples = self._permutations(all_trees, arg_count) + + for tree_tuple in tree_tuples: + for tn in sorted(getattr(method, 'TEXT', (0,))): + for an in sorted(getattr(method, 'ATTRIBUTES', (0,))): + benchmarks.append((name, method, tree_tuple, tn, an)) + return benchmarks def _permutations(self, seq, count): @@ -202,6 +290,13 @@ for child in root: child.set('a', 'bla') + @with_attributes(True) + def bench_get_attributes(self, root): + for child in root: + child.set('a', 'bla') + for child in root: + child.get('a') + def bench_setget_attributes(self, root): for child in root: child.set('a', 'bla') @@ -229,6 +324,11 @@ for child in root: child.tag + @with_text(utext=True, text=True, no_text=True) + def bench_text(self, root): + for child in root: + child.text + ############################################################ # Main program ############################################################ @@ -279,12 +379,11 @@ for bs in benchmarks ] import time - def run_bench(suite, method_name, tree_set): + def run_bench(suite, method_name, method_call, tree_set, tn, an): current_time = time.time call_repeat = range(10) - call = getattr(suite, method_name) - tree_builders = [ getattr(suite, '_root%d' % tree) + tree_builders = [ suite.tree_builder(tree, tn, an) for tree in tree_set ] times = [] @@ -295,13 +394,18 @@ for i in call_repeat: args = [ build() for build in tree_builders ] t_one_call = current_time() - call(*args) + method_call(*args) t += current_time() - t_one_call t = 1000.0 * t / len(call_repeat) times.append(t) gc.enable() return times + def build_treeset_name(trees, tn, an): + text = {0:'-', 1:'S', 2:'U'}[tn] + attr = {0:'-', 1:'A'}[an] + return "%s%s T%s" % (text, attr, ',T'.join(imap(str, trees))[:6]) + print "Running benchmark on", ', '.join(b.lib_name for b in benchmark_suites) @@ -309,19 +413,23 @@ print "Setup times for trees in seconds:" for b in benchmark_suites: - print "%-12s : " % b.lib_name, ', '.join("%9.4f" % t - for t in b.setup_times) + print "%-3s: " % b.lib_name, + for an in (0,1): + for tn in (0,1,2): + print ' %s ' % build_treeset_name((), tn, an)[:2], + print + for i, tree_times in enumerate(b.setup_times): + print " T%d:" % (i+1), ' '.join("%6.4f" % t for t in tree_times) print for bench_calls in izip(*benchmarks): - for lib, config in enumerate(izip(benchmark_suites, bench_calls)): - bench, (bench_name, tree_set) = config - - print "%-12s %-25s (T%-6s)" % (bench.lib_name, bench_name[6:], - ',T'.join(imap(str, tree_set))[:6]), + for lib, (bench, benchmark_setup) in enumerate(izip(benchmark_suites, bench_calls)): + bench_name = benchmark_setup[0] + tree_set_name = build_treeset_name(*benchmark_setup[-3:]) + print "%-3s: %-22s (%-10s)" % (bench.lib_name, bench_name[6:29], tree_set_name), sys.stdout.flush() - result = run_bench(bench, bench_name, tree_set) + result = run_bench(bench, *benchmark_setup) for t in result: print "%9.4f" % t, From scoder at codespeak.net Fri Mar 10 09:54:47 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Fri Mar 10 09:54:49 2006 Subject: [Lxml-checkins] r24204 - lxml/branch/scoder2 Message-ID: <20060310085447.5B5AD100BC@code0.codespeak.net> Author: scoder Date: Fri Mar 10 09:54:45 2006 New Revision: 24204 Modified: lxml/branch/scoder2/bench.py Log: merged in benchmark updates from trunk Modified: lxml/branch/scoder2/bench.py ============================================================================== --- lxml/branch/scoder2/bench.py (original) +++ lxml/branch/scoder2/bench.py Fri Mar 10 09:54:45 2006 @@ -1,30 +1,113 @@ import sys, string, time, copy, gc from itertools import * +_TEXT = "some ASCII text" +_UTEXT = u"some klingon: \F8D2" +_ATTRIBUTES = { + '{attr}test' : _UTEXT, + 'bla' : _TEXT + } + +def with_attributes(use_attributes): + "Decorator for benchmarks that use attributes" + value = {False : 0, True : 1}[ bool(use_attributes) ] + def set_value(function): + try: + function.ATTRIBUTES.add(value) + except AttributeError: + function.ATTRIBUTES = set([value]) + return function + return set_value + +def with_text(no_text=False, text=False, utext=False): + "Decorator for benchmarks that use text" + values = [] + if no_text: + values.append(0) + if text: + values.append(1) + if utext: + values.append(2) + def set_value(function): + try: + function.TEXT.add(values) + except AttributeError: + function.TEXT = set(values) + return function + return set_value + + class BenchMarkBase(object): atoz = string.ascii_lowercase + _LIB_NAME_MAP = { + 'etree' : 'lxe', + 'ElementTree' : 'ET', + 'cElementTree' : 'cET' + } + def __init__(self, etree): self.etree = etree - self.lib_name = etree.__name__.split('.')[-1] + libname = etree.__name__.split('.')[-1] + self.lib_name = self._LIB_NAME_MAP.get(libname, libname) - self.setup_times = times = [] + if libname == 'etree': + deepcopy = copy.deepcopy + def set_property(root, fname): + setattr(self, fname, lambda : deepcopy(root)) + else: + def set_property(root, fname): + setattr(self, fname, self.et_make_factory(root)) + + attribute_list = list(izip(count(), ({}, _ATTRIBUTES))) + text_list = list(izip(count(), (None, _TEXT, _UTEXT))) + build_name = self._tree_builder_name + + self.setup_times = [] for tree in self._all_trees(): + times = [] + self.setup_times.append(times) setup = getattr(self, '_setup_tree%d' % tree) - root, t = setup() - times.append(t) - setattr(self, '__root%d' % tree, root) - def set_property(root): - setattr(self, '_root%d' % tree, - lambda : copy.deepcopy(root)) - set_property(root) - - def setup(self, trees=()): - if not trees: - trees = self._all_trees() - - for tree in trees: - set_property( getattr(self, '__root%d' % tree) ) + for an, attributes in attribute_list: + for tn, text in text_list: + root, t = setup(text, attributes) + times.append(t) + set_property(root, build_name(tree, tn, an)) + + def _tree_builder_name(self, tree, tn, an): + return '_root%d_T%d_A%d' % (tree, tn, an) + + def tree_builder(self, tree, tn, an): + return getattr(self, self._tree_builder_name(tree, tn, an)) + + def et_make_factory(self, elem): + def generate_elem(append, elem, level): + var = "e" + str(level) + arg = repr(elem.tag) + if elem.attrib: + arg += ", **%r" % elem.attrib + if level == 1: + append(" e1 = Element(%s)" % arg) + else: + append(" %s = SubElement(e%d, %s)" % (var, level-1, arg)) + if elem.text: + append(" %s.text = %r" % (var, elem.text)) + if elem.tail: + append(" %s.tail = %r" % (var, elem.tail)) + for e in elem: + generate_elem(append, e, level+1) + # generate code for a function that creates a tree + output = ["def element_factory():"] + generate_elem(output.append, elem, 1) + output.append(" return e1") + # setup global function namespace + namespace = { + "Element" : self.etree.Element, + "SubElement" : self.etree.SubElement + } + # create function object + exec "\n".join(output) in namespace + return namespace["element_factory"] def _all_trees(self): all_trees = [] @@ -33,7 +116,7 @@ all_trees.append(int(name[11:])) return all_trees - def _setup_tree1(self): + def _setup_tree1(self, text, attributes): "tree with 26 2nd level and 520 3rd level children" atoz = self.atoz SubElement = self.etree.SubElement @@ -41,14 +124,14 @@ t = current_time() root = self.etree.Element('{a}root') for ch1 in atoz: - el = SubElement(root, "{b}"+ch1) + el = SubElement(root, "{b}"+ch1, attributes) for ch2 in atoz: for i in range(20): SubElement(el, "{c}%s%03d" % (ch2, i)) t = current_time() - t return (root, t) - def _setup_tree2(self): + def _setup_tree2(self, text, attributes): "tree with 520 2nd level and 26 3rd level children" atoz = self.atoz SubElement = self.etree.SubElement @@ -57,13 +140,13 @@ root = self.etree.Element('{x}root') for ch1 in atoz: for i in range(20): - el = SubElement(root, "{y}%s%03d" % (ch1, i)) + el = SubElement(root, "{y}%s%03d" % (ch1, i), attributes) for ch2 in atoz: SubElement(el, "{z}"+ch2) t = current_time() - t return (root, t) - def _setup_tree3(self): + def _setup_tree3(self, text, attributes): "tree of depth 8 with 3 children per node" SubElement = self.etree.SubElement current_time = time.time @@ -72,12 +155,12 @@ children = [root] for i in range(7): tag_no = count().next - children = [ SubElement(c, "{y}z%d" % i) + children = [ SubElement(c, "{y}z%d" % i, attributes) for i,c in enumerate(chain(children, children, children)) ] t = current_time() - t return (root, t) - def _setup_tree4(self): + def _setup_tree4(self, text, attributes): "small tree with 26 2nd level and 2 3rd level children" atoz = self.atoz SubElement = self.etree.SubElement @@ -86,9 +169,9 @@ root = self.etree.Element('{x}root') children = [root] for ch1 in atoz: - el = SubElement(root, "{b}"+ch1) - SubElement(el, "{c}a") - SubElement(el, "{c}b") + el = SubElement(root, "{b}"+ch1, attributes) + SubElement(el, "{c}a", attributes) + SubElement(el, "{c}b", attributes) t = current_time() - t return (root, t) @@ -109,16 +192,21 @@ else: tree_sets = () if tree_sets: - for tree_set in tree_sets: - benchmarks.append((name, map(int, tree_set.split(',')))) + tree_tuples = [ map(int, tree_set.split(',')) + for tree_set in tree_sets ] else: try: function = getattr(method, 'im_func', method) arg_count = method.func_code.co_argcount - 1 except AttributeError: arg_count = 1 - for trees in self._permutations(all_trees, arg_count): - benchmarks.append((name, trees)) + tree_tuples = self._permutations(all_trees, arg_count) + + for tree_tuple in tree_tuples: + for tn in sorted(getattr(method, 'TEXT', (0,))): + for an in sorted(getattr(method, 'ATTRIBUTES', (0,))): + benchmarks.append((name, method, tree_tuple, tn, an)) + return benchmarks def _permutations(self, seq, count): @@ -202,6 +290,13 @@ for child in root: child.set('a', 'bla') + @with_attributes(True) + def bench_get_attributes(self, root): + for child in root: + child.set('a', 'bla') + for child in root: + child.get('a') + def bench_setget_attributes(self, root): for child in root: child.set('a', 'bla') @@ -229,6 +324,11 @@ for child in root: child.tag + @with_text(utext=True, text=True, no_text=True) + def bench_text(self, root): + for child in root: + child.text + ############################################################ # Main program ############################################################ @@ -279,12 +379,11 @@ for bs in benchmarks ] import time - def run_bench(suite, method_name, tree_set): + def run_bench(suite, method_name, method_call, tree_set, tn, an): current_time = time.time call_repeat = range(10) - call = getattr(suite, method_name) - tree_builders = [ getattr(suite, '_root%d' % tree) + tree_builders = [ suite.tree_builder(tree, tn, an) for tree in tree_set ] times = [] @@ -295,13 +394,18 @@ for i in call_repeat: args = [ build() for build in tree_builders ] t_one_call = current_time() - call(*args) + method_call(*args) t += current_time() - t_one_call t = 1000.0 * t / len(call_repeat) times.append(t) gc.enable() return times + def build_treeset_name(trees, tn, an): + text = {0:'-', 1:'S', 2:'U'}[tn] + attr = {0:'-', 1:'A'}[an] + return "%s%s T%s" % (text, attr, ',T'.join(imap(str, trees))[:6]) + print "Running benchmark on", ', '.join(b.lib_name for b in benchmark_suites) @@ -309,19 +413,23 @@ print "Setup times for trees in seconds:" for b in benchmark_suites: - print "%-12s : " % b.lib_name, ', '.join("%9.4f" % t - for t in b.setup_times) + print "%-3s: " % b.lib_name, + for an in (0,1): + for tn in (0,1,2): + print ' %s ' % build_treeset_name((), tn, an)[:2], + print + for i, tree_times in enumerate(b.setup_times): + print " T%d:" % (i+1), ' '.join("%6.4f" % t for t in tree_times) print for bench_calls in izip(*benchmarks): - for lib, config in enumerate(izip(benchmark_suites, bench_calls)): - bench, (bench_name, tree_set) = config - - print "%-12s %-25s (T%-6s)" % (bench.lib_name, bench_name[6:], - ',T'.join(imap(str, tree_set))[:6]), + for lib, (bench, benchmark_setup) in enumerate(izip(benchmark_suites, bench_calls)): + bench_name = benchmark_setup[0] + tree_set_name = build_treeset_name(*benchmark_setup[-3:]) + print "%-3s: %-22s (%-10s)" % (bench.lib_name, bench_name[6:29], tree_set_name), sys.stdout.flush() - result = run_bench(bench, bench_name, tree_set) + result = run_bench(bench, *benchmark_setup) for t in result: print "%9.4f" % t, From scoder at codespeak.net Fri Mar 10 13:05:14 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Fri Mar 10 13:05:15 2006 Subject: [Lxml-checkins] r24213 - lxml/trunk/src/lxml Message-ID: <20060310120514.4DB7A100CE@code0.codespeak.net> Author: scoder Date: Fri Mar 10 13:05:12 2006 New Revision: 24213 Modified: lxml/trunk/src/lxml/tree.pxd Log: clean up Modified: lxml/trunk/src/lxml/tree.pxd ============================================================================== --- lxml/trunk/src/lxml/tree.pxd (original) +++ lxml/trunk/src/lxml/tree.pxd Fri Mar 10 13:05:12 2006 @@ -17,11 +17,12 @@ cdef object PyUnicode_AsUTF8String(object ustring) cdef object PyString_FromStringAndSize(char* s, int size) cdef object PyString_FromString(char* s) + cdef object PyString_FromFormat(char* format, ...) + cdef int PyList_Append(object l, object obj) cdef int PyDict_SetItemString(object d, char* key, object value) cdef PyObject* PyDict_GetItemString(object d, char* key) cdef PyObject* PyDict_GetItem(object d, object key) - cdef object PyString_FromFormat(char* format, ...) cdef extern from "libxml/encoding.h": ctypedef struct xmlCharEncodingHandler From scoder at codespeak.net Fri Mar 10 13:07:08 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Fri Mar 10 13:07:09 2006 Subject: [Lxml-checkins] r24214 - lxml/trunk Message-ID: <20060310120708.5D90A100CE@code0.codespeak.net> Author: scoder Date: Fri Mar 10 13:07:07 2006 New Revision: 24214 Modified: lxml/trunk/bench.py Log: stronger benchmark for element.text Modified: lxml/trunk/bench.py ============================================================================== --- lxml/trunk/bench.py (original) +++ lxml/trunk/bench.py Fri Mar 10 13:07:07 2006 @@ -329,6 +329,13 @@ for child in root: child.text + @with_text(utext=True, text=True, no_text=True) + def bench_text_repeat(self, root): + repeat = range(500) + for child in root: + for i in repeat: + child.text + ############################################################ # Main program ############################################################ From scoder at codespeak.net Fri Mar 10 13:35:36 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Fri Mar 10 13:35:38 2006 Subject: [Lxml-checkins] r24218 - lxml/trunk/src/lxml Message-ID: <20060310123536.38A7E100CF@code0.codespeak.net> Author: scoder Date: Fri Mar 10 13:35:35 2006 New Revision: 24218 Modified: lxml/trunk/src/lxml/etree.pyx Log: fast C-only path for common cases in _collectText Modified: lxml/trunk/src/lxml/etree.pyx ============================================================================== --- lxml/trunk/src/lxml/etree.pyx (original) +++ lxml/trunk/src/lxml/etree.pyx Fri Mar 10 13:35:35 2006 @@ -504,7 +504,6 @@ property text: def __get__(self): - cdef xmlNode* c_node return _collectText(self._c_node.children) def __set__(self, value): @@ -525,7 +524,6 @@ property tail: def __get__(self): - cdef xmlNode* c_node return _collectText(self._c_node.next) def __set__(self, value): @@ -1269,14 +1267,31 @@ If there was no text to collect, return None """ + cdef int scount + cdef char* text + cdef xmlNode* c_node_cur + # check for multiple text nodes and count accumulated string length + scount = 0 + text = NULL + c_node_cur = c_node + while c_node_cur is not NULL and c_node_cur.type == tree.XML_TEXT_NODE: + if c_node_cur.content[0] != c'\0': + text = c_node_cur.content + scount = scount + 1 + c_node_cur = c_node_cur.next + + # handle two most common cases first + if text is NULL: + return None + if scount == 1: + return funicode(text) + + # the rest is not performance critical anymore result = '' while c_node is not NULL and c_node.type == tree.XML_TEXT_NODE: result = result + c_node.content c_node = c_node.next - if result: - return funicode(result) - else: - return None + return funicode(result) cdef _removeText(xmlNode* c_node): """Remove all text nodes. From scoder at codespeak.net Fri Mar 10 13:39:43 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Fri Mar 10 13:39:44 2006 Subject: [Lxml-checkins] r24219 - in lxml/branch/scoder2: . src/lxml Message-ID: <20060310123943.77036100D3@code0.codespeak.net> Author: scoder Date: Fri Mar 10 13:39:41 2006 New Revision: 24219 Modified: lxml/branch/scoder2/bench.py lxml/branch/scoder2/src/lxml/etree.pyx lxml/branch/scoder2/src/lxml/tree.pxd Log: merges from trunk Modified: lxml/branch/scoder2/bench.py ============================================================================== --- lxml/branch/scoder2/bench.py (original) +++ lxml/branch/scoder2/bench.py Fri Mar 10 13:39:41 2006 @@ -329,6 +329,13 @@ for child in root: child.text + @with_text(utext=True, text=True, no_text=True) + def bench_text_repeat(self, root): + repeat = range(500) + for child in root: + for i in repeat: + child.text + ############################################################ # Main program ############################################################ Modified: lxml/branch/scoder2/src/lxml/etree.pyx ============================================================================== --- lxml/branch/scoder2/src/lxml/etree.pyx (original) +++ lxml/branch/scoder2/src/lxml/etree.pyx Fri Mar 10 13:39:41 2006 @@ -504,7 +504,6 @@ property text: def __get__(self): - cdef xmlNode* c_node return _collectText(self._c_node.children) def __set__(self, value): @@ -525,7 +524,6 @@ property tail: def __get__(self): - cdef xmlNode* c_node return _collectText(self._c_node.next) def __set__(self, value): @@ -1269,14 +1267,31 @@ If there was no text to collect, return None """ + cdef int scount + cdef char* text + cdef xmlNode* c_node_cur + # check for multiple text nodes and count accumulated string length + scount = 0 + text = NULL + c_node_cur = c_node + while c_node_cur is not NULL and c_node_cur.type == tree.XML_TEXT_NODE: + if c_node_cur.content[0] != c'\0': + text = c_node_cur.content + scount = scount + 1 + c_node_cur = c_node_cur.next + + # handle two most common cases first + if text is NULL: + return None + if scount == 1: + return funicode(text) + + # the rest is not performance critical anymore result = '' while c_node is not NULL and c_node.type == tree.XML_TEXT_NODE: result = result + c_node.content c_node = c_node.next - if result: - return funicode(result) - else: - return None + return funicode(result) cdef _removeText(xmlNode* c_node): """Remove all text nodes. Modified: lxml/branch/scoder2/src/lxml/tree.pxd ============================================================================== --- lxml/branch/scoder2/src/lxml/tree.pxd (original) +++ lxml/branch/scoder2/src/lxml/tree.pxd Fri Mar 10 13:39:41 2006 @@ -17,11 +17,12 @@ cdef object PyUnicode_AsUTF8String(object ustring) cdef object PyString_FromStringAndSize(char* s, int size) cdef object PyString_FromString(char* s) + cdef object PyString_FromFormat(char* format, ...) + cdef int PyList_Append(object l, object obj) cdef int PyDict_SetItemString(object d, char* key, object value) cdef PyObject* PyDict_GetItemString(object d, char* key) cdef PyObject* PyDict_GetItem(object d, object key) - cdef object PyString_FromFormat(char* format, ...) cdef extern from "libxml/encoding.h": ctypedef struct xmlCharEncodingHandler From scoder at codespeak.net Fri Mar 10 13:48:28 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Fri Mar 10 13:48:28 2006 Subject: [Lxml-checkins] r24222 - lxml/trunk/src/lxml Message-ID: <20060310124828.39182100D4@code0.codespeak.net> Author: scoder Date: Fri Mar 10 13:48:26 2006 New Revision: 24222 Modified: lxml/trunk/src/lxml/etree.pyx Log: fix comment Modified: lxml/trunk/src/lxml/etree.pyx ============================================================================== --- lxml/trunk/src/lxml/etree.pyx (original) +++ lxml/trunk/src/lxml/etree.pyx Fri Mar 10 13:48:26 2006 @@ -1270,7 +1270,7 @@ cdef int scount cdef char* text cdef xmlNode* c_node_cur - # check for multiple text nodes and count accumulated string length + # check for multiple text nodes scount = 0 text = NULL c_node_cur = c_node From scoder at codespeak.net Fri Mar 10 13:49:14 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Fri Mar 10 13:49:15 2006 Subject: [Lxml-checkins] r24223 - lxml/branch/scoder2/src/lxml Message-ID: <20060310124914.C3F59100D4@code0.codespeak.net> Author: scoder Date: Fri Mar 10 13:49:03 2006 New Revision: 24223 Modified: lxml/branch/scoder2/src/lxml/etree.pyx Log: merges from trunk Modified: lxml/branch/scoder2/src/lxml/etree.pyx ============================================================================== --- lxml/branch/scoder2/src/lxml/etree.pyx (original) +++ lxml/branch/scoder2/src/lxml/etree.pyx Fri Mar 10 13:49:03 2006 @@ -1270,7 +1270,7 @@ cdef int scount cdef char* text cdef xmlNode* c_node_cur - # check for multiple text nodes and count accumulated string length + # check for multiple text nodes scount = 0 text = NULL c_node_cur = c_node From scoder at codespeak.net Fri Mar 10 13:56:17 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Fri Mar 10 13:56:19 2006 Subject: [Lxml-checkins] r24224 - lxml/pyrex Message-ID: <20060310125617.CAEB6100D4@code0.codespeak.net> Author: scoder Date: Fri Mar 10 13:56:16 2006 New Revision: 24224 Added: lxml/pyrex/ Log: import local version of patched Pyrex From scoder at codespeak.net Fri Mar 10 14:02:42 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Fri Mar 10 14:02:43 2006 Subject: [Lxml-checkins] r24226 - in lxml/pyrex: Pyrex Pyrex/Compiler Pyrex/Plex build Message-ID: <20060310130242.BFE41100D4@code0.codespeak.net> Author: scoder Date: Fri Mar 10 14:02:35 2006 New Revision: 24226 Removed: lxml/pyrex/Pyrex/Compiler/Code.pyc lxml/pyrex/Pyrex/Compiler/DebugFlags.pyc lxml/pyrex/Pyrex/Compiler/Errors.pyc lxml/pyrex/Pyrex/Compiler/ExprNodes.pyc lxml/pyrex/Pyrex/Compiler/Lexicon.pyc lxml/pyrex/Pyrex/Compiler/Main.pyc lxml/pyrex/Pyrex/Compiler/Naming.pyc lxml/pyrex/Pyrex/Compiler/Nodes.pyc lxml/pyrex/Pyrex/Compiler/Options.pyc lxml/pyrex/Pyrex/Compiler/Parsing.pyc lxml/pyrex/Pyrex/Compiler/PyrexTypes.pyc lxml/pyrex/Pyrex/Compiler/Scanning.pyc lxml/pyrex/Pyrex/Compiler/Symtab.pyc lxml/pyrex/Pyrex/Compiler/TypeSlots.pyc lxml/pyrex/Pyrex/Compiler/Version.pyc lxml/pyrex/Pyrex/Compiler/__init__.pyc lxml/pyrex/Pyrex/Debugging.pyc lxml/pyrex/Pyrex/Plex/Actions.pyc lxml/pyrex/Pyrex/Plex/DFA.pyc lxml/pyrex/Pyrex/Plex/Errors.pyc lxml/pyrex/Pyrex/Plex/Lexicons.pyc lxml/pyrex/Pyrex/Plex/Machines.pyc lxml/pyrex/Pyrex/Plex/Regexps.pyc lxml/pyrex/Pyrex/Plex/Scanners.pyc lxml/pyrex/Pyrex/Plex/Transitions.pyc lxml/pyrex/Pyrex/Plex/__init__.pyc lxml/pyrex/Pyrex/Utils.pyc lxml/pyrex/Pyrex/__init__.pyc lxml/pyrex/build/ Log: removed accidentally added files Deleted: /lxml/pyrex/Pyrex/Compiler/Code.pyc ============================================================================== Binary file. No diff available. Deleted: /lxml/pyrex/Pyrex/Compiler/DebugFlags.pyc ============================================================================== Binary file. No diff available. Deleted: /lxml/pyrex/Pyrex/Compiler/Errors.pyc ============================================================================== Binary file. No diff available. Deleted: /lxml/pyrex/Pyrex/Compiler/ExprNodes.pyc ============================================================================== Binary file. No diff available. Deleted: /lxml/pyrex/Pyrex/Compiler/Lexicon.pyc ============================================================================== Binary file. No diff available. Deleted: /lxml/pyrex/Pyrex/Compiler/Main.pyc ============================================================================== Binary file. No diff available. Deleted: /lxml/pyrex/Pyrex/Compiler/Naming.pyc ============================================================================== Binary file. No diff available. Deleted: /lxml/pyrex/Pyrex/Compiler/Nodes.pyc ============================================================================== Binary file. No diff available. Deleted: /lxml/pyrex/Pyrex/Compiler/Options.pyc ============================================================================== Binary file. No diff available. Deleted: /lxml/pyrex/Pyrex/Compiler/Parsing.pyc ============================================================================== Binary file. No diff available. Deleted: /lxml/pyrex/Pyrex/Compiler/PyrexTypes.pyc ============================================================================== Binary file. No diff available. Deleted: /lxml/pyrex/Pyrex/Compiler/Scanning.pyc ============================================================================== Binary file. No diff available. Deleted: /lxml/pyrex/Pyrex/Compiler/Symtab.pyc ============================================================================== Binary file. No diff available. Deleted: /lxml/pyrex/Pyrex/Compiler/TypeSlots.pyc ============================================================================== Binary file. No diff available. Deleted: /lxml/pyrex/Pyrex/Compiler/Version.pyc ============================================================================== Binary file. No diff available. Deleted: /lxml/pyrex/Pyrex/Compiler/__init__.pyc ============================================================================== Binary file. No diff available. Deleted: /lxml/pyrex/Pyrex/Debugging.pyc ============================================================================== Binary file. No diff available. Deleted: /lxml/pyrex/Pyrex/Plex/Actions.pyc ============================================================================== Binary file. No diff available. Deleted: /lxml/pyrex/Pyrex/Plex/DFA.pyc ============================================================================== Binary file. No diff available. Deleted: /lxml/pyrex/Pyrex/Plex/Errors.pyc ============================================================================== Binary file. No diff available. Deleted: /lxml/pyrex/Pyrex/Plex/Lexicons.pyc ============================================================================== Binary file. No diff available. Deleted: /lxml/pyrex/Pyrex/Plex/Machines.pyc ============================================================================== Binary file. No diff available. Deleted: /lxml/pyrex/Pyrex/Plex/Regexps.pyc ============================================================================== Binary file. No diff available. Deleted: /lxml/pyrex/Pyrex/Plex/Scanners.pyc ============================================================================== Binary file. No diff available. Deleted: /lxml/pyrex/Pyrex/Plex/Transitions.pyc ============================================================================== Binary file. No diff available. Deleted: /lxml/pyrex/Pyrex/Plex/__init__.pyc ============================================================================== Binary file. No diff available. Deleted: /lxml/pyrex/Pyrex/Utils.pyc ============================================================================== Binary file. No diff available. Deleted: /lxml/pyrex/Pyrex/__init__.pyc ============================================================================== Binary file. No diff available. From scoder at codespeak.net Fri Mar 10 14:26:47 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Fri Mar 10 14:26:48 2006 Subject: [Lxml-checkins] r24227 - lxml/trunk Message-ID: <20060310132647.F01AE100D6@code0.codespeak.net> Author: scoder Date: Fri Mar 10 14:26:46 2006 New Revision: 24227 Modified: lxml/trunk/INSTALL.txt Log: updated doc on Pyrex problems and downloadable versions from lxml SVN Modified: lxml/trunk/INSTALL.txt ============================================================================== --- lxml/trunk/INSTALL.txt (original) +++ lxml/trunk/INSTALL.txt Fri Mar 10 14:26:46 2006 @@ -12,11 +12,14 @@ * libxslt 1.1.12 (newer versions should work). It can be found here: http://xmlsoft.org/XSLT/downloads.html -You also need Pyrex (0.9.3) to compile the software. It can be found -here: +You also need Pyrex (0.9.3) to compile the software. The official +homepage can be found here: * http://www.cosc.canterbury.ac.nz/~greg/python/Pyrex/ +However, see below for an updated version if you have any trouble +using it, especially with GCC 4.x. + You also need Python 2.3 (Python 2.4 also ought to work). Installation @@ -42,10 +45,17 @@ Building lxml with gcc 4.0 -------------------------- -Pyrex 0.9.3 generates C code that gcc 4.0 does not accept. Pending an -official release of a version of Pyrex that does work with gcc 4.0, -here's a patch to Pyrex that makes lxml compile and appear to work -with gcc 4.0: +Pyrex 0.9.3 generates C code that gcc 4.0 does not accept. Pending an official +release of a version of Pyrex that does work with gcc 4.0, the lxml project +currently provides an updated version of Pyrex: + +http://codespeak.net/svn/lxml/pyrex/dist/Pyrex-0.9.3.1.tar.gz +http://codespeak.net/svn/lxml/pyrex/dist/Pyrex-0.9.3.1-1.src.rpm + +It is based on Pyrex 0.9.3.1 and contains a number of patches that make lxml +compile and appear to work with gcc 4.0. If you use this version, you can +simply skip the rest of the section. In case you want to apply them yourself, +the first one is: http://codespeak.net/lxml/Pyrex-0.9.3-gcc4.patch @@ -56,7 +66,21 @@ http://codespeak.net/lxml/Pyrex-0.9.3-gcc4-small.patch It may however actually be that at the time you read this, this extra -patch has been applied by the distributions as well. +patch has been applied by the distributions as well. You may still +encounter the following problem when building the extension:: + + TypeError: swig_sources() takes exactly 2 arguments (3 given) + +To fix this, look for the following line in Pyrex/Distutils/build_ext.py +(around line 35):: + + def swig_sources (self, sources): + +and change it to:: + + def swig_sources (self, sources, *otherargs): + +The above install files have these three changes applied. Troubleshooting --------------- From scoder at codespeak.net Fri Mar 10 14:36:27 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Fri Mar 10 14:36:28 2006 Subject: [Lxml-checkins] r24228 - lxml/branch/scoder2 Message-ID: <20060310133627.1940B100D6@code0.codespeak.net> Author: scoder Date: Fri Mar 10 14:36:25 2006 New Revision: 24228 Modified: lxml/branch/scoder2/INSTALL.txt Log: merges from trunk Modified: lxml/branch/scoder2/INSTALL.txt ============================================================================== --- lxml/branch/scoder2/INSTALL.txt (original) +++ lxml/branch/scoder2/INSTALL.txt Fri Mar 10 14:36:25 2006 @@ -12,11 +12,14 @@ * libxslt 1.1.12 (newer versions should work). It can be found here: http://xmlsoft.org/XSLT/downloads.html -You also need Pyrex (0.9.3) to compile the software. It can be found -here: +You also need Pyrex (0.9.3) to compile the software. The official +homepage can be found here: * http://www.cosc.canterbury.ac.nz/~greg/python/Pyrex/ +However, see below for an updated version if you have any trouble +using it, especially with GCC 4.x. + You also need Python 2.3 (Python 2.4 also ought to work). Installation @@ -42,10 +45,17 @@ Building lxml with gcc 4.0 -------------------------- -Pyrex 0.9.3 generates C code that gcc 4.0 does not accept. Pending an -official release of a version of Pyrex that does work with gcc 4.0, -here's a patch to Pyrex that makes lxml compile and appear to work -with gcc 4.0: +Pyrex 0.9.3 generates C code that gcc 4.0 does not accept. Pending an official +release of a version of Pyrex that does work with gcc 4.0, the lxml project +currently provides an updated version of Pyrex: + +http://codespeak.net/svn/lxml/pyrex/dist/Pyrex-0.9.3.1.tar.gz +http://codespeak.net/svn/lxml/pyrex/dist/Pyrex-0.9.3.1-1.src.rpm + +It is based on Pyrex 0.9.3.1 and contains a number of patches that make lxml +compile and appear to work with gcc 4.0. If you use this version, you can +simply skip the rest of the section. In case you want to apply them yourself, +the first one is: http://codespeak.net/lxml/Pyrex-0.9.3-gcc4.patch @@ -56,7 +66,21 @@ http://codespeak.net/lxml/Pyrex-0.9.3-gcc4-small.patch It may however actually be that at the time you read this, this extra -patch has been applied by the distributions as well. +patch has been applied by the distributions as well. You may still +encounter the following problem when building the extension:: + + TypeError: swig_sources() takes exactly 2 arguments (3 given) + +To fix this, look for the following line in Pyrex/Distutils/build_ext.py +(around line 35):: + + def swig_sources (self, sources): + +and change it to:: + + def swig_sources (self, sources, *otherargs): + +The above install files have these three changes applied. Troubleshooting --------------- From scoder at codespeak.net Fri Mar 10 15:14:28 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Fri Mar 10 15:14:30 2006 Subject: [Lxml-checkins] r24229 - lxml/trunk Message-ID: <20060310141428.D2007100D7@code0.codespeak.net> Author: scoder Date: Fri Mar 10 15:14:27 2006 New Revision: 24229 Modified: lxml/trunk/INSTALL.txt Log: separate URLs in doc Modified: lxml/trunk/INSTALL.txt ============================================================================== --- lxml/trunk/INSTALL.txt (original) +++ lxml/trunk/INSTALL.txt Fri Mar 10 15:14:27 2006 @@ -50,6 +50,7 @@ currently provides an updated version of Pyrex: http://codespeak.net/svn/lxml/pyrex/dist/Pyrex-0.9.3.1.tar.gz + http://codespeak.net/svn/lxml/pyrex/dist/Pyrex-0.9.3.1-1.src.rpm It is based on Pyrex 0.9.3.1 and contains a number of patches that make lxml From scoder at codespeak.net Fri Mar 10 15:16:00 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Fri Mar 10 15:16:01 2006 Subject: [Lxml-checkins] r24230 - lxml/branch/scoder2 Message-ID: <20060310141600.908B9100D7@code0.codespeak.net> Author: scoder Date: Fri Mar 10 15:15:59 2006 New Revision: 24230 Modified: lxml/branch/scoder2/INSTALL.txt Log: merges from trunk Modified: lxml/branch/scoder2/INSTALL.txt ============================================================================== --- lxml/branch/scoder2/INSTALL.txt (original) +++ lxml/branch/scoder2/INSTALL.txt Fri Mar 10 15:15:59 2006 @@ -50,6 +50,7 @@ currently provides an updated version of Pyrex: http://codespeak.net/svn/lxml/pyrex/dist/Pyrex-0.9.3.1.tar.gz + http://codespeak.net/svn/lxml/pyrex/dist/Pyrex-0.9.3.1-1.src.rpm It is based on Pyrex 0.9.3.1 and contains a number of patches that make lxml From scoder at codespeak.net Sun Mar 12 07:51:53 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Sun Mar 12 07:51:55 2006 Subject: [Lxml-checkins] r24245 - lxml/trunk/src/lxml Message-ID: <20060312065153.EB53B100A6@code0.codespeak.net> Author: scoder Date: Sun Mar 12 07:51:46 2006 New Revision: 24245 Added: lxml/trunk/src/lxml/python.pxd - copied unchanged from r24244, lxml/trunk/src/lxml/tree.pxd Log: split of tree.pxd: move Python.h stuff into python.pxd From scoder at codespeak.net Sun Mar 12 07:57:52 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Sun Mar 12 07:57:55 2006 Subject: [Lxml-checkins] r24246 - lxml/trunk/src/lxml Message-ID: <20060312065752.31FEB100A6@code0.codespeak.net> Author: scoder Date: Sun Mar 12 07:57:51 2006 New Revision: 24246 Modified: lxml/trunk/src/lxml/proxy.pxi Log: clean up, prepend new proxies to _private list instead of appending them, fold createProxyRef into registerProxy (only place where it is used) Modified: lxml/trunk/src/lxml/proxy.pxi ============================================================================== --- lxml/trunk/src/lxml/proxy.pxi (original) +++ lxml/trunk/src/lxml/proxy.pxi Sun Mar 12 07:57:51 2006 @@ -30,34 +30,22 @@ cdef int hasProxy(xmlNode* c_node): return c_node._private is not NULL -cdef ProxyRef* createProxyRef(_NodeBase proxy, int proxy_type): - """Create a backpointer proxy reference for a proxy and type. - """ - cdef ProxyRef* result - result = cstd.malloc(sizeof(ProxyRef)) - result.proxy = proxy - result.type = proxy_type - result.next = NULL - return result - cdef void registerProxy(_NodeBase proxy, int proxy_type): """Register a proxy and type for the node it's proxying for. """ + cdef xmlNode* c_node cdef ProxyRef* ref - cdef ProxyRef* prev_ref # cannot register for NULL - if proxy._c_node is NULL: + c_node = proxy._c_node + if c_node is NULL: return # XXX should we check whether we ran into proxy_type before? #print "registering for:", proxy._c_node - ref = proxy._c_node._private - if ref is NULL: - proxy._c_node._private = createProxyRef(proxy, proxy_type) - return - while ref is not NULL: - prev_ref = ref - ref = ref.next - prev_ref.next = createProxyRef(proxy, proxy_type) + ref = cstd.malloc(sizeof(ProxyRef)) + ref.proxy = proxy + ref.type = proxy_type + ref.next = c_node._private + c_node._private = ref # prepend cdef void unregisterProxy(_NodeBase proxy): """Unregister a proxy for the node it's proxying for. From scoder at codespeak.net Sun Mar 12 08:07:40 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Sun Mar 12 08:07:40 2006 Subject: [Lxml-checkins] r24247 - lxml/trunk Message-ID: <20060312070740.09027100A6@code0.codespeak.net> Author: scoder Date: Sun Mar 12 08:07:38 2006 New Revision: 24247 Modified: lxml/trunk/MANIFEST.in Log: include non .txt files from doc/ Modified: lxml/trunk/MANIFEST.in ============================================================================== --- lxml/trunk/MANIFEST.in (original) +++ lxml/trunk/MANIFEST.in Sun Mar 12 08:07:38 2006 @@ -1,4 +1,4 @@ include setup.py MANIFEST.in *.txt recursive-include src *.pyx *.pxd *.pxi *.py etree.c recursive-include src/lxml/tests *.rng *.xslt *.xml -recursive-include doc *.txt +recursive-include doc *.txt *.xml *.mgp From scoder at codespeak.net Sun Mar 12 08:08:07 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Sun Mar 12 08:08:08 2006 Subject: [Lxml-checkins] r24248 - lxml/trunk/src/lxml Message-ID: <20060312070807.3F94E100A6@code0.codespeak.net> Author: scoder Date: Sun Mar 12 08:08:00 2006 New Revision: 24248 Modified: lxml/trunk/src/lxml/etree.pyx lxml/trunk/src/lxml/nsclasses.pxi lxml/trunk/src/lxml/proxy.pxi lxml/trunk/src/lxml/python.pxd lxml/trunk/src/lxml/tree.pxd lxml/trunk/src/lxml/xslt.pxi Log: factored out Python.h declarations into python.pxd Modified: lxml/trunk/src/lxml/etree.pyx ============================================================================== --- lxml/trunk/src/lxml/etree.pyx (original) +++ lxml/trunk/src/lxml/etree.pyx Sun Mar 12 08:08:00 2006 @@ -1,4 +1,4 @@ -cimport tree +cimport tree, python from tree cimport xmlDoc, xmlNode, xmlAttr, xmlNs cimport xpath cimport xslt @@ -563,7 +563,7 @@ doc = self._doc while c_node is not NULL and c < stop: if _isElement(c_node): - ret = tree.PyList_Append(result, _elementFactory(doc, c_node)) + ret = python.PyList_Append(result, _elementFactory(doc, c_node)) if ret: raise c = c + 1 @@ -656,7 +656,7 @@ c_node = self._c_node.children while c_node is not NULL: if _isElement(c_node): - ret = tree.PyList_Append(result, _elementFactory(doc, c_node)) + ret = python.PyList_Append(result, _elementFactory(doc, c_node)) if ret: raise c_node = c_node.next @@ -711,11 +711,11 @@ else: c_ns_href = c_node.ns.href element_class = _find_element_class(c_ns_href, c_node.name) - result = element_class() elif c_node.type == tree.XML_COMMENT_NODE: - result = _Comment() + element_class = _Comment else: assert 0, "Unknown node type: %s" % c_node.type + result = element_class() result._doc = doc result._c_node = c_node result._proxy_type = PROXY_ELEMENT @@ -1228,13 +1228,13 @@ cdef _dumpToFile(f, xmlDoc* c_doc, xmlNode* c_node): - cdef tree.PyObject* o + cdef python.PyObject* o cdef tree.xmlOutputBuffer* c_buffer - if not tree.PyFile_Check(f): + if not python.PyFile_Check(f): raise ValueError, "Not a file" - o = f - c_buffer = tree.xmlOutputBufferCreateFile(tree.PyFile_AsFile(o), NULL) + o = f + c_buffer = tree.xmlOutputBufferCreateFile(python.PyFile_AsFile(o), NULL) tree.xmlNodeDumpOutput(c_buffer, c_doc, c_node, 0, 0, NULL) # dump next node if it's a text node _dumpNextNode(c_buffer, c_doc, c_node, NULL) @@ -1407,15 +1407,15 @@ cdef object funicode(char* s): if isutf8(s): - return tree.PyUnicode_DecodeUTF8(s, tree.strlen(s), "strict") - return tree.PyString_FromStringAndSize(s, tree.strlen(s)) + return python.PyUnicode_DecodeUTF8(s, tree.strlen(s), "strict") + return python.PyString_FromStringAndSize(s, tree.strlen(s)) cdef object _utf8(object s): - if tree.PyString_Check(s): + if python.PyString_Check(s): assert not isutf8(s), "All strings must be Unicode or ASCII" return s - elif tree.PyUnicode_Check(s): - return tree.PyUnicode_AsUTF8String(s) + elif python.PyUnicode_Check(s): + return python.PyUnicode_AsUTF8String(s) else: raise TypeError, "Argument must be string or unicode." @@ -1433,11 +1433,10 @@ if c_pos is NULL: raise ValueError, "Invalid tag name" nslen = c_pos - c_tag - 1 - ns = tree.PyString_FromStringAndSize(c_tag+1, nslen) - c_tag = c_pos + 1 + ns = python.PyString_FromStringAndSize(c_tag+1, nslen) + tag = python.PyString_FromString(c_pos+1) else: ns = None - tag = tree.PyString_FromString(c_tag) return ns, tag cdef object _namespacedName(xmlNode* c_node): @@ -1449,7 +1448,7 @@ return funicode(name) else: href = c_node.ns.href - s = tree.PyString_FromFormat("{%s}%s", href, name) + s = python.PyString_FromFormat("{%s}%s", href, name) if isutf8(href) or isutf8(name): return unicode(s, 'UTF-8') else: Modified: lxml/trunk/src/lxml/nsclasses.pxi ============================================================================== --- lxml/trunk/src/lxml/nsclasses.pxi (original) +++ lxml/trunk/src/lxml/nsclasses.pxi Sun Mar 12 08:08:00 2006 @@ -78,10 +78,10 @@ return self._get(name_utf) cdef object _get(self, char* c_name): - cdef tree.PyObject* dict_result - dict_result = tree.PyDict_GetItemString(self._classes, c_name) + cdef python.PyObject* dict_result + dict_result = python.PyDict_GetItemString(self._classes, c_name) if dict_result is NULL: - dict_result = tree.PyDict_GetItemString(self._extensions, c_name) + dict_result = python.PyDict_GetItemString(self._extensions, c_name) if dict_result is NULL: raise KeyError, "Name not registered." return dict_result @@ -93,14 +93,14 @@ cdef object _find_element_class(char* c_namespace_utf, char* c_element_name_utf): - cdef tree.PyObject* dict_result + cdef python.PyObject* dict_result cdef _NamespaceRegistry registry cdef object result if c_namespace_utf is not NULL: - dict_result = tree.PyDict_GetItemString( + dict_result = python.PyDict_GetItemString( __NAMESPACE_CLASSES, c_namespace_utf) else: - dict_result = tree.PyDict_GetItem( + dict_result = python.PyDict_GetItem( __NAMESPACE_CLASSES, None) if dict_result is NULL: return _Element @@ -109,13 +109,13 @@ classes = registry._classes if c_element_name_utf is not NULL: - dict_result = tree.PyDict_GetItemString( + dict_result = python.PyDict_GetItemString( classes, c_element_name_utf) else: dict_result = NULL if dict_result is NULL: - dict_result = tree.PyDict_GetItem(classes, None) + dict_result = python.PyDict_GetItem(classes, None) if dict_result is not NULL: result = dict_result Modified: lxml/trunk/src/lxml/proxy.pxi ============================================================================== --- lxml/trunk/src/lxml/proxy.pxi (original) +++ lxml/trunk/src/lxml/proxy.pxi Sun Mar 12 08:08:00 2006 @@ -7,7 +7,7 @@ cdef struct _ProxyRef cdef struct _ProxyRef: - tree.PyObject* proxy + python.PyObject* proxy int type _ProxyRef* next @@ -42,7 +42,7 @@ # XXX should we check whether we ran into proxy_type before? #print "registering for:", proxy._c_node ref = cstd.malloc(sizeof(ProxyRef)) - ref.proxy = proxy + ref.proxy = proxy ref.type = proxy_type ref.next = c_node._private c_node._private = ref # prepend @@ -50,11 +50,11 @@ cdef void unregisterProxy(_NodeBase proxy): """Unregister a proxy for the node it's proxying for. """ - cdef tree.PyObject* proxy_ref + cdef python.PyObject* proxy_ref cdef ProxyRef* ref cdef ProxyRef* prev_ref cdef xmlNode* c_node - proxy_ref = proxy + proxy_ref = proxy c_node = proxy._c_node ref = c_node._private if ref.proxy == proxy_ref: Modified: lxml/trunk/src/lxml/python.pxd ============================================================================== --- lxml/trunk/src/lxml/python.pxd (original) +++ lxml/trunk/src/lxml/python.pxd Sun Mar 12 08:08:00 2006 @@ -1,19 +1,16 @@ -#from xmlparser cimport xmlDict +from tree cimport FILE -cdef extern from "stdio.h": - ctypedef struct FILE - cdef int strlen(char* s) - cdef extern from "Python.h": ctypedef struct PyObject cdef FILE* PyFile_AsFile(PyObject* p) cdef int PyFile_Check(object p) cdef object PyFile_Name(object p) - cdef void Py_INCREF(object o) - cdef object PyUnicode_DecodeUTF8(char* s, int size, char* errors) + cdef int PyUnicode_Check(object obj) cdef int PyString_Check(object obj) + + cdef object PyUnicode_DecodeUTF8(char* s, int size, char* errors) cdef object PyUnicode_AsUTF8String(object ustring) cdef object PyString_FromStringAndSize(char* s, int size) cdef object PyString_FromString(char* s) @@ -24,168 +21,5 @@ cdef PyObject* PyDict_GetItemString(object d, char* key) cdef PyObject* PyDict_GetItem(object d, object key) -cdef extern from "libxml/encoding.h": - ctypedef struct xmlCharEncodingHandler - cdef xmlCharEncodingHandler* xmlFindCharEncodingHandler(char* name) - -cdef extern from "libxml/tree.h": - - # for some reason need to define this in this section; - # libxml/dict.h appears to be broken to include in C - ctypedef struct xmlDict - - ctypedef struct xmlDoc - ctypedef struct xmlAttr - - ctypedef enum xmlElementType: - XML_ELEMENT_NODE= 1 - XML_ATTRIBUTE_NODE= 2 - XML_TEXT_NODE= 3 - XML_CDATA_SECTION_NODE= 4 - XML_ENTITY_REF_NODE= 5 - XML_ENTITY_NODE= 6 - XML_PI_NODE= 7 - XML_COMMENT_NODE= 8 - XML_DOCUMENT_NODE= 9 - XML_DOCUMENT_TYPE_NODE= 10 - XML_DOCUMENT_FRAG_NODE= 11 - XML_NOTATION_NODE= 12 - XML_HTML_DOCUMENT_NODE= 13 - XML_DTD_NODE= 14 - XML_ELEMENT_DECL= 15 - XML_ATTRIBUTE_DECL= 16 - XML_ENTITY_DECL= 17 - XML_NAMESPACE_DECL= 18 - XML_XINCLUDE_START= 19 - XML_XINCLUDE_END= 20 - - - ctypedef struct xmlNs: - char* href - char* prefix - - ctypedef struct xmlNode: - void* _private - xmlElementType type - char* name - xmlNode* children - xmlNode* last - xmlNode* parent - xmlNode* next - xmlNode* prev - xmlDoc* doc - char* content - xmlAttr* properties - xmlNs* ns - - ctypedef struct xmlDoc: - xmlElementType type - char* name - xmlNode* children - xmlNode* last - xmlNode* parent - xmlNode* next - xmlNode* prev - xmlDoc* doc - xmlDict* dict - char* URL - - ctypedef struct xmlAttr: - void* _private - xmlElementType type - char* name - xmlNode* children - xmlNode* last - xmlNode* parent - xmlAttr* next - xmlAttr* prev - xmlDoc* doc - xmlNs* ns - - ctypedef struct xmlElement: - xmlElementType type - char* name - xmlNode* children - xmlNode* last - xmlNode* parent - xmlNode* next - xmlNode* prev - xmlDoc* doc - - ctypedef struct xmlBuffer - - ctypedef struct xmlOutputBuffer: - xmlBuffer* buffer - xmlBuffer* conv - - cdef void xmlFreeDoc(xmlDoc *cur) - cdef void xmlFreeNode(xmlNode* cur) - cdef void xmlFree(char* buf) - - cdef xmlNode* xmlNewNode(xmlNs* ns, char* name) - cdef xmlNode* xmlNewDocText(xmlDoc* doc, char* content) - cdef xmlNode* xmlNewDocComment(xmlDoc* doc, char* content) - cdef xmlNs* xmlNewNs(xmlNode* node, char* href, char* prefix) - cdef xmlNode* xmlAddChild(xmlNode* parent, xmlNode* cur) - cdef xmlNode* xmlReplaceNode(xmlNode* old, xmlNode* cur) - cdef xmlNode* xmlAddPrevSibling(xmlNode* cur, xmlNode* elem) - cdef xmlNode* xmlAddNextSibling(xmlNode* cur, xmlNode* elem) - cdef xmlNode* xmlNewDocNode(xmlDoc* doc, xmlNs* ns, - char* name, char* content) - cdef xmlDoc* xmlNewDoc(char* version) - cdef xmlAttr* xmlNewProp(xmlNode* node, char* name, char* value) - cdef char* xmlGetNoNsProp(xmlNode* node, char* name) - cdef char* xmlGetNsProp(xmlNode* node, char* name, char* nameSpace) - cdef void xmlSetNs(xmlNode* node, xmlNs* ns) - cdef void xmlSetProp(xmlNode* node, char* name, char* value) - cdef void xmlSetNsProp(xmlNode* node, xmlNs* ns, char* name, char* value) - cdef void xmlRemoveProp(xmlAttr* cur) - cdef void xmlDocDumpMemory(xmlDoc* cur, char** mem, int* size) - cdef void xmlDocDumpMemoryEnc(xmlDoc* cur, char** mem, int* size, - char* encoding) - cdef int xmlSaveFileTo(xmlOutputBuffer* out, xmlDoc* cur, char* encoding) - - cdef void xmlUnlinkNode(xmlNode* cur) - cdef xmlNode* xmlDocSetRootElement(xmlDoc* doc, xmlNode* root) - cdef xmlNode* xmlDocGetRootElement(xmlDoc* doc) - cdef void xmlSetTreeDoc(xmlNode* tree, xmlDoc* doc) - cdef xmlNode* xmlDocCopyNode(xmlNode* node, xmlDoc* doc, int extended) - cdef xmlAttr* xmlHasProp(xmlNode* node, char* name) - cdef xmlAttr* xmlHasNsProp(xmlNode* node, char* name, char* nameSpace) - cdef char* xmlNodeGetContent(xmlNode* cur) - cdef xmlNs* xmlSearchNs(xmlDoc* doc, xmlNode* node, char* nameSpace) - cdef xmlNs* xmlSearchNsByHref(xmlDoc* doc, xmlNode* node, char* href) - cdef int xmlIsBlankNode(xmlNode* node) - cdef void xmlElemDump(FILE* f, xmlDoc* doc, xmlNode* cur) - cdef void xmlNodeDumpOutput(xmlOutputBuffer* buf, - xmlDoc* doc, xmlNode* cur, int level, - int format, char* encoding) - cdef void xmlNodeSetName(xmlNode* cur, char* name) - cdef xmlDoc* xmlCopyDoc(xmlDoc* doc, int recursive) - cdef xmlNode* xmlCopyNode(xmlNode* node, int extended) - cdef int xmlReconciliateNs(xmlDoc* doc, xmlNode* tree) - cdef xmlBuffer* xmlBufferCreate() - cdef char* xmlBufferContent(xmlBuffer* buf) - -cdef extern from "libxml/xmlIO.h": - - cdef xmlOutputBuffer* xmlAllocOutputBuffer(xmlCharEncodingHandler* encoder) - cdef xmlOutputBuffer* xmlOutputBufferCreateFile( - FILE* file, - xmlCharEncodingHandler* encoder) - cdef int xmlOutputBufferWriteString(xmlOutputBuffer* out, char* str) - cdef int xmlOutputBufferFlush(xmlOutputBuffer* out) - cdef int xmlOutputBufferClose(xmlOutputBuffer* out) - -cdef extern from "libxml/xmlsave.h": - ctypedef struct xmlSaveCtxt: - pass - - cdef xmlSaveCtxt* xmlSaveToFilename(char* filename, char* encoding, - int options) - cdef long xmlSaveDoc(xmlSaveCtxt* ctxt, xmlDoc* doc) - cdef int xmlSaveClose(xmlSaveCtxt* ctxt) - -cdef extern from "libxml/xmlstring.h": - cdef char* xmlStrdup(char* cur) - cdef char* xmlStrchr(char* cur, char value) + cdef int PyObject_IsInstance(object instance, object classes) + cdef int PyObject_HasAttrString(object obj, char* attr) Modified: lxml/trunk/src/lxml/tree.pxd ============================================================================== --- lxml/trunk/src/lxml/tree.pxd (original) +++ lxml/trunk/src/lxml/tree.pxd Sun Mar 12 08:08:00 2006 @@ -4,26 +4,6 @@ ctypedef struct FILE cdef int strlen(char* s) -cdef extern from "Python.h": - ctypedef struct PyObject - - cdef FILE* PyFile_AsFile(PyObject* p) - cdef int PyFile_Check(object p) - cdef object PyFile_Name(object p) - cdef void Py_INCREF(object o) - cdef object PyUnicode_DecodeUTF8(char* s, int size, char* errors) - cdef int PyUnicode_Check(object obj) - cdef int PyString_Check(object obj) - cdef object PyUnicode_AsUTF8String(object ustring) - cdef object PyString_FromStringAndSize(char* s, int size) - cdef object PyString_FromString(char* s) - cdef object PyString_FromFormat(char* format, ...) - - cdef int PyList_Append(object l, object obj) - cdef int PyDict_SetItemString(object d, char* key, object value) - cdef PyObject* PyDict_GetItemString(object d, char* key) - cdef PyObject* PyDict_GetItem(object d, object key) - cdef extern from "libxml/encoding.h": ctypedef struct xmlCharEncodingHandler cdef xmlCharEncodingHandler* xmlFindCharEncodingHandler(char* name) Modified: lxml/trunk/src/lxml/xslt.pxi ============================================================================== --- lxml/trunk/src/lxml/xslt.pxi (original) +++ lxml/trunk/src/lxml/xslt.pxi Sun Mar 12 08:08:00 2006 @@ -308,10 +308,10 @@ cdef xpath.xmlXPathObject* _wrapXPathObject(object obj) except NULL: cdef xpath.xmlNodeSet* resultSet cdef _NodeBase node - if tree.PyString_Check(obj): + if python.PyString_Check(obj): # XXX use the Wrap variant? Or leak... return xpath.xmlXPathNewCString(obj) - if tree.PyUnicode_Check(obj): + if python.PyUnicode_Check(obj): obj = _utf8(obj) return xpath.xmlXPathNewCString(obj) if isinstance(obj, types.BooleanType): From scoder at codespeak.net Sun Mar 12 08:27:37 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Sun Mar 12 08:27:41 2006 Subject: [Lxml-checkins] r24249 - in lxml/trunk: . src/lxml Message-ID: <20060312072737.0CE40100A6@code0.codespeak.net> Author: scoder Date: Sun Mar 12 08:27:33 2006 New Revision: 24249 Added: lxml/trunk/src/lxml/etree.h Modified: lxml/trunk/MANIFEST.in lxml/trunk/src/lxml/etree.pyx lxml/trunk/src/lxml/python.pxd Log: replace isinstance() and hasattr() by Python API calls (redefined as C macros in new etree.h) Modified: lxml/trunk/MANIFEST.in ============================================================================== --- lxml/trunk/MANIFEST.in (original) +++ lxml/trunk/MANIFEST.in Sun Mar 12 08:27:33 2006 @@ -1,4 +1,4 @@ include setup.py MANIFEST.in *.txt -recursive-include src *.pyx *.pxd *.pxi *.py etree.c +recursive-include src *.pyx *.pxd *.pxi *.py etree.c etree.h recursive-include src/lxml/tests *.rng *.xslt *.xml recursive-include doc *.txt *.xml *.mgp Added: lxml/trunk/src/lxml/etree.h ============================================================================== --- (empty file) +++ lxml/trunk/src/lxml/etree.h Sun Mar 12 08:27:33 2006 @@ -0,0 +1,7 @@ +#ifndef HAS_ETREE_H +#define HAS_ETREE_H + +#define isinstance(a,b) PyObject_IsInstance(a,b) +#define hasattr(a,b) PyObject_HasAttrString(a,b) + +#endif /*HAS_ETREE_H*/ Modified: lxml/trunk/src/lxml/etree.pyx ============================================================================== --- lxml/trunk/src/lxml/etree.pyx (original) +++ lxml/trunk/src/lxml/etree.pyx Sun Mar 12 08:27:33 2006 @@ -1,5 +1,6 @@ cimport tree, python from tree cimport xmlDoc, xmlNode, xmlAttr, xmlNs +from python cimport isinstance, hasattr cimport xpath cimport xslt cimport xmlerror @@ -1041,7 +1042,7 @@ def XML(text): cdef xmlDoc* c_doc - if isinstance(text, unicode): + if python.PyUnicode_Check(text): text = _stripDeclaration(_utf8(text)) c_doc = theParser.parseDoc(text, None) return _documentFactory(c_doc).getroot() @@ -1407,7 +1408,7 @@ cdef object funicode(char* s): if isutf8(s): - return python.PyUnicode_DecodeUTF8(s, tree.strlen(s), "strict") + return python.PyUnicode_DecodeUTF8(s, tree.strlen(s), NULL) return python.PyString_FromStringAndSize(s, tree.strlen(s)) cdef object _utf8(object s): @@ -1442,7 +1443,6 @@ cdef object _namespacedName(xmlNode* c_node): cdef char* href cdef char* name - cdef object s name = c_node.name if c_node.ns is NULL or c_node.ns.href is NULL: return funicode(name) @@ -1450,7 +1450,7 @@ href = c_node.ns.href s = python.PyString_FromFormat("{%s}%s", href, name) if isutf8(href) or isutf8(name): - return unicode(s, 'UTF-8') + return python.PyUnicode_FromEncodedObject(s, 'UTF-8', NULL) else: return s Modified: lxml/trunk/src/lxml/python.pxd ============================================================================== --- lxml/trunk/src/lxml/python.pxd (original) +++ lxml/trunk/src/lxml/python.pxd Sun Mar 12 08:27:33 2006 @@ -10,6 +10,8 @@ cdef int PyUnicode_Check(object obj) cdef int PyString_Check(object obj) + cdef object PyUnicode_FromEncodedObject(object s, char* encoding, + char* errors) cdef object PyUnicode_DecodeUTF8(char* s, int size, char* errors) cdef object PyUnicode_AsUTF8String(object ustring) cdef object PyString_FromStringAndSize(char* s, int size) @@ -23,3 +25,7 @@ cdef int PyObject_IsInstance(object instance, object classes) cdef int PyObject_HasAttrString(object obj, char* attr) + +cdef extern from "etree.h": # redefines some functions as macros + cdef int isinstance(object instance, object classes) + cdef int hasattr(object obj, char* attr) From scoder at codespeak.net Sun Mar 12 09:07:47 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Sun Mar 12 09:07:48 2006 Subject: [Lxml-checkins] r24250 - lxml/pyrex/Pyrex/Compiler Message-ID: <20060312080747.066C9100A6@code0.codespeak.net> Author: scoder Date: Sun Mar 12 09:07:36 2006 New Revision: 24250 Modified: lxml/pyrex/Pyrex/Compiler/ExprNodes.py Log: do not emit empty tuple creation for object calls without arguments Modified: lxml/pyrex/Pyrex/Compiler/ExprNodes.py ============================================================================== --- lxml/pyrex/Pyrex/Compiler/ExprNodes.py (original) +++ lxml/pyrex/Pyrex/Compiler/ExprNodes.py Sun Mar 12 09:07:36 2006 @@ -1191,9 +1191,12 @@ self.self = function.obj function.obj = CloneNode(self.self) if self.function.type.is_pyobject: - self.arg_tuple = TupleNode(self.pos, args = self.args) + if self.args: + self.arg_tuple = TupleNode(self.pos, args = self.args) + self.arg_tuple.analyse_types(env) + else: + self.arg_tuple = () self.args = None - self.arg_tuple.analyse_types(env) self.type = PyrexTypes.py_object_type self.is_temp = 1 else: @@ -1285,11 +1288,15 @@ def generate_result_code(self, code): #print_call_chain("SimpleCallNode.generate_result_code") ### if self.function.type.is_pyobject: + if self.arg_tuple: + arg_result = self.arg_tuple.result + else: + arg_result = "0" code.putln( "%s = PyObject_CallObject(%s, %s); if (!%s) %s" % ( self.result, self.function.result, - self.arg_tuple.result, + arg_result, self.result, code.error_goto(self.pos))) elif self.function.type.is_cfunction: From scoder at codespeak.net Sun Mar 12 09:10:48 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Sun Mar 12 09:10:50 2006 Subject: [Lxml-checkins] r24251 - lxml/pyrex/Pyrex/Compiler Message-ID: <20060312081048.607011009F@code0.codespeak.net> Author: scoder Date: Sun Mar 12 09:10:42 2006 New Revision: 24251 Modified: lxml/pyrex/Pyrex/Compiler/ExprNodes.py lxml/pyrex/Pyrex/Compiler/Nodes.py Log: applied patch 3 by David M. Cooke Modified: lxml/pyrex/Pyrex/Compiler/ExprNodes.py ============================================================================== --- lxml/pyrex/Pyrex/Compiler/ExprNodes.py (original) +++ lxml/pyrex/Pyrex/Compiler/ExprNodes.py Sun Mar 12 09:10:42 2006 @@ -3092,23 +3092,23 @@ } static PyObject *__Pyx_UnpackItem(PyObject *seq, int i) { - PyObject *item; - if (!(item = PySequence_GetItem(seq, i))) { + PyObject *item = PySequence_GetItem(seq, i); + if (!item) { if (PyErr_ExceptionMatches(PyExc_IndexError)) - __Pyx_UnpackError(); + __Pyx_UnpackError(); } return item; } static int __Pyx_EndUnpack(PyObject *seq, int i) { - PyObject *item; - if (item = PySequence_GetItem(seq, i)) { + PyObject *item = PySequence_GetItem(seq, i); + if (item) { Py_DECREF(item); __Pyx_UnpackError(); return -1; } PyErr_Clear(); - return 0; + return 0; } """ Modified: lxml/pyrex/Pyrex/Compiler/Nodes.py ============================================================================== --- lxml/pyrex/Pyrex/Compiler/Nodes.py (original) +++ lxml/pyrex/Pyrex/Compiler/Nodes.py Sun Mar 12 09:10:42 2006 @@ -3764,10 +3764,8 @@ bad: Py_XDECREF(args1); Py_XDECREF(kwds1); - if (*args2) - Py_XDECREF(*args2); - if (*kwds2) - Py_XDECREF(*kwds2); + Py_XDECREF(*args2); + Py_XDECREF(*kwds2); return -1; } """ From scoder at codespeak.net Sun Mar 12 09:16:58 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Sun Mar 12 09:17:00 2006 Subject: [Lxml-checkins] r24252 - lxml/pyrex/Pyrex/Compiler Message-ID: <20060312081658.D7933100A6@code0.codespeak.net> Author: scoder Date: Sun Mar 12 09:16:57 2006 New Revision: 24252 Modified: lxml/pyrex/Pyrex/Compiler/ExprNodes.py Log: clean up Modified: lxml/pyrex/Pyrex/Compiler/ExprNodes.py ============================================================================== --- lxml/pyrex/Pyrex/Compiler/ExprNodes.py (original) +++ lxml/pyrex/Pyrex/Compiler/ExprNodes.py Sun Mar 12 09:16:57 2006 @@ -1194,8 +1194,6 @@ if self.args: self.arg_tuple = TupleNode(self.pos, args = self.args) self.arg_tuple.analyse_types(env) - else: - self.arg_tuple = () self.args = None self.type = PyrexTypes.py_object_type self.is_temp = 1 From scoder at codespeak.net Sun Mar 12 09:52:48 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Sun Mar 12 09:52:49 2006 Subject: [Lxml-checkins] r24253 - lxml/trunk/src/lxml Message-ID: <20060312085248.32FB210098@code0.codespeak.net> Author: scoder Date: Sun Mar 12 09:52:46 2006 New Revision: 24253 Modified: lxml/trunk/src/lxml/nsclasses.pxi lxml/trunk/src/lxml/python.pxd lxml/trunk/src/lxml/xslt.pxi Log: use Python API type checks instead of calls to isinstance() for standard types Modified: lxml/trunk/src/lxml/nsclasses.pxi ============================================================================== --- lxml/trunk/src/lxml/nsclasses.pxi (original) +++ lxml/trunk/src/lxml/nsclasses.pxi Sun Mar 12 09:52:46 2006 @@ -56,11 +56,11 @@ self[name] = item def __setitem__(self, name, item): - if isinstance(item, (type, types.ClassType)) and issubclass(item, ElementBase): + if python.PyType_Check(item) and issubclass(item, ElementBase): d = self._classes elif name is None: raise NamespaceRegistryError, "Registered name can only be None for elements." - elif isinstance(item, (type, types.ClassType)) and issubclass(item, XSLTElement): + elif python.PyType_Check(item) and issubclass(item, XSLTElement): d = self._xslt_elements elif callable(item): d = self._extensions Modified: lxml/trunk/src/lxml/python.pxd ============================================================================== --- lxml/trunk/src/lxml/python.pxd (original) +++ lxml/trunk/src/lxml/python.pxd Sun Mar 12 09:52:46 2006 @@ -23,6 +23,10 @@ cdef PyObject* PyDict_GetItemString(object d, char* key) cdef PyObject* PyDict_GetItem(object d, object key) + cdef int PyNumber_Check(object instance) + cdef int PyBool_Check(object instance) + cdef int PySequence_Check(object instance) + cdef int PyType_Check(object instance) cdef int PyObject_IsInstance(object instance, object classes) cdef int PyObject_HasAttrString(object obj, char* attr) Modified: lxml/trunk/src/lxml/xslt.pxi ============================================================================== --- lxml/trunk/src/lxml/xslt.pxi (original) +++ lxml/trunk/src/lxml/xslt.pxi Sun Mar 12 09:52:46 2006 @@ -314,13 +314,13 @@ if python.PyUnicode_Check(obj): obj = _utf8(obj) return xpath.xmlXPathNewCString(obj) - if isinstance(obj, types.BooleanType): + if python.PyBool_Check(obj): return xpath.xmlXPathNewBoolean(obj) - if isinstance(obj, (int, float)): + if python.PyNumber_Check(obj): return xpath.xmlXPathNewFloat(obj) if isinstance(obj, _NodeBase): obj = [obj] - if isinstance(obj, (types.ListType, types.TupleType)): + if python.PySequence_Check(obj): resultSet = xpath.xmlXPathNodeSetCreate(NULL) for element in obj: if isinstance(element, _NodeBase): From scoder at codespeak.net Sun Mar 12 09:54:11 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Sun Mar 12 09:54:12 2006 Subject: [Lxml-checkins] r24254 - lxml/trunk/src/lxml Message-ID: <20060312085411.4531410098@code0.codespeak.net> Author: scoder Date: Sun Mar 12 09:54:06 2006 New Revision: 24254 Modified: lxml/trunk/src/lxml/etree.pyx Log: clean up Modified: lxml/trunk/src/lxml/etree.pyx ============================================================================== --- lxml/trunk/src/lxml/etree.pyx (original) +++ lxml/trunk/src/lxml/etree.pyx Sun Mar 12 09:54:06 2006 @@ -7,7 +7,7 @@ cimport xinclude cimport c14n cimport cstd -import re, types +import re import _elementpath from StringIO import StringIO From scoder at codespeak.net Sun Mar 12 11:43:36 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Sun Mar 12 11:43:43 2006 Subject: [Lxml-checkins] r24256 - lxml/trunk Message-ID: <20060312104336.089401009C@code0.codespeak.net> Author: scoder Date: Sun Mar 12 11:43:35 2006 New Revision: 24256 Modified: lxml/trunk/bench.py Log: allow running without lxml by passing -nolxml option Modified: lxml/trunk/bench.py ============================================================================== --- lxml/trunk/bench.py (original) +++ lxml/trunk/bench.py Sun Mar 12 11:43:35 2006 @@ -341,6 +341,7 @@ ############################################################ if __name__ == '__main__': + import_lxml = True if len(sys.argv) > 1: try: sys.argv.remove('-i') @@ -348,8 +349,16 @@ except ValueError: pass - from lxml import etree - _etrees = [etree] + try: + sys.argv.remove('-nolxml') + import_lxml = False + except ValueError: + pass + + _etrees = [] + if import_lxml: + from lxml import etree + _etrees.append(etree) if len(sys.argv) > 1: try: @@ -369,6 +378,10 @@ except ImportError: pass + if not _etrees: + print "No library to test. Exiting." + sys.exit(1) + print "Preparing test suites and trees ..." benchmark_suites = map(BenchMark, _etrees) From scoder at codespeak.net Sun Mar 12 12:47:40 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Sun Mar 12 12:47:42 2006 Subject: [Lxml-checkins] r24257 - lxml/trunk Message-ID: <20060312114740.2874D1009C@code0.codespeak.net> Author: scoder Date: Sun Mar 12 12:47:38 2006 New Revision: 24257 Modified: lxml/trunk/bench.py Log: output formatting Modified: lxml/trunk/bench.py ============================================================================== --- lxml/trunk/bench.py (original) +++ lxml/trunk/bench.py Sun Mar 12 12:47:38 2006 @@ -446,7 +446,7 @@ for lib, (bench, benchmark_setup) in enumerate(izip(benchmark_suites, bench_calls)): bench_name = benchmark_setup[0] tree_set_name = build_treeset_name(*benchmark_setup[-3:]) - print "%-3s: %-22s (%-10s)" % (bench.lib_name, bench_name[6:29], tree_set_name), + print "%-3s: %-23s (%-10s)" % (bench.lib_name, bench_name[6:29], tree_set_name), sys.stdout.flush() result = run_bench(bench, *benchmark_setup) From scoder at codespeak.net Sun Mar 12 12:52:42 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Sun Mar 12 12:52:44 2006 Subject: [Lxml-checkins] r24259 - lxml/trunk/src/lxml Message-ID: <20060312115242.C6443100A6@code0.codespeak.net> Author: scoder Date: Sun Mar 12 12:52:40 2006 New Revision: 24259 Modified: lxml/trunk/src/lxml/etree.h lxml/trunk/src/lxml/etree.pyx lxml/trunk/src/lxml/tree.pxd Log: make _isElement() a C macro to enforce its inlining in C helper functions, some clean up Modified: lxml/trunk/src/lxml/etree.h ============================================================================== --- lxml/trunk/src/lxml/etree.h (original) +++ lxml/trunk/src/lxml/etree.h Sun Mar 12 12:52:40 2006 @@ -3,5 +3,8 @@ #define isinstance(a,b) PyObject_IsInstance(a,b) #define hasattr(a,b) PyObject_HasAttrString(a,b) +#define _isElement(c_node) \ + ((c_node)->type == XML_ELEMENT_NODE || \ + (c_node)->type == XML_COMMENT_NODE) #endif /*HAS_ETREE_H*/ Modified: lxml/trunk/src/lxml/etree.pyx ============================================================================== --- lxml/trunk/src/lxml/etree.pyx (original) +++ lxml/trunk/src/lxml/etree.pyx Sun Mar 12 12:52:40 2006 @@ -1,5 +1,5 @@ cimport tree, python -from tree cimport xmlDoc, xmlNode, xmlAttr, xmlNs +from tree cimport xmlDoc, xmlNode, xmlAttr, xmlNs, _isElement from python cimport isinstance, hasattr cimport xpath cimport xslt @@ -18,13 +18,9 @@ cdef int PROXY_ELEMENT cdef int PROXY_ATTRIB -cdef int PROXY_ATTRIB_ITER -cdef int PROXY_ELEMENT_ITER PROXY_ELEMENT = 0 PROXY_ATTRIB = 1 -PROXY_ATTRIB_ITER = 2 -PROXY_ELEMENT_ITER = 3 # the rules @@ -1373,9 +1369,10 @@ c_target = c_tail c_tail = c_next -cdef int _isElement(xmlNode* c_node): - return (c_node.type == tree.XML_ELEMENT_NODE or - c_node.type == tree.XML_COMMENT_NODE) +### see etree.h: +## cdef int _isElement(xmlNode* c_node): +## return (c_node.type == tree.XML_ELEMENT_NODE or +## c_node.type == tree.XML_COMMENT_NODE) cdef xmlNode* _deleteSlice(xmlNode* c_node, int start, int stop): """Delete slice, starting with c_node, start counting at start, end at stop. @@ -1409,7 +1406,7 @@ cdef object funicode(char* s): if isutf8(s): return python.PyUnicode_DecodeUTF8(s, tree.strlen(s), NULL) - return python.PyString_FromStringAndSize(s, tree.strlen(s)) + return python.PyString_FromString(s) cdef object _utf8(object s): if python.PyString_Check(s): Modified: lxml/trunk/src/lxml/tree.pxd ============================================================================== --- lxml/trunk/src/lxml/tree.pxd (original) +++ lxml/trunk/src/lxml/tree.pxd Sun Mar 12 12:52:40 2006 @@ -169,3 +169,6 @@ cdef extern from "libxml/xmlstring.h": cdef char* xmlStrdup(char* cur) cdef char* xmlStrchr(char* cur, char value) + +cdef extern from "etree.h": + cdef int _isElement(xmlNode* node) From scoder at codespeak.net Sun Mar 12 13:59:55 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Sun Mar 12 13:59:57 2006 Subject: [Lxml-checkins] r24260 - lxml/trunk/src/lxml Message-ID: <20060312125955.8D54B1009C@code0.codespeak.net> Author: scoder Date: Sun Mar 12 13:59:53 2006 New Revision: 24260 Modified: lxml/trunk/src/lxml/etree.h lxml/trunk/src/lxml/etree.pyx lxml/trunk/src/lxml/proxy.pxi lxml/trunk/src/lxml/tree.pxd Log: moved PROXY_ELEMENT and PROXY_ATTRIB into C enum in etree.h Modified: lxml/trunk/src/lxml/etree.h ============================================================================== --- lxml/trunk/src/lxml/etree.h (original) +++ lxml/trunk/src/lxml/etree.h Sun Mar 12 13:59:53 2006 @@ -7,4 +7,9 @@ ((c_node)->type == XML_ELEMENT_NODE || \ (c_node)->type == XML_COMMENT_NODE) +typedef enum { + PROXY_ELEMENT, + PROXY_ATTRIB +} LXML_PROXY_TYPE; + #endif /*HAS_ETREE_H*/ Modified: lxml/trunk/src/lxml/etree.pyx ============================================================================== --- lxml/trunk/src/lxml/etree.pyx (original) +++ lxml/trunk/src/lxml/etree.pyx Sun Mar 12 13:59:53 2006 @@ -1,5 +1,6 @@ cimport tree, python -from tree cimport xmlDoc, xmlNode, xmlAttr, xmlNs, _isElement +from tree cimport xmlDoc, xmlNode, xmlAttr, xmlNs +from tree cimport _isElement, LXML_PROXY_TYPE, PROXY_ATTRIB, PROXY_ELEMENT from python cimport isinstance, hasattr cimport xpath cimport xslt @@ -16,12 +17,6 @@ # should libxml2/libxslt be allowed to shout? DEBUG = False -cdef int PROXY_ELEMENT -cdef int PROXY_ATTRIB - -PROXY_ELEMENT = 0 -PROXY_ATTRIB = 1 - # the rules # any libxml C argument/variable is prefixed with c_ Modified: lxml/trunk/src/lxml/proxy.pxi ============================================================================== --- lxml/trunk/src/lxml/proxy.pxi (original) +++ lxml/trunk/src/lxml/proxy.pxi Sun Mar 12 13:59:53 2006 @@ -8,7 +8,7 @@ cdef struct _ProxyRef: python.PyObject* proxy - int type + LXML_PROXY_TYPE type _ProxyRef* next ctypedef _ProxyRef ProxyRef Modified: lxml/trunk/src/lxml/tree.pxd ============================================================================== --- lxml/trunk/src/lxml/tree.pxd (original) +++ lxml/trunk/src/lxml/tree.pxd Sun Mar 12 13:59:53 2006 @@ -172,3 +172,6 @@ cdef extern from "etree.h": cdef int _isElement(xmlNode* node) + ctypedef enum LXML_PROXY_TYPE: + PROXY_ELEMENT + PROXY_ATTRIB From scoder at codespeak.net Sun Mar 12 14:13:19 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Sun Mar 12 14:13:20 2006 Subject: [Lxml-checkins] r24261 - lxml/trunk/src/lxml Message-ID: <20060312131319.824091009C@code0.codespeak.net> Author: scoder Date: Sun Mar 12 14:13:17 2006 New Revision: 24261 Modified: lxml/trunk/src/lxml/xslt.pxi Log: small simplification in _wrapXPathObject Modified: lxml/trunk/src/lxml/xslt.pxi ============================================================================== --- lxml/trunk/src/lxml/xslt.pxi (original) +++ lxml/trunk/src/lxml/xslt.pxi Sun Mar 12 14:13:17 2006 @@ -308,11 +308,10 @@ cdef xpath.xmlXPathObject* _wrapXPathObject(object obj) except NULL: cdef xpath.xmlNodeSet* resultSet cdef _NodeBase node - if python.PyString_Check(obj): - # XXX use the Wrap variant? Or leak... - return xpath.xmlXPathNewCString(obj) if python.PyUnicode_Check(obj): obj = _utf8(obj) + if python.PyString_Check(obj): + # XXX use the Wrap variant? Or leak... return xpath.xmlXPathNewCString(obj) if python.PyBool_Check(obj): return xpath.xmlXPathNewBoolean(obj) From scoder at codespeak.net Sun Mar 12 14:14:36 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Sun Mar 12 14:14:37 2006 Subject: [Lxml-checkins] r24262 - in lxml/branch/scoder2: . src/lxml Message-ID: <20060312131436.787241009C@code0.codespeak.net> Author: scoder Date: Sun Mar 12 14:14:33 2006 New Revision: 24262 Added: lxml/branch/scoder2/src/lxml/etree.h - copied unchanged from r24260, lxml/trunk/src/lxml/etree.h lxml/branch/scoder2/src/lxml/python.pxd - copied unchanged from r24260, lxml/trunk/src/lxml/python.pxd Modified: lxml/branch/scoder2/MANIFEST.in lxml/branch/scoder2/bench.py lxml/branch/scoder2/src/lxml/etree.pyx lxml/branch/scoder2/src/lxml/nsclasses.pxi lxml/branch/scoder2/src/lxml/proxy.pxi lxml/branch/scoder2/src/lxml/tree.pxd lxml/branch/scoder2/src/lxml/xslt.pxi Log: loads of merges from trunk Modified: lxml/branch/scoder2/MANIFEST.in ============================================================================== --- lxml/branch/scoder2/MANIFEST.in (original) +++ lxml/branch/scoder2/MANIFEST.in Sun Mar 12 14:14:33 2006 @@ -1,4 +1,4 @@ include setup.py MANIFEST.in *.txt -recursive-include src *.pyx *.pxd *.pxi *.py etree.c +recursive-include src *.pyx *.pxd *.pxi *.py etree.c etree.h recursive-include src/lxml/tests *.rng *.xslt *.xml -recursive-include doc *.txt +recursive-include doc *.txt *.xml *.mgp Modified: lxml/branch/scoder2/bench.py ============================================================================== --- lxml/branch/scoder2/bench.py (original) +++ lxml/branch/scoder2/bench.py Sun Mar 12 14:14:33 2006 @@ -341,6 +341,7 @@ ############################################################ if __name__ == '__main__': + import_lxml = True if len(sys.argv) > 1: try: sys.argv.remove('-i') @@ -348,8 +349,16 @@ except ValueError: pass - from lxml import etree - _etrees = [etree] + try: + sys.argv.remove('-nolxml') + import_lxml = False + except ValueError: + pass + + _etrees = [] + if import_lxml: + from lxml import etree + _etrees.append(etree) if len(sys.argv) > 1: try: @@ -369,6 +378,10 @@ except ImportError: pass + if not _etrees: + print "No library to test. Exiting." + sys.exit(1) + print "Preparing test suites and trees ..." benchmark_suites = map(BenchMark, _etrees) @@ -433,7 +446,7 @@ for lib, (bench, benchmark_setup) in enumerate(izip(benchmark_suites, bench_calls)): bench_name = benchmark_setup[0] tree_set_name = build_treeset_name(*benchmark_setup[-3:]) - print "%-3s: %-22s (%-10s)" % (bench.lib_name, bench_name[6:29], tree_set_name), + print "%-3s: %-23s (%-10s)" % (bench.lib_name, bench_name[6:29], tree_set_name), sys.stdout.flush() result = run_bench(bench, *benchmark_setup) Modified: lxml/branch/scoder2/src/lxml/etree.pyx ============================================================================== --- lxml/branch/scoder2/src/lxml/etree.pyx (original) +++ lxml/branch/scoder2/src/lxml/etree.pyx Sun Mar 12 14:14:33 2006 @@ -1,12 +1,14 @@ -cimport tree +cimport tree, python from tree cimport xmlDoc, xmlNode, xmlAttr, xmlNs +from tree cimport _isElement, LXML_PROXY_TYPE, PROXY_ATTRIB, PROXY_ELEMENT +from python cimport isinstance, hasattr cimport xpath cimport xslt cimport xmlerror cimport xinclude cimport c14n cimport cstd -import re, types +import re import _elementpath from StringIO import StringIO @@ -15,16 +17,6 @@ # should libxml2/libxslt be allowed to shout? DEBUG = False -cdef int PROXY_ELEMENT -cdef int PROXY_ATTRIB -cdef int PROXY_ATTRIB_ITER -cdef int PROXY_ELEMENT_ITER - -PROXY_ELEMENT = 0 -PROXY_ATTRIB = 1 -PROXY_ATTRIB_ITER = 2 -PROXY_ELEMENT_ITER = 3 - # the rules # any libxml C argument/variable is prefixed with c_ @@ -563,7 +555,7 @@ doc = self._doc while c_node is not NULL and c < stop: if _isElement(c_node): - ret = tree.PyList_Append(result, _elementFactory(doc, c_node)) + ret = python.PyList_Append(result, _elementFactory(doc, c_node)) if ret: raise c = c + 1 @@ -656,7 +648,7 @@ c_node = self._c_node.children while c_node is not NULL: if _isElement(c_node): - ret = tree.PyList_Append(result, _elementFactory(doc, c_node)) + ret = python.PyList_Append(result, _elementFactory(doc, c_node)) if ret: raise c_node = c_node.next @@ -711,11 +703,11 @@ else: c_ns_href = c_node.ns.href element_class = _find_element_class(c_ns_href, c_node.name) - result = element_class() elif c_node.type == tree.XML_COMMENT_NODE: - result = _Comment() + element_class = _Comment else: assert 0, "Unknown node type: %s" % c_node.type + result = element_class() result._doc = doc result._c_node = c_node result._proxy_type = PROXY_ELEMENT @@ -1041,7 +1033,7 @@ def XML(text): cdef xmlDoc* c_doc - if isinstance(text, unicode): + if python.PyUnicode_Check(text): text = _stripDeclaration(_utf8(text)) c_doc = theParser.parseDoc(text, None) return _documentFactory(c_doc).getroot() @@ -1228,13 +1220,13 @@ cdef _dumpToFile(f, xmlDoc* c_doc, xmlNode* c_node): - cdef tree.PyObject* o + cdef python.PyObject* o cdef tree.xmlOutputBuffer* c_buffer - if not tree.PyFile_Check(f): + if not python.PyFile_Check(f): raise ValueError, "Not a file" - o = f - c_buffer = tree.xmlOutputBufferCreateFile(tree.PyFile_AsFile(o), NULL) + o = f + c_buffer = tree.xmlOutputBufferCreateFile(python.PyFile_AsFile(o), NULL) tree.xmlNodeDumpOutput(c_buffer, c_doc, c_node, 0, 0, NULL) # dump next node if it's a text node _dumpNextNode(c_buffer, c_doc, c_node, NULL) @@ -1372,9 +1364,10 @@ c_target = c_tail c_tail = c_next -cdef int _isElement(xmlNode* c_node): - return (c_node.type == tree.XML_ELEMENT_NODE or - c_node.type == tree.XML_COMMENT_NODE) +### see etree.h: +## cdef int _isElement(xmlNode* c_node): +## return (c_node.type == tree.XML_ELEMENT_NODE or +## c_node.type == tree.XML_COMMENT_NODE) cdef xmlNode* _deleteSlice(xmlNode* c_node, int start, int stop): """Delete slice, starting with c_node, start counting at start, end at stop. @@ -1407,15 +1400,15 @@ cdef object funicode(char* s): if isutf8(s): - return tree.PyUnicode_DecodeUTF8(s, tree.strlen(s), "strict") - return tree.PyString_FromStringAndSize(s, tree.strlen(s)) + return python.PyUnicode_DecodeUTF8(s, tree.strlen(s), NULL) + return python.PyString_FromString(s) cdef object _utf8(object s): - if tree.PyString_Check(s): + if python.PyString_Check(s): assert not isutf8(s), "All strings must be Unicode or ASCII" return s - elif tree.PyUnicode_Check(s): - return tree.PyUnicode_AsUTF8String(s) + elif python.PyUnicode_Check(s): + return python.PyUnicode_AsUTF8String(s) else: raise TypeError, "Argument must be string or unicode." @@ -1433,25 +1426,23 @@ if c_pos is NULL: raise ValueError, "Invalid tag name" nslen = c_pos - c_tag - 1 - ns = tree.PyString_FromStringAndSize(c_tag+1, nslen) - c_tag = c_pos + 1 + ns = python.PyString_FromStringAndSize(c_tag+1, nslen) + tag = python.PyString_FromString(c_pos+1) else: ns = None - tag = tree.PyString_FromString(c_tag) return ns, tag cdef object _namespacedName(xmlNode* c_node): cdef char* href cdef char* name - cdef object s name = c_node.name if c_node.ns is NULL or c_node.ns.href is NULL: return funicode(name) else: href = c_node.ns.href - s = tree.PyString_FromFormat("{%s}%s", href, name) + s = python.PyString_FromFormat("{%s}%s", href, name) if isutf8(href) or isutf8(name): - return unicode(s, 'UTF-8') + return python.PyUnicode_FromEncodedObject(s, 'UTF-8', NULL) else: return s Modified: lxml/branch/scoder2/src/lxml/nsclasses.pxi ============================================================================== --- lxml/branch/scoder2/src/lxml/nsclasses.pxi (original) +++ lxml/branch/scoder2/src/lxml/nsclasses.pxi Sun Mar 12 14:14:33 2006 @@ -77,11 +77,11 @@ self[name] = item def __setitem__(self, name, item): - if isinstance(item, (type, types.ClassType)) and issubclass(item, ElementBase): + if python.PyType_Check(item) and issubclass(item, ElementBase): d = self._classes elif name is None: raise NamespaceRegistryError, "Registered name can only be None for elements." - elif isinstance(item, (type, types.ClassType)) and issubclass(item, XSLTElement): + elif python.PyType_Check(item) and issubclass(item, XSLTElement): d = self._xslt_elements elif callable(item): d = self._extensions @@ -99,10 +99,10 @@ return self._get(name_utf) cdef object _get(self, char* c_name): - cdef tree.PyObject* dict_result - dict_result = tree.PyDict_GetItemString(self._classes, c_name) + cdef python.PyObject* dict_result + dict_result = python.PyDict_GetItemString(self._classes, c_name) if dict_result is NULL: - dict_result = tree.PyDict_GetItemString(self._extensions, c_name) + dict_result = python.PyDict_GetItemString(self._extensions, c_name) if dict_result is NULL: raise KeyError, "Name not registered." return dict_result @@ -137,8 +137,8 @@ self._extensions[name_utf] = item cdef object _get(self, char* c_name): - cdef tree.PyObject* dict_result - dict_result = tree.PyDict_GetItemString(self._extensions, c_name) + cdef python.PyObject* dict_result + dict_result = python.PyDict_GetItemString(self._extensions, c_name) if dict_result is NULL: raise KeyError, "Name not registered." return dict_result @@ -164,36 +164,36 @@ cdef _find_extensions(namespaces): """Returns a dictionary that maps each namespace in the provided list to a dictionary of name-function mappings defined under that namespace.""" - cdef tree.PyObject* dict_result + cdef python.PyObject* dict_result cdef char* c_ns_utf extension_dict = {} for ns_uri in namespaces: if ns_uri is None: - dict_result = tree.PyDict_GetItem( + dict_result = python.PyDict_GetItem( __FUNCTION_NAMESPACE_REGISTRIES, None) else: ns_utf = _utf8(ns_uri) c_ns_utf = ns_utf - dict_result = tree.PyDict_GetItemString( + dict_result = python.PyDict_GetItemString( __FUNCTION_NAMESPACE_REGISTRIES, c_ns_utf) if dict_result is NULL: continue extensions = (<_NamespaceRegistry>dict_result)._extensions if extensions: - tree.PyDict_SetItemString( + python.PyDict_SetItemString( extension_dict, c_ns_utf, extensions) return extension_dict cdef object _find_element_class(char* c_namespace_utf, char* c_element_name_utf): - cdef tree.PyObject* dict_result + cdef python.PyObject* dict_result cdef _NamespaceRegistry registry cdef object result if c_namespace_utf is not NULL: - dict_result = tree.PyDict_GetItemString( + dict_result = python.PyDict_GetItemString( __NAMESPACE_REGISTRIES, c_namespace_utf) else: - dict_result = tree.PyDict_GetItem( + dict_result = python.PyDict_GetItem( __NAMESPACE_REGISTRIES, None) if dict_result is NULL: return _Element @@ -202,13 +202,13 @@ classes = registry._classes if c_element_name_utf is not NULL: - dict_result = tree.PyDict_GetItemString( + dict_result = python.PyDict_GetItemString( classes, c_element_name_utf) else: dict_result = NULL if dict_result is NULL: - dict_result = tree.PyDict_GetItem(classes, None) + dict_result = python.PyDict_GetItem(classes, None) if dict_result is not NULL: return dict_result Modified: lxml/branch/scoder2/src/lxml/proxy.pxi ============================================================================== --- lxml/branch/scoder2/src/lxml/proxy.pxi (original) +++ lxml/branch/scoder2/src/lxml/proxy.pxi Sun Mar 12 14:14:33 2006 @@ -7,8 +7,8 @@ cdef struct _ProxyRef cdef struct _ProxyRef: - tree.PyObject* proxy - int type + python.PyObject* proxy + LXML_PROXY_TYPE type _ProxyRef* next ctypedef _ProxyRef ProxyRef @@ -30,43 +30,31 @@ cdef int hasProxy(xmlNode* c_node): return c_node._private is not NULL -cdef ProxyRef* createProxyRef(_NodeBase proxy, int proxy_type): - """Create a backpointer proxy reference for a proxy and type. - """ - cdef ProxyRef* result - result = cstd.malloc(sizeof(ProxyRef)) - result.proxy = proxy - result.type = proxy_type - result.next = NULL - return result - cdef void registerProxy(_NodeBase proxy, int proxy_type): """Register a proxy and type for the node it's proxying for. """ + cdef xmlNode* c_node cdef ProxyRef* ref - cdef ProxyRef* prev_ref # cannot register for NULL - if proxy._c_node is NULL: + c_node = proxy._c_node + if c_node is NULL: return # XXX should we check whether we ran into proxy_type before? #print "registering for:", proxy._c_node - ref = proxy._c_node._private - if ref is NULL: - proxy._c_node._private = createProxyRef(proxy, proxy_type) - return - while ref is not NULL: - prev_ref = ref - ref = ref.next - prev_ref.next = createProxyRef(proxy, proxy_type) + ref = cstd.malloc(sizeof(ProxyRef)) + ref.proxy = proxy + ref.type = proxy_type + ref.next = c_node._private + c_node._private = ref # prepend cdef void unregisterProxy(_NodeBase proxy): """Unregister a proxy for the node it's proxying for. """ - cdef tree.PyObject* proxy_ref + cdef python.PyObject* proxy_ref cdef ProxyRef* ref cdef ProxyRef* prev_ref cdef xmlNode* c_node - proxy_ref = proxy + proxy_ref = proxy c_node = proxy._c_node ref = c_node._private if ref.proxy == proxy_ref: Modified: lxml/branch/scoder2/src/lxml/tree.pxd ============================================================================== --- lxml/branch/scoder2/src/lxml/tree.pxd (original) +++ lxml/branch/scoder2/src/lxml/tree.pxd Sun Mar 12 14:14:33 2006 @@ -4,26 +4,6 @@ ctypedef struct FILE cdef int strlen(char* s) -cdef extern from "Python.h": - ctypedef struct PyObject - - cdef FILE* PyFile_AsFile(PyObject* p) - cdef int PyFile_Check(object p) - cdef object PyFile_Name(object p) - cdef void Py_INCREF(object o) - cdef object PyUnicode_DecodeUTF8(char* s, int size, char* errors) - cdef int PyUnicode_Check(object obj) - cdef int PyString_Check(object obj) - cdef object PyUnicode_AsUTF8String(object ustring) - cdef object PyString_FromStringAndSize(char* s, int size) - cdef object PyString_FromString(char* s) - cdef object PyString_FromFormat(char* format, ...) - - cdef int PyList_Append(object l, object obj) - cdef int PyDict_SetItemString(object d, char* key, object value) - cdef PyObject* PyDict_GetItemString(object d, char* key) - cdef PyObject* PyDict_GetItem(object d, object key) - cdef extern from "libxml/encoding.h": ctypedef struct xmlCharEncodingHandler cdef xmlCharEncodingHandler* xmlFindCharEncodingHandler(char* name) @@ -189,3 +169,9 @@ cdef extern from "libxml/xmlstring.h": cdef char* xmlStrdup(char* cur) cdef char* xmlStrchr(char* cur, char value) + +cdef extern from "etree.h": + cdef int _isElement(xmlNode* node) + ctypedef enum LXML_PROXY_TYPE: + PROXY_ELEMENT + PROXY_ATTRIB Modified: lxml/branch/scoder2/src/lxml/xslt.pxi ============================================================================== --- lxml/branch/scoder2/src/lxml/xslt.pxi (original) +++ lxml/branch/scoder2/src/lxml/xslt.pxi Sun Mar 12 14:14:33 2006 @@ -658,18 +658,18 @@ cdef xpath.xmlXPathObject* _wrapXPathObject(object obj) except NULL: cdef xpath.xmlNodeSet* resultSet cdef _NodeBase node - if tree.PyUnicode_Check(obj): + if python.PyUnicode_Check(obj): obj = _utf8(obj) - if tree.PyString_Check(obj): + if python.PyString_Check(obj): # XXX use the Wrap variant? Or leak... return xpath.xmlXPathNewCString(obj) - if isinstance(obj, types.BooleanType): + if python.PyBool_Check(obj): return xpath.xmlXPathNewBoolean(obj) - if isinstance(obj, (int, float)): + if python.PyNumber_Check(obj): return xpath.xmlXPathNewFloat(obj) if isinstance(obj, _NodeBase): obj = [obj] - if isinstance(obj, (types.ListType, types.TupleType)): + if python.PySequence_Check(obj): resultSet = xpath.xmlXPathNodeSetCreate(NULL) for element in obj: if isinstance(element, _NodeBase): From scoder at codespeak.net Sun Mar 12 18:47:25 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Sun Mar 12 18:47:27 2006 Subject: [Lxml-checkins] r24264 - lxml/trunk/src/lxml Message-ID: <20060312174725.5654F100BC@code0.codespeak.net> Author: scoder Date: Sun Mar 12 18:47:17 2006 New Revision: 24264 Modified: lxml/trunk/src/lxml/etree.h lxml/trunk/src/lxml/etree.pyx lxml/trunk/src/lxml/tree.pxd Log: removed duplication of LXML_PROXY_TYPE declaration: declare it in etree.pyx Modified: lxml/trunk/src/lxml/etree.h ============================================================================== --- lxml/trunk/src/lxml/etree.h (original) +++ lxml/trunk/src/lxml/etree.h Sun Mar 12 18:47:17 2006 @@ -7,9 +7,4 @@ ((c_node)->type == XML_ELEMENT_NODE || \ (c_node)->type == XML_COMMENT_NODE) -typedef enum { - PROXY_ELEMENT, - PROXY_ATTRIB -} LXML_PROXY_TYPE; - #endif /*HAS_ETREE_H*/ Modified: lxml/trunk/src/lxml/etree.pyx ============================================================================== --- lxml/trunk/src/lxml/etree.pyx (original) +++ lxml/trunk/src/lxml/etree.pyx Sun Mar 12 18:47:17 2006 @@ -1,6 +1,5 @@ cimport tree, python -from tree cimport xmlDoc, xmlNode, xmlAttr, xmlNs -from tree cimport _isElement, LXML_PROXY_TYPE, PROXY_ATTRIB, PROXY_ELEMENT +from tree cimport xmlDoc, xmlNode, xmlAttr, xmlNs, _isElement from python cimport isinstance, hasattr cimport xpath cimport xslt @@ -17,6 +16,10 @@ # should libxml2/libxslt be allowed to shout? DEBUG = False +ctypedef enum LXML_PROXY_TYPE: + PROXY_ELEMENT + PROXY_ATTRIB + # the rules # any libxml C argument/variable is prefixed with c_ Modified: lxml/trunk/src/lxml/tree.pxd ============================================================================== --- lxml/trunk/src/lxml/tree.pxd (original) +++ lxml/trunk/src/lxml/tree.pxd Sun Mar 12 18:47:17 2006 @@ -172,6 +172,3 @@ cdef extern from "etree.h": cdef int _isElement(xmlNode* node) - ctypedef enum LXML_PROXY_TYPE: - PROXY_ELEMENT - PROXY_ATTRIB From scoder at codespeak.net Sun Mar 12 18:47:54 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Sun Mar 12 18:47:55 2006 Subject: [Lxml-checkins] r24265 - lxml/branch/scoder2/src/lxml Message-ID: <20060312174754.27A67100BC@code0.codespeak.net> Author: scoder Date: Sun Mar 12 18:47:52 2006 New Revision: 24265 Modified: lxml/branch/scoder2/src/lxml/etree.h lxml/branch/scoder2/src/lxml/etree.pyx lxml/branch/scoder2/src/lxml/tree.pxd Log: merges from trunk Modified: lxml/branch/scoder2/src/lxml/etree.h ============================================================================== --- lxml/branch/scoder2/src/lxml/etree.h (original) +++ lxml/branch/scoder2/src/lxml/etree.h Sun Mar 12 18:47:52 2006 @@ -7,9 +7,4 @@ ((c_node)->type == XML_ELEMENT_NODE || \ (c_node)->type == XML_COMMENT_NODE) -typedef enum { - PROXY_ELEMENT, - PROXY_ATTRIB -} LXML_PROXY_TYPE; - #endif /*HAS_ETREE_H*/ Modified: lxml/branch/scoder2/src/lxml/etree.pyx ============================================================================== --- lxml/branch/scoder2/src/lxml/etree.pyx (original) +++ lxml/branch/scoder2/src/lxml/etree.pyx Sun Mar 12 18:47:52 2006 @@ -1,6 +1,5 @@ cimport tree, python -from tree cimport xmlDoc, xmlNode, xmlAttr, xmlNs -from tree cimport _isElement, LXML_PROXY_TYPE, PROXY_ATTRIB, PROXY_ELEMENT +from tree cimport xmlDoc, xmlNode, xmlAttr, xmlNs, _isElement from python cimport isinstance, hasattr cimport xpath cimport xslt @@ -17,6 +16,10 @@ # should libxml2/libxslt be allowed to shout? DEBUG = False +ctypedef enum LXML_PROXY_TYPE: + PROXY_ELEMENT + PROXY_ATTRIB + # the rules # any libxml C argument/variable is prefixed with c_ Modified: lxml/branch/scoder2/src/lxml/tree.pxd ============================================================================== --- lxml/branch/scoder2/src/lxml/tree.pxd (original) +++ lxml/branch/scoder2/src/lxml/tree.pxd Sun Mar 12 18:47:52 2006 @@ -172,6 +172,3 @@ cdef extern from "etree.h": cdef int _isElement(xmlNode* node) - ctypedef enum LXML_PROXY_TYPE: - PROXY_ELEMENT - PROXY_ATTRIB From scoder at codespeak.net Sun Mar 12 20:19:56 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Sun Mar 12 20:19:58 2006 Subject: [Lxml-checkins] r24267 - lxml/trunk/src/lxml Message-ID: <20060312191956.283B8100BC@code0.codespeak.net> Author: scoder Date: Sun Mar 12 20:19:54 2006 New Revision: 24267 Modified: lxml/trunk/src/lxml/etree.pyx Log: checked in an alternative implementation of funicode - not faster, to be reverted in next revision Modified: lxml/trunk/src/lxml/etree.pyx ============================================================================== --- lxml/trunk/src/lxml/etree.pyx (original) +++ lxml/trunk/src/lxml/etree.pyx Sun Mar 12 20:19:54 2006 @@ -1406,6 +1406,24 @@ return python.PyUnicode_DecodeUTF8(s, tree.strlen(s), NULL) return python.PyString_FromString(s) +cdef object funicode_alt(char* string): + cdef char* s + cdef int slen + cdef int is_ascii + is_ascii = 1 + s = string + while s[0] != c'\0': + if s[0] & 0x80: + is_ascii = 0 + break + s = s + 1 + slen = (s - string) + if is_ascii: + return python.PyString_FromStringAndSize(string, slen) + + slen = slen + tree.strlen(s) + return python.PyUnicode_DecodeUTF8(string, slen, NULL) + cdef object _utf8(object s): if python.PyString_Check(s): assert not isutf8(s), "All strings must be Unicode or ASCII" From scoder at codespeak.net Sun Mar 12 20:20:28 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Sun Mar 12 20:20:30 2006 Subject: [Lxml-checkins] r24268 - lxml/trunk/src/lxml Message-ID: <20060312192028.8E298100BC@code0.codespeak.net> Author: scoder Date: Sun Mar 12 20:20:27 2006 New Revision: 24268 Modified: lxml/trunk/src/lxml/etree.pyx Log: reverted: removed funicode_alt Modified: lxml/trunk/src/lxml/etree.pyx ============================================================================== --- lxml/trunk/src/lxml/etree.pyx (original) +++ lxml/trunk/src/lxml/etree.pyx Sun Mar 12 20:20:27 2006 @@ -1406,24 +1406,6 @@ return python.PyUnicode_DecodeUTF8(s, tree.strlen(s), NULL) return python.PyString_FromString(s) -cdef object funicode_alt(char* string): - cdef char* s - cdef int slen - cdef int is_ascii - is_ascii = 1 - s = string - while s[0] != c'\0': - if s[0] & 0x80: - is_ascii = 0 - break - s = s + 1 - slen = (s - string) - if is_ascii: - return python.PyString_FromStringAndSize(string, slen) - - slen = slen + tree.strlen(s) - return python.PyUnicode_DecodeUTF8(string, slen, NULL) - cdef object _utf8(object s): if python.PyString_Check(s): assert not isutf8(s), "All strings must be Unicode or ASCII" From scoder at codespeak.net Sun Mar 12 21:03:32 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Sun Mar 12 21:03:33 2006 Subject: [Lxml-checkins] r24269 - lxml/trunk/src/lxml Message-ID: <20060312200332.881E0100BC@code0.codespeak.net> Author: scoder Date: Sun Mar 12 21:03:30 2006 New Revision: 24269 Modified: lxml/trunk/src/lxml/etree.pyx Log: element.remove() : do not search for the child, we know it is a child if we are the parent - just unlink Modified: lxml/trunk/src/lxml/etree.pyx ============================================================================== --- lxml/trunk/src/lxml/etree.pyx (original) +++ lxml/trunk/src/lxml/etree.pyx Sun Mar 12 21:03:30 2006 @@ -456,20 +456,12 @@ def remove(self, _Element element): cdef xmlNode* c_node - cdef xmlNode* c_search_node _raiseIfNone(element) - c_search_node = element._c_node - if c_search_node.parent is not self._c_node: + c_node = element._c_node + if c_node.parent is not self._c_node: raise ValueError, "Element is not a child of this node." - c_node = self._c_node.children - while c_node is not NULL: - if c_node is c_search_node: - _removeText(c_search_node.next) - tree.xmlUnlinkNode(element._c_node) - return - c_node = c_node.next - else: - raise ValueError, "Matching element could not be found" + _removeText(c_node.next) + tree.xmlUnlinkNode(c_node) # PROPERTIES property tag: From scoder at codespeak.net Mon Mar 13 11:07:17 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Mon Mar 13 11:07:18 2006 Subject: [Lxml-checkins] r24282 - lxml/trunk Message-ID: <20060313100717.0BED6100C3@code0.codespeak.net> Author: scoder Date: Mon Mar 13 11:07:12 2006 New Revision: 24282 Modified: lxml/trunk/bench.py Log: benchmarks for element.index(), including slice searching Modified: lxml/trunk/bench.py ============================================================================== --- lxml/trunk/bench.py (original) +++ lxml/trunk/bench.py Mon Mar 13 11:07:12 2006 @@ -336,6 +336,18 @@ for i in repeat: child.text + def bench_index(self, root): + for child in root: + root.index(child) + + def bench_index_slice(self, root): + for child in root[5:100]: + root.index(child, 5, 100) + + def bench_index_slice_neg(self, root): + for child in root[-100:-5]: + root.index(child, start=-100, stop=-5) + ############################################################ # Main program ############################################################ From scoder at codespeak.net Mon Mar 13 11:15:22 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Mon Mar 13 11:15:23 2006 Subject: [Lxml-checkins] r24283 - lxml/trunk/src/lxml Message-ID: <20060313101522.8E6CF100C3@code0.codespeak.net> Author: scoder Date: Mon Mar 13 11:15:21 2006 New Revision: 24283 Modified: lxml/trunk/src/lxml/etree.pyx Log: new implementation of element.index() - never touch a child more than once (esp: don't call len()) - for negative slice indices, check the slice before traversing everything - not slower if no slice given or element found in slice - faster for negative indices and if element not in slice Modified: lxml/trunk/src/lxml/etree.pyx ============================================================================== --- lxml/trunk/src/lxml/etree.pyx (original) +++ lxml/trunk/src/lxml/etree.pyx Mon Mar 13 11:15:21 2006 @@ -574,43 +574,73 @@ def index(self, _Element x, start=None, stop=None): cdef int k cdef int l + cdef int c_stop + cdef int c_start cdef xmlNode* c_child - cdef xmlNode* c_search_node + cdef xmlNode* c_start_node _raiseIfNone(x) - - c_search_node = x._c_node - if c_search_node.parent is not self._c_node: + c_child = x._c_node + if c_child.parent is not self._c_node: raise ValueError, "Element is not a child of this node." - k = 0 - c_child = self._c_node.children + if start is None: + c_start = 0 + else: + c_start = start + if stop is None: + c_stop = 0 + else: + c_stop = stop + if c_stop == 0 or \ + c_start >= c_stop and (c_stop > 0 or c_start < 0): + raise ValueError, "list.index(x): x not in slice" + + # for negative slice indices, check slice before searching index + if c_start < 0 or c_stop < 0: + # start from right, at most up to leftmost(c_start, c_stop) + if c_start < c_stop: + k = -c_start + else: + k = -c_stop + c_start_node = self._c_node.last + l = 1 + while c_start_node != c_child and l < k: + if _isElement(c_start_node): + l = l + 1 + c_start_node = c_start_node.prev + if c_start_node == c_child: + # found! before slice end? + if c_stop < 0 and l <= -c_stop: + raise ValueError, "list.index(x): x not in slice" + elif c_start < 0: + raise ValueError, "list.index(x): x not in slice" - # account for negative start and stop by turning them into positive - l = -1 - if start is not None and start < 0: - l = self.__len__() - start = l + start - if stop is not None and stop < 0: - if l < 0: - l = self.__len__() - stop = l + stop - - while c_child is not NULL: - if _isElement(c_child): - if c_child is c_search_node: - if ((start is None or k >= start) and - (stop is None or k < stop)): - return k - else: - # since there is only a single element to be found - # if we found it out of range, we will not find - # it anymore in the range, so we bail out - raise ValueError, "list.index(x): x not in list" - else: + # now determine the index backwards from child + c_child = c_child.prev + k = 0 + if c_stop > 0: + # we can optimize: stop after c_stop elements if not found + while c_child != NULL and k < c_stop: + if _isElement(c_child): k = k + 1 - c_child = c_child.next - - raise ValueError, "list index(x): x not in list" + c_child = c_child.prev + if k < c_stop: + return k + else: + # traverse all + while c_child != NULL: + if _isElement(c_child): + k = k + 1 + c_child = c_child.prev + if c_start > 0: + if k >= c_start: + return k + else: + return k + if c_start or c_stop: + raise ValueError, "list.index(x): x not in slice" + else: + raise ValueError, "list.index(x): x not in list" def get(self, key, default=None): # XXX more redundancy, but might be slightly faster than From scoder at codespeak.net Mon Mar 13 14:01:18 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Mon Mar 13 14:01:20 2006 Subject: [Lxml-checkins] r24291 - lxml/trunk/src/lxml Message-ID: <20060313130118.AFFE2100CB@code0.codespeak.net> Author: scoder Date: Mon Mar 13 14:01:16 2006 New Revision: 24291 Modified: lxml/trunk/src/lxml/etree.pyx lxml/trunk/src/lxml/nsclasses.pxi Log: clean up Modified: lxml/trunk/src/lxml/etree.pyx ============================================================================== --- lxml/trunk/src/lxml/etree.pyx (original) +++ lxml/trunk/src/lxml/etree.pyx Mon Mar 13 14:01:16 2006 @@ -1070,6 +1070,7 @@ def dump(_NodeBase elem): assert elem is not None, "Must supply element." + # better, but not ET compatible : _raiseIfNone(elem) _dumpToFile(sys.stdout, elem._doc._c_doc, elem._c_node) def tostring(_NodeBase element, encoding='us-ascii'): Modified: lxml/trunk/src/lxml/nsclasses.pxi ============================================================================== --- lxml/trunk/src/lxml/nsclasses.pxi (original) +++ lxml/trunk/src/lxml/nsclasses.pxi Mon Mar 13 14:01:16 2006 @@ -95,7 +95,6 @@ char* c_element_name_utf): cdef python.PyObject* dict_result cdef _NamespaceRegistry registry - cdef object result if c_namespace_utf is not NULL: dict_result = python.PyDict_GetItemString( __NAMESPACE_CLASSES, c_namespace_utf) @@ -105,7 +104,7 @@ if dict_result is NULL: return _Element - registry = dict_result + registry = <_NamespaceRegistry>dict_result classes = registry._classes if c_element_name_utf is not NULL: @@ -118,7 +117,6 @@ dict_result = python.PyDict_GetItem(classes, None) if dict_result is not NULL: - result = dict_result - return result + return dict_result else: return _Element From scoder at codespeak.net Mon Mar 13 14:03:07 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Mon Mar 13 14:03:09 2006 Subject: [Lxml-checkins] r24292 - lxml/branch/scoder2/src/lxml Message-ID: <20060313130307.680E8100CB@code0.codespeak.net> Author: scoder Date: Mon Mar 13 14:03:05 2006 New Revision: 24292 Modified: lxml/branch/scoder2/src/lxml/python.pxd lxml/branch/scoder2/src/lxml/xslt.pxi Log: some more C-isms for frequently called functions Modified: lxml/branch/scoder2/src/lxml/python.pxd ============================================================================== --- lxml/branch/scoder2/src/lxml/python.pxd (original) +++ lxml/branch/scoder2/src/lxml/python.pxd Mon Mar 13 14:03:05 2006 @@ -19,9 +19,11 @@ cdef object PyString_FromFormat(char* format, ...) cdef int PyList_Append(object l, object obj) + cdef int PyDict_SetItem(object d, object key, object value) cdef int PyDict_SetItemString(object d, char* key, object value) cdef PyObject* PyDict_GetItemString(object d, char* key) cdef PyObject* PyDict_GetItem(object d, object key) + cdef int PyDict_DelItem(object d, object key) cdef int PyNumber_Check(object instance) cdef int PyBool_Check(object instance) Modified: lxml/branch/scoder2/src/lxml/xslt.pxi ============================================================================== --- lxml/branch/scoder2/src/lxml/xslt.pxi (original) +++ lxml/branch/scoder2/src/lxml/xslt.pxi Mon Mar 13 14:03:05 2006 @@ -74,15 +74,17 @@ self._temp_elements = {} self._temp_docs = {} - def _to_utf(self, s): + cdef _to_utf(self, s): "Convert to UTF-8 and keep a reference to the encoded string" + cdef python.PyObject* dict_result if s is None: return None - try: - return self._utf_refs[s] - except KeyError: - utf = self._utf_refs[s] = _utf8(s) - return utf + dict_result = python.PyDict_GetItem(self._utf_refs, s) + if dict_result is not NULL: + return dict_result + utf = _utf8(s) + python.PyDict_SetItem(self._utf_refs, s, utf) + return utf cdef _register_context(self, _Document doc, int allow_none_namespace): self._doc = doc @@ -95,10 +97,7 @@ extensions = _find_all_extensions() if extensions: if not allow_none_namespace: - try: - del extensions[None] - except KeyError: - pass + python.PyDict_DelItem(extensions, None) self._registerExtensionFunctions(extensions) if self._extensions is not None: self.registerExtensionFunctions(self._extensions) From scoder at codespeak.net Mon Mar 13 14:03:41 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Mon Mar 13 14:03:42 2006 Subject: [Lxml-checkins] r24293 - lxml/trunk/src/lxml/tests Message-ID: <20060313130341.81C5C100A0@code0.codespeak.net> Author: scoder Date: Mon Mar 13 14:03:40 2006 New Revision: 24293 Modified: lxml/trunk/src/lxml/tests/test_xpathevaluator.py Log: test if element.xpath() raises TypeError for None prefixes in namespace dict Modified: lxml/trunk/src/lxml/tests/test_xpathevaluator.py ============================================================================== --- lxml/trunk/src/lxml/tests/test_xpathevaluator.py (original) +++ lxml/trunk/src/lxml/tests/test_xpathevaluator.py Mon Mar 13 14:03:40 2006 @@ -89,6 +89,9 @@ self.assertEquals( [root[0]], root.xpath('//baz:b', {'baz': 'uri:a'})) + self.assertRaises( + TypeError, + root.xpath, '//b', {None: 'uri:a'}) def test_xpath_error(self): tree = self.parse('') From scoder at codespeak.net Mon Mar 13 14:05:53 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Mon Mar 13 14:05:54 2006 Subject: [Lxml-checkins] r24294 - lxml/branch/scoder2/src/lxml Message-ID: <20060313130553.3CF65100CB@code0.codespeak.net> Author: scoder Date: Mon Mar 13 14:05:51 2006 New Revision: 24294 Modified: lxml/branch/scoder2/src/lxml/nsclasses.pxi Log: clean up, fix potential race condition in _find_extensions() Modified: lxml/branch/scoder2/src/lxml/nsclasses.pxi ============================================================================== --- lxml/branch/scoder2/src/lxml/nsclasses.pxi (original) +++ lxml/branch/scoder2/src/lxml/nsclasses.pxi Mon Mar 13 14:05:51 2006 @@ -169,19 +169,16 @@ extension_dict = {} for ns_uri in namespaces: if ns_uri is None: - dict_result = python.PyDict_GetItem( - __FUNCTION_NAMESPACE_REGISTRIES, None) + ns_utf = None else: ns_utf = _utf8(ns_uri) - c_ns_utf = ns_utf - dict_result = python.PyDict_GetItemString( - __FUNCTION_NAMESPACE_REGISTRIES, c_ns_utf) + dict_result = python.PyDict_GetItem( + __FUNCTION_NAMESPACE_REGISTRIES, ns_utf) if dict_result is NULL: continue extensions = (<_NamespaceRegistry>dict_result)._extensions if extensions: - python.PyDict_SetItemString( - extension_dict, c_ns_utf, extensions) + python.PyDict_SetItem(extension_dict, ns_utf, extensions) return extension_dict cdef object _find_element_class(char* c_namespace_utf, From scoder at codespeak.net Mon Mar 13 14:16:11 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Mon Mar 13 14:16:12 2006 Subject: [Lxml-checkins] r24295 - in lxml/branch/scoder2: . src/lxml src/lxml/tests Message-ID: <20060313131611.BA915100D8@code0.codespeak.net> Author: scoder Date: Mon Mar 13 14:16:04 2006 New Revision: 24295 Modified: lxml/branch/scoder2/bench.py lxml/branch/scoder2/src/lxml/etree.pyx lxml/branch/scoder2/src/lxml/nsclasses.pxi lxml/branch/scoder2/src/lxml/tests/test_xpathevaluator.py Log: merges from trunk Modified: lxml/branch/scoder2/bench.py ============================================================================== --- lxml/branch/scoder2/bench.py (original) +++ lxml/branch/scoder2/bench.py Mon Mar 13 14:16:04 2006 @@ -336,6 +336,18 @@ for i in repeat: child.text + def bench_index(self, root): + for child in root: + root.index(child) + + def bench_index_slice(self, root): + for child in root[5:100]: + root.index(child, 5, 100) + + def bench_index_slice_neg(self, root): + for child in root[-100:-5]: + root.index(child, start=-100, stop=-5) + ############################################################ # Main program ############################################################ Modified: lxml/branch/scoder2/src/lxml/etree.pyx ============================================================================== --- lxml/branch/scoder2/src/lxml/etree.pyx (original) +++ lxml/branch/scoder2/src/lxml/etree.pyx Mon Mar 13 14:16:04 2006 @@ -456,20 +456,12 @@ def remove(self, _Element element): cdef xmlNode* c_node - cdef xmlNode* c_search_node _raiseIfNone(element) - c_search_node = element._c_node - if c_search_node.parent is not self._c_node: + c_node = element._c_node + if c_node.parent is not self._c_node: raise ValueError, "Element is not a child of this node." - c_node = self._c_node.children - while c_node is not NULL: - if c_node is c_search_node: - _removeText(c_search_node.next) - tree.xmlUnlinkNode(element._c_node) - return - c_node = c_node.next - else: - raise ValueError, "Matching element could not be found" + _removeText(c_node.next) + tree.xmlUnlinkNode(c_node) # PROPERTIES property tag: @@ -582,43 +574,73 @@ def index(self, _Element x, start=None, stop=None): cdef int k cdef int l + cdef int c_stop + cdef int c_start cdef xmlNode* c_child - cdef xmlNode* c_search_node + cdef xmlNode* c_start_node _raiseIfNone(x) - - c_search_node = x._c_node - if c_search_node.parent is not self._c_node: + c_child = x._c_node + if c_child.parent is not self._c_node: raise ValueError, "Element is not a child of this node." - k = 0 - c_child = self._c_node.children + if start is None: + c_start = 0 + else: + c_start = start + if stop is None: + c_stop = 0 + else: + c_stop = stop + if c_stop == 0 or \ + c_start >= c_stop and (c_stop > 0 or c_start < 0): + raise ValueError, "list.index(x): x not in slice" + + # for negative slice indices, check slice before searching index + if c_start < 0 or c_stop < 0: + # start from right, at most up to leftmost(c_start, c_stop) + if c_start < c_stop: + k = -c_start + else: + k = -c_stop + c_start_node = self._c_node.last + l = 1 + while c_start_node != c_child and l < k: + if _isElement(c_start_node): + l = l + 1 + c_start_node = c_start_node.prev + if c_start_node == c_child: + # found! before slice end? + if c_stop < 0 and l <= -c_stop: + raise ValueError, "list.index(x): x not in slice" + elif c_start < 0: + raise ValueError, "list.index(x): x not in slice" - # account for negative start and stop by turning them into positive - l = -1 - if start is not None and start < 0: - l = self.__len__() - start = l + start - if stop is not None and stop < 0: - if l < 0: - l = self.__len__() - stop = l + stop - - while c_child is not NULL: - if _isElement(c_child): - if c_child is c_search_node: - if ((start is None or k >= start) and - (stop is None or k < stop)): - return k - else: - # since there is only a single element to be found - # if we found it out of range, we will not find - # it anymore in the range, so we bail out - raise ValueError, "list.index(x): x not in list" - else: + # now determine the index backwards from child + c_child = c_child.prev + k = 0 + if c_stop > 0: + # we can optimize: stop after c_stop elements if not found + while c_child != NULL and k < c_stop: + if _isElement(c_child): k = k + 1 - c_child = c_child.next - - raise ValueError, "list index(x): x not in list" + c_child = c_child.prev + if k < c_stop: + return k + else: + # traverse all + while c_child != NULL: + if _isElement(c_child): + k = k + 1 + c_child = c_child.prev + if c_start > 0: + if k >= c_start: + return k + else: + return k + if c_start or c_stop: + raise ValueError, "list.index(x): x not in slice" + else: + raise ValueError, "list.index(x): x not in list" def get(self, key, default=None): # XXX more redundancy, but might be slightly faster than @@ -1048,6 +1070,7 @@ def dump(_NodeBase elem): assert elem is not None, "Must supply element." + # better, but not ET compatible : _raiseIfNone(elem) _dumpToFile(sys.stdout, elem._doc._c_doc, elem._c_node) def tostring(_NodeBase element, encoding='us-ascii'): Modified: lxml/branch/scoder2/src/lxml/nsclasses.pxi ============================================================================== --- lxml/branch/scoder2/src/lxml/nsclasses.pxi (original) +++ lxml/branch/scoder2/src/lxml/nsclasses.pxi Mon Mar 13 14:16:04 2006 @@ -185,7 +185,6 @@ char* c_element_name_utf): cdef python.PyObject* dict_result cdef _NamespaceRegistry registry - cdef object result if c_namespace_utf is not NULL: dict_result = python.PyDict_GetItemString( __NAMESPACE_REGISTRIES, c_namespace_utf) Modified: lxml/branch/scoder2/src/lxml/tests/test_xpathevaluator.py ============================================================================== --- lxml/branch/scoder2/src/lxml/tests/test_xpathevaluator.py (original) +++ lxml/branch/scoder2/src/lxml/tests/test_xpathevaluator.py Mon Mar 13 14:16:04 2006 @@ -89,6 +89,9 @@ self.assertEquals( [root[0]], root.xpath('//baz:b', {'baz': 'uri:a'})) + self.assertRaises( + TypeError, + root.xpath, '//b', {None: 'uri:a'}) def test_xpath_error(self): tree = self.parse('') From scoder at codespeak.net Mon Mar 13 14:25:29 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Mon Mar 13 14:25:31 2006 Subject: [Lxml-checkins] r24296 - lxml/trunk/src/lxml/tests Message-ID: <20060313132529.A3A16100D8@code0.codespeak.net> Author: scoder Date: Mon Mar 13 14:25:28 2006 New Revision: 24296 Modified: lxml/trunk/src/lxml/tests/test_nsclasses.py Log: call doctest file for namespace classes Modified: lxml/trunk/src/lxml/tests/test_nsclasses.py ============================================================================== --- lxml/trunk/src/lxml/tests/test_nsclasses.py (original) +++ lxml/trunk/src/lxml/tests/test_nsclasses.py Mon Mar 13 14:25:28 2006 @@ -5,7 +5,7 @@ namespace registry mechanism """ -import unittest +import unittest, doctest from common_imports import etree, HelperTestCase @@ -68,6 +68,8 @@ def test_suite(): suite = unittest.TestSuite() suite.addTests([unittest.makeSuite(ETreeNamespaceClassesTestCase)]) + suite.addTests( + [doctest.DocFileSuite('../../../doc/namespace_extensions.txt')]) return suite if __name__ == '__main__': From scoder at codespeak.net Mon Mar 13 14:25:55 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Mon Mar 13 14:25:55 2006 Subject: [Lxml-checkins] r24297 - lxml/branch/scoder2/src/lxml/tests Message-ID: <20060313132555.4378A100D8@code0.codespeak.net> Author: scoder Date: Mon Mar 13 14:25:53 2006 New Revision: 24297 Modified: lxml/branch/scoder2/src/lxml/tests/test_nsclasses.py Log: merges from trunk Modified: lxml/branch/scoder2/src/lxml/tests/test_nsclasses.py ============================================================================== --- lxml/branch/scoder2/src/lxml/tests/test_nsclasses.py (original) +++ lxml/branch/scoder2/src/lxml/tests/test_nsclasses.py Mon Mar 13 14:25:53 2006 @@ -5,7 +5,7 @@ namespace registry mechanism """ -import unittest +import unittest, doctest from common_imports import etree, HelperTestCase @@ -68,6 +68,8 @@ def test_suite(): suite = unittest.TestSuite() suite.addTests([unittest.makeSuite(ETreeNamespaceClassesTestCase)]) + suite.addTests( + [doctest.DocFileSuite('../../../doc/namespace_extensions.txt')]) return suite if __name__ == '__main__': From scoder at codespeak.net Mon Mar 13 14:40:29 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Mon Mar 13 14:40:31 2006 Subject: [Lxml-checkins] r24298 - lxml/trunk/doc Message-ID: <20060313134029.E9F6F100D2@code0.codespeak.net> Author: scoder Date: Mon Mar 13 14:40:28 2006 New Revision: 24298 Modified: lxml/trunk/doc/namespace_extensions.txt Log: fix doctests Modified: lxml/trunk/doc/namespace_extensions.txt ============================================================================== --- lxml/trunk/doc/namespace_extensions.txt (original) +++ lxml/trunk/doc/namespace_extensions.txt Mon Mar 13 14:40:28 2006 @@ -43,7 +43,7 @@ >>> namespace = Namespace('http://hui.de/honk') -and then register the new element with that namespace:: +and then register the new element type with that namespace:: >>> namespace['honk'] = HonkElement @@ -178,5 +178,5 @@ You can then use your new function in XPath expressions: >>> element = XML('') ->>> element.xpath('f:tagname(//honk)', {'f' : 'myfunctions'}) -'honk' +>>> [el.tag for el in element.xpath('f:tagname(//honk)', {'f' : 'myfunctions'})] +['honk'] From scoder at codespeak.net Mon Mar 13 14:41:30 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Mon Mar 13 14:41:32 2006 Subject: [Lxml-checkins] r24299 - lxml/trunk/src/lxml/tests Message-ID: <20060313134130.D1056100D2@code0.codespeak.net> Author: scoder Date: Mon Mar 13 14:41:29 2006 New Revision: 24299 Modified: lxml/trunk/src/lxml/tests/test_nsclasses.py Log: added test for element creation with namespace classes Modified: lxml/trunk/src/lxml/tests/test_nsclasses.py ============================================================================== --- lxml/trunk/src/lxml/tests/test_nsclasses.py (original) +++ lxml/trunk/src/lxml/tests/test_nsclasses.py Mon Mar 13 14:41:29 2006 @@ -65,6 +65,20 @@ etree.Namespace(u'ns11').clear() + def test_create_element(self): + bluff_dict = {u'bluff' : self.bluff_class} + maeh_dict = {u'maeh' : self.maeh_class} + + etree.Namespace(u'ns20').update(bluff_dict) + etree.Namespace(u'ns21').update(maeh_dict) + + el = etree.Element("{ns20}bluff") + etree.SubElement(el, "{ns21}maeh") + self.assert_(hasattr(el, 'bluff')) + self.assert_(hasattr(el[0], 'maeh')) + self.assertEquals(el.bluff(), u'bluff') + self.assertEquals(el[0].maeh(), u'maeh') + def test_suite(): suite = unittest.TestSuite() suite.addTests([unittest.makeSuite(ETreeNamespaceClassesTestCase)]) From scoder at codespeak.net Mon Mar 13 14:43:17 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Mon Mar 13 14:43:18 2006 Subject: [Lxml-checkins] r24300 - in lxml/branch/scoder2: doc src/lxml/tests Message-ID: <20060313134317.1A76B100D2@code0.codespeak.net> Author: scoder Date: Mon Mar 13 14:43:05 2006 New Revision: 24300 Modified: lxml/branch/scoder2/doc/namespace_extensions.txt lxml/branch/scoder2/src/lxml/tests/test_nsclasses.py Log: merges from trunk Modified: lxml/branch/scoder2/doc/namespace_extensions.txt ============================================================================== --- lxml/branch/scoder2/doc/namespace_extensions.txt (original) +++ lxml/branch/scoder2/doc/namespace_extensions.txt Mon Mar 13 14:43:05 2006 @@ -45,7 +45,7 @@ >>> namespace = Namespace('http://hui.de/honk') -and then register the new element with that namespace:: +and then register the new element type with that namespace:: >>> namespace['honk'] = HonkElement Modified: lxml/branch/scoder2/src/lxml/tests/test_nsclasses.py ============================================================================== --- lxml/branch/scoder2/src/lxml/tests/test_nsclasses.py (original) +++ lxml/branch/scoder2/src/lxml/tests/test_nsclasses.py Mon Mar 13 14:43:05 2006 @@ -65,6 +65,20 @@ etree.Namespace(u'ns11').clear() + def test_create_element(self): + bluff_dict = {u'bluff' : self.bluff_class} + maeh_dict = {u'maeh' : self.maeh_class} + + etree.Namespace(u'ns20').update(bluff_dict) + etree.Namespace(u'ns21').update(maeh_dict) + + el = etree.Element("{ns20}bluff") + etree.SubElement(el, "{ns21}maeh") + self.assert_(hasattr(el, 'bluff')) + self.assert_(hasattr(el[0], 'maeh')) + self.assertEquals(el.bluff(), u'bluff') + self.assertEquals(el[0].maeh(), u'maeh') + def test_suite(): suite = unittest.TestSuite() suite.addTests([unittest.makeSuite(ETreeNamespaceClassesTestCase)]) From scoder at codespeak.net Mon Mar 13 15:12:24 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Mon Mar 13 15:12:25 2006 Subject: [Lxml-checkins] r24302 - lxml/trunk/src/lxml Message-ID: <20060313141224.28FA2100D8@code0.codespeak.net> Author: scoder Date: Mon Mar 13 15:12:18 2006 New Revision: 24302 Modified: lxml/trunk/src/lxml/etree.pyx Log: bug fix for element namespace setting - revision 24115 introduced a reimplementation of the element namespace setting that broke the namespace classes lookup by setting the element ns too late - test cases that show the bug were added in last revisions - namespaces are now set on xmlNode structures, not on _Elements - requires slight code duplication, but is The Right Thing to do Modified: lxml/trunk/src/lxml/etree.pyx ============================================================================== --- lxml/trunk/src/lxml/etree.pyx (original) +++ lxml/trunk/src/lxml/etree.pyx Mon Mar 13 15:12:18 2006 @@ -137,24 +137,10 @@ cdef xmlNs* _getNs(self, char* href): """Get or create namespace structure. """ - cdef xmlDoc* c_doc - cdef xmlNode* c_node - cdef xmlNs* c_ns - - c_doc = self._doc._c_doc - c_node = self._c_node - # look for existing ns - c_ns = tree.xmlSearchNsByHref(c_doc, c_node, href) - if c_ns is not NULL: - return c_ns - # create ns if existing ns cannot be found - # try to simulate ElementTree's namespace prefix creation - prefix = self._doc.buildNewPrefix() - c_ns = tree.xmlNewNs(c_node, href, prefix) - return c_ns + return _getNodeNs(self._doc, self._c_node, href) cdef void _setNs(self, char* href): - tree.xmlSetNs(self._c_node, self._getNs(href)) + _setNodeNs(self._doc, self._c_node, href) cdef class _ElementTree: cdef _Document _doc @@ -1002,10 +988,9 @@ c_node = _createElement(c_doc, name_utf, attrib, extra) tree.xmlDocSetRootElement(c_doc, c_node) doc = _documentFactory(c_doc) - result = _elementFactory(doc, c_node) # add namespaces to node if necessary - _setNamespaces(result, ns_utf, nsmap) - return result + _setNamespaces(doc, c_node, ns_utf, nsmap) + return _elementFactory(doc, c_node) def Comment(text=None): cdef _Document doc @@ -1020,16 +1005,17 @@ return _commentFactory(doc, c_node) def SubElement(_Element parent, tag, attrib=None, nsmap=None, **extra): - cdef xmlNode* c_node - cdef _Element element + cdef xmlNode* c_node + cdef _Element element + cdef _Document doc _raiseIfNone(parent) ns_utf, name_utf = _getNsTag(tag) - c_node = _createElement(parent._doc._c_doc, name_utf, attrib, extra) - element = _elementFactory(parent._doc, c_node) + doc = parent._doc + c_node = _createElement(doc._c_doc, name_utf, attrib, extra) tree.xmlAddChild(parent._c_node, c_node) # add namespaces to node if necessary - _setNamespaces(element, ns_utf, nsmap) - return element + _setNamespaces(doc, c_node, ns_utf, nsmap) + return _elementFactory(doc, c_node) def ElementTree(_Element element=None, file=None, parser=None): cdef xmlNode* c_next @@ -1110,21 +1096,20 @@ doc = _parseDocument(source, parser) return ElementTree(doc.getroot()) -cdef void _setNamespaces(_NodeBase element, object node_ns_utf, object nsmap): +cdef void _setNamespaces(_Document doc, xmlNode* c_node, + object node_ns_utf, object nsmap): "Set namespace of node and register ns-prefix mappings." - cdef xmlNs* c_ns - cdef xmlNode* c_node - cdef xmlDoc* c_doc - cdef char* c_prefix - cdef char* c_href + cdef xmlNs* c_ns + cdef xmlDoc* c_doc + cdef char* c_prefix + cdef char* c_href if not nsmap: if node_ns_utf is not None: - element._setNs(node_ns_utf) + _setNodeNs(doc, c_node, node_ns_utf) return - c_node = element._c_node - c_doc = element._doc._c_doc + c_doc = doc._c_doc for prefix, href in nsmap.items(): href_utf = _utf8(href) c_href = href_utf @@ -1142,7 +1127,29 @@ node_ns_utf = None if node_ns_utf is not None: - element._setNs(node_ns_utf) + _setNodeNs(doc, c_node, node_ns_utf) + +cdef xmlNs* _getNodeNs(_Document doc, xmlNode* c_node, char* href): + """Get or create namespace structure. + """ + cdef xmlDoc* c_doc + cdef xmlNs* c_ns + + c_doc = doc._c_doc + # look for existing ns + c_ns = tree.xmlSearchNsByHref(c_doc, c_node, href) + if c_ns is not NULL: + return c_ns + # create ns if existing ns cannot be found + # try to simulate ElementTree's namespace prefix creation + prefix = doc.buildNewPrefix() + c_ns = tree.xmlNewNs(c_node, href, prefix) + return c_ns + +cdef void _setNodeNs(_Document doc, xmlNode* c_node, char* href): + cdef xmlNs* c_ns + c_ns = _getNodeNs(doc, c_node, href) + tree.xmlSetNs(c_node, c_ns) # include submodules From scoder at codespeak.net Mon Mar 13 15:23:07 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Mon Mar 13 15:23:09 2006 Subject: [Lxml-checkins] r24303 - lxml/trunk/src/lxml Message-ID: <20060313142307.539D0100D8@code0.codespeak.net> Author: scoder Date: Mon Mar 13 15:22:56 2006 New Revision: 24303 Modified: lxml/trunk/src/lxml/etree.pyx Log: refactoring: make namespace lookup and setting methods in _Document Modified: lxml/trunk/src/lxml/etree.pyx ============================================================================== --- lxml/trunk/src/lxml/etree.pyx (original) +++ lxml/trunk/src/lxml/etree.pyx Mon Mar 13 15:22:56 2006 @@ -68,6 +68,25 @@ self._ns_counter = self._ns_counter + 1 return ns + cdef xmlNs* _findOrBuildNodeNs(self, xmlNode* c_node, char* href): + """Get or create namespace structure. + """ + cdef xmlNs* c_ns + # look for existing ns + c_ns = tree.xmlSearchNsByHref(self._c_doc, c_node, href) + if c_ns is not NULL: + return c_ns + # create ns if existing ns cannot be found + # try to simulate ElementTree's namespace prefix creation + prefix = self.buildNewPrefix() + c_ns = tree.xmlNewNs(c_node, href, prefix) + return c_ns + + cdef void _setNodeNs(self, xmlNode* c_node, char* href): + cdef xmlNs* c_ns + c_ns = self._findOrBuildNodeNs(c_node, href) + tree.xmlSetNs(c_node, c_ns) + def getroot(self): cdef xmlNode* c_node c_node = tree.xmlDocGetRootElement(self._c_doc) @@ -134,14 +153,6 @@ this if they recursively call _init() in the superclasses. """ - cdef xmlNs* _getNs(self, char* href): - """Get or create namespace structure. - """ - return _getNodeNs(self._doc, self._c_node, href) - - cdef void _setNs(self, char* href): - _setNodeNs(self._doc, self._c_node, href) - cdef class _ElementTree: cdef _Document _doc cdef _NodeBase _context_node @@ -460,8 +471,7 @@ tree.xmlNodeSetName(self._c_node, text) if ns is None: return - c_ns = self._getNs(ns) - tree.xmlSetNs(self._c_node, c_ns) + self._doc._setNodeNs(self._c_node, ns) # not in ElementTree, read-only property prefix: @@ -790,7 +800,7 @@ if ns is None: tree.xmlSetProp(self._c_node, tag, value) else: - c_ns = self._getNs(ns) + c_ns = self._doc._findOrBuildNodeNs(self._c_node, ns) tree.xmlSetNsProp(self._c_node, c_ns, tag, value) def __delitem__(self, key): @@ -1106,7 +1116,7 @@ if not nsmap: if node_ns_utf is not None: - _setNodeNs(doc, c_node, node_ns_utf) + doc._setNodeNs(c_node, node_ns_utf) return c_doc = doc._c_doc @@ -1127,29 +1137,7 @@ node_ns_utf = None if node_ns_utf is not None: - _setNodeNs(doc, c_node, node_ns_utf) - -cdef xmlNs* _getNodeNs(_Document doc, xmlNode* c_node, char* href): - """Get or create namespace structure. - """ - cdef xmlDoc* c_doc - cdef xmlNs* c_ns - - c_doc = doc._c_doc - # look for existing ns - c_ns = tree.xmlSearchNsByHref(c_doc, c_node, href) - if c_ns is not NULL: - return c_ns - # create ns if existing ns cannot be found - # try to simulate ElementTree's namespace prefix creation - prefix = doc.buildNewPrefix() - c_ns = tree.xmlNewNs(c_node, href, prefix) - return c_ns - -cdef void _setNodeNs(_Document doc, xmlNode* c_node, char* href): - cdef xmlNs* c_ns - c_ns = _getNodeNs(doc, c_node, href) - tree.xmlSetNs(c_node, c_ns) + doc._setNodeNs(c_node, node_ns_utf) # include submodules From scoder at codespeak.net Mon Mar 13 15:36:15 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Mon Mar 13 15:36:16 2006 Subject: [Lxml-checkins] r24304 - lxml/trunk/src/lxml Message-ID: <20060313143615.F2263100E0@code0.codespeak.net> Author: scoder Date: Mon Mar 13 15:36:14 2006 New Revision: 24304 Modified: lxml/trunk/src/lxml/etree.pyx Log: moved _setNamespace function into _Document, following previous refactoring - namespace structure lookup and setting for nodes is now entirely done by _Document methods Modified: lxml/trunk/src/lxml/etree.pyx ============================================================================== --- lxml/trunk/src/lxml/etree.pyx (original) +++ lxml/trunk/src/lxml/etree.pyx Mon Mar 13 15:36:14 2006 @@ -63,13 +63,20 @@ #print self._c_doc.dict is theParser._c_dict tree.xmlFreeDoc(self._c_doc) + def getroot(self): + cdef xmlNode* c_node + c_node = tree.xmlDocGetRootElement(self._c_doc) + if c_node is NULL: + return None + return _elementFactory(self, c_node) + def buildNewPrefix(self): ns = "ns%d" % self._ns_counter self._ns_counter = self._ns_counter + 1 return ns cdef xmlNs* _findOrBuildNodeNs(self, xmlNode* c_node, char* href): - """Get or create namespace structure. + """Get or create namespace structure for a node. """ cdef xmlNs* c_ns # look for existing ns @@ -83,16 +90,44 @@ return c_ns cdef void _setNodeNs(self, xmlNode* c_node, char* href): + "Lookup namespace structure and set it for the node." cdef xmlNs* c_ns c_ns = self._findOrBuildNodeNs(c_node, href) tree.xmlSetNs(c_node, c_ns) - def getroot(self): - cdef xmlNode* c_node - c_node = tree.xmlDocGetRootElement(self._c_doc) - if c_node is NULL: - return None - return _elementFactory(self, c_node) + cdef void _setNodeNamespaces(self, xmlNode* c_node, + object node_ns_utf, object nsmap): + """Lookup current namespace prefixes, then set namespace structure for + node and register new ns-prefix mappings. + """ + cdef xmlNs* c_ns + cdef xmlDoc* c_doc + cdef char* c_prefix + cdef char* c_href + if not nsmap: + if node_ns_utf is not None: + self._setNodeNs(c_node, node_ns_utf) + return + + c_doc = self._c_doc + for prefix, href in nsmap.items(): + href_utf = _utf8(href) + c_href = href_utf + if prefix is not None: + prefix_utf = _utf8(prefix) + c_prefix = prefix_utf + else: + c_prefix = NULL + # add namespace with prefix if ns is not already known + c_ns = tree.xmlSearchNsByHref(c_doc, c_node, c_href) + if c_ns is NULL: + c_ns = tree.xmlNewNs(c_node, c_href, c_prefix) + if href_utf == node_ns_utf: + tree.xmlSetNs(c_node, c_ns) + node_ns_utf = None + + if node_ns_utf is not None: + self._setNodeNs(c_node, node_ns_utf) cdef _Document _parseDocument(source, parser): cdef xmlDoc* c_doc @@ -999,7 +1034,7 @@ tree.xmlDocSetRootElement(c_doc, c_node) doc = _documentFactory(c_doc) # add namespaces to node if necessary - _setNamespaces(doc, c_node, ns_utf, nsmap) + doc._setNodeNamespaces(c_node, ns_utf, nsmap) return _elementFactory(doc, c_node) def Comment(text=None): @@ -1024,7 +1059,7 @@ c_node = _createElement(doc._c_doc, name_utf, attrib, extra) tree.xmlAddChild(parent._c_node, c_node) # add namespaces to node if necessary - _setNamespaces(doc, c_node, ns_utf, nsmap) + doc._setNodeNamespaces(c_node, ns_utf, nsmap) return _elementFactory(doc, c_node) def ElementTree(_Element element=None, file=None, parser=None): @@ -1106,39 +1141,6 @@ doc = _parseDocument(source, parser) return ElementTree(doc.getroot()) -cdef void _setNamespaces(_Document doc, xmlNode* c_node, - object node_ns_utf, object nsmap): - "Set namespace of node and register ns-prefix mappings." - cdef xmlNs* c_ns - cdef xmlDoc* c_doc - cdef char* c_prefix - cdef char* c_href - - if not nsmap: - if node_ns_utf is not None: - doc._setNodeNs(c_node, node_ns_utf) - return - - c_doc = doc._c_doc - for prefix, href in nsmap.items(): - href_utf = _utf8(href) - c_href = href_utf - if prefix is not None: - prefix_utf = _utf8(prefix) - c_prefix = prefix_utf - else: - c_prefix = NULL - # add namespace with prefix if ns is not already known - c_ns = tree.xmlSearchNsByHref(c_doc, c_node, c_href) - if c_ns is NULL: - c_ns = tree.xmlNewNs(c_node, c_href, c_prefix) - if href_utf == node_ns_utf: - tree.xmlSetNs(c_node, c_ns) - node_ns_utf = None - - if node_ns_utf is not None: - doc._setNodeNs(c_node, node_ns_utf) - # include submodules include "nsclasses.pxi" # Namespace implementation and registry From scoder at codespeak.net Mon Mar 13 15:40:21 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Mon Mar 13 15:40:22 2006 Subject: [Lxml-checkins] r24305 - lxml/trunk/src/lxml Message-ID: <20060313144021.57E86100E0@code0.codespeak.net> Author: scoder Date: Mon Mar 13 15:40:19 2006 New Revision: 24305 Modified: lxml/trunk/src/lxml/etree.pyx Log: clean up Modified: lxml/trunk/src/lxml/etree.pyx ============================================================================== --- lxml/trunk/src/lxml/etree.pyx (original) +++ lxml/trunk/src/lxml/etree.pyx Mon Mar 13 15:40:19 2006 @@ -1024,10 +1024,9 @@ # module-level API for ElementTree def Element(tag, attrib=None, nsmap=None, **extra): - cdef _Document doc - cdef _Element result cdef xmlNode* c_node cdef xmlDoc* c_doc + cdef _Document doc ns_utf, name_utf = _getNsTag(tag) c_doc = theParser.newDoc() c_node = _createElement(c_doc, name_utf, attrib, extra) @@ -1051,7 +1050,6 @@ def SubElement(_Element parent, tag, attrib=None, nsmap=None, **extra): cdef xmlNode* c_node - cdef _Element element cdef _Document doc _raiseIfNone(parent) ns_utf, name_utf = _getNsTag(tag) From scoder at codespeak.net Mon Mar 13 15:42:47 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Mon Mar 13 15:42:48 2006 Subject: [Lxml-checkins] r24306 - lxml/branch/scoder2/src/lxml Message-ID: <20060313144247.0A4EF100E0@code0.codespeak.net> Author: scoder Date: Mon Mar 13 15:42:46 2006 New Revision: 24306 Modified: lxml/branch/scoder2/src/lxml/etree.pyx Log: merges from trunk Modified: lxml/branch/scoder2/src/lxml/etree.pyx ============================================================================== --- lxml/branch/scoder2/src/lxml/etree.pyx (original) +++ lxml/branch/scoder2/src/lxml/etree.pyx Mon Mar 13 15:42:46 2006 @@ -63,11 +63,6 @@ #print self._c_doc.dict is theParser._c_dict tree.xmlFreeDoc(self._c_doc) - def buildNewPrefix(self): - ns = "ns%d" % self._ns_counter - self._ns_counter = self._ns_counter + 1 - return ns - def getroot(self): cdef xmlNode* c_node c_node = tree.xmlDocGetRootElement(self._c_doc) @@ -75,6 +70,65 @@ return None return _elementFactory(self, c_node) + def buildNewPrefix(self): + ns = "ns%d" % self._ns_counter + self._ns_counter = self._ns_counter + 1 + return ns + + cdef xmlNs* _findOrBuildNodeNs(self, xmlNode* c_node, char* href): + """Get or create namespace structure for a node. + """ + cdef xmlNs* c_ns + # look for existing ns + c_ns = tree.xmlSearchNsByHref(self._c_doc, c_node, href) + if c_ns is not NULL: + return c_ns + # create ns if existing ns cannot be found + # try to simulate ElementTree's namespace prefix creation + prefix = self.buildNewPrefix() + c_ns = tree.xmlNewNs(c_node, href, prefix) + return c_ns + + cdef void _setNodeNs(self, xmlNode* c_node, char* href): + "Lookup namespace structure and set it for the node." + cdef xmlNs* c_ns + c_ns = self._findOrBuildNodeNs(c_node, href) + tree.xmlSetNs(c_node, c_ns) + + cdef void _setNodeNamespaces(self, xmlNode* c_node, + object node_ns_utf, object nsmap): + """Lookup current namespace prefixes, then set namespace structure for + node and register new ns-prefix mappings. + """ + cdef xmlNs* c_ns + cdef xmlDoc* c_doc + cdef char* c_prefix + cdef char* c_href + if not nsmap: + if node_ns_utf is not None: + self._setNodeNs(c_node, node_ns_utf) + return + + c_doc = self._c_doc + for prefix, href in nsmap.items(): + href_utf = _utf8(href) + c_href = href_utf + if prefix is not None: + prefix_utf = _utf8(prefix) + c_prefix = prefix_utf + else: + c_prefix = NULL + # add namespace with prefix if ns is not already known + c_ns = tree.xmlSearchNsByHref(c_doc, c_node, c_href) + if c_ns is NULL: + c_ns = tree.xmlNewNs(c_node, c_href, c_prefix) + if href_utf == node_ns_utf: + tree.xmlSetNs(c_node, c_ns) + node_ns_utf = None + + if node_ns_utf is not None: + self._setNodeNs(c_node, node_ns_utf) + cdef _Document _parseDocument(source, parser): cdef xmlDoc* c_doc # XXX simplistic (c)StringIO support @@ -134,28 +188,6 @@ this if they recursively call _init() in the superclasses. """ - cdef xmlNs* _getNs(self, char* href): - """Get or create namespace structure. - """ - cdef xmlDoc* c_doc - cdef xmlNode* c_node - cdef xmlNs* c_ns - - c_doc = self._doc._c_doc - c_node = self._c_node - # look for existing ns - c_ns = tree.xmlSearchNsByHref(c_doc, c_node, href) - if c_ns is not NULL: - return c_ns - # create ns if existing ns cannot be found - # try to simulate ElementTree's namespace prefix creation - prefix = self._doc.buildNewPrefix() - c_ns = tree.xmlNewNs(c_node, href, prefix) - return c_ns - - cdef void _setNs(self, char* href): - tree.xmlSetNs(self._c_node, self._getNs(href)) - cdef class _ElementTree: cdef _Document _doc cdef _NodeBase _context_node @@ -474,8 +506,7 @@ tree.xmlNodeSetName(self._c_node, text) if ns is None: return - c_ns = self._getNs(ns) - tree.xmlSetNs(self._c_node, c_ns) + self._doc._setNodeNs(self._c_node, ns) # not in ElementTree, read-only property prefix: @@ -804,7 +835,7 @@ if ns is None: tree.xmlSetProp(self._c_node, tag, value) else: - c_ns = self._getNs(ns) + c_ns = self._doc._findOrBuildNodeNs(self._c_node, ns) tree.xmlSetNsProp(self._c_node, c_ns, tag, value) def __delitem__(self, key): @@ -993,19 +1024,17 @@ # module-level API for ElementTree def Element(_tag, attrib=None, nsmap=None, **_extra): - cdef _Document doc - cdef _Element result cdef xmlNode* c_node cdef xmlDoc* c_doc + cdef _Document doc ns_utf, name_utf = _getNsTag(_tag) c_doc = theParser.newDoc() c_node = _createElement(c_doc, name_utf, attrib, _extra) tree.xmlDocSetRootElement(c_doc, c_node) doc = _documentFactory(c_doc) - result = _elementFactory(doc, c_node) # add namespaces to node if necessary - _setNamespaces(result, ns_utf, nsmap) - return result + doc._setNodeNamespaces(c_node, ns_utf, nsmap) + return _elementFactory(doc, c_node) def Comment(text=None): cdef _Document doc @@ -1020,16 +1049,17 @@ return _commentFactory(doc, c_node) def SubElement(_Element _parent, _tag, attrib=None, nsmap=None, **_extra): - cdef xmlNode* c_node - cdef _Element element + cdef xmlNode* c_node + cdef _Document doc _raiseIfNone(_parent) ns_utf, name_utf = _getNsTag(_tag) - c_node = _createElement(_parent._doc._c_doc, name_utf, attrib, _extra) - element = _elementFactory(_parent._doc, c_node) + doc = _parent._doc + c_node = _createElement(doc._c_doc, name_utf, attrib, _extra) + element = _elementFactory(doc, c_node) tree.xmlAddChild(_parent._c_node, c_node) # add namespaces to node if necessary - _setNamespaces(element, ns_utf, nsmap) - return element + doc._setNodeNamespaces(c_node, ns_utf, nsmap) + return _elementFactory(doc, c_node) def ElementTree(_Element element=None, file=None, parser=None): cdef xmlNode* c_next @@ -1110,40 +1140,6 @@ doc = _parseDocument(source, parser) return ElementTree(doc.getroot()) -cdef void _setNamespaces(_NodeBase element, object node_ns_utf, object nsmap): - "Set namespace of node and register ns-prefix mappings." - cdef xmlNs* c_ns - cdef xmlNode* c_node - cdef xmlDoc* c_doc - cdef char* c_prefix - cdef char* c_href - - if not nsmap: - if node_ns_utf is not None: - element._setNs(node_ns_utf) - return - - c_node = element._c_node - c_doc = element._doc._c_doc - for prefix, href in nsmap.items(): - href_utf = _utf8(href) - c_href = href_utf - if prefix is not None: - prefix_utf = _utf8(prefix) - c_prefix = prefix_utf - else: - c_prefix = NULL - # add namespace with prefix if ns is not already known - c_ns = tree.xmlSearchNsByHref(c_doc, c_node, c_href) - if c_ns is NULL: - c_ns = tree.xmlNewNs(c_node, c_href, c_prefix) - if href_utf == node_ns_utf: - tree.xmlSetNs(c_node, c_ns) - node_ns_utf = None - - if node_ns_utf is not None: - element._setNs(node_ns_utf) - # include submodules include "nsclasses.pxi" # Namespace implementation and registry From scoder at codespeak.net Mon Mar 13 16:39:03 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Mon Mar 13 16:39:04 2006 Subject: [Lxml-checkins] r24309 - lxml/trunk/src/lxml Message-ID: <20060313153903.5E7B9100D7@code0.codespeak.net> Author: scoder Date: Mon Mar 13 16:39:01 2006 New Revision: 24309 Modified: lxml/trunk/src/lxml/etree.h lxml/trunk/src/lxml/nsclasses.pxi lxml/trunk/src/lxml/python.pxd Log: clean up Modified: lxml/trunk/src/lxml/etree.h ============================================================================== --- lxml/trunk/src/lxml/etree.h (original) +++ lxml/trunk/src/lxml/etree.h Mon Mar 13 16:39:01 2006 @@ -2,7 +2,7 @@ #define HAS_ETREE_H #define isinstance(a,b) PyObject_IsInstance(a,b) -#define hasattr(a,b) PyObject_HasAttrString(a,b) +#define hasattr(a,b) PyObject_HasAttr(a,b) #define _isElement(c_node) \ ((c_node)->type == XML_ELEMENT_NODE || \ (c_node)->type == XML_COMMENT_NODE) Modified: lxml/trunk/src/lxml/nsclasses.pxi ============================================================================== --- lxml/trunk/src/lxml/nsclasses.pxi (original) +++ lxml/trunk/src/lxml/nsclasses.pxi Mon Mar 13 16:39:01 2006 @@ -77,11 +77,11 @@ name_utf = _utf8(name) return self._get(name_utf) - cdef object _get(self, char* c_name): + cdef object _get(self, object name): cdef python.PyObject* dict_result - dict_result = python.PyDict_GetItemString(self._classes, c_name) + dict_result = python.PyDict_GetItem(self._classes, name) if dict_result is NULL: - dict_result = python.PyDict_GetItemString(self._extensions, c_name) + dict_result = python.PyDict_GetItem(self._extensions, name) if dict_result is NULL: raise KeyError, "Name not registered." return dict_result Modified: lxml/trunk/src/lxml/python.pxd ============================================================================== --- lxml/trunk/src/lxml/python.pxd (original) +++ lxml/trunk/src/lxml/python.pxd Mon Mar 13 16:39:01 2006 @@ -28,8 +28,8 @@ cdef int PySequence_Check(object instance) cdef int PyType_Check(object instance) cdef int PyObject_IsInstance(object instance, object classes) - cdef int PyObject_HasAttrString(object obj, char* attr) + cdef int PyObject_HasAttr(object obj, object attr) cdef extern from "etree.h": # redefines some functions as macros cdef int isinstance(object instance, object classes) - cdef int hasattr(object obj, char* attr) + cdef int hasattr(object obj, object attr) From scoder at codespeak.net Mon Mar 13 16:41:36 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Mon Mar 13 16:41:37 2006 Subject: [Lxml-checkins] r24310 - lxml/branch/scoder2/src/lxml Message-ID: <20060313154136.33C4B100D7@code0.codespeak.net> Author: scoder Date: Mon Mar 13 16:41:34 2006 New Revision: 24310 Modified: lxml/branch/scoder2/src/lxml/etree.h lxml/branch/scoder2/src/lxml/nsclasses.pxi lxml/branch/scoder2/src/lxml/python.pxd Log: merges from trunk Modified: lxml/branch/scoder2/src/lxml/etree.h ============================================================================== --- lxml/branch/scoder2/src/lxml/etree.h (original) +++ lxml/branch/scoder2/src/lxml/etree.h Mon Mar 13 16:41:34 2006 @@ -2,7 +2,7 @@ #define HAS_ETREE_H #define isinstance(a,b) PyObject_IsInstance(a,b) -#define hasattr(a,b) PyObject_HasAttrString(a,b) +#define hasattr(a,b) PyObject_HasAttr(a,b) #define _isElement(c_node) \ ((c_node)->type == XML_ELEMENT_NODE || \ (c_node)->type == XML_COMMENT_NODE) Modified: lxml/branch/scoder2/src/lxml/nsclasses.pxi ============================================================================== --- lxml/branch/scoder2/src/lxml/nsclasses.pxi (original) +++ lxml/branch/scoder2/src/lxml/nsclasses.pxi Mon Mar 13 16:41:34 2006 @@ -98,11 +98,11 @@ name_utf = _utf8(name) return self._get(name_utf) - cdef object _get(self, char* c_name): + cdef object _get(self, object name): cdef python.PyObject* dict_result - dict_result = python.PyDict_GetItemString(self._classes, c_name) + dict_result = python.PyDict_GetItem(self._classes, name) if dict_result is NULL: - dict_result = python.PyDict_GetItemString(self._extensions, c_name) + dict_result = python.PyDict_GetItem(self._extensions, name) if dict_result is NULL: raise KeyError, "Name not registered." return dict_result @@ -136,9 +136,9 @@ name_utf = _utf8(name) self._extensions[name_utf] = item - cdef object _get(self, char* c_name): + cdef object _get(self, object name): cdef python.PyObject* dict_result - dict_result = python.PyDict_GetItemString(self._extensions, c_name) + dict_result = python.PyDict_GetItem(self._extensions, name) if dict_result is NULL: raise KeyError, "Name not registered." return dict_result Modified: lxml/branch/scoder2/src/lxml/python.pxd ============================================================================== --- lxml/branch/scoder2/src/lxml/python.pxd (original) +++ lxml/branch/scoder2/src/lxml/python.pxd Mon Mar 13 16:41:34 2006 @@ -30,8 +30,8 @@ cdef int PySequence_Check(object instance) cdef int PyType_Check(object instance) cdef int PyObject_IsInstance(object instance, object classes) - cdef int PyObject_HasAttrString(object obj, char* attr) + cdef int PyObject_HasAttr(object obj, object attr) cdef extern from "etree.h": # redefines some functions as macros cdef int isinstance(object instance, object classes) - cdef int hasattr(object obj, char* attr) + cdef int hasattr(object obj, object attr) From scoder at codespeak.net Tue Mar 14 06:23:14 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Tue Mar 14 06:23:16 2006 Subject: [Lxml-checkins] r24327 - lxml/trunk/src/lxml/tests Message-ID: <20060314052314.2F3FE10098@code0.codespeak.net> Author: scoder Date: Tue Mar 14 06:23:09 2006 New Revision: 24327 Modified: lxml/trunk/src/lxml/tests/test_nsclasses.py Log: test default namespace classes (None namespace or element name) Modified: lxml/trunk/src/lxml/tests/test_nsclasses.py ============================================================================== --- lxml/trunk/src/lxml/tests/test_nsclasses.py (original) +++ lxml/trunk/src/lxml/tests/test_nsclasses.py Tue Mar 14 06:23:09 2006 @@ -67,9 +67,9 @@ def test_create_element(self): bluff_dict = {u'bluff' : self.bluff_class} - maeh_dict = {u'maeh' : self.maeh_class} - etree.Namespace(u'ns20').update(bluff_dict) + + maeh_dict = {u'maeh' : self.maeh_class} etree.Namespace(u'ns21').update(maeh_dict) el = etree.Element("{ns20}bluff") @@ -79,6 +79,26 @@ self.assertEquals(el.bluff(), u'bluff') self.assertEquals(el[0].maeh(), u'maeh') + etree.Namespace(u'ns20').clear() + etree.Namespace(u'ns21').clear() + + def test_create_element_default(self): + bluff_dict = {None : self.bluff_class} + etree.Namespace(u'ns30').update(bluff_dict) + + maeh_dict = {u'maeh' : self.maeh_class} + etree.Namespace(None).update(maeh_dict) + + el = etree.Element("{ns30}bluff") + etree.SubElement(el, "maeh") + self.assert_(hasattr(el, 'bluff')) + self.assert_(hasattr(el[0], 'maeh')) + self.assertEquals(el.bluff(), u'bluff') + self.assertEquals(el[0].maeh(), u'maeh') + + etree.Namespace(None).clear() + etree.Namespace(u'ns30').clear() + def test_suite(): suite = unittest.TestSuite() suite.addTests([unittest.makeSuite(ETreeNamespaceClassesTestCase)]) From scoder at codespeak.net Tue Mar 14 09:21:26 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Tue Mar 14 09:21:27 2006 Subject: [Lxml-checkins] r24332 - lxml/branch/scoder2/src/lxml/tests Message-ID: <20060314082126.67E3210098@code0.codespeak.net> Author: scoder Date: Tue Mar 14 09:21:19 2006 New Revision: 24332 Modified: lxml/branch/scoder2/src/lxml/tests/test_nsclasses.py Log: merges from trunk Modified: lxml/branch/scoder2/src/lxml/tests/test_nsclasses.py ============================================================================== --- lxml/branch/scoder2/src/lxml/tests/test_nsclasses.py (original) +++ lxml/branch/scoder2/src/lxml/tests/test_nsclasses.py Tue Mar 14 09:21:19 2006 @@ -67,9 +67,9 @@ def test_create_element(self): bluff_dict = {u'bluff' : self.bluff_class} - maeh_dict = {u'maeh' : self.maeh_class} - etree.Namespace(u'ns20').update(bluff_dict) + + maeh_dict = {u'maeh' : self.maeh_class} etree.Namespace(u'ns21').update(maeh_dict) el = etree.Element("{ns20}bluff") @@ -79,6 +79,26 @@ self.assertEquals(el.bluff(), u'bluff') self.assertEquals(el[0].maeh(), u'maeh') + etree.Namespace(u'ns20').clear() + etree.Namespace(u'ns21').clear() + + def test_create_element_default(self): + bluff_dict = {None : self.bluff_class} + etree.Namespace(u'ns30').update(bluff_dict) + + maeh_dict = {u'maeh' : self.maeh_class} + etree.Namespace(None).update(maeh_dict) + + el = etree.Element("{ns30}bluff") + etree.SubElement(el, "maeh") + self.assert_(hasattr(el, 'bluff')) + self.assert_(hasattr(el[0], 'maeh')) + self.assertEquals(el.bluff(), u'bluff') + self.assertEquals(el[0].maeh(), u'maeh') + + etree.Namespace(None).clear() + etree.Namespace(u'ns30').clear() + def test_suite(): suite = unittest.TestSuite() suite.addTests([unittest.makeSuite(ETreeNamespaceClassesTestCase)]) From scoder at codespeak.net Tue Mar 14 09:25:37 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Tue Mar 14 09:25:39 2006 Subject: [Lxml-checkins] r24333 - lxml/branch/scoder2/src/lxml Message-ID: <20060314082537.831EB10098@code0.codespeak.net> Author: scoder Date: Tue Mar 14 09:25:35 2006 New Revision: 24333 Modified: lxml/branch/scoder2/src/lxml/nsclasses.pxi Log: __repr__ for namespace registry class Modified: lxml/branch/scoder2/src/lxml/nsclasses.pxi ============================================================================== --- lxml/branch/scoder2/src/lxml/nsclasses.pxi (original) +++ lxml/branch/scoder2/src/lxml/nsclasses.pxi Tue Mar 14 09:25:35 2006 @@ -112,6 +112,9 @@ self._extensions.clear() #self.self._xslt_elements.clear() + def __repr__(self): + return "Namespace(%r)" % self._ns_uri + cdef class _FunctionNamespaceRegistry(_NamespaceRegistry): cdef object _prefix cdef object _prefix_utf @@ -143,6 +146,9 @@ raise KeyError, "Name not registered." return dict_result + def __repr__(self): + return "FunctionNamespace(%r)" % self._ns_uri + cdef object _find_all_extensions(): "Internal lookup function to find all extension functions for XSLT/XPath." cdef _NamespaceRegistry registry From scoder at codespeak.net Tue Mar 14 09:29:08 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Tue Mar 14 09:29:19 2006 Subject: [Lxml-checkins] r24334 - lxml/trunk/src/lxml Message-ID: <20060314082908.4DD1910098@code0.codespeak.net> Author: scoder Date: Tue Mar 14 09:28:57 2006 New Revision: 24334 Modified: lxml/trunk/src/lxml/nsclasses.pxi Log: merged in _NamespaceRegistry.__repr__ from branch Modified: lxml/trunk/src/lxml/nsclasses.pxi ============================================================================== --- lxml/trunk/src/lxml/nsclasses.pxi (original) +++ lxml/trunk/src/lxml/nsclasses.pxi Tue Mar 14 09:28:57 2006 @@ -91,6 +91,9 @@ self._extensions.clear() #self.self._xslt_elements.clear() + def __repr__(self): + return "Namespace(%r)" % self._ns_uri + cdef object _find_element_class(char* c_namespace_utf, char* c_element_name_utf): cdef python.PyObject* dict_result From scoder at codespeak.net Tue Mar 14 09:52:08 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Tue Mar 14 09:52:09 2006 Subject: [Lxml-checkins] r24335 - lxml/trunk/src/lxml/tests Message-ID: <20060314085208.2C78110098@code0.codespeak.net> Author: scoder Date: Tue Mar 14 09:52:07 2006 New Revision: 24335 Modified: lxml/trunk/src/lxml/tests/test_elementtree.py Log: test case for element ns setup from parsed XML Modified: lxml/trunk/src/lxml/tests/test_elementtree.py ============================================================================== --- lxml/trunk/src/lxml/tests/test_elementtree.py (original) +++ lxml/trunk/src/lxml/tests/test_elementtree.py Tue Mar 14 09:52:07 2006 @@ -1443,6 +1443,24 @@ self.assertEquals( '{%s}b' % ns2, b.tag) + def test_ns_tag_parse(self): + Element = self.etree.Element + SubElement = self.etree.SubElement + ElementTree = self.etree.ElementTree + + ns = 'http://xml.infrae.com/1' + ns2 = 'http://xml.infrae.com/2' + f = StringIO('' % (ns, ns2)) + t = ElementTree(file=f) + + a = t.getroot() + self.assertEquals('{%s}a' % ns, + a.tag) + self.assertEquals('{%s}b' % ns2, + a[0].tag) + self.assertEquals('{%s}b' % ns, + a[1].tag) + def test_ns_attr(self): Element = self.etree.Element ns = 'http://xml.infrae.com/1' From scoder at codespeak.net Tue Mar 14 10:05:48 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Tue Mar 14 10:05:50 2006 Subject: [Lxml-checkins] r24337 - lxml/trunk/src/lxml/tests Message-ID: <20060314090548.842071008E@code0.codespeak.net> Author: scoder Date: Tue Mar 14 10:05:47 2006 New Revision: 24337 Modified: lxml/trunk/src/lxml/tests/test_nsclasses.py Log: test case for namespace default class Modified: lxml/trunk/src/lxml/tests/test_nsclasses.py ============================================================================== --- lxml/trunk/src/lxml/tests/test_nsclasses.py (original) +++ lxml/trunk/src/lxml/tests/test_nsclasses.py Tue Mar 14 10:05:47 2006 @@ -65,6 +65,36 @@ etree.Namespace(u'ns11').clear() + def test_default_class(self): + bluff_dict = { + None : self.bluff_class, + 'maeh' : self.maeh_class + } + + ns = etree.Namespace("uri:nsDefClass") + ns.update(bluff_dict) + + tree = self.parse(u''' + + + + ''') + + el = tree.getroot() + self.assertFalse(isinstance(el, etree.ElementBase)) + for child in el[:-1]: + self.assert_(isinstance(child, etree.ElementBase), child.tag) + self.assertFalse(isinstance(el[-1], etree.ElementBase)) + + self.assert_(hasattr(el[0], 'bluff')) + self.assert_(hasattr(el[1], 'bluff')) + self.assert_(hasattr(el[2], 'maeh')) + self.assert_(hasattr(el[3], 'maeh')) + self.assertFalse(hasattr(el[4], 'maeh')) + del el + + ns.clear() + def test_create_element(self): bluff_dict = {u'bluff' : self.bluff_class} etree.Namespace(u'ns20').update(bluff_dict) From scoder at codespeak.net Tue Mar 14 10:12:16 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Tue Mar 14 10:12:17 2006 Subject: [Lxml-checkins] r24338 - lxml/trunk/src/lxml/tests Message-ID: <20060314091216.61A7C1008E@code0.codespeak.net> Author: scoder Date: Tue Mar 14 10:12:15 2006 New Revision: 24338 Modified: lxml/trunk/src/lxml/tests/test_nsclasses.py Log: longer test case Modified: lxml/trunk/src/lxml/tests/test_nsclasses.py ============================================================================== --- lxml/trunk/src/lxml/tests/test_nsclasses.py (original) +++ lxml/trunk/src/lxml/tests/test_nsclasses.py Tue Mar 14 10:12:15 2006 @@ -104,10 +104,18 @@ el = etree.Element("{ns20}bluff") etree.SubElement(el, "{ns21}maeh") + etree.SubElement(el, "{ns20}bluff") + etree.SubElement(el, "{ns21}bluff") + self.assert_(hasattr(el, 'bluff')) self.assert_(hasattr(el[0], 'maeh')) + self.assert_(hasattr(el[1], 'bluff')) + self.assertFalse(hasattr(el[2], 'bluff')) + self.assertFalse(hasattr(el[2], 'maeh')) + self.assertEquals(el.bluff(), u'bluff') self.assertEquals(el[0].maeh(), u'maeh') + self.assertEquals(el[1].bluff(), u'bluff') etree.Namespace(u'ns20').clear() etree.Namespace(u'ns21').clear() From scoder at codespeak.net Tue Mar 14 10:21:09 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Tue Mar 14 10:21:21 2006 Subject: [Lxml-checkins] r24339 - lxml/branch/scoder2/src/lxml/tests Message-ID: <20060314092109.205D210092@code0.codespeak.net> Author: scoder Date: Tue Mar 14 10:21:07 2006 New Revision: 24339 Modified: lxml/branch/scoder2/src/lxml/tests/test_elementtree.py lxml/branch/scoder2/src/lxml/tests/test_nsclasses.py Log: merges from trunk Modified: lxml/branch/scoder2/src/lxml/tests/test_elementtree.py ============================================================================== --- lxml/branch/scoder2/src/lxml/tests/test_elementtree.py (original) +++ lxml/branch/scoder2/src/lxml/tests/test_elementtree.py Tue Mar 14 10:21:07 2006 @@ -1443,6 +1443,24 @@ self.assertEquals( '{%s}b' % ns2, b.tag) + def test_ns_tag_parse(self): + Element = self.etree.Element + SubElement = self.etree.SubElement + ElementTree = self.etree.ElementTree + + ns = 'http://xml.infrae.com/1' + ns2 = 'http://xml.infrae.com/2' + f = StringIO('' % (ns, ns2)) + t = ElementTree(file=f) + + a = t.getroot() + self.assertEquals('{%s}a' % ns, + a.tag) + self.assertEquals('{%s}b' % ns2, + a[0].tag) + self.assertEquals('{%s}b' % ns, + a[1].tag) + def test_ns_attr(self): Element = self.etree.Element ns = 'http://xml.infrae.com/1' Modified: lxml/branch/scoder2/src/lxml/tests/test_nsclasses.py ============================================================================== --- lxml/branch/scoder2/src/lxml/tests/test_nsclasses.py (original) +++ lxml/branch/scoder2/src/lxml/tests/test_nsclasses.py Tue Mar 14 10:21:07 2006 @@ -65,6 +65,36 @@ etree.Namespace(u'ns11').clear() + def test_default_class(self): + bluff_dict = { + None : self.bluff_class, + 'maeh' : self.maeh_class + } + + ns = etree.Namespace("uri:nsDefClass") + ns.update(bluff_dict) + + tree = self.parse(u''' + + + + ''') + + el = tree.getroot() + self.assertFalse(isinstance(el, etree.ElementBase)) + for child in el[:-1]: + self.assert_(isinstance(child, etree.ElementBase), child.tag) + self.assertFalse(isinstance(el[-1], etree.ElementBase)) + + self.assert_(hasattr(el[0], 'bluff')) + self.assert_(hasattr(el[1], 'bluff')) + self.assert_(hasattr(el[2], 'maeh')) + self.assert_(hasattr(el[3], 'maeh')) + self.assertFalse(hasattr(el[4], 'maeh')) + del el + + ns.clear() + def test_create_element(self): bluff_dict = {u'bluff' : self.bluff_class} etree.Namespace(u'ns20').update(bluff_dict) @@ -74,10 +104,18 @@ el = etree.Element("{ns20}bluff") etree.SubElement(el, "{ns21}maeh") + etree.SubElement(el, "{ns20}bluff") + etree.SubElement(el, "{ns21}bluff") + self.assert_(hasattr(el, 'bluff')) self.assert_(hasattr(el[0], 'maeh')) + self.assert_(hasattr(el[1], 'bluff')) + self.assertFalse(hasattr(el[2], 'bluff')) + self.assertFalse(hasattr(el[2], 'maeh')) + self.assertEquals(el.bluff(), u'bluff') self.assertEquals(el[0].maeh(), u'maeh') + self.assertEquals(el[1].bluff(), u'bluff') etree.Namespace(u'ns20').clear() etree.Namespace(u'ns21').clear() From scoder at codespeak.net Tue Mar 14 10:34:10 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Tue Mar 14 10:34:11 2006 Subject: [Lxml-checkins] r24343 - lxml/trunk/src/lxml/tests Message-ID: <20060314093410.7C10610098@code0.codespeak.net> Author: scoder Date: Tue Mar 14 10:34:08 2006 New Revision: 24343 Modified: lxml/trunk/src/lxml/tests/test_elementtree.py Log: longer test case Modified: lxml/trunk/src/lxml/tests/test_elementtree.py ============================================================================== --- lxml/trunk/src/lxml/tests/test_elementtree.py (original) +++ lxml/trunk/src/lxml/tests/test_elementtree.py Tue Mar 14 10:34:08 2006 @@ -1434,14 +1434,19 @@ ns2 = 'http://xml.infrae.com/2' a = Element('{%s}a' % ns) b = SubElement(a, '{%s}b' % ns2) + c = SubElement(a, '{%s}c' % ns) self.assertEquals('{%s}a' % ns, a.tag) self.assertEquals('{%s}b' % ns2, b.tag) - self.assertEquals( - '{%s}a' % ns, a.tag) - self.assertEquals( - '{%s}b' % ns2, b.tag) + self.assertEquals('{%s}c' % ns, + c.tag) + self.assertEquals('{%s}a' % ns, + a.tag) + self.assertEquals('{%s}b' % ns2, + b.tag) + self.assertEquals('{%s}c' % ns, + c.tag) def test_ns_tag_parse(self): Element = self.etree.Element From scoder at codespeak.net Tue Mar 14 10:35:30 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Tue Mar 14 10:35:31 2006 Subject: [Lxml-checkins] r24344 - lxml/branch/scoder2/src/lxml/tests Message-ID: <20060314093530.656E210098@code0.codespeak.net> Author: scoder Date: Tue Mar 14 10:35:29 2006 New Revision: 24344 Modified: lxml/branch/scoder2/src/lxml/tests/test_elementtree.py Log: merges from trunk Modified: lxml/branch/scoder2/src/lxml/tests/test_elementtree.py ============================================================================== --- lxml/branch/scoder2/src/lxml/tests/test_elementtree.py (original) +++ lxml/branch/scoder2/src/lxml/tests/test_elementtree.py Tue Mar 14 10:35:29 2006 @@ -1434,14 +1434,19 @@ ns2 = 'http://xml.infrae.com/2' a = Element('{%s}a' % ns) b = SubElement(a, '{%s}b' % ns2) + c = SubElement(a, '{%s}c' % ns) self.assertEquals('{%s}a' % ns, a.tag) self.assertEquals('{%s}b' % ns2, b.tag) - self.assertEquals( - '{%s}a' % ns, a.tag) - self.assertEquals( - '{%s}b' % ns2, b.tag) + self.assertEquals('{%s}c' % ns, + c.tag) + self.assertEquals('{%s}a' % ns, + a.tag) + self.assertEquals('{%s}b' % ns2, + b.tag) + self.assertEquals('{%s}c' % ns, + c.tag) def test_ns_tag_parse(self): Element = self.etree.Element From scoder at codespeak.net Tue Mar 14 11:04:12 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Tue Mar 14 11:04:12 2006 Subject: [Lxml-checkins] r24350 - lxml/branch/scoder2/src/lxml/tests Message-ID: <20060314100412.45D291009D@code0.codespeak.net> Author: scoder Date: Tue Mar 14 11:04:06 2006 New Revision: 24350 Modified: lxml/branch/scoder2/src/lxml/tests/test_nsclasses.py Log: extended test case to make it fail for current revision Modified: lxml/branch/scoder2/src/lxml/tests/test_nsclasses.py ============================================================================== --- lxml/branch/scoder2/src/lxml/tests/test_nsclasses.py (original) +++ lxml/branch/scoder2/src/lxml/tests/test_nsclasses.py Tue Mar 14 11:04:06 2006 @@ -103,11 +103,16 @@ etree.Namespace(u'ns21').update(maeh_dict) el = etree.Element("{ns20}bluff") - etree.SubElement(el, "{ns21}maeh") - etree.SubElement(el, "{ns20}bluff") - etree.SubElement(el, "{ns21}bluff") - self.assert_(hasattr(el, 'bluff')) + + child = etree.SubElement(el, "{ns21}maeh") + self.assert_(hasattr(child, 'maeh')) + child = etree.SubElement(el, "{ns20}bluff") + self.assert_(hasattr(child, 'bluff')) + child = etree.SubElement(el, "{ns21}bluff") + self.assertFalse(hasattr(child, 'bluff')) + self.assertFalse(hasattr(child, 'maeh')) + self.assert_(hasattr(el[0], 'maeh')) self.assert_(hasattr(el[1], 'bluff')) self.assertFalse(hasattr(el[2], 'bluff')) From scoder at codespeak.net Tue Mar 14 11:07:11 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Tue Mar 14 11:07:17 2006 Subject: [Lxml-checkins] r24352 - lxml/branch/scoder2/src/lxml Message-ID: <20060314100711.5404710094@code0.codespeak.net> Author: scoder Date: Tue Mar 14 11:06:59 2006 New Revision: 24352 Modified: lxml/branch/scoder2/src/lxml/etree.pyx Log: bug fix to make test case from revision 24350 pass - bug was due to incomplete merge from trunk Modified: lxml/branch/scoder2/src/lxml/etree.pyx ============================================================================== --- lxml/branch/scoder2/src/lxml/etree.pyx (original) +++ lxml/branch/scoder2/src/lxml/etree.pyx Tue Mar 14 11:06:59 2006 @@ -1055,7 +1055,6 @@ ns_utf, name_utf = _getNsTag(_tag) doc = _parent._doc c_node = _createElement(doc._c_doc, name_utf, attrib, _extra) - element = _elementFactory(doc, c_node) tree.xmlAddChild(_parent._c_node, c_node) # add namespaces to node if necessary doc._setNodeNamespaces(c_node, ns_utf, nsmap) From scoder at codespeak.net Tue Mar 14 11:08:08 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Tue Mar 14 11:08:09 2006 Subject: [Lxml-checkins] r24353 - lxml/trunk/src/lxml/tests Message-ID: <20060314100808.33B8010094@code0.codespeak.net> Author: scoder Date: Tue Mar 14 11:08:06 2006 New Revision: 24353 Modified: lxml/trunk/src/lxml/tests/test_nsclasses.py Log: merged in test case from branch Modified: lxml/trunk/src/lxml/tests/test_nsclasses.py ============================================================================== --- lxml/trunk/src/lxml/tests/test_nsclasses.py (original) +++ lxml/trunk/src/lxml/tests/test_nsclasses.py Tue Mar 14 11:08:06 2006 @@ -103,11 +103,16 @@ etree.Namespace(u'ns21').update(maeh_dict) el = etree.Element("{ns20}bluff") - etree.SubElement(el, "{ns21}maeh") - etree.SubElement(el, "{ns20}bluff") - etree.SubElement(el, "{ns21}bluff") - self.assert_(hasattr(el, 'bluff')) + + child = etree.SubElement(el, "{ns21}maeh") + self.assert_(hasattr(child, 'maeh')) + child = etree.SubElement(el, "{ns20}bluff") + self.assert_(hasattr(child, 'bluff')) + child = etree.SubElement(el, "{ns21}bluff") + self.assertFalse(hasattr(child, 'bluff')) + self.assertFalse(hasattr(child, 'maeh')) + self.assert_(hasattr(el[0], 'maeh')) self.assert_(hasattr(el[1], 'bluff')) self.assertFalse(hasattr(el[2], 'bluff')) From scoder at codespeak.net Wed Mar 15 08:07:50 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Wed Mar 15 08:07:52 2006 Subject: [Lxml-checkins] r24374 - in lxml/pyrex: Demos/callback Demos/embed Tools Message-ID: <20060315070750.815011008E@code0.codespeak.net> Author: scoder Date: Wed Mar 15 08:07:39 2006 New Revision: 24374 Added: lxml/pyrex/Demos/callback/ lxml/pyrex/Demos/callback/Makefile lxml/pyrex/Demos/callback/Makefile.nodistutils lxml/pyrex/Demos/callback/README.txt lxml/pyrex/Demos/callback/Setup.py lxml/pyrex/Demos/callback/cheese.pyx lxml/pyrex/Demos/callback/cheesefinder.c lxml/pyrex/Demos/callback/cheesefinder.h lxml/pyrex/Demos/callback/run_cheese.py lxml/pyrex/Demos/embed/ lxml/pyrex/Demos/embed/Makefile (contents, props changed) lxml/pyrex/Demos/embed/Makefile.msc lxml/pyrex/Demos/embed/Makefile.msc.static lxml/pyrex/Demos/embed/Makefile.unix lxml/pyrex/Demos/embed/README lxml/pyrex/Demos/embed/embedded.pyx lxml/pyrex/Demos/embed/main.c lxml/pyrex/Tools/ lxml/pyrex/Tools/pyrex-mode.el lxml/pyrex/Tools/pyrex.st Log: added Demos and Tools Added: lxml/pyrex/Demos/callback/Makefile ============================================================================== --- (empty file) +++ lxml/pyrex/Demos/callback/Makefile Wed Mar 15 08:07:39 2006 @@ -0,0 +1,10 @@ +all: + python Setup.py build_ext --inplace + +test: all + python run_cheese.py + +clean: + @echo Cleaning Demos/callback + @rm -f cheese.c *.o *.so *~ core + @rm -rf build Added: lxml/pyrex/Demos/callback/Makefile.nodistutils ============================================================================== --- (empty file) +++ lxml/pyrex/Demos/callback/Makefile.nodistutils Wed Mar 15 08:07:39 2006 @@ -0,0 +1,19 @@ +PYHOME = $(HOME)/pkg/python/version +PYINCLUDE = \ + -I$(PYHOME)/include/python2.2 \ + -I$(PYHOME)/$(ARCH)/include/python2.2 + +%.c: %.pyx + ../../bin/pyrexc $< + +%.o: %.c + gcc -c -fPIC $(PYINCLUDE) $< + +%.so: %.o + gcc -shared $< -lm -o $@ + +all: cheese.so + +clean: + @echo Cleaning Demos/callback + @rm -f *.c *.o *.so *~ core core.* Added: lxml/pyrex/Demos/callback/README.txt ============================================================================== --- (empty file) +++ lxml/pyrex/Demos/callback/README.txt Wed Mar 15 08:07:39 2006 @@ -0,0 +1 @@ +This example demonstrates how you can wrap a C API that has a callback interface, so that you can pass Python functions to it as callbacks. The files cheesefinder.h and cheesefinder.c represent the C library to be wrapped. The file cheese.pyx is the Pyrex module which wraps it. The file run_cheese.py demonstrates how to call the wrapper. \ No newline at end of file Added: lxml/pyrex/Demos/callback/Setup.py ============================================================================== --- (empty file) +++ lxml/pyrex/Demos/callback/Setup.py Wed Mar 15 08:07:39 2006 @@ -0,0 +1,11 @@ +from distutils.core import setup +from distutils.extension import Extension +from Pyrex.Distutils import build_ext + +setup( + name = 'callback', + ext_modules=[ + Extension("cheese", ["cheese.pyx", "cheesefinder.c"]), + ], + cmdclass = {'build_ext': build_ext} +) Added: lxml/pyrex/Demos/callback/cheese.pyx ============================================================================== --- (empty file) +++ lxml/pyrex/Demos/callback/cheese.pyx Wed Mar 15 08:07:39 2006 @@ -0,0 +1,13 @@ +# +# Pyrex wrapper for the cheesefinder API +# + +cdef extern from "cheesefinder.h": + ctypedef void (*cheesefunc)(char *name, void *user_data) + void find_cheeses(cheesefunc user_func, void *user_data) + +def find(f): + find_cheeses(callback, f) + +cdef void callback(char *name, void *f): + (f)(name) Added: lxml/pyrex/Demos/callback/cheesefinder.c ============================================================================== --- (empty file) +++ lxml/pyrex/Demos/callback/cheesefinder.c Wed Mar 15 08:07:39 2006 @@ -0,0 +1,21 @@ +/* + * An example of a C API that provides a callback mechanism. + */ + +#include "cheesefinder.h" + +static char *cheeses[] = { + "cheddar", + "camembert", + "that runny one", + 0 +}; + +void find_cheeses(cheesefunc user_func, void *user_data) { + char **p = cheeses; + while (*p) { + user_func(*p, user_data); + ++p; + } +} + Added: lxml/pyrex/Demos/callback/cheesefinder.h ============================================================================== --- (empty file) +++ lxml/pyrex/Demos/callback/cheesefinder.h Wed Mar 15 08:07:39 2006 @@ -0,0 +1 @@ +typedef void (*cheesefunc)(char *name, void *user_data); void find_cheeses(cheesefunc user_func, void *user_data); \ No newline at end of file Added: lxml/pyrex/Demos/callback/run_cheese.py ============================================================================== --- (empty file) +++ lxml/pyrex/Demos/callback/run_cheese.py Wed Mar 15 08:07:39 2006 @@ -0,0 +1,7 @@ +import cheese + +def report_cheese(name): + print "Found cheese:", name + +cheese.find(report_cheese) + Added: lxml/pyrex/Demos/embed/Makefile ============================================================================== --- (empty file) +++ lxml/pyrex/Demos/embed/Makefile Wed Mar 15 08:07:39 2006 @@ -0,0 +1 @@ +link Makefile.unix \ No newline at end of file Added: lxml/pyrex/Demos/embed/Makefile.msc ============================================================================== --- (empty file) +++ lxml/pyrex/Demos/embed/Makefile.msc Wed Mar 15 08:07:39 2006 @@ -0,0 +1,35 @@ +# Makefile for Microsoft C Compiler, building a DLL +PYVERSION = 2.2 +PYHOME = \Python$(PYVERSION:.=) +PYINCLUDE = -I$(PYHOME)\include +PYLIB = /LIBPATH:$(PYHOME)\libs + +CFLAGS = $(PYINCLUDE) /Ox /W3 /GX -nologo +.SUFFIXES: .exe .dll .obj .c .cpp .pyx + +.pyx.c: + $(PYHOME)\Python.exe ../../pyrexc.py $< + +all: main.exe + +clean: + del /Q/F *.obj embedded.h embedded.c main.exe embedded.dll embedded.lib embedded.exp + +# When linking the DLL we must explicitly list all of the exports +# There doesn't seem to be an easy way to get DL_EXPORT to have the correct definition +# to do the export for us without breaking the importing of symbols from the core +# python library. +embedded.dll: embedded.obj + link /nologo /DLL /INCREMENTAL:NO $(PYLIB) $** /IMPLIB:$*.lib /DEF:<< /OUT:$*.dll +EXPORTS initembedded +EXPORTS spam +<< + +main.exe: main.obj embedded.lib + link /nologo $** $(PYLIB) /OUT:main.exe + +embedded.h: embedded.c +main.obj: embedded.h +embedded.obj: embedded.c + $(CC) /MD $(CFLAGS) -c $** +embedded.lib: embedded.dll Added: lxml/pyrex/Demos/embed/Makefile.msc.static ============================================================================== --- (empty file) +++ lxml/pyrex/Demos/embed/Makefile.msc.static Wed Mar 15 08:07:39 2006 @@ -0,0 +1 @@ +# Makefile for Microsoft compiler statically linking PYVERSION = 2.2 PYHOME = \Python$(PYVERSION:.=) PYINCLUDE = -I$(PYHOME)\include PYLIB = /LIBPATH:$(PYHOME)\libs python22.lib CFLAGS = $(PYINCLUDE) /Ox /W3 /GX -nologo .SUFFIXES: .exe .dll .obj .c .cpp .pyx .pyx.c: $(PYHOME)\Python.exe ../../pyrexc.py $< all: main.exe clean: -del /Q/F *.obj embedded.h embedded.c main.exe main.exe: main.obj embedded.obj link /nologo $** $(PYLIB) /OUT:main.exe embedded.h: embedded.c main.obj: embedded.h \ No newline at end of file Added: lxml/pyrex/Demos/embed/Makefile.unix ============================================================================== --- (empty file) +++ lxml/pyrex/Demos/embed/Makefile.unix Wed Mar 15 08:07:39 2006 @@ -0,0 +1,30 @@ +PYVERSION = 2.2 +PYHOME = $(HOME)/pkg/python/$(PYVERSION) +PYARCH = $(PYHOME)/$(ARCH) +PYINCLUDE = \ + -I$(PYHOME)/include/python$(PYVERSION) \ + -I$(PYARCH)/include/python$(PYVERSION) +PYLIB = -L$(PYARCH)/lib/python$(PYVERSION)/config \ + -lpython$(PYVERSION) \ + -ldl -lpthread -lutil -lm + +%.c: %.pyx + ../../bin/pyrexc $< + +%.o: %.c + gcc -c -fPIC $(PYINCLUDE) $< + +#%.so: %.o +# gcc -shared $< -lm -o $@ + +all: main + +main: main.o embedded.o + gcc main.o embedded.o $(PYLIB) -o main + +clean: + @echo Cleaning Demos/embed + @rm -f *~ *.o *.so core core.* embedded.h embedded.c main + +embedded.h: embedded.c +main.o: embedded.h Added: lxml/pyrex/Demos/embed/README ============================================================================== --- (empty file) +++ lxml/pyrex/Demos/embed/README Wed Mar 15 08:07:39 2006 @@ -0,0 +1 @@ +This example demonstrates how Pyrex-generated code can be called directly from a main program written in C. In this example, the module's initialisation function (called "initembedded", since the module is called "embedded") is called explicitly. This is necessary because the module is not being imported using the normal Python import mechanism. The Windows makefiles were contributed by Duncan Booth . \ No newline at end of file Added: lxml/pyrex/Demos/embed/embedded.pyx ============================================================================== --- (empty file) +++ lxml/pyrex/Demos/embed/embedded.pyx Wed Mar 15 08:07:39 2006 @@ -0,0 +1,5 @@ +cdef public void spam(): + praise() + +def praise(): + print "Spam, glorious spam!" Added: lxml/pyrex/Demos/embed/main.c ============================================================================== --- (empty file) +++ lxml/pyrex/Demos/embed/main.c Wed Mar 15 08:07:39 2006 @@ -0,0 +1,9 @@ +#include "Python.h" +#include "embedded.h" + +int main(int argc, char *argv) { + Py_Initialize(); + initembedded(); + spam(); + Py_Finalize(); +} Added: lxml/pyrex/Tools/pyrex-mode.el ============================================================================== --- (empty file) +++ lxml/pyrex/Tools/pyrex-mode.el Wed Mar 15 08:07:39 2006 @@ -0,0 +1 @@ +;;;; `Pyrex' mode. (add-to-list 'auto-mode-alist '("\\.pyx\\'" . pyrex-mode)) (define-derived-mode pyrex-mode python-mode "Pyrex" (font-lock-add-keywords nil `((,(concat "\\<\\(NULL" "\\|c\\(def\\|har\\|typedef\\)" "\\|e\\(num\\|xtern\\)" "\\|float" "\\|in\\(clude\\|t\\)" "\\|object\\|public\\|struct\\|type\\|union\\|void" "\\)\\>") 1 font-lock-keyword-face t)))) \ No newline at end of file Added: lxml/pyrex/Tools/pyrex.st ============================================================================== --- (empty file) +++ lxml/pyrex/Tools/pyrex.st Wed Mar 15 08:07:39 2006 @@ -0,0 +1 @@ +/** * Name: pyrex * Description: Pyrex - a Language for Writing Python Extension Modules * Author: Markku Rossi */ state pyrex extends python { /* Additional keywords. (build-re '( NULL as cdef char ctypedef double enum extern float include int long private public short signed sizeof struct union unsigned void )) */ /\b(NULL|as|c(def|har|typedef)|double|e(num|xtern)|float|in(clude|t)\ |long|p(rivate|ublic)|s(hort|i(gned|zeof)|truct)|un(ion|signed)|void)\b/ { keyword_face(true); language_print($0); keyword_face(false); } } /* Local variables: mode: c End: */ \ No newline at end of file From scoder at codespeak.net Wed Mar 15 12:27:42 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Wed Mar 15 12:27:49 2006 Subject: [Lxml-checkins] r24380 - lxml/trunk/src/lxml Message-ID: <20060315112742.98AD010088@code0.codespeak.net> Author: scoder Date: Wed Mar 15 12:27:38 2006 New Revision: 24380 Modified: lxml/trunk/src/lxml/etree.pyx lxml/trunk/src/lxml/tree.pxd Log: implement element.getiterator() as iterator - new class ElementDepthFirstIterator: * iterator returning all elements of a tree depth first - new class ElementTagFilter: * iterator that filters out elements that do not match the requested tag * used to wrap ElementDepthFirstIterator to return only matching elements - getiterator() now simply returns one of the two based on the requested tag Modified: lxml/trunk/src/lxml/etree.pyx ============================================================================== --- lxml/trunk/src/lxml/etree.pyx (original) +++ lxml/trunk/src/lxml/etree.pyx Wed Mar 15 12:27:38 2006 @@ -218,7 +218,7 @@ def getiterator(self, tag=None): root = self.getroot() if root is None: - return [] + return () return root.getiterator(tag) def find(self, path): @@ -718,17 +718,11 @@ return None def getiterator(self, tag=None): - result = [] - if tag == "*": - tag = None - if tag is None or self.tag == tag: - result.append(self) - for node in self: - result.extend(node.getiterator(tag)) - return result - - # XXX this doesn't work yet - # return _docOrderIteratorFactory(self._doc, self._c_node, tag) + iterator = ElementDepthFirstIterator(self) + if tag is None or tag == '*': + return iterator + else: + return ElementTagFilter(iterator, tag) def makeelement(self, tag, attrib): return Element(tag, attrib) @@ -976,32 +970,107 @@ cdef class ElementChildIterator: # we keep Python references here to control GC - cdef object _node - def __init__(self, node): # Python ref! + cdef _NodeBase _node + def __init__(self, _NodeBase node): # Python ref! cdef xmlNode* c_node - cdef _NodeBase base_node - base_node = <_NodeBase>node - c_node = _findChildForwards(base_node._c_node, 0) + c_node = _findChildForwards(node._c_node, 0) if c_node is NULL: self._node = None else: - self._node = _elementFactory(base_node._doc, c_node) + self._node = _elementFactory(node._doc, c_node) def __iter__(self): return self def __next__(self): cdef xmlNode* c_node - cdef _NodeBase base_node - current_node = self._node # Python ref! + cdef _NodeBase current_node + # Python ref: + current_node = self._node if current_node is None: raise StopIteration - base_node = <_NodeBase>current_node - c_node = _nextElement(base_node._c_node) + c_node = _nextElement(current_node._c_node) if c_node is NULL: self._node = None else: - self._node = _elementFactory(base_node._doc, c_node) + # Python ref: + self._node = _elementFactory(current_node._doc, c_node) return current_node +cdef class ElementDepthFirstIterator: + # we keep Python references here to control GC + # keep next node to return and a stack of position state in the tree + cdef object _stack + cdef _NodeBase _next_node + def __init__(self, _NodeBase node): + cdef xmlNode* c_node + _raiseIfNone(node) + self._next_node = node + self._stack = [] + c_node = _findChildForwards(node._c_node, 0) + if c_node is not NULL: + python.PyList_Append( + self._stack, _elementFactory(node._doc, c_node)) + def __iter__(self): + return self + def __next__(self): + cdef xmlNode* c_node + cdef _NodeBase current_node + cdef _NodeBase next_node + current_node = self._next_node + if current_node is None: + raise StopIteration + + stack = self._stack + if not stack: + self._next_node = None + return current_node + + next_node = stack[-1] + self._next_node = next_node + + # take next child until we reach a leaf + c_node = _findChildForwards(next_node._c_node, 0) + if c_node is NULL: + pop = stack.pop + while c_node is NULL and stack: + # go up the stack until we find a sibling + next_node = pop() + c_node = _nextElement(next_node._c_node) + + if c_node is not NULL: + python.PyList_Append( + stack, _elementFactory(next_node._doc, c_node)) + return current_node + +cdef class ElementTagFilter: + cdef object _iterator + cdef object _pystrings + cdef char* _href + cdef char* _name + def __init__(self, element_iterator, tag): + self._iterator = element_iterator + ns_href, name = _getNsTag(tag) + self._pystrings = (ns_href, name) # keep Python references + self._name = name + if ns_href is None: + self._href = NULL + else: + self._href = ns_href + def __iter__(self): + return self + def __next__(self): + cdef _NodeBase node + cdef xmlNode* c_node + while 1: + node = self._iterator.next() + c_node = node._c_node + if tree.strcmp(c_node.name, self._name) == 0: + if c_node.ns == NULL or c_node.ns.href == NULL: + if self._href == NULL: + break + elif tree.strcmp(c_node.ns.href, self._href) == 0: + break + return node + cdef xmlNode* _createElement(xmlDoc* c_doc, object name_utf, object attrib, object extra) except NULL: cdef xmlNode* c_node Modified: lxml/trunk/src/lxml/tree.pxd ============================================================================== --- lxml/trunk/src/lxml/tree.pxd (original) +++ lxml/trunk/src/lxml/tree.pxd Wed Mar 15 12:27:38 2006 @@ -3,6 +3,7 @@ cdef extern from "stdio.h": ctypedef struct FILE cdef int strlen(char* s) + cdef int strcmp(char* s1, char* s2) cdef extern from "libxml/encoding.h": ctypedef struct xmlCharEncodingHandler From scoder at codespeak.net Wed Mar 15 12:29:36 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Wed Mar 15 12:29:37 2006 Subject: [Lxml-checkins] r24381 - lxml/branch/scoder2/src/lxml Message-ID: <20060315112936.9561710088@code0.codespeak.net> Author: scoder Date: Wed Mar 15 12:29:34 2006 New Revision: 24381 Modified: lxml/branch/scoder2/src/lxml/etree.pyx lxml/branch/scoder2/src/lxml/tree.pxd Log: merged in new getiterator() implementation from trunk Modified: lxml/branch/scoder2/src/lxml/etree.pyx ============================================================================== --- lxml/branch/scoder2/src/lxml/etree.pyx (original) +++ lxml/branch/scoder2/src/lxml/etree.pyx Wed Mar 15 12:29:34 2006 @@ -218,7 +218,7 @@ def getiterator(self, tag=None): root = self.getroot() if root is None: - return [] + return () return root.getiterator(tag) def find(self, path): @@ -718,17 +718,11 @@ return None def getiterator(self, tag=None): - result = [] - if tag == "*": - tag = None - if tag is None or self.tag == tag: - result.append(self) - for node in self: - result.extend(node.getiterator(tag)) - return result - - # XXX this doesn't work yet - # return _docOrderIteratorFactory(self._doc, self._c_node, tag) + iterator = ElementDepthFirstIterator(self) + if tag is None or tag == '*': + return iterator + else: + return ElementTagFilter(iterator, tag) def makeelement(self, tag, attrib): return Element(tag, attrib) @@ -976,32 +970,107 @@ cdef class ElementChildIterator: # we keep Python references here to control GC - cdef object _node - def __init__(self, node): # Python ref! + cdef _NodeBase _node + def __init__(self, _NodeBase node): # Python ref! cdef xmlNode* c_node - cdef _NodeBase base_node - base_node = <_NodeBase>node - c_node = _findChildForwards(base_node._c_node, 0) + c_node = _findChildForwards(node._c_node, 0) if c_node is NULL: self._node = None else: - self._node = _elementFactory(base_node._doc, c_node) + self._node = _elementFactory(node._doc, c_node) def __iter__(self): return self def __next__(self): cdef xmlNode* c_node - cdef _NodeBase base_node - current_node = self._node # Python ref! + cdef _NodeBase current_node + # Python ref: + current_node = self._node if current_node is None: raise StopIteration - base_node = <_NodeBase>current_node - c_node = _nextElement(base_node._c_node) + c_node = _nextElement(current_node._c_node) if c_node is NULL: self._node = None else: - self._node = _elementFactory(base_node._doc, c_node) + # Python ref: + self._node = _elementFactory(current_node._doc, c_node) return current_node +cdef class ElementDepthFirstIterator: + # we keep Python references here to control GC + # keep next node to return and a stack of position state in the tree + cdef object _stack + cdef _NodeBase _next_node + def __init__(self, _NodeBase node): + cdef xmlNode* c_node + _raiseIfNone(node) + self._next_node = node + self._stack = [] + c_node = _findChildForwards(node._c_node, 0) + if c_node is not NULL: + python.PyList_Append( + self._stack, _elementFactory(node._doc, c_node)) + def __iter__(self): + return self + def __next__(self): + cdef xmlNode* c_node + cdef _NodeBase current_node + cdef _NodeBase next_node + current_node = self._next_node + if current_node is None: + raise StopIteration + + stack = self._stack + if not stack: + self._next_node = None + return current_node + + next_node = stack[-1] + self._next_node = next_node + + # take next child until we reach a leaf + c_node = _findChildForwards(next_node._c_node, 0) + if c_node is NULL: + pop = stack.pop + while c_node is NULL and stack: + # go up the stack until we find a sibling + next_node = pop() + c_node = _nextElement(next_node._c_node) + + if c_node is not NULL: + python.PyList_Append( + stack, _elementFactory(next_node._doc, c_node)) + return current_node + +cdef class ElementTagFilter: + cdef object _iterator + cdef object _pystrings + cdef char* _href + cdef char* _name + def __init__(self, element_iterator, tag): + self._iterator = element_iterator + ns_href, name = _getNsTag(tag) + self._pystrings = (ns_href, name) # keep Python references + self._name = name + if ns_href is None: + self._href = NULL + else: + self._href = ns_href + def __iter__(self): + return self + def __next__(self): + cdef _NodeBase node + cdef xmlNode* c_node + while 1: + node = self._iterator.next() + c_node = node._c_node + if tree.strcmp(c_node.name, self._name) == 0: + if c_node.ns == NULL or c_node.ns.href == NULL: + if self._href == NULL: + break + elif tree.strcmp(c_node.ns.href, self._href) == 0: + break + return node + cdef xmlNode* _createElement(xmlDoc* c_doc, object name_utf, object attrib, object extra) except NULL: cdef xmlNode* c_node Modified: lxml/branch/scoder2/src/lxml/tree.pxd ============================================================================== --- lxml/branch/scoder2/src/lxml/tree.pxd (original) +++ lxml/branch/scoder2/src/lxml/tree.pxd Wed Mar 15 12:29:34 2006 @@ -3,6 +3,7 @@ cdef extern from "stdio.h": ctypedef struct FILE cdef int strlen(char* s) + cdef int strcmp(char* s1, char* s2) cdef extern from "libxml/encoding.h": ctypedef struct xmlCharEncodingHandler From scoder at codespeak.net Wed Mar 15 13:52:54 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Wed Mar 15 13:53:06 2006 Subject: [Lxml-checkins] r24383 - lxml/trunk Message-ID: <20060315125254.0320B10084@code0.codespeak.net> Author: scoder Date: Wed Mar 15 13:52:43 2006 New Revision: 24383 Modified: lxml/trunk/bench.py Log: new benchmarks for element.getiterator() Modified: lxml/trunk/bench.py ============================================================================== --- lxml/trunk/bench.py (original) +++ lxml/trunk/bench.py Wed Mar 15 13:52:43 2006 @@ -137,12 +137,12 @@ SubElement = self.etree.SubElement current_time = time.time t = current_time() - root = self.etree.Element('{x}root') + root = self.etree.Element('{a}root') for ch1 in atoz: for i in range(20): - el = SubElement(root, "{y}%s%03d" % (ch1, i), attributes) + el = SubElement(root, "{b}"+ch1, attributes) for ch2 in atoz: - SubElement(el, "{z}"+ch2) + SubElement(el, "{c}%s%03d" % (ch2, i)) t = current_time() - t return (root, t) @@ -151,11 +151,11 @@ SubElement = self.etree.SubElement current_time = time.time t = current_time() - root = self.etree.Element('{x}root') + root = self.etree.Element('{a}root') children = [root] for i in range(7): tag_no = count().next - children = [ SubElement(c, "{y}z%d" % i, attributes) + children = [ SubElement(c, "{b}a%d" % i, attributes) for i,c in enumerate(chain(children, children, children)) ] t = current_time() - t return (root, t) @@ -166,7 +166,7 @@ SubElement = self.etree.SubElement current_time = time.time t = current_time() - root = self.etree.Element('{x}root') + root = self.etree.Element('{a}root') children = [root] for ch1 in atoz: el = SubElement(root, "{b}"+ch1, attributes) @@ -348,6 +348,15 @@ for child in root[-100:-5]: root.index(child, start=-100, stop=-5) + def bench_getiterator(self, root): + list(islice(root.getiterator(), 10, 110)) + + def bench_getiterator_tag(self, root): + list(islice(root.getiterator("{b}a"), 3, 10)) + + def bench_getiterator_tag_all(self, root): + list(islice(root.getiterator("{b}a"), 10, 150)) + ############################################################ # Main program ############################################################ From scoder at codespeak.net Wed Mar 15 13:54:04 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Wed Mar 15 13:54:05 2006 Subject: [Lxml-checkins] r24384 - lxml/trunk/src/lxml Message-ID: <20060315125404.2068210084@code0.codespeak.net> Author: scoder Date: Wed Mar 15 13:53:52 2006 New Revision: 24384 Modified: lxml/trunk/src/lxml/etree.pyx Log: clean up, new utility function _hasTag for C-node tag test against namespace and tag name Modified: lxml/trunk/src/lxml/etree.pyx ============================================================================== --- lxml/trunk/src/lxml/etree.pyx (original) +++ lxml/trunk/src/lxml/etree.pyx Wed Mar 15 13:53:52 2006 @@ -996,6 +996,8 @@ return current_node cdef class ElementDepthFirstIterator: + """Iterates over an element and its sub-elements in document order (depth + first).""" # we keep Python references here to control GC # keep next node to return and a stack of position state in the tree cdef object _stack @@ -1047,7 +1049,7 @@ cdef char* _href cdef char* _name def __init__(self, element_iterator, tag): - self._iterator = element_iterator + self._iterator = iter(element_iterator) ns_href, name = _getNsTag(tag) self._pystrings = (ns_href, name) # keep Python references self._name = name @@ -1062,14 +1064,8 @@ cdef xmlNode* c_node while 1: node = self._iterator.next() - c_node = node._c_node - if tree.strcmp(c_node.name, self._name) == 0: - if c_node.ns == NULL or c_node.ns.href == NULL: - if self._href == NULL: - break - elif tree.strcmp(c_node.ns.href, self._href) == 0: - break - return node + if _hasTag(node._c_node, self._name, self._href): + return node cdef xmlNode* _createElement(xmlDoc* c_doc, object name_utf, object attrib, object extra) except NULL: @@ -1536,6 +1532,14 @@ else: return s +cdef int _hasTag(xmlNode* c_node, char* name, char* ns_href): + if tree.strcmp(c_node.name, name) == 0: + if c_node.ns == NULL or c_node.ns.href == NULL: + return ns_href == NULL + elif tree.strcmp(c_node.ns.href, ns_href) == 0: + return 1 + return 0 + def _getFilenameForFile(source): """Given a Python File or Gzip object, give filename back. From scoder at codespeak.net Wed Mar 15 16:43:39 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Wed Mar 15 16:43:41 2006 Subject: [Lxml-checkins] r24388 - in lxml/trunk: . src/lxml Message-ID: <20060315154339.8451810086@code0.codespeak.net> Author: scoder Date: Wed Mar 15 16:43:37 2006 New Revision: 24388 Modified: lxml/trunk/bench.py lxml/trunk/src/lxml/etree.pyx lxml/trunk/src/lxml/python.pxd Log: clean up, moved _hasTag function back into ElementTagFilter, factored out helper method from ElementDepthFirstIterator.__next__ Modified: lxml/trunk/bench.py ============================================================================== --- lxml/trunk/bench.py (original) +++ lxml/trunk/bench.py Wed Mar 15 16:43:37 2006 @@ -355,7 +355,7 @@ list(islice(root.getiterator("{b}a"), 3, 10)) def bench_getiterator_tag_all(self, root): - list(islice(root.getiterator("{b}a"), 10, 150)) + list(root.getiterator("{b}a")) ############################################################ # Main program Modified: lxml/trunk/src/lxml/etree.pyx ============================================================================== --- lxml/trunk/src/lxml/etree.pyx (original) +++ lxml/trunk/src/lxml/etree.pyx Wed Mar 15 16:43:37 2006 @@ -997,7 +997,7 @@ cdef class ElementDepthFirstIterator: """Iterates over an element and its sub-elements in document order (depth - first).""" + first pre-order).""" # we keep Python references here to control GC # keep next node to return and a stack of position state in the tree cdef object _stack @@ -1007,41 +1007,38 @@ _raiseIfNone(node) self._next_node = node self._stack = [] - c_node = _findChildForwards(node._c_node, 0) - if c_node is not NULL: - python.PyList_Append( - self._stack, _elementFactory(node._doc, c_node)) + self._findAndPushNextNode(node) def __iter__(self): return self def __next__(self): cdef xmlNode* c_node - cdef _NodeBase current_node cdef _NodeBase next_node current_node = self._next_node if current_node is None: raise StopIteration - stack = self._stack - if not stack: + if python.PyList_GET_SIZE(stack) == 0: self._next_node = None return current_node - next_node = stack[-1] self._next_node = next_node + self._findAndPushNextNode(next_node) + return current_node - # take next child until we reach a leaf - c_node = _findChildForwards(next_node._c_node, 0) + cdef void _findAndPushNextNode(self, _NodeBase node): + cdef xmlNode* c_node + stack = self._stack + # try next child level until we hit a leaf + c_node = _findChildForwards(node._c_node, 0) if c_node is NULL: pop = stack.pop - while c_node is NULL and stack: - # go up the stack until we find a sibling - next_node = pop() - c_node = _nextElement(next_node._c_node) - + while c_node is NULL and python.PyList_GET_SIZE(stack): + # walk up the stack until we find a sibling + node = pop() + c_node = _nextElement(node._c_node) if c_node is not NULL: python.PyList_Append( - stack, _elementFactory(next_node._doc, c_node)) - return current_node + stack, _elementFactory(node._doc, c_node)) cdef class ElementTagFilter: cdef object _iterator @@ -1061,12 +1058,19 @@ return self def __next__(self): cdef _NodeBase node - cdef xmlNode* c_node while 1: node = self._iterator.next() - if _hasTag(node._c_node, self._name, self._href): + if self._tagMatches(node._c_node): return node + cdef int _tagMatches(self, xmlNode* c_node): + if tree.strcmp(c_node.name, self._name) == 0: + if c_node.ns == NULL or c_node.ns.href == NULL: + return self._href == NULL + else: + return tree.strcmp(c_node.ns.href, self._href) == 0 + return 0 + cdef xmlNode* _createElement(xmlDoc* c_doc, object name_utf, object attrib, object extra) except NULL: cdef xmlNode* c_node @@ -1532,14 +1536,6 @@ else: return s -cdef int _hasTag(xmlNode* c_node, char* name, char* ns_href): - if tree.strcmp(c_node.name, name) == 0: - if c_node.ns == NULL or c_node.ns.href == NULL: - return ns_href == NULL - elif tree.strcmp(c_node.ns.href, ns_href) == 0: - return 1 - return 0 - def _getFilenameForFile(source): """Given a Python File or Gzip object, give filename back. Modified: lxml/trunk/src/lxml/python.pxd ============================================================================== --- lxml/trunk/src/lxml/python.pxd (original) +++ lxml/trunk/src/lxml/python.pxd Wed Mar 15 16:43:37 2006 @@ -18,6 +18,7 @@ cdef object PyString_FromString(char* s) cdef object PyString_FromFormat(char* format, ...) + cdef int PyList_GET_SIZE(object l) cdef int PyList_Append(object l, object obj) cdef int PyDict_SetItemString(object d, char* key, object value) cdef PyObject* PyDict_GetItemString(object d, char* key) From scoder at codespeak.net Wed Mar 15 16:47:32 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Wed Mar 15 16:47:34 2006 Subject: [Lxml-checkins] r24389 - in lxml/branch/scoder2: . src/lxml Message-ID: <20060315154732.A505210086@code0.codespeak.net> Author: scoder Date: Wed Mar 15 16:47:21 2006 New Revision: 24389 Modified: lxml/branch/scoder2/bench.py lxml/branch/scoder2/src/lxml/etree.pyx lxml/branch/scoder2/src/lxml/python.pxd Log: iterator merges from trunk Modified: lxml/branch/scoder2/bench.py ============================================================================== --- lxml/branch/scoder2/bench.py (original) +++ lxml/branch/scoder2/bench.py Wed Mar 15 16:47:21 2006 @@ -137,12 +137,12 @@ SubElement = self.etree.SubElement current_time = time.time t = current_time() - root = self.etree.Element('{x}root') + root = self.etree.Element('{a}root') for ch1 in atoz: for i in range(20): - el = SubElement(root, "{y}%s%03d" % (ch1, i), attributes) + el = SubElement(root, "{b}"+ch1, attributes) for ch2 in atoz: - SubElement(el, "{z}"+ch2) + SubElement(el, "{c}%s%03d" % (ch2, i)) t = current_time() - t return (root, t) @@ -151,11 +151,11 @@ SubElement = self.etree.SubElement current_time = time.time t = current_time() - root = self.etree.Element('{x}root') + root = self.etree.Element('{a}root') children = [root] for i in range(7): tag_no = count().next - children = [ SubElement(c, "{y}z%d" % i, attributes) + children = [ SubElement(c, "{b}a%d" % i, attributes) for i,c in enumerate(chain(children, children, children)) ] t = current_time() - t return (root, t) @@ -166,7 +166,7 @@ SubElement = self.etree.SubElement current_time = time.time t = current_time() - root = self.etree.Element('{x}root') + root = self.etree.Element('{a}root') children = [root] for ch1 in atoz: el = SubElement(root, "{b}"+ch1, attributes) @@ -348,6 +348,15 @@ for child in root[-100:-5]: root.index(child, start=-100, stop=-5) + def bench_getiterator(self, root): + list(islice(root.getiterator(), 10, 110)) + + def bench_getiterator_tag(self, root): + list(islice(root.getiterator("{b}a"), 3, 10)) + + def bench_getiterator_tag_all(self, root): + list(root.getiterator("{b}a")) + ############################################################ # Main program ############################################################ Modified: lxml/branch/scoder2/src/lxml/etree.pyx ============================================================================== --- lxml/branch/scoder2/src/lxml/etree.pyx (original) +++ lxml/branch/scoder2/src/lxml/etree.pyx Wed Mar 15 16:47:21 2006 @@ -996,6 +996,8 @@ return current_node cdef class ElementDepthFirstIterator: + """Iterates over an element and its sub-elements in document order (depth + first pre-order).""" # we keep Python references here to control GC # keep next node to return and a stack of position state in the tree cdef object _stack @@ -1005,41 +1007,38 @@ _raiseIfNone(node) self._next_node = node self._stack = [] - c_node = _findChildForwards(node._c_node, 0) - if c_node is not NULL: - python.PyList_Append( - self._stack, _elementFactory(node._doc, c_node)) + self._findAndPushNextNode(node) def __iter__(self): return self def __next__(self): cdef xmlNode* c_node - cdef _NodeBase current_node cdef _NodeBase next_node current_node = self._next_node if current_node is None: raise StopIteration - stack = self._stack - if not stack: + if python.PyList_GET_SIZE(stack) == 0: self._next_node = None return current_node - next_node = stack[-1] self._next_node = next_node + self._findAndPushNextNode(next_node) + return current_node - # take next child until we reach a leaf - c_node = _findChildForwards(next_node._c_node, 0) + cdef void _findAndPushNextNode(self, _NodeBase node): + cdef xmlNode* c_node + stack = self._stack + # try next child level until we hit a leaf + c_node = _findChildForwards(node._c_node, 0) if c_node is NULL: pop = stack.pop - while c_node is NULL and stack: - # go up the stack until we find a sibling - next_node = pop() - c_node = _nextElement(next_node._c_node) - + while c_node is NULL and python.PyList_GET_SIZE(stack): + # walk up the stack until we find a sibling + node = pop() + c_node = _nextElement(node._c_node) if c_node is not NULL: python.PyList_Append( - stack, _elementFactory(next_node._doc, c_node)) - return current_node + stack, _elementFactory(node._doc, c_node)) cdef class ElementTagFilter: cdef object _iterator @@ -1047,7 +1046,7 @@ cdef char* _href cdef char* _name def __init__(self, element_iterator, tag): - self._iterator = element_iterator + self._iterator = iter(element_iterator) ns_href, name = _getNsTag(tag) self._pystrings = (ns_href, name) # keep Python references self._name = name @@ -1059,17 +1058,18 @@ return self def __next__(self): cdef _NodeBase node - cdef xmlNode* c_node while 1: node = self._iterator.next() - c_node = node._c_node - if tree.strcmp(c_node.name, self._name) == 0: - if c_node.ns == NULL or c_node.ns.href == NULL: - if self._href == NULL: - break - elif tree.strcmp(c_node.ns.href, self._href) == 0: - break - return node + if self._tagMatches(node._c_node): + return node + + cdef int _tagMatches(self, xmlNode* c_node): + if tree.strcmp(c_node.name, self._name) == 0: + if c_node.ns == NULL or c_node.ns.href == NULL: + return self._href == NULL + else: + return tree.strcmp(c_node.ns.href, self._href) == 0 + return 0 cdef xmlNode* _createElement(xmlDoc* c_doc, object name_utf, object attrib, object extra) except NULL: Modified: lxml/branch/scoder2/src/lxml/python.pxd ============================================================================== --- lxml/branch/scoder2/src/lxml/python.pxd (original) +++ lxml/branch/scoder2/src/lxml/python.pxd Wed Mar 15 16:47:21 2006 @@ -18,6 +18,7 @@ cdef object PyString_FromString(char* s) cdef object PyString_FromFormat(char* format, ...) + cdef int PyList_GET_SIZE(object l) cdef int PyList_Append(object l, object obj) cdef int PyDict_SetItem(object d, object key, object value) cdef int PyDict_SetItemString(object d, char* key, object value) From scoder at codespeak.net Wed Mar 15 17:19:55 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Wed Mar 15 17:19:58 2006 Subject: [Lxml-checkins] r24395 - in lxml/trunk/src/lxml: . tests Message-ID: <20060315161955.31CFD10088@code0.codespeak.net> Author: scoder Date: Wed Mar 15 17:19:54 2006 New Revision: 24395 Modified: lxml/trunk/src/lxml/etree.pyx lxml/trunk/src/lxml/python.pxd lxml/trunk/src/lxml/tests/test_elementtree.py Log: added XMLID module function - simplistic, straight forward implementation using XPath Modified: lxml/trunk/src/lxml/etree.pyx ============================================================================== --- lxml/trunk/src/lxml/etree.pyx (original) +++ lxml/trunk/src/lxml/etree.pyx Wed Mar 15 17:19:54 2006 @@ -1163,6 +1163,13 @@ fromstring = XML +def XMLID(text): + root = XML(text) + dic = {} + for elem in root.xpath('//*[string(@id)]'): + python.PyDict_SetItem(dic, elem.get('id'), elem) + return (root, dic) + def iselement(element): return isinstance(element, _Element) Modified: lxml/trunk/src/lxml/python.pxd ============================================================================== --- lxml/trunk/src/lxml/python.pxd (original) +++ lxml/trunk/src/lxml/python.pxd Wed Mar 15 17:19:54 2006 @@ -21,6 +21,7 @@ cdef int PyList_GET_SIZE(object l) cdef int PyList_Append(object l, object obj) cdef int PyDict_SetItemString(object d, char* key, object value) + cdef int PyDict_SetItem(object d, object key, object value) cdef PyObject* PyDict_GetItemString(object d, char* key) cdef PyObject* PyDict_GetItem(object d, object key) Modified: lxml/trunk/src/lxml/tests/test_elementtree.py ============================================================================== --- lxml/trunk/src/lxml/tests/test_elementtree.py (original) +++ lxml/trunk/src/lxml/tests/test_elementtree.py Wed Mar 15 17:19:54 2006 @@ -334,6 +334,29 @@ self.assertEquals(0, len(root)) self.assertEquals('This is a text.', root.text) + def test_XMLID(self): + XMLID = self.etree.XMLID + XML = self.etree.XML + xml_text = ''' + +

...

+

...

+

Regular paragraph.

+

...

+
+ ''' + + root, dic = XMLID(xml_text) + root2 = XML(xml_text) + self.assertEquals(self._writeElement(root), + self._writeElement(root2)) + expected = { + "chapter1" : root[0], + "note1" : root[1], + "warn1" : root[3] + } + self.assertEquals(dic, expected) + def test_fromstring(self): fromstring = self.etree.fromstring From scoder at codespeak.net Wed Mar 15 17:20:16 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Wed Mar 15 17:20:17 2006 Subject: [Lxml-checkins] r24396 - in lxml/branch/scoder2/src/lxml: . tests Message-ID: <20060315162016.EE67010088@code0.codespeak.net> Author: scoder Date: Wed Mar 15 17:20:14 2006 New Revision: 24396 Modified: lxml/branch/scoder2/src/lxml/etree.pyx lxml/branch/scoder2/src/lxml/python.pxd lxml/branch/scoder2/src/lxml/tests/test_elementtree.py Log: merges from trunk Modified: lxml/branch/scoder2/src/lxml/etree.pyx ============================================================================== --- lxml/branch/scoder2/src/lxml/etree.pyx (original) +++ lxml/branch/scoder2/src/lxml/etree.pyx Wed Mar 15 17:20:14 2006 @@ -1163,6 +1163,13 @@ fromstring = XML +def XMLID(text): + root = XML(text) + dic = {} + for elem in root.xpath('//*[string(@id)]'): + python.PyDict_SetItem(dic, elem.get('id'), elem) + return (root, dic) + def iselement(element): return isinstance(element, _Element) Modified: lxml/branch/scoder2/src/lxml/python.pxd ============================================================================== --- lxml/branch/scoder2/src/lxml/python.pxd (original) +++ lxml/branch/scoder2/src/lxml/python.pxd Wed Mar 15 17:20:14 2006 @@ -22,6 +22,7 @@ cdef int PyList_Append(object l, object obj) cdef int PyDict_SetItem(object d, object key, object value) cdef int PyDict_SetItemString(object d, char* key, object value) + cdef int PyDict_SetItem(object d, object key, object value) cdef PyObject* PyDict_GetItemString(object d, char* key) cdef PyObject* PyDict_GetItem(object d, object key) cdef int PyDict_DelItem(object d, object key) Modified: lxml/branch/scoder2/src/lxml/tests/test_elementtree.py ============================================================================== --- lxml/branch/scoder2/src/lxml/tests/test_elementtree.py (original) +++ lxml/branch/scoder2/src/lxml/tests/test_elementtree.py Wed Mar 15 17:20:14 2006 @@ -334,6 +334,29 @@ self.assertEquals(0, len(root)) self.assertEquals('This is a text.', root.text) + def test_XMLID(self): + XMLID = self.etree.XMLID + XML = self.etree.XML + xml_text = ''' + +

...

+

...

+

Regular paragraph.

+

...

+
+ ''' + + root, dic = XMLID(xml_text) + root2 = XML(xml_text) + self.assertEquals(self._writeElement(root), + self._writeElement(root2)) + expected = { + "chapter1" : root[0], + "note1" : root[1], + "warn1" : root[3] + } + self.assertEquals(dic, expected) + def test_fromstring(self): fromstring = self.etree.fromstring From scoder at codespeak.net Wed Mar 15 17:23:55 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Wed Mar 15 17:23:56 2006 Subject: [Lxml-checkins] r24398 - lxml/branch/scoder2/src/lxml Message-ID: <20060315162355.BE86910088@code0.codespeak.net> Author: scoder Date: Wed Mar 15 17:23:49 2006 New Revision: 24398 Modified: lxml/branch/scoder2/src/lxml/python.pxd Log: fixed 1-line left-over from merge Modified: lxml/branch/scoder2/src/lxml/python.pxd ============================================================================== --- lxml/branch/scoder2/src/lxml/python.pxd (original) +++ lxml/branch/scoder2/src/lxml/python.pxd Wed Mar 15 17:23:49 2006 @@ -20,7 +20,6 @@ cdef int PyList_GET_SIZE(object l) cdef int PyList_Append(object l, object obj) - cdef int PyDict_SetItem(object d, object key, object value) cdef int PyDict_SetItemString(object d, char* key, object value) cdef int PyDict_SetItem(object d, object key, object value) cdef PyObject* PyDict_GetItemString(object d, char* key) From scoder at codespeak.net Thu Mar 16 07:45:16 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Thu Mar 16 07:45:18 2006 Subject: [Lxml-checkins] r24424 - in lxml/branch/scoder2/src/lxml: . tests Message-ID: <20060316064516.9303C10082@code0.codespeak.net> Author: scoder Date: Thu Mar 16 07:45:12 2006 New Revision: 24424 Modified: lxml/branch/scoder2/src/lxml/tests/test_xpathevaluator.py lxml/branch/scoder2/src/lxml/xslt.pxi Log: moved {namespace} syntax support from XPath class into ETXPath class, some clean up, make XPath objects callable (== evaluate) Modified: lxml/branch/scoder2/src/lxml/tests/test_xpathevaluator.py ============================================================================== --- lxml/branch/scoder2/src/lxml/tests/test_xpathevaluator.py (original) +++ lxml/branch/scoder2/src/lxml/tests/test_xpathevaluator.py Thu Mar 16 07:45:12 2006 @@ -170,6 +170,9 @@ self.assertEquals(1, len(r)) self.assertEquals("true", r[0].get('attr')) + +class ETreeXPathClassTestCase(HelperTestCase): + "Tests for the XPath class" def test_xpath_compile_doc(self): x = self.parse('') @@ -208,22 +211,24 @@ r = expr.evaluate(x, aval=True) self.assertEquals(1, len(r)) + def test_xpath_compile_error(self): + self.assertRaises(SyntaxError, etree.XPath, '\\fad') + +class ETreeETXPathClassTestCase(HelperTestCase): + "Tests for the ETXPath class" def test_xpath_compile_ns(self): x = self.parse('') - expr = etree.XPath("/a/{nsa}b") + expr = etree.ETXPath("/a/{nsa}b") r = expr.evaluate(x) self.assertEquals(1, len(r)) self.assertEquals('{nsa}b', r[0].tag) - expr = etree.XPath("/a/{nsb}b") + expr = etree.ETXPath("/a/{nsb}b") r = expr.evaluate(x) self.assertEquals(1, len(r)) self.assertEquals('{nsb}b', r[0].tag) - def test_xpath_compile_error(self): - self.assertRaises(SyntaxError, etree.XPath, '\\fad') - SAMPLE_XML = etree.parse(StringIO(""" text @@ -318,6 +323,8 @@ def test_suite(): suite = unittest.TestSuite() suite.addTests([unittest.makeSuite(ETreeXPathTestCase)]) + suite.addTests([unittest.makeSuite(ETreeXPathClassTestCase)]) + suite.addTests([unittest.makeSuite(ETreeETXPathClassTestCase)]) suite.addTests([doctest.DocTestSuite()]) return suite Modified: lxml/branch/scoder2/src/lxml/xslt.pxi ============================================================================== --- lxml/branch/scoder2/src/lxml/xslt.pxi (original) +++ lxml/branch/scoder2/src/lxml/xslt.pxi Thu Mar 16 07:45:12 2006 @@ -27,11 +27,6 @@ class XPathSyntaxError(LxmlSyntaxError): pass -cdef object _RE_STRINGS -cdef object _RE_NAMESPACES -_RE_STRINGS = re.compile('("[^"]*")|(\'[^\']*\')') -_RE_NAMESPACES = re.compile('{([^}]+)}') - ################################################################################ # support for extension functions in XPath/XSLT @@ -470,23 +465,6 @@ def __init__(self, namespaces, extensions, variables=None): self._context = XPathContext(namespaces, extensions, variables) - cdef _nsextract_path(self, path_utf): - namespaces = {} - # replace {namespaces} by prefixes - stripped_path = _RE_STRINGS.sub('', path_utf) # remove string literals - namespace_uris = [] - i = 1 - for namespace in _RE_NAMESPACES.findall(stripped_path): - if namespace not in namespace_uris: - namespace_uris.append(namespace) - namespaces["p%02d" % i] = namespace - i = i+1 - for prefix, namespace in namespaces.items(): - ns_str = "{%s}" % namespace - prefix_str = prefix + ':' - path_utf = path_utf.replace(ns_str, prefix_str) - return path_utf, namespaces - cdef object _handle_result(self, xpath.xmlXPathObject* xpathObj, _Document doc): _exc_info = self._context._exc_info if _exc_info is not None: @@ -607,49 +585,76 @@ cdef readonly object path def __init__(self, path, namespaces=None, extensions=None): + XPathEvaluatorBase.__init__(self, namespaces, extensions, None) self.path = path path = _utf8(path) - if namespaces is None: - path, namespaces = self._nsextract_path(path) - XPathEvaluatorBase.__init__(self, namespaces, extensions, None) self._xpath = xpath.xmlXPathCompile(path) if self._xpath is NULL: raise XPathSyntaxError, "Error in xpath expression." - def evaluate(self, _etree_or_element, **_variables): + def __call__(self, _etree_or_element, **_variables): cdef xpath.xmlXPathContext* xpathCtxt cdef xpath.xmlXPathObject* xpathObj cdef _Document document cdef _NodeBase element + cdef XPathContext context - if isinstance(_etree_or_element, _ElementTree): - document = (<_ElementTree>_etree_or_element)._doc - element = <_NodeBase>(document.getroot()) - elif isinstance(_etree_or_element, _NodeBase): - element = <_NodeBase>_etree_or_element - document = element._doc - else: - raise ValueError, "Invalid argument, neither document nor element." + document = _documentOrRaise(_etree_or_element) + element = _rootNodeOf(_etree_or_element) xpathCtxt = xpath.xmlXPathNewContext(document._c_doc) xpathCtxt.node = element._c_node - self._context._release_temp_refs() - self._context.register_context(xpathCtxt, document) - self._context.registerVariables(_variables) + context = self._context + context._release_temp_refs() + context.register_context(xpathCtxt, document) + context.registerVariables(_variables) xpathObj = xpath.xmlXPathCompiledEval(self._xpath, xpathCtxt) - self._context.unregister_context() + context.unregister_context() xpath.xmlXPathFreeContext(xpathCtxt) return self._handle_result(xpathObj, document) + def evaluate(self, _tree, **variables): + return self(_tree, **variables) + def __dealloc__(self): if self._xpath is not NULL: xpath.xmlXPathFreeCompExpr(self._xpath) +cdef object _replace_strings +cdef object _find_namespaces +_replace_strings = re.compile('("[^"]*")|(\'[^\']*\')').sub +_find_namespaces = re.compile('{([^}]+)}').findall + +cdef class ETXPath(XPath): + """Special XPath class that supports the ElementTree {uri} notation for + namespaces.""" + def __init__(self, path, extensions=None): + path_utf, namespaces = self._nsextract_path(_utf8(path)) + XPath.__init__(self, funicode(path_utf), namespaces, extensions) + + cdef _nsextract_path(self, path_utf): + # replace {namespaces} by new prefixes + cdef int i + namespaces = {} + stripped_path = _replace_strings('', path_utf) # remove string literals + namespace_uris = [] + i = 1 + for namespace in _find_namespaces(stripped_path): + if namespace not in namespace_uris: + prefix = python.PyString_FromFormat("xpp%02d", i) + i = i+1 + python.PyList_Append(namespace_uris, namespace) + python.PyDict_SetItem(namespaces, prefix, namespace) + ns_str = "{%s}" % namespace + prefix_str = prefix + ':' + # FIXME: this also replaces {namespaces} within strings! + path_utf = path_utf.replace(ns_str, prefix_str) + return path_utf, namespaces ################################################################################ # helper functions From scoder at codespeak.net Thu Mar 16 07:46:21 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Thu Mar 16 07:46:23 2006 Subject: [Lxml-checkins] r24425 - lxml/branch/scoder2 Message-ID: <20060316064621.D7DC910082@code0.codespeak.net> Author: scoder Date: Thu Mar 16 07:46:14 2006 New Revision: 24425 Modified: lxml/branch/scoder2/bench.py Log: benchmarks for XPath evaluators Modified: lxml/branch/scoder2/bench.py ============================================================================== --- lxml/branch/scoder2/bench.py (original) +++ lxml/branch/scoder2/bench.py Thu Mar 16 07:46:14 2006 @@ -36,6 +36,13 @@ return function return set_value +def onlylib(*libs): + def set_libs(function): + if libs: + function.LIBS = libs + return function + return set_libs + class BenchMarkBase(object): atoz = string.ascii_lowercase @@ -187,6 +194,9 @@ if not name.startswith('bench_'): continue method = getattr(self, name) + if hasattr(method, 'LIBS'): + if self.lib_name not in method.LIBS: + continue if method.__doc__: tree_sets = method.__doc__.split() else: @@ -357,6 +367,18 @@ def bench_getiterator_tag_all(self, root): list(root.getiterator("{b}a")) + @onlylib('lxe') + def bench_xpath_class(self, root): + xpath = self.etree.XPath("./*[0]") + for child in root: + xpath(child) + + @onlylib('lxe') + def bench_xpath_element(self, root): + for child in root: + xpath = self.etree.XPathElementEvaluator(child) + xpath.evaluate("./*[0]") + ############################################################ # Main program ############################################################ From scoder at codespeak.net Thu Mar 16 07:47:49 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Thu Mar 16 07:47:50 2006 Subject: [Lxml-checkins] r24426 - lxml/branch/scoder2/src/lxml Message-ID: <20060316064749.AE0F010082@code0.codespeak.net> Author: scoder Date: Thu Mar 16 07:47:48 2006 New Revision: 24426 Modified: lxml/branch/scoder2/src/lxml/xslt.pxi Log: fix argument naming Modified: lxml/branch/scoder2/src/lxml/xslt.pxi ============================================================================== --- lxml/branch/scoder2/src/lxml/xslt.pxi (original) +++ lxml/branch/scoder2/src/lxml/xslt.pxi Thu Mar 16 07:47:48 2006 @@ -618,8 +618,8 @@ return self._handle_result(xpathObj, document) - def evaluate(self, _tree, **variables): - return self(_tree, **variables) + def evaluate(self, _tree, **_variables): + return self(_tree, **_variables) def __dealloc__(self): if self._xpath is not NULL: From scoder at codespeak.net Thu Mar 16 08:29:51 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Thu Mar 16 08:29:53 2006 Subject: [Lxml-checkins] r24427 - in lxml/trunk: . src/lxml Message-ID: <20060316072951.5B53B10082@code0.codespeak.net> Author: scoder Date: Thu Mar 16 08:29:39 2006 New Revision: 24427 Modified: lxml/trunk/bench.py lxml/trunk/src/lxml/etree.pyx Log: added __nonzero__ method to _Element to speed up truth test (== has children?), new benchmark has_children to show that it's much faster Modified: lxml/trunk/bench.py ============================================================================== --- lxml/trunk/bench.py (original) +++ lxml/trunk/bench.py Thu Mar 16 08:29:39 2006 @@ -261,6 +261,11 @@ def bench_clear(self, root): root.clear() + def bench_has_children(self, root): + for child in root: + if child and child and child and child and child: + pass + def bench_create_subelements(self, root): SubElement = self.etree.SubElement for child in root: Modified: lxml/trunk/src/lxml/etree.pyx ============================================================================== --- lxml/trunk/src/lxml/etree.pyx (original) +++ lxml/trunk/src/lxml/etree.pyx Thu Mar 16 08:29:39 2006 @@ -599,6 +599,11 @@ c_node = c_node.next return c + def __nonzero__(self): + cdef xmlNode* c_node + c_node = _findChildBackwards(self._c_node, 0) + return c_node != NULL + def __iter__(self): return ElementChildIterator(self) From scoder at codespeak.net Thu Mar 16 08:31:34 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Thu Mar 16 08:31:35 2006 Subject: [Lxml-checkins] r24428 - in lxml/branch/scoder2: . src/lxml Message-ID: <20060316073134.3F22310082@code0.codespeak.net> Author: scoder Date: Thu Mar 16 08:31:32 2006 New Revision: 24428 Modified: lxml/branch/scoder2/bench.py lxml/branch/scoder2/src/lxml/etree.pyx Log: merges from trunk Modified: lxml/branch/scoder2/bench.py ============================================================================== --- lxml/branch/scoder2/bench.py (original) +++ lxml/branch/scoder2/bench.py Thu Mar 16 08:31:32 2006 @@ -271,6 +271,11 @@ def bench_clear(self, root): root.clear() + def bench_has_children(self, root): + for child in root: + if child and child and child and child and child: + pass + def bench_create_subelements(self, root): SubElement = self.etree.SubElement for child in root: Modified: lxml/branch/scoder2/src/lxml/etree.pyx ============================================================================== --- lxml/branch/scoder2/src/lxml/etree.pyx (original) +++ lxml/branch/scoder2/src/lxml/etree.pyx Thu Mar 16 08:31:32 2006 @@ -599,6 +599,11 @@ c_node = c_node.next return c + def __nonzero__(self): + cdef xmlNode* c_node + c_node = _findChildBackwards(self._c_node, 0) + return c_node != NULL + def __iter__(self): return ElementChildIterator(self) From scoder at codespeak.net Thu Mar 16 10:19:24 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Thu Mar 16 10:19:26 2006 Subject: [Lxml-checkins] r24430 - lxml/trunk Message-ID: <20060316091924.F126A10082@code0.codespeak.net> Author: scoder Date: Thu Mar 16 10:19:23 2006 New Revision: 24430 Modified: lxml/trunk/bench.py Log: benchmark for len(element) Modified: lxml/trunk/bench.py ============================================================================== --- lxml/trunk/bench.py (original) +++ lxml/trunk/bench.py Thu Mar 16 10:19:23 2006 @@ -266,6 +266,10 @@ if child and child and child and child and child: pass + def bench_len(self, root): + for child in root: + map(len, repeat(child, 20)) + def bench_create_subelements(self, root): SubElement = self.etree.SubElement for child in root: From scoder at codespeak.net Thu Mar 16 10:19:44 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Thu Mar 16 10:19:45 2006 Subject: [Lxml-checkins] r24431 - lxml/branch/scoder2 Message-ID: <20060316091944.6E3D010082@code0.codespeak.net> Author: scoder Date: Thu Mar 16 10:19:43 2006 New Revision: 24431 Modified: lxml/branch/scoder2/bench.py Log: merges from trunk Modified: lxml/branch/scoder2/bench.py ============================================================================== --- lxml/branch/scoder2/bench.py (original) +++ lxml/branch/scoder2/bench.py Thu Mar 16 10:19:43 2006 @@ -276,6 +276,10 @@ if child and child and child and child and child: pass + def bench_len(self, root): + for child in root: + map(len, repeat(child, 20)) + def bench_create_subelements(self, root): SubElement = self.etree.SubElement for child in root: From scoder at codespeak.net Thu Mar 16 10:41:16 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Thu Mar 16 10:41:18 2006 Subject: [Lxml-checkins] r24434 - lxml/branch/scoder2/src/lxml Message-ID: <20060316094116.A3F241008F@code0.codespeak.net> Author: scoder Date: Thu Mar 16 10:41:15 2006 New Revision: 24434 Modified: lxml/branch/scoder2/src/lxml/nsclasses.pxi Log: some clean up Modified: lxml/branch/scoder2/src/lxml/nsclasses.pxi ============================================================================== --- lxml/branch/scoder2/src/lxml/nsclasses.pxi (original) +++ lxml/branch/scoder2/src/lxml/nsclasses.pxi Thu Mar 16 10:41:15 2006 @@ -153,7 +153,7 @@ "Internal lookup function to find all extension functions for XSLT/XPath." cdef _NamespaceRegistry registry ns_extensions = {} - for (ns_utf, registry) in __FUNCTION_NAMESPACE_REGISTRIES.items(): + for (ns_utf, registry) in __FUNCTION_NAMESPACE_REGISTRIES.iteritems(): if registry._extensions: ns_extensions[ns_utf] = registry._extensions return ns_extensions @@ -162,7 +162,7 @@ "Internal lookup function to find all function prefixes for XSLT/XPath." cdef _FunctionNamespaceRegistry registry ns_prefixes = {} - for (ns_utf, registry) in __FUNCTION_NAMESPACE_REGISTRIES.items(): + for (ns_utf, registry) in __FUNCTION_NAMESPACE_REGISTRIES.iteritems(): if registry._prefix_utf is not None: ns_prefixes[registry._prefix_utf] = ns_utf return ns_prefixes From scoder at codespeak.net Thu Mar 16 10:43:44 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Thu Mar 16 10:43:45 2006 Subject: [Lxml-checkins] r24435 - lxml/branch/scoder2/src/lxml Message-ID: <20060316094344.200771008F@code0.codespeak.net> Author: scoder Date: Thu Mar 16 10:43:42 2006 New Revision: 24435 Modified: lxml/branch/scoder2/src/lxml/python.pxd lxml/branch/scoder2/src/lxml/xslt.pxi Log: some clean up and refactoring of XPath context classes Modified: lxml/branch/scoder2/src/lxml/python.pxd ============================================================================== --- lxml/branch/scoder2/src/lxml/python.pxd (original) +++ lxml/branch/scoder2/src/lxml/python.pxd Thu Mar 16 10:43:42 2006 @@ -25,6 +25,7 @@ cdef PyObject* PyDict_GetItemString(object d, char* key) cdef PyObject* PyDict_GetItem(object d, object key) cdef int PyDict_DelItem(object d, object key) + cdef int PyDict_Clear(object d) cdef int PyNumber_Check(object instance) cdef int PyBool_Check(object instance) Modified: lxml/branch/scoder2/src/lxml/xslt.pxi ============================================================================== --- lxml/branch/scoder2/src/lxml/xslt.pxi (original) +++ lxml/branch/scoder2/src/lxml/xslt.pxi Thu Mar 16 10:43:42 2006 @@ -31,6 +31,7 @@ # support for extension functions in XPath/XSLT cdef class BaseContext: + cdef xpath.xmlXPathContext* _xpathCtxt cdef _Document _doc cdef object _extensions cdef object _namespaces @@ -44,6 +45,7 @@ cdef object _exc_info def __init__(self, namespaces, extensions): + self._xpathCtxt = NULL self._utf_refs = {} # fix old format extensions @@ -69,7 +71,7 @@ self._temp_elements = {} self._temp_docs = {} - cdef _to_utf(self, s): + cdef object _to_utf(self, s): "Convert to UTF-8 and keep a reference to the encoded string" cdef python.PyObject* dict_result if s is None: @@ -81,6 +83,10 @@ python.PyDict_SetItem(self._utf_refs, s, utf) return utf + cdef void _set_xpath_context(self, xpath.xmlXPathContext* xpathCtxt): + self._xpathCtxt = xpathCtxt + xpathCtxt.userData = self + cdef _register_context(self, _Document doc, int allow_none_namespace): self._doc = doc self._exc_info = None @@ -90,12 +96,13 @@ extensions = _find_extensions(namespaces.values()) else: extensions = _find_all_extensions() + if self._extensions is not None: + # add user provided extensions + extensions.update(self._extensions) if extensions: if not allow_none_namespace: python.PyDict_DelItem(extensions, None) self._registerExtensionFunctions(extensions) - if self._extensions is not None: - self.registerExtensionFunctions(self._extensions) cdef _unregister_context(self): self._unregisterExtensionFunctions() @@ -105,10 +112,13 @@ cdef _free_context(self): self._registered_namespaces = [] self._registered_extensions = [] - self._utf_refs.clear() + python.PyDict_Clear(self._utf_refs) self._doc = None + if self._xpathCtxt is not NULL: + self._xpathCtxt.userData = NULL + self._xpathCtxt = NULL - # namespaces (internal UTF-8 versions with leading '_') + # namespaces (internal UTF-8 methods with leading '_') def addNamespace(self, prefix, uri): if self._namespaces is None: @@ -123,14 +133,16 @@ def registerNamespace(self, prefix, ns_uri): prefix_utf = self._to_utf(prefix) ns_uri_utf = self._to_utf(ns_uri) - self._contextRegisterNamespace(prefix_utf, ns_uri_utf) + xpath.xmlXPathRegisterNs(self._xpathCtxt, prefix_utf, ns_uri_utf) self._registered_namespaces.append(prefix_utf) cdef _unregisterNamespaces(self): + cdef xpath.xmlXPathContext* xpathCtxt + xpathCtxt = self._xpathCtxt for prefix_utf in self._registered_namespaces: - self._contextUnregisterNamespace(prefix_utf) + xpath.xmlXPathRegisterNs(xpathCtxt, prefix_utf, NULL) - # extension functions (internal UTF-8 versions with leading '_') + # extension functions (internal UTF-8 methods with leading '_') def registerExtensionFunctions(self, extensions): for ns_uri, extension in extensions.items(): @@ -162,8 +174,8 @@ cdef _release_temp_refs(self): "Free temporarily referenced objects from this context." - self._temp_elements.clear() - self._temp_docs.clear() + python.PyDict_Clear(self._temp_elements) + python.PyDict_Clear(self._temp_docs) cdef _hold(self, obj): """A way to temporarily hold references to nodes in the evaluator. @@ -174,16 +186,16 @@ """ cdef _NodeBase element if isinstance(obj, _NodeBase): - obj = [obj] - if not type(obj) in (type([]), type(())): + obj = (obj,) + elif not python.PySequence_Check(obj): return for o in obj: if isinstance(o, _NodeBase): element = <_NodeBase>o #print "Holding element:", element._c_node - self._temp_elements[id(element)] = element + python.PyDict_SetItem(self._temp_elements, id(element), element) #print "Holding document:", element._doc._c_doc - self._temp_docs[id(element._doc)] = element._doc + python.PyDict_SetItem(self._temp_docs, id(element._doc), element._doc) ################################################################################ @@ -197,6 +209,7 @@ cdef register_context(self, xslt.xsltTransformContext* xsltCtxt, _Document doc): self._xsltCtxt = xsltCtxt + self._set_xpath_context(xsltCtxt.xpathCtxt) self._register_context(doc, 0) xsltCtxt.xpathCtxt.userData = self @@ -205,7 +218,6 @@ xsltCtxt = self._xsltCtxt if xsltCtxt is NULL: return - xsltCtxt.xpathCtxt.userData = NULL self._unregister_context() self._xsltCtxt = NULL @@ -218,14 +230,6 @@ self._xsltCtxt = NULL xslt.xsltFreeTransformContext(xsltCtxt) - def _contextRegisterNamespace(self, prefix_utf, uri_utf): - # ZZZ: don't know if this is the right thing to do for XSLT, but works - xpath.xmlXPathRegisterNs(self._xsltCtxt.xpathCtxt, prefix_utf, uri_utf) - - def _contextUnregisterNamespace(self, prefix_utf): - # ZZZ: don't know if this is the right thing to do for XSLT, but works - xpath.xmlXPathRegisterNs(self._xsltCtxt.xpathCtxt, prefix_utf, NULL) - def _contextRegisterExtensionFunction(self, ns_uri_utf, name_utf): if ns_uri_utf is None: raise XSLTExtensionError, "extensions must have non-empty namespaces" @@ -372,24 +376,21 @@ # XPath cdef class XPathContext(BaseContext): - cdef xpath.xmlXPathContext* _xpathCtxt cdef object _variables cdef object _registered_variables def __init__(self, namespaces, extensions, variables): - self._xpathCtxt = NULL BaseContext.__init__(self, namespaces, extensions) self._variables = variables self._registered_variables = [] cdef register_context(self, xpath.xmlXPathContext* xpathCtxt, _Document doc): - self._xpathCtxt = xpathCtxt + self._set_xpath_context(xpathCtxt) ns_prefixes = _find_all_extension_prefixes() if ns_prefixes: self.registerNamespaces(ns_prefixes) self._register_context(doc, 1) if self._variables is not None: self.registerVariables(self._variables) - xpathCtxt.userData = self cdef unregister_context(self): cdef xpath.xmlXPathContext* xpathCtxt @@ -409,7 +410,6 @@ return self._free_context() self._registered_variables = [] - self._xpathCtxt = NULL xpath.xmlXPathFreeContext(xpathCtxt) cdef registerVariables(self, variable_dict): @@ -437,12 +437,6 @@ xpath.xmlXPathRegisterVariable(xpathCtxt, name_utf, NULL) xpath.xmlXPathFreeObject(xpathVarValue) - def _contextRegisterNamespace(self, prefix_utf, uri_utf): - xpath.xmlXPathRegisterNs(self._xpathCtxt, prefix_utf, uri_utf) - - def _contextUnregisterNamespace(self, prefix_utf): - xpath.xmlXPathRegisterNs(self._xpathCtxt, prefix_utf, NULL) - def _contextRegisterExtensionFunction(self, ns_uri_utf, name_utf): if ns_uri_utf is not None: xpath.xmlXPathRegisterFuncNS(self._xpathCtxt, From scoder at codespeak.net Thu Mar 16 10:58:39 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Thu Mar 16 10:58:40 2006 Subject: [Lxml-checkins] r24436 - lxml/branch/scoder2/src/lxml Message-ID: <20060316095839.934351008F@code0.codespeak.net> Author: scoder Date: Thu Mar 16 10:58:38 2006 New Revision: 24436 Modified: lxml/branch/scoder2/src/lxml/xslt.pxi Log: clean up in ETXPath and XPathContext Modified: lxml/branch/scoder2/src/lxml/xslt.pxi ============================================================================== --- lxml/branch/scoder2/src/lxml/xslt.pxi (original) +++ lxml/branch/scoder2/src/lxml/xslt.pxi Thu Mar 16 10:58:38 2006 @@ -412,23 +412,23 @@ self._registered_variables = [] xpath.xmlXPathFreeContext(xpathCtxt) - cdef registerVariables(self, variable_dict): + def registerVariables(self, variable_dict): for name, value in variable_dict.items(): self.registerVariable(name, value) - cdef _unregisterVariables(self): + cdef void _unregisterVariables(self): for name in self._registered_variables: self._unregisterVariable(name) - cdef registerVariable(self, name, value): + def registerVariable(self, name, value): self._registerVariable(self._to_utf(name), value) self._registered_variables.append(name) - cdef _registerVariable(self, name_utf, value): + cdef void _registerVariable(self, name_utf, value): xpath.xmlXPathRegisterVariable( self._xpathCtxt, name_utf, _wrapXPathObject(value)) - cdef _unregisterVariable(self, name_utf): + cdef void _unregisterVariable(self, name_utf): cdef xpath.xmlXPathContext* xpathCtxt cdef xpath.xmlXPathObject* xpathVarValue xpathCtxt = self._xpathCtxt @@ -622,7 +622,7 @@ cdef object _replace_strings cdef object _find_namespaces _replace_strings = re.compile('("[^"]*")|(\'[^\']*\')').sub -_find_namespaces = re.compile('{([^}]+)}').findall +_find_namespaces = re.compile('({[^}]+})').findall cdef class ETXPath(XPath): """Special XPath class that supports the ElementTree {uri} notation for @@ -636,18 +636,18 @@ cdef int i namespaces = {} stripped_path = _replace_strings('', path_utf) # remove string literals - namespace_uris = [] + namespace_defs = [] i = 1 - for namespace in _find_namespaces(stripped_path): - if namespace not in namespace_uris: + for namespace_def in _find_namespaces(stripped_path): + if namespace_def not in namespace_defs: prefix = python.PyString_FromFormat("xpp%02d", i) i = i+1 - python.PyList_Append(namespace_uris, namespace) + python.PyList_Append(namespace_defs, namespace_def) + namespace = namespace_def[1:-1] # remove '{}' python.PyDict_SetItem(namespaces, prefix, namespace) - ns_str = "{%s}" % namespace prefix_str = prefix + ':' # FIXME: this also replaces {namespaces} within strings! - path_utf = path_utf.replace(ns_str, prefix_str) + path_utf = path_utf.replace(namespace_def, prefix_str) return path_utf, namespaces ################################################################################ From scoder at codespeak.net Thu Mar 16 11:09:12 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Thu Mar 16 11:09:14 2006 Subject: [Lxml-checkins] r24437 - lxml/branch/scoder2/src/lxml Message-ID: <20060316100912.EFDB01008F@code0.codespeak.net> Author: scoder Date: Thu Mar 16 11:09:11 2006 New Revision: 24437 Modified: lxml/branch/scoder2/src/lxml/xslt.pxi Log: make XPath result handling return comment nodes as _Comment elements Modified: lxml/branch/scoder2/src/lxml/xslt.pxi ============================================================================== --- lxml/branch/scoder2/src/lxml/xslt.pxi (original) +++ lxml/branch/scoder2/src/lxml/xslt.pxi Thu Mar 16 11:09:11 2006 @@ -666,7 +666,7 @@ if python.PyNumber_Check(obj): return xpath.xmlXPathNewFloat(obj) if isinstance(obj, _NodeBase): - obj = [obj] + obj = (obj,) if python.PySequence_Check(obj): resultSet = xpath.xmlXPathNodeSetCreate(NULL) for element in obj: @@ -715,7 +715,7 @@ return result for i from 0 <= i < xpathObj.nodesetval.nodeNr: c_node = xpathObj.nodesetval.nodeTab[i] - if c_node.type == tree.XML_ELEMENT_NODE: + if _isElement(c_node): element = _elementFactory(doc, c_node) result.append(element) elif c_node.type == tree.XML_TEXT_NODE: @@ -725,12 +725,6 @@ attr_value = funicode(s) tree.xmlFree(s) result.append(attr_value) - elif c_node.type == tree.XML_COMMENT_NODE: - s = tree.xmlNodeGetContent(c_node) - s2 = '' % s - comment_value = funicode(s2) - tree.xmlFree(s) - result.append(comment_value) else: print "Not yet implemented result node type:", c_node.type raise NotImplementedError From scoder at codespeak.net Thu Mar 16 11:11:30 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Thu Mar 16 11:11:30 2006 Subject: [Lxml-checkins] r24439 - lxml/trunk/src/lxml Message-ID: <20060316101130.2AC991008F@code0.codespeak.net> Author: scoder Date: Thu Mar 16 11:11:28 2006 New Revision: 24439 Modified: lxml/trunk/src/lxml/xslt.pxi Log: merge from scoder2 (R24437): make XPath result handling return comment nodes as _Comment elements Modified: lxml/trunk/src/lxml/xslt.pxi ============================================================================== --- lxml/trunk/src/lxml/xslt.pxi (original) +++ lxml/trunk/src/lxml/xslt.pxi Thu Mar 16 11:11:28 2006 @@ -318,7 +318,7 @@ if python.PyNumber_Check(obj): return xpath.xmlXPathNewFloat(obj) if isinstance(obj, _NodeBase): - obj = [obj] + obj = (obj,) if python.PySequence_Check(obj): resultSet = xpath.xmlXPathNodeSetCreate(NULL) for element in obj: @@ -367,7 +367,7 @@ return result for i from 0 <= i < xpathObj.nodesetval.nodeNr: c_node = xpathObj.nodesetval.nodeTab[i] - if c_node.type == tree.XML_ELEMENT_NODE: + if _isElement(c_node): element = _elementFactory(doc, c_node) result.append(element) elif c_node.type == tree.XML_TEXT_NODE: @@ -377,12 +377,6 @@ attr_value = funicode(s) tree.xmlFree(s) result.append(attr_value) - elif c_node.type == tree.XML_COMMENT_NODE: - s = tree.xmlNodeGetContent(c_node) - s2 = '' % s - comment_value = funicode(s2) - tree.xmlFree(s) - result.append(comment_value) else: print "Not yet implemented result node type:", c_node.type raise NotImplementedError From scoder at codespeak.net Thu Mar 16 11:31:44 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Thu Mar 16 11:31:46 2006 Subject: [Lxml-checkins] r24440 - in lxml/branch/scoder2/src/lxml: . tests Message-ID: <20060316103144.BA2DD1008B@code0.codespeak.net> Author: scoder Date: Thu Mar 16 11:31:33 2006 New Revision: 24440 Modified: lxml/branch/scoder2/src/lxml/etree.pyx lxml/branch/scoder2/src/lxml/tests/test_xpathevaluator.py Log: fix test case for XPath comment result, make Comment.__repr__ display the comment Modified: lxml/branch/scoder2/src/lxml/etree.pyx ============================================================================== --- lxml/branch/scoder2/src/lxml/etree.pyx (original) +++ lxml/branch/scoder2/src/lxml/etree.pyx Thu Mar 16 11:31:33 2006 @@ -794,7 +794,7 @@ # ACCESSORS def __repr__(self): - return "" % id(self) + return "" % self.text def __getitem__(self, n): raise IndexError Modified: lxml/branch/scoder2/src/lxml/tests/test_xpathevaluator.py ============================================================================== --- lxml/branch/scoder2/src/lxml/tests/test_xpathevaluator.py (original) +++ lxml/branch/scoder2/src/lxml/tests/test_xpathevaluator.py Thu Mar 16 11:31:33 2006 @@ -59,8 +59,8 @@ def test_xpath_list_comment(self): tree = self.parse('') - self.assertEquals([''], - tree.xpath('/a/node()')) + self.assertEquals([''], + map(repr, tree.xpath('/a/node()'))) def test_rel_xpath_boolean(self): root = etree.XML('') From scoder at codespeak.net Thu Mar 16 11:48:50 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Thu Mar 16 11:48:53 2006 Subject: [Lxml-checkins] r24444 - lxml/trunk/src/lxml Message-ID: <20060316104850.EEF8B10094@code0.codespeak.net> Author: scoder Date: Thu Mar 16 11:48:49 2006 New Revision: 24444 Modified: lxml/trunk/src/lxml/etree.pyx Log: new error handling: store list with libxml2/xslt output strings in exceptions (if DEBUG==1) Modified: lxml/trunk/src/lxml/etree.pyx ============================================================================== --- lxml/trunk/src/lxml/etree.pyx (original) +++ lxml/trunk/src/lxml/etree.pyx Thu Mar 16 11:48:49 2006 @@ -14,7 +14,10 @@ import sys # should libxml2/libxslt be allowed to shout? -DEBUG = False +# 0 : off +# 1 : append to exceptions +# 2 : to stderr +DEBUG = 1 ctypedef enum LXML_PROXY_TYPE: PROXY_ELEMENT @@ -33,7 +36,17 @@ # module level superclass for all exceptions class LxmlError(Error): - pass + def __init__(self, message): + Error.__init__(self, message) + self.error_log = __ERROR_LOG + _clear_error_log() + +# list to collect error output message from libxml2/libxslt +cdef object __ERROR_LOG +__ERROR_LOG = [] + +cdef void _clear_error_log(): + __ERROR_LOG = [] # superclass for all syntax errors class LxmlSyntaxError(SyntaxError, LxmlError): @@ -1614,6 +1627,13 @@ xmlerror.xmlError* error): pass +cdef void logGenericErrorFunc(void* ctxt, char* msg, ...): + python.PyList_Append(__ERROR_LOG, msg) + +cdef void logStructuredErrorFunc(void* userData, + xmlerror.xmlError* error): + python.PyList_Append(__ERROR_LOG, error.message) + cdef void _shutUpLibxmlErrors(): xmlerror.xmlSetGenericErrorFunc(NULL, nullGenericErrorFunc) xmlerror.xmlSetStructuredErrorFunc(NULL, nullStructuredErrorFunc) @@ -1622,8 +1642,18 @@ xslt.xsltSetGenericErrorFunc(NULL, nullGenericErrorFunc) # xslt.xsltSetTransformErrorFunc +cdef void _logLibxmlErrors(): + xmlerror.xmlSetGenericErrorFunc(NULL, logGenericErrorFunc) + xmlerror.xmlSetStructuredErrorFunc(NULL, logStructuredErrorFunc) + +cdef void _logLibxsltErrors(): + xslt.xsltSetGenericErrorFunc(NULL, logGenericErrorFunc) + # xslt.xsltSetTransformErrorFunc + # ugly global shutting up of all errors, but seems to work.. -if not DEBUG: +if DEBUG == 0: _shutUpLibxmlErrors() _shutUpLibxsltErrors() - +elif DEBUG == 1: + _logLibxmlErrors() + _logLibxsltErrors() From scoder at codespeak.net Thu Mar 16 11:51:57 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Thu Mar 16 11:51:59 2006 Subject: [Lxml-checkins] r24445 - in lxml/trunk/src/lxml: . tests Message-ID: <20060316105157.3F4E810094@code0.codespeak.net> Author: scoder Date: Thu Mar 16 11:51:45 2006 New Revision: 24445 Modified: lxml/trunk/src/lxml/etree.pyx lxml/trunk/src/lxml/tests/test_xpathevaluator.py Log: merge from branch: fix test case for XPath comment result, make Comment.__repr__ display the comment Modified: lxml/trunk/src/lxml/etree.pyx ============================================================================== --- lxml/trunk/src/lxml/etree.pyx (original) +++ lxml/trunk/src/lxml/etree.pyx Thu Mar 16 11:51:45 2006 @@ -807,7 +807,7 @@ # ACCESSORS def __repr__(self): - return "" % id(self) + return "" % self.text def __getitem__(self, n): raise IndexError Modified: lxml/trunk/src/lxml/tests/test_xpathevaluator.py ============================================================================== --- lxml/trunk/src/lxml/tests/test_xpathevaluator.py (original) +++ lxml/trunk/src/lxml/tests/test_xpathevaluator.py Thu Mar 16 11:51:45 2006 @@ -59,8 +59,8 @@ def test_xpath_list_comment(self): tree = self.parse('') - self.assertEquals([''], - tree.xpath('/a/node()')) + self.assertEquals([''], + map(repr, tree.xpath('/a/node()'))) def test_rel_xpath_boolean(self): root = etree.XML('') From scoder at codespeak.net Thu Mar 16 11:54:12 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Thu Mar 16 11:54:23 2006 Subject: [Lxml-checkins] r24446 - lxml/branch/scoder2/src/lxml Message-ID: <20060316105412.4BD0E10094@code0.codespeak.net> Author: scoder Date: Thu Mar 16 11:54:01 2006 New Revision: 24446 Modified: lxml/branch/scoder2/src/lxml/etree.pyx Log: merged in error handling from trunk Modified: lxml/branch/scoder2/src/lxml/etree.pyx ============================================================================== --- lxml/branch/scoder2/src/lxml/etree.pyx (original) +++ lxml/branch/scoder2/src/lxml/etree.pyx Thu Mar 16 11:54:01 2006 @@ -14,7 +14,10 @@ import sys # should libxml2/libxslt be allowed to shout? -DEBUG = False +# 0 : off +# 1 : append to exceptions +# 2 : to stderr +DEBUG = 1 ctypedef enum LXML_PROXY_TYPE: PROXY_ELEMENT @@ -33,7 +36,17 @@ # module level superclass for all exceptions class LxmlError(Error): - pass + def __init__(self, message): + Error.__init__(self, message) + self.error_log = __ERROR_LOG + _clear_error_log() + +# list to collect error output message from libxml2/libxslt +cdef object __ERROR_LOG +__ERROR_LOG = [] + +cdef void _clear_error_log(): + __ERROR_LOG = [] # superclass for all syntax errors class LxmlSyntaxError(SyntaxError, LxmlError): @@ -1614,6 +1627,13 @@ xmlerror.xmlError* error): pass +cdef void logGenericErrorFunc(void* ctxt, char* msg, ...): + python.PyList_Append(__ERROR_LOG, msg) + +cdef void logStructuredErrorFunc(void* userData, + xmlerror.xmlError* error): + python.PyList_Append(__ERROR_LOG, error.message) + cdef void _shutUpLibxmlErrors(): xmlerror.xmlSetGenericErrorFunc(NULL, nullGenericErrorFunc) xmlerror.xmlSetStructuredErrorFunc(NULL, nullStructuredErrorFunc) @@ -1622,8 +1642,18 @@ xslt.xsltSetGenericErrorFunc(NULL, nullGenericErrorFunc) # xslt.xsltSetTransformErrorFunc +cdef void _logLibxmlErrors(): + xmlerror.xmlSetGenericErrorFunc(NULL, logGenericErrorFunc) + xmlerror.xmlSetStructuredErrorFunc(NULL, logStructuredErrorFunc) + +cdef void _logLibxsltErrors(): + xslt.xsltSetGenericErrorFunc(NULL, logGenericErrorFunc) + # xslt.xsltSetTransformErrorFunc + # ugly global shutting up of all errors, but seems to work.. -if not DEBUG: +if DEBUG == 0: _shutUpLibxmlErrors() _shutUpLibxsltErrors() - +elif DEBUG == 1: + _logLibxmlErrors() + _logLibxsltErrors() From scoder at codespeak.net Thu Mar 16 12:45:25 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Thu Mar 16 12:45:26 2006 Subject: [Lxml-checkins] r24451 - lxml/trunk/src/lxml Message-ID: <20060316114525.7A0C510098@code0.codespeak.net> Author: scoder Date: Thu Mar 16 12:45:23 2006 New Revision: 24451 Modified: lxml/trunk/src/lxml/etree.pyx Log: refactoring of debug log: accept multi-line log entries, rotate log entries after __MAX_LOG_SIZE lines Modified: lxml/trunk/src/lxml/etree.pyx ============================================================================== --- lxml/trunk/src/lxml/etree.pyx (original) +++ lxml/trunk/src/lxml/etree.pyx Thu Mar 16 12:45:23 2006 @@ -17,13 +17,20 @@ # 0 : off # 1 : append to exceptions # 2 : to stderr -DEBUG = 1 +cdef int __DEBUG +__DEBUG = 0 + +DEBUG = __DEBUG + +# maximum number of lines in the libxml2/xslt log if __DEBUG == 1 +cdef int __MAX_LOG_SIZE +__MAX_LOG_SIZE = 100 + ctypedef enum LXML_PROXY_TYPE: PROXY_ELEMENT PROXY_ATTRIB - # the rules # any libxml C argument/variable is prefixed with c_ # any non-public function/class is prefixed with an underscore @@ -36,20 +43,16 @@ # module level superclass for all exceptions class LxmlError(Error): - def __init__(self, message): - Error.__init__(self, message) - self.error_log = __ERROR_LOG - _clear_error_log() - -# list to collect error output message from libxml2/libxslt -cdef object __ERROR_LOG -__ERROR_LOG = [] - -cdef void _clear_error_log(): - __ERROR_LOG = [] + def __init__(self, *args): + Error.__init__(self, *args) + if __DEBUG == 1 and python.PyList_GET_SIZE(__ERROR_LOG): + self.error_log = __ERROR_LOG + _clear_error_log() + else: + self.error_log = () # superclass for all syntax errors -class LxmlSyntaxError(SyntaxError, LxmlError): +class LxmlSyntaxError(LxmlError, SyntaxError): pass class XIncludeError(LxmlError): @@ -1620,27 +1623,37 @@ ################################################################################ # DEBUG setup -cdef void nullGenericErrorFunc(void* ctxt, char* msg, ...): - pass +# list to collect error output message from libxml2/libxslt +cdef object __ERROR_LOG +__ERROR_LOG = [] -cdef void nullStructuredErrorFunc(void* userData, - xmlerror.xmlError* error): - pass +cdef void _clear_error_log(): + __ERROR_LOG = [] + +cdef void _logLines(char* s): + cdef char* pos + cdef int l + while s is not NULL and s[0] != c'\0': + pos = tree.xmlStrchr(s, c'\n') + if pos is NULL: + py_string = python.PyString_FromString(s) + s = NULL + else: + l = pos - s + py_string = python.PyString_FromStringAndSize(s, l) + s = pos + 1 + python.PyList_Append(__ERROR_LOG, py_string) + + l = python.PyList_GET_SIZE(__ERROR_LOG) - __MAX_LOG_SIZE + if l > 0: + del __ERROR_LOG[:l] cdef void logGenericErrorFunc(void* ctxt, char* msg, ...): - python.PyList_Append(__ERROR_LOG, msg) + _logLines(msg) cdef void logStructuredErrorFunc(void* userData, xmlerror.xmlError* error): - python.PyList_Append(__ERROR_LOG, error.message) - -cdef void _shutUpLibxmlErrors(): - xmlerror.xmlSetGenericErrorFunc(NULL, nullGenericErrorFunc) - xmlerror.xmlSetStructuredErrorFunc(NULL, nullStructuredErrorFunc) - -cdef void _shutUpLibxsltErrors(): - xslt.xsltSetGenericErrorFunc(NULL, nullGenericErrorFunc) - # xslt.xsltSetTransformErrorFunc + _logLines(error.message) cdef void _logLibxmlErrors(): xmlerror.xmlSetGenericErrorFunc(NULL, logGenericErrorFunc) @@ -1650,10 +1663,27 @@ xslt.xsltSetGenericErrorFunc(NULL, logGenericErrorFunc) # xslt.xsltSetTransformErrorFunc + # ugly global shutting up of all errors, but seems to work.. -if DEBUG == 0: +cdef void nullGenericErrorFunc(void* ctxt, char* msg, ...): + pass + +cdef void nullStructuredErrorFunc(void* userData, + xmlerror.xmlError* error): + pass + +cdef void _shutUpLibxmlErrors(): + xmlerror.xmlSetGenericErrorFunc(NULL, nullGenericErrorFunc) + xmlerror.xmlSetStructuredErrorFunc(NULL, nullStructuredErrorFunc) + +cdef void _shutUpLibxsltErrors(): + xslt.xsltSetGenericErrorFunc(NULL, nullGenericErrorFunc) + # xslt.xsltSetTransformErrorFunc + + +if __DEBUG == 0: _shutUpLibxmlErrors() _shutUpLibxsltErrors() -elif DEBUG == 1: +elif __DEBUG == 1: _logLibxmlErrors() _logLibxsltErrors() From scoder at codespeak.net Thu Mar 16 12:46:52 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Thu Mar 16 12:46:53 2006 Subject: [Lxml-checkins] r24452 - lxml/branch/scoder2/src/lxml Message-ID: <20060316114652.226EE10098@code0.codespeak.net> Author: scoder Date: Thu Mar 16 12:46:46 2006 New Revision: 24452 Modified: lxml/branch/scoder2/src/lxml/etree.pyx Log: merged in updates for log handling from trunk Modified: lxml/branch/scoder2/src/lxml/etree.pyx ============================================================================== --- lxml/branch/scoder2/src/lxml/etree.pyx (original) +++ lxml/branch/scoder2/src/lxml/etree.pyx Thu Mar 16 12:46:46 2006 @@ -17,13 +17,20 @@ # 0 : off # 1 : append to exceptions # 2 : to stderr -DEBUG = 1 +cdef int __DEBUG +__DEBUG = 0 + +DEBUG = __DEBUG + +# maximum number of lines in the libxml2/xslt log if __DEBUG == 1 +cdef int __MAX_LOG_SIZE +__MAX_LOG_SIZE = 100 + ctypedef enum LXML_PROXY_TYPE: PROXY_ELEMENT PROXY_ATTRIB - # the rules # any libxml C argument/variable is prefixed with c_ # any non-public function/class is prefixed with an underscore @@ -36,20 +43,16 @@ # module level superclass for all exceptions class LxmlError(Error): - def __init__(self, message): - Error.__init__(self, message) - self.error_log = __ERROR_LOG - _clear_error_log() - -# list to collect error output message from libxml2/libxslt -cdef object __ERROR_LOG -__ERROR_LOG = [] - -cdef void _clear_error_log(): - __ERROR_LOG = [] + def __init__(self, *args): + Error.__init__(self, *args) + if __DEBUG == 1 and python.PyList_GET_SIZE(__ERROR_LOG): + self.error_log = __ERROR_LOG + _clear_error_log() + else: + self.error_log = () # superclass for all syntax errors -class LxmlSyntaxError(SyntaxError, LxmlError): +class LxmlSyntaxError(LxmlError, SyntaxError): pass class XIncludeError(LxmlError): @@ -1620,27 +1623,37 @@ ################################################################################ # DEBUG setup -cdef void nullGenericErrorFunc(void* ctxt, char* msg, ...): - pass +# list to collect error output message from libxml2/libxslt +cdef object __ERROR_LOG +__ERROR_LOG = [] -cdef void nullStructuredErrorFunc(void* userData, - xmlerror.xmlError* error): - pass +cdef void _clear_error_log(): + __ERROR_LOG = [] + +cdef void _logLines(char* s): + cdef char* pos + cdef int l + while s is not NULL and s[0] != c'\0': + pos = tree.xmlStrchr(s, c'\n') + if pos is NULL: + py_string = python.PyString_FromString(s) + s = NULL + else: + l = pos - s + py_string = python.PyString_FromStringAndSize(s, l) + s = pos + 1 + python.PyList_Append(__ERROR_LOG, py_string) + + l = python.PyList_GET_SIZE(__ERROR_LOG) - __MAX_LOG_SIZE + if l > 0: + del __ERROR_LOG[:l] cdef void logGenericErrorFunc(void* ctxt, char* msg, ...): - python.PyList_Append(__ERROR_LOG, msg) + _logLines(msg) cdef void logStructuredErrorFunc(void* userData, xmlerror.xmlError* error): - python.PyList_Append(__ERROR_LOG, error.message) - -cdef void _shutUpLibxmlErrors(): - xmlerror.xmlSetGenericErrorFunc(NULL, nullGenericErrorFunc) - xmlerror.xmlSetStructuredErrorFunc(NULL, nullStructuredErrorFunc) - -cdef void _shutUpLibxsltErrors(): - xslt.xsltSetGenericErrorFunc(NULL, nullGenericErrorFunc) - # xslt.xsltSetTransformErrorFunc + _logLines(error.message) cdef void _logLibxmlErrors(): xmlerror.xmlSetGenericErrorFunc(NULL, logGenericErrorFunc) @@ -1650,10 +1663,27 @@ xslt.xsltSetGenericErrorFunc(NULL, logGenericErrorFunc) # xslt.xsltSetTransformErrorFunc + # ugly global shutting up of all errors, but seems to work.. -if DEBUG == 0: +cdef void nullGenericErrorFunc(void* ctxt, char* msg, ...): + pass + +cdef void nullStructuredErrorFunc(void* userData, + xmlerror.xmlError* error): + pass + +cdef void _shutUpLibxmlErrors(): + xmlerror.xmlSetGenericErrorFunc(NULL, nullGenericErrorFunc) + xmlerror.xmlSetStructuredErrorFunc(NULL, nullStructuredErrorFunc) + +cdef void _shutUpLibxsltErrors(): + xslt.xsltSetGenericErrorFunc(NULL, nullGenericErrorFunc) + # xslt.xsltSetTransformErrorFunc + + +if __DEBUG == 0: _shutUpLibxmlErrors() _shutUpLibxsltErrors() -elif DEBUG == 1: +elif __DEBUG == 1: _logLibxmlErrors() _logLibxsltErrors() From scoder at codespeak.net Thu Mar 16 12:50:45 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Thu Mar 16 12:50:46 2006 Subject: [Lxml-checkins] r24453 - lxml/trunk/src/lxml Message-ID: <20060316115045.E161E10098@code0.codespeak.net> Author: scoder Date: Thu Mar 16 12:50:44 2006 New Revision: 24453 Modified: lxml/trunk/src/lxml/etree.pyx Log: make log length default to 20 Modified: lxml/trunk/src/lxml/etree.pyx ============================================================================== --- lxml/trunk/src/lxml/etree.pyx (original) +++ lxml/trunk/src/lxml/etree.pyx Thu Mar 16 12:50:44 2006 @@ -24,7 +24,7 @@ # maximum number of lines in the libxml2/xslt log if __DEBUG == 1 cdef int __MAX_LOG_SIZE -__MAX_LOG_SIZE = 100 +__MAX_LOG_SIZE = 20 ctypedef enum LXML_PROXY_TYPE: From scoder at codespeak.net Thu Mar 16 12:51:02 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Thu Mar 16 12:51:04 2006 Subject: [Lxml-checkins] r24454 - lxml/branch/scoder2/src/lxml Message-ID: <20060316115102.DC16E10098@code0.codespeak.net> Author: scoder Date: Thu Mar 16 12:51:01 2006 New Revision: 24454 Modified: lxml/branch/scoder2/src/lxml/etree.pyx Log: merged in updates for log handling from trunk Modified: lxml/branch/scoder2/src/lxml/etree.pyx ============================================================================== --- lxml/branch/scoder2/src/lxml/etree.pyx (original) +++ lxml/branch/scoder2/src/lxml/etree.pyx Thu Mar 16 12:51:01 2006 @@ -24,7 +24,7 @@ # maximum number of lines in the libxml2/xslt log if __DEBUG == 1 cdef int __MAX_LOG_SIZE -__MAX_LOG_SIZE = 100 +__MAX_LOG_SIZE = 20 ctypedef enum LXML_PROXY_TYPE: From scoder at codespeak.net Thu Mar 16 14:38:34 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Thu Mar 16 14:38:46 2006 Subject: [Lxml-checkins] r24463 - in lxml/trunk/src/lxml: . tests Message-ID: <20060316133834.8EA5B10091@code0.codespeak.net> Author: scoder Date: Thu Mar 16 14:38:21 2006 New Revision: 24463 Modified: lxml/trunk/src/lxml/etree.pyx lxml/trunk/src/lxml/parser.pxi lxml/trunk/src/lxml/python.pxd lxml/trunk/src/lxml/relaxng.pxi lxml/trunk/src/lxml/tests/test_etree.py lxml/trunk/src/lxml/xmlschema.pxi Log: make error_log a property in LxmlException, RelaxNG, XMLSchema and XMLParser that returns the current error log as a tuple; new module level API function clear_error_log(); test case to check log update on parser errors Modified: lxml/trunk/src/lxml/etree.pyx ============================================================================== --- lxml/trunk/src/lxml/etree.pyx (original) +++ lxml/trunk/src/lxml/etree.pyx Thu Mar 16 14:38:21 2006 @@ -13,29 +13,33 @@ from StringIO import StringIO import sys +# the rules +# any libxml C argument/variable is prefixed with c_ +# any non-public function/class is prefixed with an underscore +# instance creation is always through factories + +ctypedef enum LXML_PROXY_TYPE: + PROXY_ELEMENT + PROXY_ATTRIB + # should libxml2/libxslt be allowed to shout? # 0 : off -# 1 : append to exceptions +# 1 : provide log via exception property # 2 : to stderr cdef int __DEBUG -__DEBUG = 0 +__DEBUG = 1 +# make the compiled-in debug state publicly available DEBUG = __DEBUG # maximum number of lines in the libxml2/xslt log if __DEBUG == 1 cdef int __MAX_LOG_SIZE __MAX_LOG_SIZE = 20 +cdef object __ERROR_LOG -ctypedef enum LXML_PROXY_TYPE: - PROXY_ELEMENT - PROXY_ATTRIB - -# the rules -# any libxml C argument/variable is prefixed with c_ -# any non-public function/class is prefixed with an underscore -# instance creation is always through factories - +def __build_error_log_tuple(_): + return python.PyList_AsTuple(__ERROR_LOG) # Error superclass for ElementTree compatibility class Error(Exception): @@ -43,16 +47,10 @@ # module level superclass for all exceptions class LxmlError(Error): - def __init__(self, *args): - Error.__init__(self, *args) - if __DEBUG == 1 and python.PyList_GET_SIZE(__ERROR_LOG): - self.error_log = __ERROR_LOG - _clear_error_log() - else: - self.error_log = () + error_log = property(__build_error_log_tuple) # superclass for all syntax errors -class LxmlSyntaxError(LxmlError, SyntaxError): +class LxmlSyntaxError(SyntaxError, LxmlError): pass class XIncludeError(LxmlError): @@ -1623,12 +1621,11 @@ ################################################################################ # DEBUG setup -# list to collect error output message from libxml2/libxslt -cdef object __ERROR_LOG +# list to collect error output messages from libxml2/libxslt __ERROR_LOG = [] -cdef void _clear_error_log(): - __ERROR_LOG = [] +def clear_error_log(): + del __ERROR_LOG[:] cdef void _logLines(char* s): cdef char* pos Modified: lxml/trunk/src/lxml/parser.pxi ============================================================================== --- lxml/trunk/src/lxml/parser.pxi (original) +++ lxml/trunk/src/lxml/parser.pxi Thu Mar 16 14:38:21 2006 @@ -48,6 +48,10 @@ self._parse_options = parse_options + property error_log: + def __get__(self): + return __build_error_log_tuple(self) + ## def copy(self, attribute_defaults=None, dtd_validation=None, ## no_network=None, ns_clean=None): ## cdef int parse_options Modified: lxml/trunk/src/lxml/python.pxd ============================================================================== --- lxml/trunk/src/lxml/python.pxd (original) +++ lxml/trunk/src/lxml/python.pxd Thu Mar 16 14:38:21 2006 @@ -24,6 +24,7 @@ cdef int PyDict_SetItem(object d, object key, object value) cdef PyObject* PyDict_GetItemString(object d, char* key) cdef PyObject* PyDict_GetItem(object d, object key) + cdef object PyList_AsTuple(object o) cdef int PyNumber_Check(object instance) cdef int PyBool_Check(object instance) Modified: lxml/trunk/src/lxml/relaxng.pxi ============================================================================== --- lxml/trunk/src/lxml/relaxng.pxi (original) +++ lxml/trunk/src/lxml/relaxng.pxi Thu Mar 16 14:38:21 2006 @@ -60,3 +60,6 @@ raise RelaxNGValidateError, "Internal error in Relax NG validation" return ret == 0 + property error_log: + def __get__(self): + return __build_error_log_tuple(self) Modified: lxml/trunk/src/lxml/tests/test_etree.py ============================================================================== --- lxml/trunk/src/lxml/tests/test_etree.py (original) +++ lxml/trunk/src/lxml/tests/test_etree.py Thu Mar 16 14:38:21 2006 @@ -25,6 +25,19 @@ self.assertRaises(SyntaxError, parse, f) f.close() + def test_parse_error_logging(self): + parse = self.etree.parse + # from StringIO + f = StringIO('') + self.etree.clear_error_log() + try: + parse(f) + log = "" + except SyntaxError, e: + log = '\n'.join(e.error_log) + f.close() + self.assert_('mismatch' in log) + def test_parse_error_from_file(self): parse = self.etree.parse # from file Modified: lxml/trunk/src/lxml/xmlschema.pxi ============================================================================== --- lxml/trunk/src/lxml/xmlschema.pxi (original) +++ lxml/trunk/src/lxml/xmlschema.pxi Thu Mar 16 14:38:21 2006 @@ -52,3 +52,7 @@ if ret == -1: raise XMLSchemaValidateError, "Internal error in XML Schema validation." return ret == 0 + + property error_log: + def __get__(self): + return __build_error_log_tuple(self) From scoder at codespeak.net Thu Mar 16 14:42:22 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Thu Mar 16 14:42:24 2006 Subject: [Lxml-checkins] r24464 - in lxml/branch/scoder2/src/lxml: . tests Message-ID: <20060316134222.A18E510092@code0.codespeak.net> Author: scoder Date: Thu Mar 16 14:42:20 2006 New Revision: 24464 Modified: lxml/branch/scoder2/src/lxml/etree.pyx lxml/branch/scoder2/src/lxml/parser.pxi lxml/branch/scoder2/src/lxml/python.pxd lxml/branch/scoder2/src/lxml/relaxng.pxi lxml/branch/scoder2/src/lxml/tests/test_etree.py lxml/branch/scoder2/src/lxml/xmlschema.pxi Log: merged in error log updates from trunk Modified: lxml/branch/scoder2/src/lxml/etree.pyx ============================================================================== --- lxml/branch/scoder2/src/lxml/etree.pyx (original) +++ lxml/branch/scoder2/src/lxml/etree.pyx Thu Mar 16 14:42:20 2006 @@ -13,29 +13,33 @@ from StringIO import StringIO import sys +# the rules +# any libxml C argument/variable is prefixed with c_ +# any non-public function/class is prefixed with an underscore +# instance creation is always through factories + +ctypedef enum LXML_PROXY_TYPE: + PROXY_ELEMENT + PROXY_ATTRIB + # should libxml2/libxslt be allowed to shout? # 0 : off -# 1 : append to exceptions +# 1 : provide log via exception property # 2 : to stderr cdef int __DEBUG -__DEBUG = 0 +__DEBUG = 1 +# make the compiled-in debug state publicly available DEBUG = __DEBUG # maximum number of lines in the libxml2/xslt log if __DEBUG == 1 cdef int __MAX_LOG_SIZE __MAX_LOG_SIZE = 20 +cdef object __ERROR_LOG -ctypedef enum LXML_PROXY_TYPE: - PROXY_ELEMENT - PROXY_ATTRIB - -# the rules -# any libxml C argument/variable is prefixed with c_ -# any non-public function/class is prefixed with an underscore -# instance creation is always through factories - +def __build_error_log_tuple(_): + return python.PyList_AsTuple(__ERROR_LOG) # Error superclass for ElementTree compatibility class Error(Exception): @@ -43,16 +47,10 @@ # module level superclass for all exceptions class LxmlError(Error): - def __init__(self, *args): - Error.__init__(self, *args) - if __DEBUG == 1 and python.PyList_GET_SIZE(__ERROR_LOG): - self.error_log = __ERROR_LOG - _clear_error_log() - else: - self.error_log = () + error_log = property(__build_error_log_tuple) # superclass for all syntax errors -class LxmlSyntaxError(LxmlError, SyntaxError): +class LxmlSyntaxError(SyntaxError, LxmlError): pass class XIncludeError(LxmlError): @@ -1623,12 +1621,11 @@ ################################################################################ # DEBUG setup -# list to collect error output message from libxml2/libxslt -cdef object __ERROR_LOG +# list to collect error output messages from libxml2/libxslt __ERROR_LOG = [] -cdef void _clear_error_log(): - __ERROR_LOG = [] +def clear_error_log(): + del __ERROR_LOG[:] cdef void _logLines(char* s): cdef char* pos Modified: lxml/branch/scoder2/src/lxml/parser.pxi ============================================================================== --- lxml/branch/scoder2/src/lxml/parser.pxi (original) +++ lxml/branch/scoder2/src/lxml/parser.pxi Thu Mar 16 14:42:20 2006 @@ -48,6 +48,10 @@ self._parse_options = parse_options + property error_log: + def __get__(self): + return __build_error_log_tuple(self) + ## def copy(self, attribute_defaults=None, dtd_validation=None, ## no_network=None, ns_clean=None): ## cdef int parse_options Modified: lxml/branch/scoder2/src/lxml/python.pxd ============================================================================== --- lxml/branch/scoder2/src/lxml/python.pxd (original) +++ lxml/branch/scoder2/src/lxml/python.pxd Thu Mar 16 14:42:20 2006 @@ -26,6 +26,7 @@ cdef PyObject* PyDict_GetItem(object d, object key) cdef int PyDict_DelItem(object d, object key) cdef int PyDict_Clear(object d) + cdef object PyList_AsTuple(object o) cdef int PyNumber_Check(object instance) cdef int PyBool_Check(object instance) Modified: lxml/branch/scoder2/src/lxml/relaxng.pxi ============================================================================== --- lxml/branch/scoder2/src/lxml/relaxng.pxi (original) +++ lxml/branch/scoder2/src/lxml/relaxng.pxi Thu Mar 16 14:42:20 2006 @@ -60,6 +60,10 @@ raise RelaxNGValidateError, "Internal error in Relax NG validation" return ret == 0 + property error_log: + def __get__(self): + return __build_error_log_tuple(self) + class RelocatableRelaxNG: def __init__(self, tree, start=None): self._tree = tree Modified: lxml/branch/scoder2/src/lxml/tests/test_etree.py ============================================================================== --- lxml/branch/scoder2/src/lxml/tests/test_etree.py (original) +++ lxml/branch/scoder2/src/lxml/tests/test_etree.py Thu Mar 16 14:42:20 2006 @@ -25,6 +25,19 @@ self.assertRaises(SyntaxError, parse, f) f.close() + def test_parse_error_logging(self): + parse = self.etree.parse + # from StringIO + f = StringIO('') + self.etree.clear_error_log() + try: + parse(f) + log = "" + except SyntaxError, e: + log = '\n'.join(e.error_log) + f.close() + self.assert_('mismatch' in log) + def test_parse_error_from_file(self): parse = self.etree.parse # from file Modified: lxml/branch/scoder2/src/lxml/xmlschema.pxi ============================================================================== --- lxml/branch/scoder2/src/lxml/xmlschema.pxi (original) +++ lxml/branch/scoder2/src/lxml/xmlschema.pxi Thu Mar 16 14:42:20 2006 @@ -52,3 +52,7 @@ if ret == -1: raise XMLSchemaValidateError, "Internal error in XML Schema validation." return ret == 0 + + property error_log: + def __get__(self): + return __build_error_log_tuple(self) From scoder at codespeak.net Thu Mar 16 15:16:23 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Thu Mar 16 15:16:24 2006 Subject: [Lxml-checkins] r24467 - lxml/trunk/src/lxml Message-ID: <20060316141623.4DBF710091@code0.codespeak.net> Author: scoder Date: Thu Mar 16 15:16:21 2006 New Revision: 24467 Added: lxml/trunk/src/lxml/xmlerror.pxi Modified: lxml/trunk/src/lxml/etree.pyx Log: factored error log handling out into xmlerror.pxi Modified: lxml/trunk/src/lxml/etree.pyx ============================================================================== --- lxml/trunk/src/lxml/etree.pyx (original) +++ lxml/trunk/src/lxml/etree.pyx Thu Mar 16 15:16:21 2006 @@ -22,13 +22,20 @@ PROXY_ELEMENT PROXY_ATTRIB -# should libxml2/libxslt be allowed to shout? -# 0 : off +# what to do with libxml2/libxslt error messages? +# 0 : drop # 1 : provide log via exception property -# 2 : to stderr +# 2 : write to stderr cdef int __DEBUG __DEBUG = 1 +if __DEBUG == 0: + _shutUpLibxmlErrors() + _shutUpLibxsltErrors() +elif __DEBUG == 1: + _logLibxmlErrors() + _logLibxsltErrors() + # make the compiled-in debug state publicly available DEBUG = __DEBUG @@ -36,11 +43,6 @@ cdef int __MAX_LOG_SIZE __MAX_LOG_SIZE = 20 -cdef object __ERROR_LOG - -def __build_error_log_tuple(_): - return python.PyList_AsTuple(__ERROR_LOG) - # Error superclass for ElementTree compatibility class Error(Exception): pass @@ -1236,6 +1238,7 @@ # include submodules +include "xmlerror.pxi" # error and log handling include "nsclasses.pxi" # Namespace implementation and registry include "xslt.pxi" # XPath and XSLT include "relaxng.pxi" # RelaxNG @@ -1617,70 +1620,3 @@ changeDocumentBelowHelper(c_current, doc) c_attr_current = c_attr_current.next - -################################################################################ -# DEBUG setup - -# list to collect error output messages from libxml2/libxslt -__ERROR_LOG = [] - -def clear_error_log(): - del __ERROR_LOG[:] - -cdef void _logLines(char* s): - cdef char* pos - cdef int l - while s is not NULL and s[0] != c'\0': - pos = tree.xmlStrchr(s, c'\n') - if pos is NULL: - py_string = python.PyString_FromString(s) - s = NULL - else: - l = pos - s - py_string = python.PyString_FromStringAndSize(s, l) - s = pos + 1 - python.PyList_Append(__ERROR_LOG, py_string) - - l = python.PyList_GET_SIZE(__ERROR_LOG) - __MAX_LOG_SIZE - if l > 0: - del __ERROR_LOG[:l] - -cdef void logGenericErrorFunc(void* ctxt, char* msg, ...): - _logLines(msg) - -cdef void logStructuredErrorFunc(void* userData, - xmlerror.xmlError* error): - _logLines(error.message) - -cdef void _logLibxmlErrors(): - xmlerror.xmlSetGenericErrorFunc(NULL, logGenericErrorFunc) - xmlerror.xmlSetStructuredErrorFunc(NULL, logStructuredErrorFunc) - -cdef void _logLibxsltErrors(): - xslt.xsltSetGenericErrorFunc(NULL, logGenericErrorFunc) - # xslt.xsltSetTransformErrorFunc - - -# ugly global shutting up of all errors, but seems to work.. -cdef void nullGenericErrorFunc(void* ctxt, char* msg, ...): - pass - -cdef void nullStructuredErrorFunc(void* userData, - xmlerror.xmlError* error): - pass - -cdef void _shutUpLibxmlErrors(): - xmlerror.xmlSetGenericErrorFunc(NULL, nullGenericErrorFunc) - xmlerror.xmlSetStructuredErrorFunc(NULL, nullStructuredErrorFunc) - -cdef void _shutUpLibxsltErrors(): - xslt.xsltSetGenericErrorFunc(NULL, nullGenericErrorFunc) - # xslt.xsltSetTransformErrorFunc - - -if __DEBUG == 0: - _shutUpLibxmlErrors() - _shutUpLibxsltErrors() -elif __DEBUG == 1: - _logLibxmlErrors() - _logLibxsltErrors() Added: lxml/trunk/src/lxml/xmlerror.pxi ============================================================================== --- (empty file) +++ lxml/trunk/src/lxml/xmlerror.pxi Thu Mar 16 15:16:21 2006 @@ -0,0 +1,61 @@ +################################################################################ +# DEBUG setup + +# list to collect error output messages from libxml2/libxslt +cdef object __ERROR_LOG +__ERROR_LOG = [] + +def __build_error_log_tuple(_): + return python.PyList_AsTuple(__ERROR_LOG) + +def clear_error_log(): + del __ERROR_LOG[:] + +cdef void _logLines(char* s): + cdef char* pos + cdef int l + while s is not NULL and s[0] != c'\0': + pos = tree.xmlStrchr(s, c'\n') + if pos is NULL: + py_string = python.PyString_FromString(s) + s = NULL + else: + l = pos - s + py_string = python.PyString_FromStringAndSize(s, l) + s = pos + 1 + python.PyList_Append(__ERROR_LOG, py_string) + + l = python.PyList_GET_SIZE(__ERROR_LOG) - __MAX_LOG_SIZE + if l > 0: + del __ERROR_LOG[:l] + +cdef void logStructuredErrorFunc(void* userData, + xmlerror.xmlError* error): + _logLines(error.message) + +cdef void logGenericErrorFunc(void* ctxt, char* msg, ...): + _logLines(msg) + +cdef void _logLibxmlErrors(): + xmlerror.xmlSetGenericErrorFunc(NULL, nullGenericErrorFunc) + xmlerror.xmlSetStructuredErrorFunc(NULL, logStructuredErrorFunc) + +cdef void _logLibxsltErrors(): + xslt.xsltSetGenericErrorFunc(NULL, logGenericErrorFunc) + # xslt.xsltSetTransformErrorFunc + +# ugly global shutting up of all errors, but seems to work.. +cdef void nullGenericErrorFunc(void* ctxt, char* msg, ...): + pass + +cdef void nullStructuredErrorFunc(void* userData, + xmlerror.xmlError* error): + pass + +cdef void _shutUpLibxmlErrors(): + xmlerror.xmlSetGenericErrorFunc(NULL, nullGenericErrorFunc) + xmlerror.xmlSetStructuredErrorFunc(NULL, nullStructuredErrorFunc) + +cdef void _shutUpLibxsltErrors(): + xslt.xsltSetGenericErrorFunc(NULL, nullGenericErrorFunc) + # xslt.xsltSetTransformErrorFunc From scoder at codespeak.net Thu Mar 16 15:17:24 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Thu Mar 16 15:17:24 2006 Subject: [Lxml-checkins] r24468 - lxml/branch/scoder2/src/lxml Message-ID: <20060316141724.0F0D410091@code0.codespeak.net> Author: scoder Date: Thu Mar 16 15:17:22 2006 New Revision: 24468 Added: lxml/branch/scoder2/src/lxml/xmlerror.pxi - copied unchanged from r24467, lxml/trunk/src/lxml/xmlerror.pxi Modified: lxml/branch/scoder2/src/lxml/etree.pyx Log: merged in error log updates from trunk Modified: lxml/branch/scoder2/src/lxml/etree.pyx ============================================================================== --- lxml/branch/scoder2/src/lxml/etree.pyx (original) +++ lxml/branch/scoder2/src/lxml/etree.pyx Thu Mar 16 15:17:22 2006 @@ -22,13 +22,20 @@ PROXY_ELEMENT PROXY_ATTRIB -# should libxml2/libxslt be allowed to shout? -# 0 : off +# what to do with libxml2/libxslt error messages? +# 0 : drop # 1 : provide log via exception property -# 2 : to stderr +# 2 : write to stderr cdef int __DEBUG __DEBUG = 1 +if __DEBUG == 0: + _shutUpLibxmlErrors() + _shutUpLibxsltErrors() +elif __DEBUG == 1: + _logLibxmlErrors() + _logLibxsltErrors() + # make the compiled-in debug state publicly available DEBUG = __DEBUG @@ -36,11 +43,6 @@ cdef int __MAX_LOG_SIZE __MAX_LOG_SIZE = 20 -cdef object __ERROR_LOG - -def __build_error_log_tuple(_): - return python.PyList_AsTuple(__ERROR_LOG) - # Error superclass for ElementTree compatibility class Error(Exception): pass @@ -1236,6 +1238,7 @@ # include submodules +include "xmlerror.pxi" # error and log handling include "nsclasses.pxi" # Namespace implementation and registry include "xslt.pxi" # XPath and XSLT include "relaxng.pxi" # RelaxNG @@ -1617,70 +1620,3 @@ changeDocumentBelowHelper(c_current, doc) c_attr_current = c_attr_current.next - -################################################################################ -# DEBUG setup - -# list to collect error output messages from libxml2/libxslt -__ERROR_LOG = [] - -def clear_error_log(): - del __ERROR_LOG[:] - -cdef void _logLines(char* s): - cdef char* pos - cdef int l - while s is not NULL and s[0] != c'\0': - pos = tree.xmlStrchr(s, c'\n') - if pos is NULL: - py_string = python.PyString_FromString(s) - s = NULL - else: - l = pos - s - py_string = python.PyString_FromStringAndSize(s, l) - s = pos + 1 - python.PyList_Append(__ERROR_LOG, py_string) - - l = python.PyList_GET_SIZE(__ERROR_LOG) - __MAX_LOG_SIZE - if l > 0: - del __ERROR_LOG[:l] - -cdef void logGenericErrorFunc(void* ctxt, char* msg, ...): - _logLines(msg) - -cdef void logStructuredErrorFunc(void* userData, - xmlerror.xmlError* error): - _logLines(error.message) - -cdef void _logLibxmlErrors(): - xmlerror.xmlSetGenericErrorFunc(NULL, logGenericErrorFunc) - xmlerror.xmlSetStructuredErrorFunc(NULL, logStructuredErrorFunc) - -cdef void _logLibxsltErrors(): - xslt.xsltSetGenericErrorFunc(NULL, logGenericErrorFunc) - # xslt.xsltSetTransformErrorFunc - - -# ugly global shutting up of all errors, but seems to work.. -cdef void nullGenericErrorFunc(void* ctxt, char* msg, ...): - pass - -cdef void nullStructuredErrorFunc(void* userData, - xmlerror.xmlError* error): - pass - -cdef void _shutUpLibxmlErrors(): - xmlerror.xmlSetGenericErrorFunc(NULL, nullGenericErrorFunc) - xmlerror.xmlSetStructuredErrorFunc(NULL, nullStructuredErrorFunc) - -cdef void _shutUpLibxsltErrors(): - xslt.xsltSetGenericErrorFunc(NULL, nullGenericErrorFunc) - # xslt.xsltSetTransformErrorFunc - - -if __DEBUG == 0: - _shutUpLibxmlErrors() - _shutUpLibxsltErrors() -elif __DEBUG == 1: - _logLibxmlErrors() - _logLibxsltErrors() From scoder at codespeak.net Thu Mar 16 16:01:09 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Thu Mar 16 16:01:11 2006 Subject: [Lxml-checkins] r24471 - lxml/trunk/src/lxml Message-ID: <20060316150109.F2730100A6@code0.codespeak.net> Author: scoder Date: Thu Mar 16 16:01:08 2006 New Revision: 24471 Modified: lxml/trunk/src/lxml/etree.pyx Log: clean up Modified: lxml/trunk/src/lxml/etree.pyx ============================================================================== --- lxml/trunk/src/lxml/etree.pyx (original) +++ lxml/trunk/src/lxml/etree.pyx Thu Mar 16 16:01:08 2006 @@ -29,6 +29,10 @@ cdef int __DEBUG __DEBUG = 1 +# maximum number of lines in the libxml2/xslt log if __DEBUG == 1 +cdef int __MAX_LOG_SIZE +__MAX_LOG_SIZE = 20 + if __DEBUG == 0: _shutUpLibxmlErrors() _shutUpLibxsltErrors() @@ -39,10 +43,6 @@ # make the compiled-in debug state publicly available DEBUG = __DEBUG -# maximum number of lines in the libxml2/xslt log if __DEBUG == 1 -cdef int __MAX_LOG_SIZE -__MAX_LOG_SIZE = 20 - # Error superclass for ElementTree compatibility class Error(Exception): pass From scoder at codespeak.net Thu Mar 16 16:03:11 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Thu Mar 16 16:03:13 2006 Subject: [Lxml-checkins] r24472 - lxml/trunk/src/lxml Message-ID: <20060316150311.D24F4100A6@code0.codespeak.net> Author: scoder Date: Thu Mar 16 16:03:10 2006 New Revision: 24472 Modified: lxml/trunk/src/lxml/etree.pyx Log: speed up exception creation (>60%) Modified: lxml/trunk/src/lxml/etree.pyx ============================================================================== --- lxml/trunk/src/lxml/etree.pyx (original) +++ lxml/trunk/src/lxml/etree.pyx Thu Mar 16 16:03:10 2006 @@ -44,7 +44,7 @@ DEBUG = __DEBUG # Error superclass for ElementTree compatibility -class Error(Exception): +class Error(object, Exception): pass # module level superclass for all exceptions From scoder at codespeak.net Thu Mar 16 16:06:36 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Thu Mar 16 16:06:37 2006 Subject: [Lxml-checkins] r24474 - lxml/trunk/src/lxml Message-ID: <20060316150636.D9507100A6@code0.codespeak.net> Author: scoder Date: Thu Mar 16 16:06:35 2006 New Revision: 24474 Modified: lxml/trunk/src/lxml/etree.pyx Log: oh well ... reverted ... could someone please hit Pyrex for me? Modified: lxml/trunk/src/lxml/etree.pyx ============================================================================== --- lxml/trunk/src/lxml/etree.pyx (original) +++ lxml/trunk/src/lxml/etree.pyx Thu Mar 16 16:06:35 2006 @@ -44,7 +44,7 @@ DEBUG = __DEBUG # Error superclass for ElementTree compatibility -class Error(object, Exception): +class Error(Exception): pass # module level superclass for all exceptions From scoder at codespeak.net Thu Mar 16 16:07:18 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Thu Mar 16 16:07:19 2006 Subject: [Lxml-checkins] r24475 - lxml/trunk/src/lxml Message-ID: <20060316150718.38C57100A6@code0.codespeak.net> Author: scoder Date: Thu Mar 16 16:07:17 2006 New Revision: 24475 Modified: lxml/trunk/src/lxml/nsclasses.pxi Log: more clean up Modified: lxml/trunk/src/lxml/nsclasses.pxi ============================================================================== --- lxml/trunk/src/lxml/nsclasses.pxi (original) +++ lxml/trunk/src/lxml/nsclasses.pxi Thu Mar 16 16:07:17 2006 @@ -3,7 +3,7 @@ class NamespaceRegistryError(LxmlError): pass -class ElementBase(_Element): +cdef class ElementBase(_Element): """All classes in namespace implementations must inherit from this one. Note that subclasses *must not* override __init__ or __new__ as there is absolutely undefined when these objects will be From scoder at codespeak.net Thu Mar 16 16:07:51 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Thu Mar 16 16:07:52 2006 Subject: [Lxml-checkins] r24476 - lxml/branch/scoder2/src/lxml Message-ID: <20060316150751.C14A2100A6@code0.codespeak.net> Author: scoder Date: Thu Mar 16 16:07:50 2006 New Revision: 24476 Modified: lxml/branch/scoder2/src/lxml/etree.pyx lxml/branch/scoder2/src/lxml/nsclasses.pxi Log: clean up merges from trunk Modified: lxml/branch/scoder2/src/lxml/etree.pyx ============================================================================== --- lxml/branch/scoder2/src/lxml/etree.pyx (original) +++ lxml/branch/scoder2/src/lxml/etree.pyx Thu Mar 16 16:07:50 2006 @@ -29,6 +29,10 @@ cdef int __DEBUG __DEBUG = 1 +# maximum number of lines in the libxml2/xslt log if __DEBUG == 1 +cdef int __MAX_LOG_SIZE +__MAX_LOG_SIZE = 20 + if __DEBUG == 0: _shutUpLibxmlErrors() _shutUpLibxsltErrors() @@ -39,10 +43,6 @@ # make the compiled-in debug state publicly available DEBUG = __DEBUG -# maximum number of lines in the libxml2/xslt log if __DEBUG == 1 -cdef int __MAX_LOG_SIZE -__MAX_LOG_SIZE = 20 - # Error superclass for ElementTree compatibility class Error(Exception): pass Modified: lxml/branch/scoder2/src/lxml/nsclasses.pxi ============================================================================== --- lxml/branch/scoder2/src/lxml/nsclasses.pxi (original) +++ lxml/branch/scoder2/src/lxml/nsclasses.pxi Thu Mar 16 16:07:50 2006 @@ -3,7 +3,7 @@ class NamespaceRegistryError(LxmlError): pass -class ElementBase(_Element): +cdef class ElementBase(_Element): """All classes in namespace implementations must inherit from this one. Note that subclasses *must not* override __init__ or __new__ as it is absolutely undefined when these objects will be created or destroyed. All From scoder at codespeak.net Thu Mar 16 19:06:05 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Thu Mar 16 19:06:06 2006 Subject: [Lxml-checkins] r24490 - lxml/branch/error-reporting Message-ID: <20060316180605.0D9D0100B8@code0.codespeak.net> Author: scoder Date: Thu Mar 16 19:06:03 2006 New Revision: 24490 Added: lxml/branch/error-reporting/ - copied from r24489, lxml/trunk/ Log: new branch for better error reporting From scoder at codespeak.net Thu Mar 16 21:44:01 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Thu Mar 16 21:44:02 2006 Subject: [Lxml-checkins] r24492 - in lxml/branch/error-reporting/src/lxml: . tests Message-ID: <20060316204401.AB9DA100AC@code0.codespeak.net> Author: scoder Date: Thu Mar 16 21:43:47 2006 New Revision: 24492 Modified: lxml/branch/error-reporting/src/lxml/etree.pyx lxml/branch/error-reporting/src/lxml/parser.pxi lxml/branch/error-reporting/src/lxml/python.pxd lxml/branch/error-reporting/src/lxml/relaxng.pxi lxml/branch/error-reporting/src/lxml/tests/test_etree.py lxml/branch/error-reporting/src/lxml/xmlerror.pxi lxml/branch/error-reporting/src/lxml/xmlschema.pxi Log: large rewrite of the error handling API - use named class attributes for error domain, type and level - _LogEntry class represents xmlError structure - _ErrorLog collects error entries - global log collects all errors, rotates at 100 entries - API functions can be wrapped in log.connect() and log.disconnect() to provide a local error log (exemplified in XMLSchema and RelaxNG - untested) Modified: lxml/branch/error-reporting/src/lxml/etree.pyx ============================================================================== --- lxml/branch/error-reporting/src/lxml/etree.pyx (original) +++ lxml/branch/error-reporting/src/lxml/etree.pyx Thu Mar 16 21:43:47 2006 @@ -24,21 +24,13 @@ # what to do with libxml2/libxslt error messages? # 0 : drop -# 1 : provide log via exception property -# 2 : write to stderr +# 1 : use log cdef int __DEBUG __DEBUG = 1 # maximum number of lines in the libxml2/xslt log if __DEBUG == 1 cdef int __MAX_LOG_SIZE -__MAX_LOG_SIZE = 20 - -if __DEBUG == 0: - _shutUpLibxmlErrors() - _shutUpLibxsltErrors() -elif __DEBUG == 1: - _logLibxmlErrors() - _logLibxsltErrors() +__MAX_LOG_SIZE = 100 # make the compiled-in debug state publicly available DEBUG = __DEBUG @@ -49,10 +41,12 @@ # module level superclass for all exceptions class LxmlError(Error): - error_log = property(__build_error_log_tuple) + def __init__(self, *args): + Error.__init__(self, *args) + self.error_log = __copyGlobalErrorLog() # superclass for all syntax errors -class LxmlSyntaxError(SyntaxError, LxmlError): +class LxmlSyntaxError(LxmlError, SyntaxError): pass class XIncludeError(LxmlError): Modified: lxml/branch/error-reporting/src/lxml/parser.pxi ============================================================================== --- lxml/branch/error-reporting/src/lxml/parser.pxi (original) +++ lxml/branch/error-reporting/src/lxml/parser.pxi Thu Mar 16 21:43:47 2006 @@ -30,6 +30,7 @@ attribute default values are requested. """ cdef int _parse_options + cdef _ErrorLog _error_log def __init__(self, attribute_defaults=False, dtd_validation=False, no_network=False, ns_clean=False): cdef int parse_options @@ -47,10 +48,14 @@ parse_options = parse_options | xmlparser.XML_PARSE_NSCLEAN self._parse_options = parse_options + self._error_log = _ErrorLog() property error_log: def __get__(self): - return __build_error_log_tuple(self) + return self._error_log.copy() + + cdef void _clear_log(self): + self._error_log.clear() ## def copy(self, attribute_defaults=None, dtd_validation=None, ## no_network=None, ns_clean=None): @@ -97,7 +102,7 @@ #print "freeing dictionary (cleanup parser)" xmlparser.xmlDictFree(self._c_dict) - cdef xmlDoc* parseDoc(self, text, parser) except NULL: + cdef xmlDoc* parseDoc(self, text, XMLParser parser) except NULL: """Parse document, share dictionary if possible. """ cdef xmlDoc* result @@ -105,7 +110,8 @@ cdef int parse_error if parser is not None: - parse_options = (parser)._parse_options + parser._clear_log() + parse_options = parser._parse_options else: parse_options = _DEFAULT_PARSE_OPTIONS @@ -131,13 +137,14 @@ xmlparser.xmlFreeParserCtxt(pctxt) return result - cdef xmlDoc* parseDocFromFile(self, char* filename, parser) except NULL: + cdef xmlDoc* parseDocFromFile(self, char* filename, XMLParser parser) except NULL: cdef int parse_options cdef xmlDoc* result cdef xmlParserCtxt* pctxt if parser is not None: - parse_options = (parser)._parse_options + parser._clear_log() + parse_options = parser._parse_options else: parse_options = _DEFAULT_PARSE_OPTIONS Modified: lxml/branch/error-reporting/src/lxml/python.pxd ============================================================================== --- lxml/branch/error-reporting/src/lxml/python.pxd (original) +++ lxml/branch/error-reporting/src/lxml/python.pxd Thu Mar 16 21:43:47 2006 @@ -25,6 +25,7 @@ cdef PyObject* PyDict_GetItemString(object d, char* key) cdef PyObject* PyDict_GetItem(object d, object key) cdef object PyList_AsTuple(object o) + cdef object PyObject_GetIter(object o) cdef int PyNumber_Check(object instance) cdef int PyBool_Check(object instance) Modified: lxml/branch/error-reporting/src/lxml/relaxng.pxi ============================================================================== --- lxml/branch/error-reporting/src/lxml/relaxng.pxi (original) +++ lxml/branch/error-reporting/src/lxml/relaxng.pxi Thu Mar 16 21:43:47 2006 @@ -18,6 +18,7 @@ Can also load from filesystem directly given file object or filename. """ cdef relaxng.xmlRelaxNG* _c_schema + cdef _ErrorLog _error_log def __init__(self, _ElementTree etree=None, file=None): cdef relaxng.xmlRelaxNGParserCtxt* parser_ctxt @@ -38,6 +39,8 @@ if self._c_schema is NULL: raise RelaxNGParseError, "Document is not valid Relax NG" relaxng.xmlRelaxNGFreeParserCtxt(parser_ctxt) + + self._error_log = _ErrorLog() def __dealloc__(self): relaxng.xmlRelaxNGFree(self._c_schema) @@ -49,6 +52,7 @@ cdef xmlDoc* c_doc cdef relaxng.xmlRelaxNGValidCtxt* valid_ctxt cdef int ret + self._error_log.connect() valid_ctxt = relaxng.xmlRelaxNGNewValidCtxt(self._c_schema) c_doc = _fakeRootDoc(etree._doc._c_doc, etree._context_node._c_node) @@ -56,10 +60,11 @@ _destroyFakeDoc(etree._doc._c_doc, c_doc) relaxng.xmlRelaxNGFreeValidCtxt(valid_ctxt) + self._error_log.disconnect() if ret == -1: raise RelaxNGValidateError, "Internal error in Relax NG validation" return ret == 0 property error_log: def __get__(self): - return __build_error_log_tuple(self) + return self._error_log.copy() Modified: lxml/branch/error-reporting/src/lxml/tests/test_etree.py ============================================================================== --- lxml/branch/error-reporting/src/lxml/tests/test_etree.py (original) +++ lxml/branch/error-reporting/src/lxml/tests/test_etree.py Thu Mar 16 21:43:47 2006 @@ -29,14 +29,19 @@ parse = self.etree.parse # from StringIO f = StringIO('') - self.etree.clear_error_log() + self.etree.clearErrorLog() try: parse(f) - log = "" + logs = None except SyntaxError, e: - log = '\n'.join(e.error_log) + logs = e.error_log f.close() - self.assert_('mismatch' in log) + self.assert_([ log for log in logs + if 'mismatch' in log.message ]) + self.assert_([ log for log in logs + if 'PARSER' in log.domain_name]) + self.assert_([ log for log in logs + if 'TAG_NAME_MISMATCH' in log.type_name ]) def test_parse_error_from_file(self): parse = self.etree.parse Modified: lxml/branch/error-reporting/src/lxml/xmlerror.pxi ============================================================================== --- lxml/branch/error-reporting/src/lxml/xmlerror.pxi (original) +++ lxml/branch/error-reporting/src/lxml/xmlerror.pxi Thu Mar 16 21:43:47 2006 @@ -1,61 +1,960 @@ ################################################################################ # DEBUG setup -# list to collect error output messages from libxml2/libxslt -cdef object __ERROR_LOG -__ERROR_LOG = [] - -def __build_error_log_tuple(_): - return python.PyList_AsTuple(__ERROR_LOG) - -def clear_error_log(): - del __ERROR_LOG[:] - -cdef void _logLines(char* s): - cdef char* pos - cdef int l - while s is not NULL and s[0] != c'\0': - pos = tree.xmlStrchr(s, c'\n') - if pos is NULL: - py_string = python.PyString_FromString(s) - s = NULL +cdef class _LogEntry: + cdef int _domain + cdef int _type + cdef int _line + cdef xmlerror.xmlErrorLevel _level + cdef object _message + cdef object _filename + cdef _set(self, xmlerror.xmlError* error): + self._domain = error.domain + self._type = error.code + self._level = error.level + self._line = error.line + self._message = python.PyString_FromString(error.message) + if error.file is NULL: + self._filename = None else: - l = pos - s - py_string = python.PyString_FromStringAndSize(s, l) - s = pos + 1 - python.PyList_Append(__ERROR_LOG, py_string) - - l = python.PyList_GET_SIZE(__ERROR_LOG) - __MAX_LOG_SIZE - if l > 0: - del __ERROR_LOG[:l] - -cdef void logStructuredErrorFunc(void* userData, - xmlerror.xmlError* error): - _logLines(error.message) + self._filename = python.PyString_FromString(error.file) + + def __repr__(self): + if self._filename: + return "%s/%d[%s]%s/%s: %s" % ( + self.filename, self._line, self.level_name, + self.domain_name, self.type_name, self.message) + else: + return "[%s]%s/%s: %s" % ( + self.level_name, self.domain_name, + self.type_name, self.message) + + property filename: + def __get__(self): + return self._filename + + property message: + def __get__(self): + return self._message + + property line: + def __get__(self): + return self._line + + property domain: + def __get__(self): + return self._domain + + property domain_name: + def __get__(self): + return LxmlErrorDomains._names[self._domain] + + property type: + def __get__(self): + return self._domain + + property type_name: + def __get__(self): + return LxmlErrorTypes._names[self._type] + + property level: + def __get__(self): + return self._level + + property level_name: + def __get__(self): + return LxmlErrorLevels._names[self._level] + +cdef class _BaseErrorLog: + "Immutable base version of an error log." + cdef object _entries + def __init__(self, entries): + self._entries = entries + + def copy(self): + return _BaseErrorLog(self._entries) + + def __iter__(self): + return python.PyObject_GetIter(self._entries) + + def __repr__(self): + return '\n'.join(map(repr, self._entries)) + + def filter_domain(self, domain): + cdef _LogEntry entry + cdef int c_domain + c_domain = domain + filtered = [] + for entry in self._entries: + if entry._domain == c_domain: + python.PyList_Append(filtered, entry) + return filtered + + def filter_type(self, type): + cdef _LogEntry entry + cdef int c_type + c_type = type + for entry in self._entries: + if entry._type == c_type: + python.PyList_Append(filtered, entry) + return filtered + + def filter_level(self, level): + cdef _LogEntry entry + cdef int c_level + c_level = level + for entry in self._entries: + if (entry._level) >= c_level: + python.PyList_Append(filtered, entry) + return filtered + +cdef class _ErrorLog(_BaseErrorLog): + cdef object _accepted_domains + def __init__(self): + _BaseErrorLog.__init__(self, []) + accepted_domains = None + + def clear(self): + del self._entries[:] + + def copy(self): + return _BaseErrorLog(self._entries[:]) + + def __iter__(self): + return python.PyObject_GetIter(self._entries[:]) + + cdef void connect(self): + del self._entries[:] + xmlerror.xmlSetStructuredErrorFunc(self, _localReceiveError) + + cdef void disconnect(self): + xmlerror.xmlSetStructuredErrorFunc(NULL, _globalReceiveError) + + cdef void _receive(self, xmlerror.xmlError* error): + cdef _LogEntry entry + if self._accepted_domains is not None: + if error.domain not in self._accepted_domains: + return + entry = _LogEntry() + entry._set(error) + self.receive(entry) + if __DEBUG != 0 and __GLOBAL_ERROR_LOG != self: + __GLOBAL_ERROR_LOG._receive(error) + + def receive(self, entry): + python.PyList_Append(self._entries, entry) + +cdef class _RotatingErrorLog(_ErrorLog): + cdef int _max_len + def __init__(self, max_len): + _ErrorLog.__init__(self) + self._max_len = max_len + def receive(self, entry): + entries = self._entries + if python.PyList_GET_SIZE(entries) > self._max_len: + del entries[0] + python.PyList_Append(entries, entry) + +# global list to collect error output messages from libxml2/libxslt +cdef _RotatingErrorLog __GLOBAL_ERROR_LOG +__GLOBAL_ERROR_LOG = _RotatingErrorLog(__MAX_LOG_SIZE) + +def clearErrorLog(): + __GLOBAL_ERROR_LOG.clear() + +def __copyGlobalErrorLog(): + "Helper function for properties in exceptions." + return __GLOBAL_ERROR_LOG.copy() + +# local log function: forward error to logger object +cdef void _localReceiveError(void* c_log_handler, xmlerror.xmlError* error): + cdef _ErrorLog log_handler + log_handler = <_ErrorLog>c_log_handler + log_handler._receive(error) + +# global log functions: overridden by local functions +cdef void _globalReceiveError(void* userData, xmlerror.xmlError* error): + if __DEBUG != 0: + __GLOBAL_ERROR_LOG._receive(error) + +# dummy function: no debug output at all +cdef void _nullStructuredErrorFunc(void* userData, + xmlerror.xmlError* error): + pass +cdef void _nullGenericErrorFunc(void* ctxt, char* msg, ...): + pass -cdef void logGenericErrorFunc(void* ctxt, char* msg, ...): - _logLines(msg) +# setup for global log: cdef void _logLibxmlErrors(): - xmlerror.xmlSetGenericErrorFunc(NULL, nullGenericErrorFunc) - xmlerror.xmlSetStructuredErrorFunc(NULL, logStructuredErrorFunc) + xmlerror.xmlSetGenericErrorFunc(NULL, _nullGenericErrorFunc) + xmlerror.xmlSetStructuredErrorFunc(NULL, _globalReceiveError) cdef void _logLibxsltErrors(): - xslt.xsltSetGenericErrorFunc(NULL, logGenericErrorFunc) + xslt.xsltSetGenericErrorFunc(NULL, _nullGenericErrorFunc) # xslt.xsltSetTransformErrorFunc -# ugly global shutting up of all errors, but seems to work.. -cdef void nullGenericErrorFunc(void* ctxt, char* msg, ...): - pass - -cdef void nullStructuredErrorFunc(void* userData, - xmlerror.xmlError* error): - pass - cdef void _shutUpLibxmlErrors(): - xmlerror.xmlSetGenericErrorFunc(NULL, nullGenericErrorFunc) - xmlerror.xmlSetStructuredErrorFunc(NULL, nullStructuredErrorFunc) + xmlerror.xmlSetGenericErrorFunc(NULL, _nullGenericErrorFunc) + xmlerror.xmlSetStructuredErrorFunc(NULL, _nullStructuredErrorFunc) cdef void _shutUpLibxsltErrors(): - xslt.xsltSetGenericErrorFunc(NULL, nullGenericErrorFunc) + xslt.xsltSetGenericErrorFunc(NULL, _nullGenericErrorFunc) # xslt.xsltSetTransformErrorFunc + +# init global logging +_logLibxmlErrors() +_logLibxsltErrors() + +################################################################################ +## CONSTANTS FROM "xmlerror.pxd" +################################################################################ + +class LxmlErrorLevels: + _names = {} + NONE = 0 + WARNING = 1 # A simple warning + ERROR = 2 # A recoverable error + FATAL = 3 # A fatal error + +class LxmlErrorDomains: + _names = {} + NONE = 0 + PARSER = 1 # The XML parser + TREE = 2 # The tree module + NAMESPACE = 3 # The XML Namespace module + DTD = 4 # The XML DTD validation with parser contex + HTML = 5 # The HTML parser + MEMORY = 6 # The memory allocator + OUTPUT = 7 # The serialization code + IO = 8 # The Input/Output stack + FTP = 9 # The FTP module + HTTP = 10 # The FTP module + XINCLUDE = 11 # The XInclude processing + XPATH = 12 # The XPath module + XPOINTER = 13 # The XPointer module + REGEXP = 14 # The regular expressions module + DATATYPE = 15 # The W3C XML Schemas Datatype module + SCHEMASP = 16 # The W3C XML Schemas parser module + SCHEMASV = 17 # The W3C XML Schemas validation module + RELAXNGP = 18 # The Relax-NG parser module + RELAXNGV = 19 # The Relax-NG validator module + CATALOG = 20 # The Catalog module + C14N = 21 # The Canonicalization module + XSLT = 22 # The XSLT engine from libxslt + VALID = 23 # The XML DTD validation with valid context + CHECK = 24 # The error checking module + WRITER = 25 # The xmlwriter module + MODULE = 26 # The dynamically loaded module modu + +class LxmlErrorTypes: + _names = {} + ERR_OK = 0 + ERR_INTERNAL_ERROR = 1 + ERR_NO_MEMORY = 2 + ERR_DOCUMENT_START = 3 # 3 + ERR_DOCUMENT_EMPTY = 4 # 4 + ERR_DOCUMENT_END = 5 # 5 + ERR_INVALID_HEX_CHARREF = 6 # 6 + ERR_INVALID_DEC_CHARREF = 7 # 7 + ERR_INVALID_CHARREF = 8 # 8 + ERR_INVALID_CHAR = 9 # 9 + ERR_CHARREF_AT_EOF = 10 # 10 + ERR_CHARREF_IN_PROLOG = 11 # 11 + ERR_CHARREF_IN_EPILOG = 12 # 12 + ERR_CHARREF_IN_DTD = 13 # 13 + ERR_ENTITYREF_AT_EOF = 14 # 14 + ERR_ENTITYREF_IN_PROLOG = 15 # 15 + ERR_ENTITYREF_IN_EPILOG = 16 # 16 + ERR_ENTITYREF_IN_DTD = 17 # 17 + ERR_PEREF_AT_EOF = 18 # 18 + ERR_PEREF_IN_PROLOG = 19 # 19 + ERR_PEREF_IN_EPILOG = 20 # 20 + ERR_PEREF_IN_INT_SUBSET = 21 # 21 + ERR_ENTITYREF_NO_NAME = 22 # 22 + ERR_ENTITYREF_SEMICOL_MISSING = 23 # 23 + ERR_PEREF_NO_NAME = 24 # 24 + ERR_PEREF_SEMICOL_MISSING = 25 # 25 + ERR_UNDECLARED_ENTITY = 26 # 26 + WAR_UNDECLARED_ENTITY = 27 # 27 + ERR_UNPARSED_ENTITY = 28 # 28 + ERR_ENTITY_IS_EXTERNAL = 29 # 29 + ERR_ENTITY_IS_PARAMETER = 30 # 30 + ERR_UNKNOWN_ENCODING = 31 # 31 + ERR_UNSUPPORTED_ENCODING = 32 # 32 + ERR_STRING_NOT_STARTED = 33 # 33 + ERR_STRING_NOT_CLOSED = 34 # 34 + ERR_NS_DECL_ERROR = 35 # 35 + ERR_ENTITY_NOT_STARTED = 36 # 36 + ERR_ENTITY_NOT_FINISHED = 37 # 37 + ERR_LT_IN_ATTRIBUTE = 38 # 38 + ERR_ATTRIBUTE_NOT_STARTED = 39 # 39 + ERR_ATTRIBUTE_NOT_FINISHED = 40 # 40 + ERR_ATTRIBUTE_WITHOUT_VALUE = 41 # 41 + ERR_ATTRIBUTE_REDEFINED = 42 # 42 + ERR_LITERAL_NOT_STARTED = 43 # 43 + ERR_LITERAL_NOT_FINISHED = 44 # 44 + ERR_COMMENT_NOT_FINISHED = 45 # 45 + ERR_PI_NOT_STARTED = 46 # 46 + ERR_PI_NOT_FINISHED = 47 # 47 + ERR_NOTATION_NOT_STARTED = 48 # 48 + ERR_NOTATION_NOT_FINISHED = 49 # 49 + ERR_ATTLIST_NOT_STARTED = 50 # 50 + ERR_ATTLIST_NOT_FINISHED = 51 # 51 + ERR_MIXED_NOT_STARTED = 52 # 52 + ERR_MIXED_NOT_FINISHED = 53 # 53 + ERR_ELEMCONTENT_NOT_STARTED = 54 # 54 + ERR_ELEMCONTENT_NOT_FINISHED = 55 # 55 + ERR_XMLDECL_NOT_STARTED = 56 # 56 + ERR_XMLDECL_NOT_FINISHED = 57 # 57 + ERR_CONDSEC_NOT_STARTED = 58 # 58 + ERR_CONDSEC_NOT_FINISHED = 59 # 59 + ERR_EXT_SUBSET_NOT_FINISHED = 60 # 60 + ERR_DOCTYPE_NOT_FINISHED = 61 # 61 + ERR_MISPLACED_CDATA_END = 62 # 62 + ERR_CDATA_NOT_FINISHED = 63 # 63 + ERR_RESERVED_XML_NAME = 64 # 64 + ERR_SPACE_REQUIRED = 65 # 65 + ERR_SEPARATOR_REQUIRED = 66 # 66 + ERR_NMTOKEN_REQUIRED = 67 # 67 + ERR_NAME_REQUIRED = 68 # 68 + ERR_PCDATA_REQUIRED = 69 # 69 + ERR_URI_REQUIRED = 70 # 70 + ERR_PUBID_REQUIRED = 71 # 71 + ERR_LT_REQUIRED = 72 # 72 + ERR_GT_REQUIRED = 73 # 73 + ERR_LTSLASH_REQUIRED = 74 # 74 + ERR_EQUAL_REQUIRED = 75 # 75 + ERR_TAG_NAME_MISMATCH = 76 # 76 + ERR_TAG_NOT_FINISHED = 77 # 77 + ERR_STANDALONE_VALUE = 78 # 78 + ERR_ENCODING_NAME = 79 # 79 + ERR_HYPHEN_IN_COMMENT = 80 # 80 + ERR_INVALID_ENCODING = 81 # 81 + ERR_EXT_ENTITY_STANDALONE = 82 # 82 + ERR_CONDSEC_INVALID = 83 # 83 + ERR_VALUE_REQUIRED = 84 # 84 + ERR_NOT_WELL_BALANCED = 85 # 85 + ERR_EXTRA_CONTENT = 86 # 86 + ERR_ENTITY_CHAR_ERROR = 87 # 87 + ERR_ENTITY_PE_INTERNAL = 88 # 88 + ERR_ENTITY_LOOP = 89 # 89 + ERR_ENTITY_BOUNDARY = 90 # 90 + ERR_INVALID_URI = 91 # 91 + ERR_URI_FRAGMENT = 92 # 92 + WAR_CATALOG_PI = 93 # 93 + ERR_NO_DTD = 94 # 94 + ERR_CONDSEC_INVALID_KEYWORD = 95 # 95 + ERR_VERSION_MISSING = 96 # 96 + WAR_UNKNOWN_VERSION = 97 # 97 + WAR_LANG_VALUE = 98 # 98 + WAR_NS_URI = 99 # 99 + WAR_NS_URI_RELATIVE = 100 # 100 + ERR_MISSING_ENCODING = 101 # 101 + NS_ERR_XML_NAMESPACE = 200 + NS_ERR_UNDEFINED_NAMESPACE = 201 # 201 + NS_ERR_QNAME = 202 # 202 + NS_ERR_ATTRIBUTE_REDEFINED = 203 # 203 + DTD_ATTRIBUTE_DEFAULT = 500 + DTD_ATTRIBUTE_REDEFINED = 501 # 501 + DTD_ATTRIBUTE_VALUE = 502 # 502 + DTD_CONTENT_ERROR = 503 # 503 + DTD_CONTENT_MODEL = 504 # 504 + DTD_CONTENT_NOT_DETERMINIST = 505 # 505 + DTD_DIFFERENT_PREFIX = 506 # 506 + DTD_ELEM_DEFAULT_NAMESPACE = 507 # 507 + DTD_ELEM_NAMESPACE = 508 # 508 + DTD_ELEM_REDEFINED = 509 # 509 + DTD_EMPTY_NOTATION = 510 # 510 + DTD_ENTITY_TYPE = 511 # 511 + DTD_ID_FIXED = 512 # 512 + DTD_ID_REDEFINED = 513 # 513 + DTD_ID_SUBSET = 514 # 514 + DTD_INVALID_CHILD = 515 # 515 + DTD_INVALID_DEFAULT = 516 # 516 + DTD_LOAD_ERROR = 517 # 517 + DTD_MISSING_ATTRIBUTE = 518 # 518 + DTD_MIXED_CORRUPT = 519 # 519 + DTD_MULTIPLE_ID = 520 # 520 + DTD_NO_DOC = 521 # 521 + DTD_NO_DTD = 522 # 522 + DTD_NO_ELEM_NAME = 523 # 523 + DTD_NO_PREFIX = 524 # 524 + DTD_NO_ROOT = 525 # 525 + DTD_NOTATION_REDEFINED = 526 # 526 + DTD_NOTATION_VALUE = 527 # 527 + DTD_NOT_EMPTY = 528 # 528 + DTD_NOT_PCDATA = 529 # 529 + DTD_NOT_STANDALONE = 530 # 530 + DTD_ROOT_NAME = 531 # 531 + DTD_STANDALONE_WHITE_SPACE = 532 # 532 + DTD_UNKNOWN_ATTRIBUTE = 533 # 533 + DTD_UNKNOWN_ELEM = 534 # 534 + DTD_UNKNOWN_ENTITY = 535 # 535 + DTD_UNKNOWN_ID = 536 # 536 + DTD_UNKNOWN_NOTATION = 537 # 537 + DTD_STANDALONE_DEFAULTED = 538 # 538 + DTD_XMLID_VALUE = 539 # 539 + DTD_XMLID_TYPE = 540 # 540 + HTML_STRUCURE_ERROR = 800 + HTML_UNKNOWN_TAG = 801 # 801 + RNGP_ANYNAME_ATTR_ANCESTOR = 1000 + RNGP_ATTR_CONFLICT = 1001 # 1001 + RNGP_ATTRIBUTE_CHILDREN = 1002 # 1002 + RNGP_ATTRIBUTE_CONTENT = 1003 # 1003 + RNGP_ATTRIBUTE_EMPTY = 1004 # 1004 + RNGP_ATTRIBUTE_NOOP = 1005 # 1005 + RNGP_CHOICE_CONTENT = 1006 # 1006 + RNGP_CHOICE_EMPTY = 1007 # 1007 + RNGP_CREATE_FAILURE = 1008 # 1008 + RNGP_DATA_CONTENT = 1009 # 1009 + RNGP_DEF_CHOICE_AND_INTERLEAVE = 1010 # 1010 + RNGP_DEFINE_CREATE_FAILED = 1011 # 1011 + RNGP_DEFINE_EMPTY = 1012 # 1012 + RNGP_DEFINE_MISSING = 1013 # 1013 + RNGP_DEFINE_NAME_MISSING = 1014 # 1014 + RNGP_ELEM_CONTENT_EMPTY = 1015 # 1015 + RNGP_ELEM_CONTENT_ERROR = 1016 # 1016 + RNGP_ELEMENT_EMPTY = 1017 # 1017 + RNGP_ELEMENT_CONTENT = 1018 # 1018 + RNGP_ELEMENT_NAME = 1019 # 1019 + RNGP_ELEMENT_NO_CONTENT = 1020 # 1020 + RNGP_ELEM_TEXT_CONFLICT = 1021 # 1021 + RNGP_EMPTY = 1022 # 1022 + RNGP_EMPTY_CONSTRUCT = 1023 # 1023 + RNGP_EMPTY_CONTENT = 1024 # 1024 + RNGP_EMPTY_NOT_EMPTY = 1025 # 1025 + RNGP_ERROR_TYPE_LIB = 1026 # 1026 + RNGP_EXCEPT_EMPTY = 1027 # 1027 + RNGP_EXCEPT_MISSING = 1028 # 1028 + RNGP_EXCEPT_MULTIPLE = 1029 # 1029 + RNGP_EXCEPT_NO_CONTENT = 1030 # 1030 + RNGP_EXTERNALREF_EMTPY = 1031 # 1031 + RNGP_EXTERNAL_REF_FAILURE = 1032 # 1032 + RNGP_EXTERNALREF_RECURSE = 1033 # 1033 + RNGP_FORBIDDEN_ATTRIBUTE = 1034 # 1034 + RNGP_FOREIGN_ELEMENT = 1035 # 1035 + RNGP_GRAMMAR_CONTENT = 1036 # 1036 + RNGP_GRAMMAR_EMPTY = 1037 # 1037 + RNGP_GRAMMAR_MISSING = 1038 # 1038 + RNGP_GRAMMAR_NO_START = 1039 # 1039 + RNGP_GROUP_ATTR_CONFLICT = 1040 # 1040 + RNGP_HREF_ERROR = 1041 # 1041 + RNGP_INCLUDE_EMPTY = 1042 # 1042 + RNGP_INCLUDE_FAILURE = 1043 # 1043 + RNGP_INCLUDE_RECURSE = 1044 # 1044 + RNGP_INTERLEAVE_ADD = 1045 # 1045 + RNGP_INTERLEAVE_CREATE_FAILED = 1046 # 1046 + RNGP_INTERLEAVE_EMPTY = 1047 # 1047 + RNGP_INTERLEAVE_NO_CONTENT = 1048 # 1048 + RNGP_INVALID_DEFINE_NAME = 1049 # 1049 + RNGP_INVALID_URI = 1050 # 1050 + RNGP_INVALID_VALUE = 1051 # 1051 + RNGP_MISSING_HREF = 1052 # 1052 + RNGP_NAME_MISSING = 1053 # 1053 + RNGP_NEED_COMBINE = 1054 # 1054 + RNGP_NOTALLOWED_NOT_EMPTY = 1055 # 1055 + RNGP_NSNAME_ATTR_ANCESTOR = 1056 # 1056 + RNGP_NSNAME_NO_NS = 1057 # 1057 + RNGP_PARAM_FORBIDDEN = 1058 # 1058 + RNGP_PARAM_NAME_MISSING = 1059 # 1059 + RNGP_PARENTREF_CREATE_FAILED = 1060 # 1060 + RNGP_PARENTREF_NAME_INVALID = 1061 # 1061 + RNGP_PARENTREF_NO_NAME = 1062 # 1062 + RNGP_PARENTREF_NO_PARENT = 1063 # 1063 + RNGP_PARENTREF_NOT_EMPTY = 1064 # 1064 + RNGP_PARSE_ERROR = 1065 # 1065 + RNGP_PAT_ANYNAME_EXCEPT_ANYNAME = 1066 # 1066 + RNGP_PAT_ATTR_ATTR = 1067 # 1067 + RNGP_PAT_ATTR_ELEM = 1068 # 1068 + RNGP_PAT_DATA_EXCEPT_ATTR = 1069 # 1069 + RNGP_PAT_DATA_EXCEPT_ELEM = 1070 # 1070 + RNGP_PAT_DATA_EXCEPT_EMPTY = 1071 # 1071 + RNGP_PAT_DATA_EXCEPT_GROUP = 1072 # 1072 + RNGP_PAT_DATA_EXCEPT_INTERLEAVE = 1073 # 1073 + RNGP_PAT_DATA_EXCEPT_LIST = 1074 # 1074 + RNGP_PAT_DATA_EXCEPT_ONEMORE = 1075 # 1075 + RNGP_PAT_DATA_EXCEPT_REF = 1076 # 1076 + RNGP_PAT_DATA_EXCEPT_TEXT = 1077 # 1077 + RNGP_PAT_LIST_ATTR = 1078 # 1078 + RNGP_PAT_LIST_ELEM = 1079 # 1079 + RNGP_PAT_LIST_INTERLEAVE = 1080 # 1080 + RNGP_PAT_LIST_LIST = 1081 # 1081 + RNGP_PAT_LIST_REF = 1082 # 1082 + RNGP_PAT_LIST_TEXT = 1083 # 1083 + RNGP_PAT_NSNAME_EXCEPT_ANYNAME = 1084 # 1084 + RNGP_PAT_NSNAME_EXCEPT_NSNAME = 1085 # 1085 + RNGP_PAT_ONEMORE_GROUP_ATTR = 1086 # 1086 + RNGP_PAT_ONEMORE_INTERLEAVE_ATTR = 1087 # 1087 + RNGP_PAT_START_ATTR = 1088 # 1088 + RNGP_PAT_START_DATA = 1089 # 1089 + RNGP_PAT_START_EMPTY = 1090 # 1090 + RNGP_PAT_START_GROUP = 1091 # 1091 + RNGP_PAT_START_INTERLEAVE = 1092 # 1092 + RNGP_PAT_START_LIST = 1093 # 1093 + RNGP_PAT_START_ONEMORE = 1094 # 1094 + RNGP_PAT_START_TEXT = 1095 # 1095 + RNGP_PAT_START_VALUE = 1096 # 1096 + RNGP_PREFIX_UNDEFINED = 1097 # 1097 + RNGP_REF_CREATE_FAILED = 1098 # 1098 + RNGP_REF_CYCLE = 1099 # 1099 + RNGP_REF_NAME_INVALID = 1100 # 1100 + RNGP_REF_NO_DEF = 1101 # 1101 + RNGP_REF_NO_NAME = 1102 # 1102 + RNGP_REF_NOT_EMPTY = 1103 # 1103 + RNGP_START_CHOICE_AND_INTERLEAVE = 1104 # 1104 + RNGP_START_CONTENT = 1105 # 1105 + RNGP_START_EMPTY = 1106 # 1106 + RNGP_START_MISSING = 1107 # 1107 + RNGP_TEXT_EXPECTED = 1108 # 1108 + RNGP_TEXT_HAS_CHILD = 1109 # 1109 + RNGP_TYPE_MISSING = 1110 # 1110 + RNGP_TYPE_NOT_FOUND = 1111 # 1111 + RNGP_TYPE_VALUE = 1112 # 1112 + RNGP_UNKNOWN_ATTRIBUTE = 1113 # 1113 + RNGP_UNKNOWN_COMBINE = 1114 # 1114 + RNGP_UNKNOWN_CONSTRUCT = 1115 # 1115 + RNGP_UNKNOWN_TYPE_LIB = 1116 # 1116 + RNGP_URI_FRAGMENT = 1117 # 1117 + RNGP_URI_NOT_ABSOLUTE = 1118 # 1118 + RNGP_VALUE_EMPTY = 1119 # 1119 + RNGP_VALUE_NO_CONTENT = 1120 # 1120 + RNGP_XMLNS_NAME = 1121 # 1121 + RNGP_XML_NS = 1122 # 1122 + XPATH_EXPRESSION_OK = 1200 + XPATH_NUMBER_ERROR = 1201 # 1201 + XPATH_UNFINISHED_LITERAL_ERROR = 1202 # 1202 + XPATH_START_LITERAL_ERROR = 1203 # 1203 + XPATH_VARIABLE_REF_ERROR = 1204 # 1204 + XPATH_UNDEF_VARIABLE_ERROR = 1205 # 1205 + XPATH_INVALID_PREDICATE_ERROR = 1206 # 1206 + XPATH_EXPR_ERROR = 1207 # 1207 + XPATH_UNCLOSED_ERROR = 1208 # 1208 + XPATH_UNKNOWN_FUNC_ERROR = 1209 # 1209 + XPATH_INVALID_OPERAND = 1210 # 1210 + XPATH_INVALID_TYPE = 1211 # 1211 + XPATH_INVALID_ARITY = 1212 # 1212 + XPATH_INVALID_CTXT_SIZE = 1213 # 1213 + XPATH_INVALID_CTXT_POSITION = 1214 # 1214 + XPATH_MEMORY_ERROR = 1215 # 1215 + XPTR_SYNTAX_ERROR = 1216 # 1216 + XPTR_RESOURCE_ERROR = 1217 # 1217 + XPTR_SUB_RESOURCE_ERROR = 1218 # 1218 + XPATH_UNDEF_PREFIX_ERROR = 1219 # 1219 + XPATH_ENCODING_ERROR = 1220 # 1220 + XPATH_INVALID_CHAR_ERROR = 1221 # 1221 + TREE_INVALID_HEX = 1300 + TREE_INVALID_DEC = 1301 # 1301 + TREE_UNTERMINATED_ENTITY = 1302 # 1302 + SAVE_NOT_UTF8 = 1400 + SAVE_CHAR_INVALID = 1401 # 1401 + SAVE_NO_DOCTYPE = 1402 # 1402 + SAVE_UNKNOWN_ENCODING = 1403 # 1403 + REGEXP_COMPILE_ERROR = 1450 + IO_UNKNOWN = 1500 + IO_EACCES = 1501 # 1501 + IO_EAGAIN = 1502 # 1502 + IO_EBADF = 1503 # 1503 + IO_EBADMSG = 1504 # 1504 + IO_EBUSY = 1505 # 1505 + IO_ECANCELED = 1506 # 1506 + IO_ECHILD = 1507 # 1507 + IO_EDEADLK = 1508 # 1508 + IO_EDOM = 1509 # 1509 + IO_EEXIST = 1510 # 1510 + IO_EFAULT = 1511 # 1511 + IO_EFBIG = 1512 # 1512 + IO_EINPROGRESS = 1513 # 1513 + IO_EINTR = 1514 # 1514 + IO_EINVAL = 1515 # 1515 + IO_EIO = 1516 # 1516 + IO_EISDIR = 1517 # 1517 + IO_EMFILE = 1518 # 1518 + IO_EMLINK = 1519 # 1519 + IO_EMSGSIZE = 1520 # 1520 + IO_ENAMETOOLONG = 1521 # 1521 + IO_ENFILE = 1522 # 1522 + IO_ENODEV = 1523 # 1523 + IO_ENOENT = 1524 # 1524 + IO_ENOEXEC = 1525 # 1525 + IO_ENOLCK = 1526 # 1526 + IO_ENOMEM = 1527 # 1527 + IO_ENOSPC = 1528 # 1528 + IO_ENOSYS = 1529 # 1529 + IO_ENOTDIR = 1530 # 1530 + IO_ENOTEMPTY = 1531 # 1531 + IO_ENOTSUP = 1532 # 1532 + IO_ENOTTY = 1533 # 1533 + IO_ENXIO = 1534 # 1534 + IO_EPERM = 1535 # 1535 + IO_EPIPE = 1536 # 1536 + IO_ERANGE = 1537 # 1537 + IO_EROFS = 1538 # 1538 + IO_ESPIPE = 1539 # 1539 + IO_ESRCH = 1540 # 1540 + IO_ETIMEDOUT = 1541 # 1541 + IO_EXDEV = 1542 # 1542 + IO_NETWORK_ATTEMPT = 1543 # 1543 + IO_ENCODER = 1544 # 1544 + IO_FLUSH = 1545 # 1545 + IO_WRITE = 1546 # 1546 + IO_NO_INPUT = 1547 # 1547 + IO_BUFFER_FULL = 1548 # 1548 + IO_LOAD_ERROR = 1549 # 1549 + IO_ENOTSOCK = 1550 # 1550 + IO_EISCONN = 1551 # 1551 + IO_ECONNREFUSED = 1552 # 1552 + IO_ENETUNREACH = 1553 # 1553 + IO_EADDRINUSE = 1554 # 1554 + IO_EALREADY = 1555 # 1555 + IO_EAFNOSUPPORT = 1556 # 1556 + XINCLUDE_RECURSION = 1600 + XINCLUDE_PARSE_VALUE = 1601 # 1601 + XINCLUDE_ENTITY_DEF_MISMATCH = 1602 # 1602 + XINCLUDE_NO_HREF = 1603 # 1603 + XINCLUDE_NO_FALLBACK = 1604 # 1604 + XINCLUDE_HREF_URI = 1605 # 1605 + XINCLUDE_TEXT_FRAGMENT = 1606 # 1606 + XINCLUDE_TEXT_DOCUMENT = 1607 # 1607 + XINCLUDE_INVALID_CHAR = 1608 # 1608 + XINCLUDE_BUILD_FAILED = 1609 # 1609 + XINCLUDE_UNKNOWN_ENCODING = 1610 # 1610 + XINCLUDE_MULTIPLE_ROOT = 1611 # 1611 + XINCLUDE_XPTR_FAILED = 1612 # 1612 + XINCLUDE_XPTR_RESULT = 1613 # 1613 + XINCLUDE_INCLUDE_IN_INCLUDE = 1614 # 1614 + XINCLUDE_FALLBACKS_IN_INCLUDE = 1615 # 1615 + XINCLUDE_FALLBACK_NOT_IN_INCLUDE = 1616 # 1616 + XINCLUDE_DEPRECATED_NS = 1617 # 1617 + XINCLUDE_FRAGMENT_ID = 1618 # 1618 + CATALOG_MISSING_ATTR = 1650 + CATALOG_ENTRY_BROKEN = 1651 # 1651 + CATALOG_PREFER_VALUE = 1652 # 1652 + CATALOG_NOT_CATALOG = 1653 # 1653 + CATALOG_RECURSION = 1654 # 1654 + SCHEMAP_PREFIX_UNDEFINED = 1700 + SCHEMAP_ATTRFORMDEFAULT_VALUE = 1701 # 1701 + SCHEMAP_ATTRGRP_NONAME_NOREF = 1702 # 1702 + SCHEMAP_ATTR_NONAME_NOREF = 1703 # 1703 + SCHEMAP_COMPLEXTYPE_NONAME_NOREF = 1704 # 1704 + SCHEMAP_ELEMFORMDEFAULT_VALUE = 1705 # 1705 + SCHEMAP_ELEM_NONAME_NOREF = 1706 # 1706 + SCHEMAP_EXTENSION_NO_BASE = 1707 # 1707 + SCHEMAP_FACET_NO_VALUE = 1708 # 1708 + SCHEMAP_FAILED_BUILD_IMPORT = 1709 # 1709 + SCHEMAP_GROUP_NONAME_NOREF = 1710 # 1710 + SCHEMAP_IMPORT_NAMESPACE_NOT_URI = 1711 # 1711 + SCHEMAP_IMPORT_REDEFINE_NSNAME = 1712 # 1712 + SCHEMAP_IMPORT_SCHEMA_NOT_URI = 1713 # 1713 + SCHEMAP_INVALID_BOOLEAN = 1714 # 1714 + SCHEMAP_INVALID_ENUM = 1715 # 1715 + SCHEMAP_INVALID_FACET = 1716 # 1716 + SCHEMAP_INVALID_FACET_VALUE = 1717 # 1717 + SCHEMAP_INVALID_MAXOCCURS = 1718 # 1718 + SCHEMAP_INVALID_MINOCCURS = 1719 # 1719 + SCHEMAP_INVALID_REF_AND_SUBTYPE = 1720 # 1720 + SCHEMAP_INVALID_WHITE_SPACE = 1721 # 1721 + SCHEMAP_NOATTR_NOREF = 1722 # 1722 + SCHEMAP_NOTATION_NO_NAME = 1723 # 1723 + SCHEMAP_NOTYPE_NOREF = 1724 # 1724 + SCHEMAP_REF_AND_SUBTYPE = 1725 # 1725 + SCHEMAP_RESTRICTION_NONAME_NOREF = 1726 # 1726 + SCHEMAP_SIMPLETYPE_NONAME = 1727 # 1727 + SCHEMAP_TYPE_AND_SUBTYPE = 1728 # 1728 + SCHEMAP_UNKNOWN_ALL_CHILD = 1729 # 1729 + SCHEMAP_UNKNOWN_ANYATTRIBUTE_CHILD = 1730 # 1730 + SCHEMAP_UNKNOWN_ATTR_CHILD = 1731 # 1731 + SCHEMAP_UNKNOWN_ATTRGRP_CHILD = 1732 # 1732 + SCHEMAP_UNKNOWN_ATTRIBUTE_GROUP = 1733 # 1733 + SCHEMAP_UNKNOWN_BASE_TYPE = 1734 # 1734 + SCHEMAP_UNKNOWN_CHOICE_CHILD = 1735 # 1735 + SCHEMAP_UNKNOWN_COMPLEXCONTENT_CHILD = 1736 # 1736 + SCHEMAP_UNKNOWN_COMPLEXTYPE_CHILD = 1737 # 1737 + SCHEMAP_UNKNOWN_ELEM_CHILD = 1738 # 1738 + SCHEMAP_UNKNOWN_EXTENSION_CHILD = 1739 # 1739 + SCHEMAP_UNKNOWN_FACET_CHILD = 1740 # 1740 + SCHEMAP_UNKNOWN_FACET_TYPE = 1741 # 1741 + SCHEMAP_UNKNOWN_GROUP_CHILD = 1742 # 1742 + SCHEMAP_UNKNOWN_IMPORT_CHILD = 1743 # 1743 + SCHEMAP_UNKNOWN_LIST_CHILD = 1744 # 1744 + SCHEMAP_UNKNOWN_NOTATION_CHILD = 1745 # 1745 + SCHEMAP_UNKNOWN_PROCESSCONTENT_CHILD = 1746 # 1746 + SCHEMAP_UNKNOWN_REF = 1747 # 1747 + SCHEMAP_UNKNOWN_RESTRICTION_CHILD = 1748 # 1748 + SCHEMAP_UNKNOWN_SCHEMAS_CHILD = 1749 # 1749 + SCHEMAP_UNKNOWN_SEQUENCE_CHILD = 1750 # 1750 + SCHEMAP_UNKNOWN_SIMPLECONTENT_CHILD = 1751 # 1751 + SCHEMAP_UNKNOWN_SIMPLETYPE_CHILD = 1752 # 1752 + SCHEMAP_UNKNOWN_TYPE = 1753 # 1753 + SCHEMAP_UNKNOWN_UNION_CHILD = 1754 # 1754 + SCHEMAP_ELEM_DEFAULT_FIXED = 1755 # 1755 + SCHEMAP_REGEXP_INVALID = 1756 # 1756 + SCHEMAP_FAILED_LOAD = 1757 # 1757 + SCHEMAP_NOTHING_TO_PARSE = 1758 # 1758 + SCHEMAP_NOROOT = 1759 # 1759 + SCHEMAP_REDEFINED_GROUP = 1760 # 1760 + SCHEMAP_REDEFINED_TYPE = 1761 # 1761 + SCHEMAP_REDEFINED_ELEMENT = 1762 # 1762 + SCHEMAP_REDEFINED_ATTRGROUP = 1763 # 1763 + SCHEMAP_REDEFINED_ATTR = 1764 # 1764 + SCHEMAP_REDEFINED_NOTATION = 1765 # 1765 + SCHEMAP_FAILED_PARSE = 1766 # 1766 + SCHEMAP_UNKNOWN_PREFIX = 1767 # 1767 + SCHEMAP_DEF_AND_PREFIX = 1768 # 1768 + SCHEMAP_UNKNOWN_INCLUDE_CHILD = 1769 # 1769 + SCHEMAP_INCLUDE_SCHEMA_NOT_URI = 1770 # 1770 + SCHEMAP_INCLUDE_SCHEMA_NO_URI = 1771 # 1771 + SCHEMAP_NOT_SCHEMA = 1772 # 1772 + SCHEMAP_UNKNOWN_MEMBER_TYPE = 1773 # 1773 + SCHEMAP_INVALID_ATTR_USE = 1774 # 1774 + SCHEMAP_RECURSIVE = 1775 # 1775 + SCHEMAP_SUPERNUMEROUS_LIST_ITEM_TYPE = 1776 # 1776 + SCHEMAP_INVALID_ATTR_COMBINATION = 1777 # 1777 + SCHEMAP_INVALID_ATTR_INLINE_COMBINATION = 1778 # 1778 + SCHEMAP_MISSING_SIMPLETYPE_CHILD = 1779 # 1779 + SCHEMAP_INVALID_ATTR_NAME = 1780 # 1780 + SCHEMAP_REF_AND_CONTENT = 1781 # 1781 + SCHEMAP_CT_PROPS_CORRECT_1 = 1782 # 1782 + SCHEMAP_CT_PROPS_CORRECT_2 = 1783 # 1783 + SCHEMAP_CT_PROPS_CORRECT_3 = 1784 # 1784 + SCHEMAP_CT_PROPS_CORRECT_4 = 1785 # 1785 + SCHEMAP_CT_PROPS_CORRECT_5 = 1786 # 1786 + SCHEMAP_DERIVATION_OK_RESTRICTION_1 = 1787 # 1787 + SCHEMAP_DERIVATION_OK_RESTRICTION_2_1_1 = 1788 # 1788 + SCHEMAP_DERIVATION_OK_RESTRICTION_2_1_2 = 1789 # 1789 + SCHEMAP_DERIVATION_OK_RESTRICTION_2_2 = 1790 # 1790 + SCHEMAP_DERIVATION_OK_RESTRICTION_3 = 1791 # 1791 + SCHEMAP_WILDCARD_INVALID_NS_MEMBER = 1792 # 1792 + SCHEMAP_INTERSECTION_NOT_EXPRESSIBLE = 1793 # 1793 + SCHEMAP_UNION_NOT_EXPRESSIBLE = 1794 # 1794 + SCHEMAP_SRC_IMPORT_3_1 = 1795 # 1795 + SCHEMAP_SRC_IMPORT_3_2 = 1796 # 1796 + SCHEMAP_DERIVATION_OK_RESTRICTION_4_1 = 1797 # 1797 + SCHEMAP_DERIVATION_OK_RESTRICTION_4_2 = 1798 # 1798 + SCHEMAP_DERIVATION_OK_RESTRICTION_4_3 = 1799 # 1799 + SCHEMAP_COS_CT_EXTENDS_1_3 = 1800 # 1800 + SCHEMAV_NOROOT = 1801 + SCHEMAV_UNDECLAREDELEM = 1802 # 1802 + SCHEMAV_NOTTOPLEVEL = 1803 # 1803 + SCHEMAV_MISSING = 1804 # 1804 + SCHEMAV_WRONGELEM = 1805 # 1805 + SCHEMAV_NOTYPE = 1806 # 1806 + SCHEMAV_NOROLLBACK = 1807 # 1807 + SCHEMAV_ISABSTRACT = 1808 # 1808 + SCHEMAV_NOTEMPTY = 1809 # 1809 + SCHEMAV_ELEMCONT = 1810 # 1810 + SCHEMAV_HAVEDEFAULT = 1811 # 1811 + SCHEMAV_NOTNILLABLE = 1812 # 1812 + SCHEMAV_EXTRACONTENT = 1813 # 1813 + SCHEMAV_INVALIDATTR = 1814 # 1814 + SCHEMAV_INVALIDELEM = 1815 # 1815 + SCHEMAV_NOTDETERMINIST = 1816 # 1816 + SCHEMAV_CONSTRUCT = 1817 # 1817 + SCHEMAV_INTERNAL = 1818 # 1818 + SCHEMAV_NOTSIMPLE = 1819 # 1819 + SCHEMAV_ATTRUNKNOWN = 1820 # 1820 + SCHEMAV_ATTRINVALID = 1821 # 1821 + SCHEMAV_VALUE = 1822 # 1822 + SCHEMAV_FACET = 1823 # 1823 + SCHEMAV_CVC_DATATYPE_VALID_1_2_1 = 1824 # 1824 + SCHEMAV_CVC_DATATYPE_VALID_1_2_2 = 1825 # 1825 + SCHEMAV_CVC_DATATYPE_VALID_1_2_3 = 1826 # 1826 + SCHEMAV_CVC_TYPE_3_1_1 = 1827 # 1827 + SCHEMAV_CVC_TYPE_3_1_2 = 1828 # 1828 + SCHEMAV_CVC_FACET_VALID = 1829 # 1829 + SCHEMAV_CVC_LENGTH_VALID = 1830 # 1830 + SCHEMAV_CVC_MINLENGTH_VALID = 1831 # 1831 + SCHEMAV_CVC_MAXLENGTH_VALID = 1832 # 1832 + SCHEMAV_CVC_MININCLUSIVE_VALID = 1833 # 1833 + SCHEMAV_CVC_MAXINCLUSIVE_VALID = 1834 # 1834 + SCHEMAV_CVC_MINEXCLUSIVE_VALID = 1835 # 1835 + SCHEMAV_CVC_MAXEXCLUSIVE_VALID = 1836 # 1836 + SCHEMAV_CVC_TOTALDIGITS_VALID = 1837 # 1837 + SCHEMAV_CVC_FRACTIONDIGITS_VALID = 1838 # 1838 + SCHEMAV_CVC_PATTERN_VALID = 1839 # 1839 + SCHEMAV_CVC_ENUMERATION_VALID = 1840 # 1840 + SCHEMAV_CVC_COMPLEX_TYPE_2_1 = 1841 # 1841 + SCHEMAV_CVC_COMPLEX_TYPE_2_2 = 1842 # 1842 + SCHEMAV_CVC_COMPLEX_TYPE_2_3 = 1843 # 1843 + SCHEMAV_CVC_COMPLEX_TYPE_2_4 = 1844 # 1844 + SCHEMAV_CVC_ELT_1 = 1845 # 1845 + SCHEMAV_CVC_ELT_2 = 1846 # 1846 + SCHEMAV_CVC_ELT_3_1 = 1847 # 1847 + SCHEMAV_CVC_ELT_3_2_1 = 1848 # 1848 + SCHEMAV_CVC_ELT_3_2_2 = 1849 # 1849 + SCHEMAV_CVC_ELT_4_1 = 1850 # 1850 + SCHEMAV_CVC_ELT_4_2 = 1851 # 1851 + SCHEMAV_CVC_ELT_4_3 = 1852 # 1852 + SCHEMAV_CVC_ELT_5_1_1 = 1853 # 1853 + SCHEMAV_CVC_ELT_5_1_2 = 1854 # 1854 + SCHEMAV_CVC_ELT_5_2_1 = 1855 # 1855 + SCHEMAV_CVC_ELT_5_2_2_1 = 1856 # 1856 + SCHEMAV_CVC_ELT_5_2_2_2_1 = 1857 # 1857 + SCHEMAV_CVC_ELT_5_2_2_2_2 = 1858 # 1858 + SCHEMAV_CVC_ELT_6 = 1859 # 1859 + SCHEMAV_CVC_ELT_7 = 1860 # 1860 + SCHEMAV_CVC_ATTRIBUTE_1 = 1861 # 1861 + SCHEMAV_CVC_ATTRIBUTE_2 = 1862 # 1862 + SCHEMAV_CVC_ATTRIBUTE_3 = 1863 # 1863 + SCHEMAV_CVC_ATTRIBUTE_4 = 1864 # 1864 + SCHEMAV_CVC_COMPLEX_TYPE_3_1 = 1865 # 1865 + SCHEMAV_CVC_COMPLEX_TYPE_3_2_1 = 1866 # 1866 + SCHEMAV_CVC_COMPLEX_TYPE_3_2_2 = 1867 # 1867 + SCHEMAV_CVC_COMPLEX_TYPE_4 = 1868 # 1868 + SCHEMAV_CVC_COMPLEX_TYPE_5_1 = 1869 # 1869 + SCHEMAV_CVC_COMPLEX_TYPE_5_2 = 1870 # 1870 + SCHEMAV_ELEMENT_CONTENT = 1871 # 1871 + SCHEMAV_DOCUMENT_ELEMENT_MISSING = 1872 # 1872 + SCHEMAV_CVC_COMPLEX_TYPE_1 = 1873 # 1873 + SCHEMAV_CVC_AU = 1874 # 1874 + SCHEMAV_CVC_TYPE_1 = 1875 # 1875 + SCHEMAV_CVC_TYPE_2 = 1876 # 1876 + XPTR_UNKNOWN_SCHEME = 1900 + XPTR_CHILDSEQ_START = 1901 # 1901 + XPTR_EVAL_FAILED = 1902 # 1902 + XPTR_EXTRA_OBJECTS = 1903 # 1903 + C14N_CREATE_CTXT = 1950 + C14N_REQUIRES_UTF8 = 1951 # 1951 + C14N_CREATE_STACK = 1952 # 1952 + C14N_INVALID_NODE = 1953 # 1953 + FTP_PASV_ANSWER = 2000 + FTP_EPSV_ANSWER = 2001 # 2001 + FTP_ACCNT = 2002 # 2002 + HTTP_URL_SYNTAX = 2020 + HTTP_USE_IP = 2021 # 2021 + HTTP_UNKNOWN_HOST = 2022 # 2022 + SCHEMAP_SRC_SIMPLE_TYPE_1 = 3000 + SCHEMAP_SRC_SIMPLE_TYPE_2 = 3001 # 3001 + SCHEMAP_SRC_SIMPLE_TYPE_3 = 3002 # 3002 + SCHEMAP_SRC_SIMPLE_TYPE_4 = 3003 # 3003 + SCHEMAP_SRC_RESOLVE = 3004 # 3004 + SCHEMAP_SRC_RESTRICTION_BASE_OR_SIMPLETYPE = 3005 # 3005 + SCHEMAP_SRC_LIST_ITEMTYPE_OR_SIMPLETYPE = 3006 # 3006 + SCHEMAP_SRC_UNION_MEMBERTYPES_OR_SIMPLETYPES = 3007 # 3007 + SCHEMAP_ST_PROPS_CORRECT_1 = 3008 # 3008 + SCHEMAP_ST_PROPS_CORRECT_2 = 3009 # 3009 + SCHEMAP_ST_PROPS_CORRECT_3 = 3010 # 3010 + SCHEMAP_COS_ST_RESTRICTS_1_1 = 3011 # 3011 + SCHEMAP_COS_ST_RESTRICTS_1_2 = 3012 # 3012 + SCHEMAP_COS_ST_RESTRICTS_1_3_1 = 3013 # 3013 + SCHEMAP_COS_ST_RESTRICTS_1_3_2 = 3014 # 3014 + SCHEMAP_COS_ST_RESTRICTS_2_1 = 3015 # 3015 + SCHEMAP_COS_ST_RESTRICTS_2_3_1_1 = 3016 # 3016 + SCHEMAP_COS_ST_RESTRICTS_2_3_1_2 = 3017 # 3017 + SCHEMAP_COS_ST_RESTRICTS_2_3_2_1 = 3018 # 3018 + SCHEMAP_COS_ST_RESTRICTS_2_3_2_2 = 3019 # 3019 + SCHEMAP_COS_ST_RESTRICTS_2_3_2_3 = 3020 # 3020 + SCHEMAP_COS_ST_RESTRICTS_2_3_2_4 = 3021 # 3021 + SCHEMAP_COS_ST_RESTRICTS_2_3_2_5 = 3022 # 3022 + SCHEMAP_COS_ST_RESTRICTS_3_1 = 3023 # 3023 + SCHEMAP_COS_ST_RESTRICTS_3_3_1 = 3024 # 3024 + SCHEMAP_COS_ST_RESTRICTS_3_3_1_2 = 3025 # 3025 + SCHEMAP_COS_ST_RESTRICTS_3_3_2_2 = 3026 # 3026 + SCHEMAP_COS_ST_RESTRICTS_3_3_2_1 = 3027 # 3027 + SCHEMAP_COS_ST_RESTRICTS_3_3_2_3 = 3028 # 3028 + SCHEMAP_COS_ST_RESTRICTS_3_3_2_4 = 3029 # 3029 + SCHEMAP_COS_ST_RESTRICTS_3_3_2_5 = 3030 # 3030 + SCHEMAP_COS_ST_DERIVED_OK_2_1 = 3031 # 3031 + SCHEMAP_COS_ST_DERIVED_OK_2_2 = 3032 # 3032 + SCHEMAP_S4S_ELEM_NOT_ALLOWED = 3033 # 3033 + SCHEMAP_S4S_ELEM_MISSING = 3034 # 3034 + SCHEMAP_S4S_ATTR_NOT_ALLOWED = 3035 # 3035 + SCHEMAP_S4S_ATTR_MISSING = 3036 # 3036 + SCHEMAP_S4S_ATTR_INVALID_VALUE = 3037 # 3037 + SCHEMAP_SRC_ELEMENT_1 = 3038 # 3038 + SCHEMAP_SRC_ELEMENT_2_1 = 3039 # 3039 + SCHEMAP_SRC_ELEMENT_2_2 = 3040 # 3040 + SCHEMAP_SRC_ELEMENT_3 = 3041 # 3041 + SCHEMAP_P_PROPS_CORRECT_1 = 3042 # 3042 + SCHEMAP_P_PROPS_CORRECT_2_1 = 3043 # 3043 + SCHEMAP_P_PROPS_CORRECT_2_2 = 3044 # 3044 + SCHEMAP_E_PROPS_CORRECT_2 = 3045 # 3045 + SCHEMAP_E_PROPS_CORRECT_3 = 3046 # 3046 + SCHEMAP_E_PROPS_CORRECT_4 = 3047 # 3047 + SCHEMAP_E_PROPS_CORRECT_5 = 3048 # 3048 + SCHEMAP_E_PROPS_CORRECT_6 = 3049 # 3049 + SCHEMAP_SRC_INCLUDE = 3050 # 3050 + SCHEMAP_SRC_ATTRIBUTE_1 = 3051 # 3051 + SCHEMAP_SRC_ATTRIBUTE_2 = 3052 # 3052 + SCHEMAP_SRC_ATTRIBUTE_3_1 = 3053 # 3053 + SCHEMAP_SRC_ATTRIBUTE_3_2 = 3054 # 3054 + SCHEMAP_SRC_ATTRIBUTE_4 = 3055 # 3055 + SCHEMAP_NO_XMLNS = 3056 # 3056 + SCHEMAP_NO_XSI = 3057 # 3057 + SCHEMAP_COS_VALID_DEFAULT_1 = 3058 # 3058 + SCHEMAP_COS_VALID_DEFAULT_2_1 = 3059 # 3059 + SCHEMAP_COS_VALID_DEFAULT_2_2_1 = 3060 # 3060 + SCHEMAP_COS_VALID_DEFAULT_2_2_2 = 3061 # 3061 + SCHEMAP_CVC_SIMPLE_TYPE = 3062 # 3062 + SCHEMAP_COS_CT_EXTENDS_1_1 = 3063 # 3063 + SCHEMAP_SRC_IMPORT_1_1 = 3064 # 3064 + SCHEMAP_SRC_IMPORT_1_2 = 3065 # 3065 + SCHEMAP_SRC_IMPORT_2 = 3066 # 3066 + SCHEMAP_SRC_IMPORT_2_1 = 3067 # 3067 + SCHEMAP_SRC_IMPORT_2_2 = 3068 # 3068 + SCHEMAP_INTERNAL = 3069 # 3069 non-W3C + SCHEMAP_NOT_DETERMINISTIC = 3070 # 3070 non-W3C + SCHEMAP_SRC_ATTRIBUTE_GROUP_1 = 3071 # 3071 + SCHEMAP_SRC_ATTRIBUTE_GROUP_2 = 3072 # 3072 + SCHEMAP_SRC_ATTRIBUTE_GROUP_3 = 3073 # 3073 + SCHEMAP_MG_PROPS_CORRECT_1 = 3074 # 3074 + SCHEMAP_MG_PROPS_CORRECT_2 = 3075 # 3075 + SCHEMAP_SRC_CT_1 = 3076 # 3076 + SCHEMAP_DERIVATION_OK_RESTRICTION_2_1_3 = 3077 # 3077 + SCHEMAP_AU_PROPS_CORRECT_2 = 3078 # 3078 + SCHEMAP_A_PROPS_CORRECT_2 = 3079 # 3079 + MODULE_OPEN = 4900 # 4900 + MODULE_CLOSE = 4901 # 4901 + CHECK_FOUND_ELEMENT = 5000 + CHECK_FOUND_ATTRIBUTE = 5001 # 5001 + CHECK_FOUND_TEXT = 5002 # 5002 + CHECK_FOUND_CDATA = 5003 # 5003 + CHECK_FOUND_ENTITYREF = 5004 # 5004 + CHECK_FOUND_ENTITY = 5005 # 5005 + CHECK_FOUND_PI = 5006 # 5006 + CHECK_FOUND_COMMENT = 5007 # 5007 + CHECK_FOUND_DOCTYPE = 5008 # 5008 + CHECK_FOUND_FRAGMENT = 5009 # 5009 + CHECK_FOUND_NOTATION = 5010 # 5010 + CHECK_UNKNOWN_NODE = 5011 # 5011 + CHECK_ENTITY_TYPE = 5012 # 5012 + CHECK_NO_PARENT = 5013 # 5013 + CHECK_NO_DOC = 5014 # 5014 + CHECK_NO_NAME = 5015 # 5015 + CHECK_NO_ELEM = 5016 # 5016 + CHECK_WRONG_DOC = 5017 # 5017 + CHECK_NO_PREV = 5018 # 5018 + CHECK_WRONG_PREV = 5019 # 5019 + CHECK_NO_NEXT = 5020 # 5020 + CHECK_WRONG_NEXT = 5021 # 5021 + CHECK_NOT_DTD = 5022 # 5022 + CHECK_NOT_ATTR = 5023 # 5023 + CHECK_NOT_ATTR_DECL = 5024 # 5024 + CHECK_NOT_ELEM_DECL = 5025 # 5025 + CHECK_NOT_ENTITY_DECL = 5026 # 5026 + CHECK_NOT_NS_DECL = 5027 # 5027 + CHECK_NO_HREF = 5028 # 5028 + CHECK_WRONG_PARENT = 5029 # 5029 + CHECK_NS_SCOPE = 5030 # 5030 + CHECK_NS_ANCESTOR = 5031 # 5031 + CHECK_NOT_UTF8 = 5032 # 5032 + CHECK_NO_DICT = 5033 # 5033 + CHECK_NOT_NCNAME = 5034 # 5034 + CHECK_OUTSIDE_DICT = 5035 # 5035 + CHECK_WRONG_NAME = 5036 # 5036 + CHECK_NAME_NOT_NULL = 5037 # 5037 + CHECK_ = 5038 # 5033 + CHECK_X = 5039 # 503 + +cdef object __names +__names = LxmlErrorLevels._names +for name, value in vars(LxmlErrorLevels).iteritems(): + python.PyDict_SetItem(__names, value, name) + +__names = LxmlErrorDomains._names +for name, value in vars(LxmlErrorDomains).iteritems(): + python.PyDict_SetItem(__names, value, name) + +__names = LxmlErrorTypes._names +for name, value in vars(LxmlErrorTypes).iteritems(): + python.PyDict_SetItem(__names, value, name) Modified: lxml/branch/error-reporting/src/lxml/xmlschema.pxi ============================================================================== --- lxml/branch/error-reporting/src/lxml/xmlschema.pxi (original) +++ lxml/branch/error-reporting/src/lxml/xmlschema.pxi Thu Mar 16 21:43:47 2006 @@ -17,6 +17,7 @@ """Turn a document into an XML Schema validator. """ cdef xmlschema.xmlSchema* _c_schema + cdef _ErrorLog _error_log def __init__(self, _ElementTree etree): cdef _Document doc @@ -30,6 +31,7 @@ xmlschema.xmlSchemaFreeParserCtxt(parser_ctxt) raise XMLSchemaParseError, "Document is not valid XML Schema" xmlschema.xmlSchemaFreeParserCtxt(parser_ctxt) + self._error_log = _ErrorLog() def __dealloc__(self): xmlschema.xmlSchemaFree(self._c_schema) @@ -42,6 +44,7 @@ cdef xmlschema.xmlSchemaValidCtxt* valid_ctxt cdef xmlDoc* c_doc cdef int ret + self._error_log.connect() valid_ctxt = xmlschema.xmlSchemaNewValidCtxt(self._c_schema) c_doc = _fakeRootDoc(etree._doc._c_doc, etree._context_node._c_node) @@ -49,10 +52,11 @@ _destroyFakeDoc(etree._doc._c_doc, c_doc) xmlschema.xmlSchemaFreeValidCtxt(valid_ctxt) + self._error_log.disconnect() if ret == -1: raise XMLSchemaValidateError, "Internal error in XML Schema validation." return ret == 0 property error_log: def __get__(self): - return __build_error_log_tuple(self) + return self._error_log.copy() From scoder at codespeak.net Thu Mar 16 22:07:32 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Thu Mar 16 22:07:34 2006 Subject: [Lxml-checkins] r24495 - lxml/branch/error-reporting/src/lxml/tests Message-ID: <20060316210732.CD23F100B7@code0.codespeak.net> Author: scoder Date: Thu Mar 16 22:07:31 2006 New Revision: 24495 Modified: lxml/branch/error-reporting/src/lxml/tests/test_relaxng.py Log: test case for error reporting in RelaxNG validation Modified: lxml/branch/error-reporting/src/lxml/tests/test_relaxng.py ============================================================================== --- lxml/branch/error-reporting/src/lxml/tests/test_relaxng.py (original) +++ lxml/branch/error-reporting/src/lxml/tests/test_relaxng.py Thu Mar 16 22:07:31 2006 @@ -25,6 +25,25 @@ self.assert_(schema.validate(tree_valid)) self.assert_(not schema.validate(tree_invalid)) + def test_relaxng_error(self): + tree_invalid = self.parse('') + schema = self.parse('''\ + + + + + + + +''') + schema = etree.RelaxNG(schema) + self.assert_(not schema.validate(tree_invalid)) + errors = schema.error_log + self.assert_([ log for log in errors + if log.level_name == "ERROR" ]) + self.assert_([ log for log in errors + if "not expect" in log.message ]) + def test_relaxng_invalid_schema(self): schema = self.parse('''\ From scoder at codespeak.net Fri Mar 17 07:43:37 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Fri Mar 17 07:43:39 2006 Subject: [Lxml-checkins] r24505 - lxml/branch/error-reporting/src/lxml Message-ID: <20060317064337.DB4B2100D3@code0.codespeak.net> Author: scoder Date: Fri Mar 17 07:43:36 2006 New Revision: 24505 Modified: lxml/branch/error-reporting/src/lxml/xmlerror.pxi Log: some clean up, allow filtering multiple types/domains/levels at once Modified: lxml/branch/error-reporting/src/lxml/xmlerror.pxi ============================================================================== --- lxml/branch/error-reporting/src/lxml/xmlerror.pxi (original) +++ lxml/branch/error-reporting/src/lxml/xmlerror.pxi Fri Mar 17 07:43:36 2006 @@ -2,68 +2,44 @@ # DEBUG setup cdef class _LogEntry: - cdef int _domain - cdef int _type - cdef int _line - cdef xmlerror.xmlErrorLevel _level - cdef object _message - cdef object _filename + cdef readonly object domain + cdef readonly object type + cdef readonly object line + cdef readonly object level + cdef readonly object message + cdef readonly object filename cdef _set(self, xmlerror.xmlError* error): - self._domain = error.domain - self._type = error.code - self._level = error.level - self._line = error.line - self._message = python.PyString_FromString(error.message) + self.domain = error.domain + self.type = error.code + self.level = error.level + self.line = error.line + self.message = python.PyString_FromString(error.message) if error.file is NULL: - self._filename = None + self.filename = None else: - self._filename = python.PyString_FromString(error.file) + self.filename = python.PyString_FromString(error.file) def __repr__(self): if self._filename: return "%s/%d[%s]%s/%s: %s" % ( - self.filename, self._line, self.level_name, + self.filename, self.line, self.level_name, self.domain_name, self.type_name, self.message) else: return "[%s]%s/%s: %s" % ( self.level_name, self.domain_name, self.type_name, self.message) - property filename: - def __get__(self): - return self._filename - - property message: - def __get__(self): - return self._message - - property line: - def __get__(self): - return self._line - - property domain: - def __get__(self): - return self._domain - property domain_name: def __get__(self): - return LxmlErrorDomains._names[self._domain] - - property type: - def __get__(self): - return self._domain + return LxmlErrorDomains._names[self.domain] property type_name: def __get__(self): - return LxmlErrorTypes._names[self._type] - - property level: - def __get__(self): - return self._level + return LxmlErrorTypes._names[self.type] property level_name: def __get__(self): - return LxmlErrorLevels._names[self._level] + return LxmlErrorLevels._names[self.level] cdef class _BaseErrorLog: "Immutable base version of an error log." @@ -80,33 +56,40 @@ def __repr__(self): return '\n'.join(map(repr, self._entries)) - def filter_domain(self, domain): + def filter_domains(self, domains): cdef _LogEntry entry - cdef int c_domain - c_domain = domain filtered = [] + if not python.PySequence_Check(domains): + domains = (domains,) + for entry in self._entries: + if entry.domain in domains: + python.PyList_Append(filtered, entry) + return _BaseErrorLog(filtered) + + def filter_types(self, types): + cdef _LogEntry entry + if not python.PySequence_Check(types): + types = (types,) for entry in self._entries: - if entry._domain == c_domain: + if entry.type in types: python.PyList_Append(filtered, entry) - return filtered + return _BaseErrorLog(filtered) - def filter_type(self, type): + def filter_levels(self, levels): cdef _LogEntry entry - cdef int c_type - c_type = type + if not python.PySequence_Check(levels): + levels = (levels,) for entry in self._entries: - if entry._type == c_type: + if entry.level in levels: python.PyList_Append(filtered, entry) - return filtered + return _BaseErrorLog(filtered) - def filter_level(self, level): + def filter_from_level(self, level): cdef _LogEntry entry - cdef int c_level - c_level = level for entry in self._entries: - if (entry._level) >= c_level: + if entry.level >= level: python.PyList_Append(filtered, entry) - return filtered + return _BaseErrorLog(filtered) cdef class _ErrorLog(_BaseErrorLog): cdef object _accepted_domains From scoder at codespeak.net Fri Mar 17 07:44:26 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Fri Mar 17 07:44:27 2006 Subject: [Lxml-checkins] r24506 - lxml/trunk/src/lxml Message-ID: <20060317064426.968C0100D3@code0.codespeak.net> Author: scoder Date: Fri Mar 17 07:44:25 2006 New Revision: 24506 Modified: lxml/trunk/src/lxml/etree.h lxml/trunk/src/lxml/python.pxd Log: make callable() a macro Modified: lxml/trunk/src/lxml/etree.h ============================================================================== --- lxml/trunk/src/lxml/etree.h (original) +++ lxml/trunk/src/lxml/etree.h Fri Mar 17 07:44:25 2006 @@ -3,6 +3,7 @@ #define isinstance(a,b) PyObject_IsInstance(a,b) #define hasattr(a,b) PyObject_HasAttr(a,b) +#define callable(a) PyCallable_Check(a) #define _isElement(c_node) \ ((c_node)->type == XML_ELEMENT_NODE || \ (c_node)->type == XML_COMMENT_NODE) Modified: lxml/trunk/src/lxml/python.pxd ============================================================================== --- lxml/trunk/src/lxml/python.pxd (original) +++ lxml/trunk/src/lxml/python.pxd Fri Mar 17 07:44:25 2006 @@ -30,6 +30,7 @@ cdef int PyBool_Check(object instance) cdef int PySequence_Check(object instance) cdef int PyType_Check(object instance) + cdef int PyCallable_Check(object instance) cdef int PyObject_IsInstance(object instance, object classes) cdef int PyObject_HasAttr(object obj, object attr) From scoder at codespeak.net Fri Mar 17 07:45:28 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Fri Mar 17 07:45:29 2006 Subject: [Lxml-checkins] r24507 - lxml/trunk/src/lxml Message-ID: <20060317064528.5AE73100D3@code0.codespeak.net> Author: scoder Date: Fri Mar 17 07:45:22 2006 New Revision: 24507 Modified: lxml/trunk/src/lxml/python.pxd Log: uhm, well, use callable() as macro Modified: lxml/trunk/src/lxml/python.pxd ============================================================================== --- lxml/trunk/src/lxml/python.pxd (original) +++ lxml/trunk/src/lxml/python.pxd Fri Mar 17 07:45:22 2006 @@ -37,3 +37,4 @@ cdef extern from "etree.h": # redefines some functions as macros cdef int isinstance(object instance, object classes) cdef int hasattr(object obj, object attr) + cdef int callable(object obj) From scoder at codespeak.net Fri Mar 17 07:53:00 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Fri Mar 17 07:53:02 2006 Subject: [Lxml-checkins] r24508 - lxml/branch/error-reporting/src/lxml Message-ID: <20060317065300.5B35B100D3@code0.codespeak.net> Author: scoder Date: Fri Mar 17 07:52:58 2006 New Revision: 24508 Modified: lxml/branch/error-reporting/src/lxml/xmlerror.pxi Log: new function initThreadLogging() to initialize logging for the running thread Modified: lxml/branch/error-reporting/src/lxml/xmlerror.pxi ============================================================================== --- lxml/branch/error-reporting/src/lxml/xmlerror.pxi (original) +++ lxml/branch/error-reporting/src/lxml/xmlerror.pxi Fri Mar 17 07:52:58 2006 @@ -143,8 +143,15 @@ __GLOBAL_ERROR_LOG = _RotatingErrorLog(__MAX_LOG_SIZE) def clearErrorLog(): + """Clear the global error log. + Note that this log is already bounded to a fixed size.""" __GLOBAL_ERROR_LOG.clear() +def initThreadLogging(): + "Setup logging for the current thread." + _logLibxmlErrors() + _logLibxsltErrors() + def __copyGlobalErrorLog(): "Helper function for properties in exceptions." return __GLOBAL_ERROR_LOG.copy() @@ -186,8 +193,7 @@ # xslt.xsltSetTransformErrorFunc # init global logging -_logLibxmlErrors() -_logLibxsltErrors() +initThreadLogging() ################################################################################ ## CONSTANTS FROM "xmlerror.pxd" From scoder at codespeak.net Fri Mar 17 08:36:14 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Fri Mar 17 08:36:16 2006 Subject: [Lxml-checkins] r24509 - lxml/pyrex/dist Message-ID: <20060317073614.DAA73100D3@code0.codespeak.net> Author: scoder Date: Fri Mar 17 08:35:47 2006 New Revision: 24509 Modified: lxml/pyrex/dist/Pyrex-0.9.3.1-1.noarch.rpm lxml/pyrex/dist/Pyrex-0.9.3.1-1.src.rpm lxml/pyrex/dist/Pyrex-0.9.3.1.tar.gz Log: updated pyrex versions Modified: lxml/pyrex/dist/Pyrex-0.9.3.1-1.noarch.rpm ============================================================================== Binary files. No diff available. Modified: lxml/pyrex/dist/Pyrex-0.9.3.1-1.src.rpm ============================================================================== Binary files. No diff available. Modified: lxml/pyrex/dist/Pyrex-0.9.3.1.tar.gz ============================================================================== Binary files. No diff available. From scoder at codespeak.net Fri Mar 17 08:42:29 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Fri Mar 17 08:42:31 2006 Subject: [Lxml-checkins] r24510 - lxml/trunk Message-ID: <20060317074229.B5129100D3@code0.codespeak.net> Author: scoder Date: Fri Mar 17 08:42:23 2006 New Revision: 24510 Modified: lxml/trunk/INSTALL.txt Log: updated note on Pyrex SVN Modified: lxml/trunk/INSTALL.txt ============================================================================== --- lxml/trunk/INSTALL.txt (original) +++ lxml/trunk/INSTALL.txt Fri Mar 17 08:42:23 2006 @@ -45,9 +45,14 @@ Building lxml with gcc 4.0 -------------------------- -Pyrex 0.9.3 generates C code that gcc 4.0 does not accept. Pending an official -release of a version of Pyrex that does work with gcc 4.0, the lxml project -currently provides an updated version of Pyrex: +Pyrex 0.9.3.1 generates C code that gcc 4.0 does not accept. Pending an +official release of a version of Pyrex that does work with gcc 4.0, the lxml +project currently provides an updated version of Pyrex in its Subversion +repository: + +http://codespeak.net/svn/lxml/pyrex/ + +To install it, you can just download one of the following files: http://codespeak.net/svn/lxml/pyrex/dist/Pyrex-0.9.3.1.tar.gz From scoder at codespeak.net Fri Mar 17 08:45:33 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Fri Mar 17 08:45:39 2006 Subject: [Lxml-checkins] r24511 - lxml/trunk Message-ID: <20060317074533.AFCC6100D3@code0.codespeak.net> Author: scoder Date: Fri Mar 17 08:45:31 2006 New Revision: 24511 Modified: lxml/trunk/INSTALL.txt Log: replaced calls of python2.3 by plain python Modified: lxml/trunk/INSTALL.txt ============================================================================== --- lxml/trunk/INSTALL.txt (original) +++ lxml/trunk/INSTALL.txt Fri Mar 17 08:45:31 2006 @@ -20,7 +20,7 @@ However, see below for an updated version if you have any trouble using it, especially with GCC 4.x. -You also need Python 2.3 (Python 2.4 also ought to work). +You also need Python 2.3 or later. Installation ------------ @@ -33,7 +33,7 @@ It's also possible to do this:: - python2.3 setup.py build_ext -i + python setup.py build_ext -i or just:: @@ -127,11 +127,11 @@ To run the ElementTree and cElementTree compatibility tests, make sure you have lxml on your PYTHONPATH first, then run:: - python2.3 selftest.py + python selftest.py and:: - python2.3 selftest2.py + python selftest2.py If the tests give failures, errors, or worse, segmentation faults, we'd really like to know. Please contact us on the `mailing list`_, From scoder at codespeak.net Fri Mar 17 09:34:31 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Fri Mar 17 09:34:32 2006 Subject: [Lxml-checkins] r24512 - lxml/branch/error-reporting/src/lxml Message-ID: <20060317083431.D285C100D6@code0.codespeak.net> Author: scoder Date: Fri Mar 17 09:34:20 2006 New Revision: 24512 Modified: lxml/branch/error-reporting/src/lxml/xmlerror.pxi Log: clean up Modified: lxml/branch/error-reporting/src/lxml/xmlerror.pxi ============================================================================== --- lxml/branch/error-reporting/src/lxml/xmlerror.pxi (original) +++ lxml/branch/error-reporting/src/lxml/xmlerror.pxi Fri Mar 17 09:34:20 2006 @@ -168,13 +168,9 @@ __GLOBAL_ERROR_LOG._receive(error) # dummy function: no debug output at all -cdef void _nullStructuredErrorFunc(void* userData, - xmlerror.xmlError* error): - pass cdef void _nullGenericErrorFunc(void* ctxt, char* msg, ...): pass - # setup for global log: cdef void _logLibxmlErrors(): xmlerror.xmlSetGenericErrorFunc(NULL, _nullGenericErrorFunc) @@ -184,14 +180,6 @@ xslt.xsltSetGenericErrorFunc(NULL, _nullGenericErrorFunc) # xslt.xsltSetTransformErrorFunc -cdef void _shutUpLibxmlErrors(): - xmlerror.xmlSetGenericErrorFunc(NULL, _nullGenericErrorFunc) - xmlerror.xmlSetStructuredErrorFunc(NULL, _nullStructuredErrorFunc) - -cdef void _shutUpLibxsltErrors(): - xslt.xsltSetGenericErrorFunc(NULL, _nullGenericErrorFunc) - # xslt.xsltSetTransformErrorFunc - # init global logging initThreadLogging() From scoder at codespeak.net Mon Mar 20 08:14:55 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Mon Mar 20 08:15:02 2006 Subject: [Lxml-checkins] r24570 - lxml/branch/error-reporting/src/lxml Message-ID: <20060320071455.0AEDB100C6@code0.codespeak.net> Author: scoder Date: Mon Mar 20 08:14:54 2006 New Revision: 24570 Modified: lxml/branch/error-reporting/src/lxml/xmlerror.pxi Log: some clean up; currently unused/untested: PyErrorLog as potential integration with python logging, _DomainErrorLog to provide domain filtering Modified: lxml/branch/error-reporting/src/lxml/xmlerror.pxi ============================================================================== --- lxml/branch/error-reporting/src/lxml/xmlerror.pxi (original) +++ lxml/branch/error-reporting/src/lxml/xmlerror.pxi Mon Mar 20 08:14:54 2006 @@ -1,6 +1,30 @@ ################################################################################ # DEBUG setup +# global list to collect error output messages from libxml2/libxslt +cdef _RotatingErrorLog __GLOBAL_ERROR_LOG +__GLOBAL_ERROR_LOG = _RotatingErrorLog(__MAX_LOG_SIZE) + +def __copyGlobalErrorLog(): + "Helper function for properties in exceptions." + return __GLOBAL_ERROR_LOG.copy() + + +# module level API functions + +def clearErrorLog(): + """Clear the global error log. + Note that this log is already bounded to a fixed size.""" + __GLOBAL_ERROR_LOG.clear() + +def initThreadLogging(): + "Setup logging for the current thread." + _logLibxmlErrors() + _logLibxsltErrors() + + +# Logging classes + cdef class _LogEntry: cdef readonly object domain cdef readonly object type @@ -92,10 +116,8 @@ return _BaseErrorLog(filtered) cdef class _ErrorLog(_BaseErrorLog): - cdef object _accepted_domains def __init__(self): _BaseErrorLog.__init__(self, []) - accepted_domains = None def clear(self): del self._entries[:] @@ -115,18 +137,23 @@ cdef void _receive(self, xmlerror.xmlError* error): cdef _LogEntry entry - if self._accepted_domains is not None: - if error.domain not in self._accepted_domains: - return entry = _LogEntry() entry._set(error) + if __GLOBAL_ERROR_LOG is not self: + __GLOBAL_ERROR_LOG.receive(entry) self.receive(entry) - if __DEBUG != 0 and __GLOBAL_ERROR_LOG != self: - __GLOBAL_ERROR_LOG._receive(error) def receive(self, entry): python.PyList_Append(self._entries, entry) +cdef class _DomainErrorLog(_ErrorLog): + def receive(self, entry): + if entry.domain in self._accepted_domains: + _ErrorLog.receive(self, entry) + def __init__(self, domains): + _ErrorLog.__init__(self) + self._accepted_domains = tuple(domains) + cdef class _RotatingErrorLog(_ErrorLog): cdef int _max_len def __init__(self, max_len): @@ -138,29 +165,42 @@ del entries[0] python.PyList_Append(entries, entry) -# global list to collect error output messages from libxml2/libxslt -cdef _RotatingErrorLog __GLOBAL_ERROR_LOG -__GLOBAL_ERROR_LOG = _RotatingErrorLog(__MAX_LOG_SIZE) - -def clearErrorLog(): - """Clear the global error log. - Note that this log is already bounded to a fixed size.""" - __GLOBAL_ERROR_LOG.clear() +cdef class PyErrorLog(_ErrorLog): + cdef object _log + cdef object _level_map + cdef object _varsOf + def __init__(self, logger_name=None): + _ErrorLog.__init__(self) + import logging + self._level_map = { + LxmlErrorLevels.WARNING : logging.WARNING, + LxmlErrorLevels.ERROR : logging.ERROR, + LxmlErrorLevels.FATAL : logging.CRITICAL + } + self._varsOf = vars + if logger_name: + logger = logging.getLogger(name) + else: + logger = logging.getLogger() + self._log = logger.log -def initThreadLogging(): - "Setup logging for the current thread." - _logLibxmlErrors() - _logLibxsltErrors() + def copy(self): + return self -def __copyGlobalErrorLog(): - "Helper function for properties in exceptions." - return __GLOBAL_ERROR_LOG.copy() + def receive(self, entry): + py_level = python.PyDict_GetItem(self._level_map, entry.level) + self._log( + py_level, + "%(asctime)s %(levelname)s %(domain_name)s %(message)s", + self._varsOf(entry) + ) # local log function: forward error to logger object cdef void _localReceiveError(void* c_log_handler, xmlerror.xmlError* error): cdef _ErrorLog log_handler - log_handler = <_ErrorLog>c_log_handler - log_handler._receive(error) + if __DEBUG != 0: + log_handler = <_ErrorLog>c_log_handler + log_handler._receive(error) # global log functions: overridden by local functions cdef void _globalReceiveError(void* userData, xmlerror.xmlError* error): From scoder at codespeak.net Mon Mar 20 08:41:18 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Mon Mar 20 08:41:19 2006 Subject: [Lxml-checkins] r24571 - in lxml/branch/scoder2: . src/lxml Message-ID: <20060320074118.3B320100CB@code0.codespeak.net> Author: scoder Date: Mon Mar 20 08:41:15 2006 New Revision: 24571 Modified: lxml/branch/scoder2/INSTALL.txt lxml/branch/scoder2/src/lxml/etree.h lxml/branch/scoder2/src/lxml/python.pxd Log: merges from trunk Modified: lxml/branch/scoder2/INSTALL.txt ============================================================================== --- lxml/branch/scoder2/INSTALL.txt (original) +++ lxml/branch/scoder2/INSTALL.txt Mon Mar 20 08:41:15 2006 @@ -20,7 +20,7 @@ However, see below for an updated version if you have any trouble using it, especially with GCC 4.x. -You also need Python 2.3 (Python 2.4 also ought to work). +You also need Python 2.3 or later. Installation ------------ @@ -33,7 +33,7 @@ It's also possible to do this:: - python2.3 setup.py build_ext -i + python setup.py build_ext -i or just:: @@ -45,9 +45,14 @@ Building lxml with gcc 4.0 -------------------------- -Pyrex 0.9.3 generates C code that gcc 4.0 does not accept. Pending an official -release of a version of Pyrex that does work with gcc 4.0, the lxml project -currently provides an updated version of Pyrex: +Pyrex 0.9.3.1 generates C code that gcc 4.0 does not accept. Pending an +official release of a version of Pyrex that does work with gcc 4.0, the lxml +project currently provides an updated version of Pyrex in its Subversion +repository: + +http://codespeak.net/svn/lxml/pyrex/ + +To install it, you can just download one of the following files: http://codespeak.net/svn/lxml/pyrex/dist/Pyrex-0.9.3.1.tar.gz @@ -122,11 +127,11 @@ To run the ElementTree and cElementTree compatibility tests, make sure you have lxml on your PYTHONPATH first, then run:: - python2.3 selftest.py + python selftest.py and:: - python2.3 selftest2.py + python selftest2.py If the tests give failures, errors, or worse, segmentation faults, we'd really like to know. Please contact us on the `mailing list`_, Modified: lxml/branch/scoder2/src/lxml/etree.h ============================================================================== --- lxml/branch/scoder2/src/lxml/etree.h (original) +++ lxml/branch/scoder2/src/lxml/etree.h Mon Mar 20 08:41:15 2006 @@ -3,6 +3,7 @@ #define isinstance(a,b) PyObject_IsInstance(a,b) #define hasattr(a,b) PyObject_HasAttr(a,b) +#define callable(a) PyCallable_Check(a) #define _isElement(c_node) \ ((c_node)->type == XML_ELEMENT_NODE || \ (c_node)->type == XML_COMMENT_NODE) Modified: lxml/branch/scoder2/src/lxml/python.pxd ============================================================================== --- lxml/branch/scoder2/src/lxml/python.pxd (original) +++ lxml/branch/scoder2/src/lxml/python.pxd Mon Mar 20 08:41:15 2006 @@ -32,9 +32,11 @@ cdef int PyBool_Check(object instance) cdef int PySequence_Check(object instance) cdef int PyType_Check(object instance) + cdef int PyCallable_Check(object instance) cdef int PyObject_IsInstance(object instance, object classes) cdef int PyObject_HasAttr(object obj, object attr) cdef extern from "etree.h": # redefines some functions as macros cdef int isinstance(object instance, object classes) cdef int hasattr(object obj, object attr) + cdef int callable(object obj) From scoder at codespeak.net Mon Mar 20 09:08:36 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Mon Mar 20 09:08:37 2006 Subject: [Lxml-checkins] r24573 - lxml/branch/scoder2 Message-ID: <20060320080836.8BC7F100C6@code0.codespeak.net> Author: scoder Date: Mon Mar 20 09:08:35 2006 New Revision: 24573 Modified: lxml/branch/scoder2/bench.py Log: fix benchmark order when running lib specific benchmarks on all libs Modified: lxml/branch/scoder2/bench.py ============================================================================== --- lxml/branch/scoder2/bench.py (original) +++ lxml/branch/scoder2/bench.py Mon Mar 20 09:08:35 2006 @@ -194,9 +194,9 @@ if not name.startswith('bench_'): continue method = getattr(self, name) - if hasattr(method, 'LIBS'): - if self.lib_name not in method.LIBS: - continue + if hasattr(method, 'LIBS') and self.lib_name not in method.LIBS: + benchmarks.append((name, None, (), 0, 0)) + continue if method.__doc__: tree_sets = method.__doc__.split() else: @@ -496,9 +496,14 @@ for bench_calls in izip(*benchmarks): for lib, (bench, benchmark_setup) in enumerate(izip(benchmark_suites, bench_calls)): - bench_name = benchmark_setup[0] + bench_name, method_call = benchmark_setup[:2] tree_set_name = build_treeset_name(*benchmark_setup[-3:]) - print "%-3s: %-23s (%-10s)" % (bench.lib_name, bench_name[6:29], tree_set_name), + print "%-3s: %-23s" % (bench.lib_name, bench_name[6:29]), + if method_call is None: + print "skipped" + continue + + print "(%-10s)" % tree_set_name, sys.stdout.flush() result = run_bench(bench, *benchmark_setup) From faassen at codespeak.net Mon Mar 20 12:53:31 2006 From: faassen at codespeak.net (faassen@codespeak.net) Date: Mon Mar 20 12:53:33 2006 Subject: [Lxml-checkins] r24580 - lxml/trunk Message-ID: <20060320115331.39BE7100DA@code0.codespeak.net> Author: faassen Date: Mon Mar 20 12:53:27 2006 New Revision: 24580 Modified: lxml/trunk/CHANGES.txt lxml/trunk/version.txt Log: Preparing for release. Modified: lxml/trunk/CHANGES.txt ============================================================================== --- lxml/trunk/CHANGES.txt (original) +++ lxml/trunk/CHANGES.txt Mon Mar 20 12:53:27 2006 @@ -1,8 +1,8 @@ lxml changelog ============== -Under development -================= +0.9 (2006-03-20) +================ Features added -------------- Modified: lxml/trunk/version.txt ============================================================================== --- lxml/trunk/version.txt (original) +++ lxml/trunk/version.txt Mon Mar 20 12:53:27 2006 @@ -1 +1 @@ -0.8 +0.9 From scoder at codespeak.net Mon Mar 20 13:18:29 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Mon Mar 20 13:18:40 2006 Subject: [Lxml-checkins] r24581 - lxml/trunk/src/lxml/tests Message-ID: <20060320121829.E3F7E100DB@code0.codespeak.net> Author: scoder Date: Mon Mar 20 13:18:28 2006 New Revision: 24581 Modified: lxml/trunk/src/lxml/tests/test_xpathevaluator.py lxml/trunk/src/lxml/tests/test_xslt.py Log: merged in test cases for XPath and XSLT from scoder2 branch Modified: lxml/trunk/src/lxml/tests/test_xpathevaluator.py ============================================================================== --- lxml/trunk/src/lxml/tests/test_xpathevaluator.py (original) +++ lxml/trunk/src/lxml/tests/test_xpathevaluator.py Mon Mar 20 13:18:28 2006 @@ -154,6 +154,81 @@ self.assertEquals('Hoi', r[0].text) self.assertEquals('Dag', r[1].text) + def test_xpath_variables(self): + x = self.parse('') + e = etree.XPathEvaluator(x) + + expr = "/a[@attr=$aval]" + r = e.evaluate(expr, aval=1) + self.assertEquals(0, len(r)) + + r = e.evaluate(expr, aval="true") + self.assertEquals(1, len(r)) + self.assertEquals("true", r[0].get('attr')) + + r = e.evaluate(expr, aval=True) + self.assertEquals(1, len(r)) + self.assertEquals("true", r[0].get('attr')) + + +class ETreeXPathClassTestCase(HelperTestCase): + "Tests for the XPath class" + def test_xpath_compile_doc(self): + x = self.parse('') + + expr = etree.XPath("/a[@attr != 'true']") + r = expr.evaluate(x) + self.assertEquals(0, len(r)) + + expr = etree.XPath("/a[@attr = 'true']") + r = expr.evaluate(x) + self.assertEquals(1, len(r)) + + expr = etree.XPath( expr.path ) + r = expr.evaluate(x) + self.assertEquals(1, len(r)) + + def test_xpath_compile_element(self): + x = self.parse('') + root = x.getroot() + + expr = etree.XPath("./b") + r = expr.evaluate(root) + self.assertEquals(1, len(r)) + self.assertEquals('b', r[0].tag) + + expr = etree.XPath("./*") + r = expr.evaluate(root) + self.assertEquals(2, len(r)) + + def test_xpath_compile_vars(self): + x = self.parse('') + + expr = etree.XPath("/a[@attr=$aval]") + r = expr.evaluate(x, aval=False) + self.assertEquals(0, len(r)) + + r = expr.evaluate(x, aval=True) + self.assertEquals(1, len(r)) + + def test_xpath_compile_error(self): + self.assertRaises(SyntaxError, etree.XPath, '\\fad') + +class ETreeETXPathClassTestCase(HelperTestCase): + "Tests for the ETXPath class" + def test_xpath_compile_ns(self): + x = self.parse('') + + expr = etree.ETXPath("/a/{nsa}b") + r = expr.evaluate(x) + self.assertEquals(1, len(r)) + self.assertEquals('{nsa}b', r[0].tag) + + expr = etree.ETXPath("/a/{nsb}b") + r = expr.evaluate(x) + self.assertEquals(1, len(r)) + self.assertEquals('{nsb}b', r[0].tag) + SAMPLE_XML = etree.parse(StringIO(""" text @@ -248,6 +323,8 @@ def test_suite(): suite = unittest.TestSuite() suite.addTests([unittest.makeSuite(ETreeXPathTestCase)]) + suite.addTests([unittest.makeSuite(ETreeXPathClassTestCase)]) + suite.addTests([unittest.makeSuite(ETreeETXPathClassTestCase)]) suite.addTests([doctest.DocTestSuite()]) return suite Modified: lxml/trunk/src/lxml/tests/test_xslt.py ============================================================================== --- lxml/trunk/src/lxml/tests/test_xslt.py (original) +++ lxml/trunk/src/lxml/tests/test_xslt.py Mon Mar 20 13:18:28 2006 @@ -29,6 +29,39 @@ B ''', st.tostring(res)) + def test_xslt_input(self): + tree = self.parse('BC') + style = self.parse('''\ + + + + + +''') + + st = etree.XSLT(style) + st = etree.XSLT(style.getroot()) + self.assertRaises(TypeError, etree.XSLT, None) + + def test_xslt_input_partial_doc(self): + tree = self.parse('BC') + style = self.parse('''\ + + + + + + + +''') + + self.assertRaises(etree.XSLTParseError, etree.XSLT, style) + root_node = style.getroot() + self.assertRaises(etree.XSLTParseError, etree.XSLT, root_node) + st = etree.XSLT(root_node[0]) + def test_xslt_broken(self): tree = self.parse('') style = self.parse('''\ @@ -248,6 +281,43 @@ self.assertEquals(self._rootstring(result), 'C') + def test_extensions1(self): + tree = self.parse('B') + style = self.parse('''\ + + +''') + + def mytext(ctxt, values): + return 'X' * len(values) + + result = tree.xslt(style, {'testns' : {'mytext' : mytext}}) + self.assertEquals(self._rootstring(result), + 'X') + + def test_extensions2(self): + tree = self.parse('B') + style = self.parse('''\ + + +''') + + def mytext(ctxt, values): + return 'X' * len(values) + + namespace = etree.FunctionNamespace('testns') + namespace['mytext'] = mytext + + result = tree.xslt(style) + self.assertEquals(self._rootstring(result), + 'X') + def test_xslt_document_parse(self): # make sure document('') works from loaded files xslt = etree.XSLT(etree.parse(fileInTestDir("test-document.xslt"))) @@ -273,6 +343,8 @@ suite.addTests([unittest.makeSuite(ETreeXSLTTestCase)]) suite.addTests( [doctest.DocFileSuite('../../../doc/xpath.txt')]) + suite.addTests( + [doctest.DocFileSuite('../../../doc/extensions.txt')]) return suite if __name__ == '__main__': From scoder at codespeak.net Mon Mar 20 14:04:55 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Mon Mar 20 14:04:57 2006 Subject: [Lxml-checkins] r24583 - lxml/trunk/doc Message-ID: <20060320130455.2C22B100E1@code0.codespeak.net> Author: scoder Date: Mon Mar 20 14:04:49 2006 New Revision: 24583 Added: lxml/trunk/doc/extensions.txt - copied unchanged from r24582, lxml/branch/scoder2/doc/extensions.txt Log: merged in extension doctests from scoder2 branch From scoder at codespeak.net Mon Mar 20 14:15:46 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Mon Mar 20 14:15:48 2006 Subject: [Lxml-checkins] r24585 - in lxml/trunk: . doc src/lxml Message-ID: <20060320131546.E21DD100DC@code0.codespeak.net> Author: scoder Date: Mon Mar 20 14:15:43 2006 New Revision: 24585 Modified: lxml/trunk/CHANGES.txt lxml/trunk/doc/main.txt lxml/trunk/doc/xpath.txt lxml/trunk/src/lxml/etree.pyx lxml/trunk/src/lxml/nsclasses.pxi lxml/trunk/src/lxml/python.pxd lxml/trunk/src/lxml/xslt.pxi Log: big merge from scoder2 branch: extension functions for XPath/XSLT, XPath variable support, XPath class Modified: lxml/trunk/CHANGES.txt ============================================================================== --- lxml/trunk/CHANGES.txt (original) +++ lxml/trunk/CHANGES.txt Mon Mar 20 14:15:43 2006 @@ -7,6 +7,30 @@ Features added -------------- +* Various performance improvements + +* Benchmark script for lxml, ElementTree and cElementTree + +* Support for registering extension functions through new FunctionNamespace + class (see doc/extensions.txt) + +* Support for variables in XPath expressions (also in XPath class) + +* XPath class for compiled XPath expressions + +* XMLID module level function + +* XMLParser API for customized libxml2 parser configuration + +* Support for custom Element classes through new Namespace API (see + doc/namespace_extensions.txt) + +* Common exception base class LxmlError for module exceptions + +* real iterator support in iter(Element), Element.getiterator() + +* XSLT objects are callable, result trees support str() + * Added MANIFEST.in for easier creation of RPM files. * 'getparent' method on elements allows navigation to an element's @@ -15,6 +39,19 @@ * Python core compatible SAX tree builder and SAX event generator. See doc/sax.txt for more information. +Bugs fixed +---------- + +* Segfaults and memory leaks in various API functions of Element + +* Segfault in XSLT.tostring() + +* ElementTree object no longer interfere, Elements can be root of different + ElementTrees at the same time + +* document('') now works in XSLT documents read from files (in-memory + documents cannot support this due to libxslt deficiencies) + 0.8 (2005-11-03) ================ Modified: lxml/trunk/doc/main.txt ============================================================================== --- lxml/trunk/doc/main.txt (original) +++ lxml/trunk/doc/main.txt Mon Mar 20 14:15:43 2006 @@ -53,8 +53,12 @@ lxml also `extends this API`_ to expose libxml2 and libxslt specific functionality, such as XPath_, `Relax NG`_, `XML Schema`_, `XSLT`_, and -`c14n`_. There is also more `detailed information`_ about what's -possible with XPath. +`c14n`_. Python code can be called from XPath expressions and XSLT stylesheets +through the use of `extension functions`_. + +In addition to the ElementTree API, lxml also features an API for +`implementing namespaces`_ using tag specific element classes. This is a +simple way to write arbitrary XML driven APIs on top of lxml. .. _`ElementTree API`: http://effbot.org/zone/element-index.htm @@ -62,7 +66,7 @@ .. _`extends this API`: api.html -.. _`detailed information`: xpath.html +.. _`extension functions`_: extensions.html .. _XPath: http://www.w3.org/TR/xpath @@ -74,6 +78,8 @@ .. _`c14n`: http://www.w3.org/TR/2001/REC-xml-c14n-20010315 +.. _`implementing namespaces`: namespace_extensions.html + Mailing list ------------ Modified: lxml/trunk/doc/xpath.txt ============================================================================== --- lxml/trunk/doc/xpath.txt (original) +++ lxml/trunk/doc/xpath.txt Mon Mar 20 14:15:43 2006 @@ -1,13 +1,13 @@ XPath extension functions ========================= -This document describes how to deal with XPath extension -functions. This documentation is preliminary as the API is still in -flux. - -An extension function is defined in Python. In order to use it in -XPath, it needs to have a name by which it can be called in XPath, and -an optional namespace URI. +This document describes the OLD DEPRICATED way of dealing with XPath extension +functions. For updated documentation, please see the new Namespace API +described in nsclasses.txt and extensions.txt. + +Extension functions are defined in Python. In order to use such a function, it +must have a name by which it can be called in XPath, and an optional namespace +URI. As the first argument a function will always receive the XPathEvaluator object that is currently in the process of evaluating Modified: lxml/trunk/src/lxml/etree.pyx ============================================================================== --- lxml/trunk/src/lxml/etree.pyx (original) +++ lxml/trunk/src/lxml/etree.pyx Mon Mar 20 14:15:43 2006 @@ -259,7 +259,7 @@ return root.findall(path) # extensions to ElementTree API - def xpath(self, path, namespaces=None): + def xpath(self, _path, namespaces=None, **_variables): """XPath evaluate in context of document. namespaces is an optional dictionary with prefix to namespace URI @@ -274,9 +274,9 @@ against the same document, it is more efficient to use XPathEvaluator directly. """ - return XPathDocumentEvaluator(self, namespaces).evaluate(path) + return XPathDocumentEvaluator(self._doc, namespaces).evaluate(_path, **_variables) - def xslt(self, xslt, **kw): + def xslt(self, _xslt, extensions=None, **_kw): """Transform this document using other document. xslt is a tree that should be XSLT @@ -288,8 +288,8 @@ multiple documents, it is more efficient to use the XSLT class directly. """ - style = XSLT(xslt) - return style(self, **kw) + style = XSLT(_xslt, extensions) + return style(self, **_kw) def relaxng(self, relaxng): """Validate this document using other document. @@ -757,8 +757,8 @@ def findall(self, path): return _elementpath.findall(self, path) - def xpath(self, path, namespaces=None): - return XPathElementEvaluator(self, namespaces).evaluate(path) + def xpath(self, _path, namespaces=None, **_variables): + return XPathElementEvaluator(self, namespaces).evaluate(_path, **_variables) cdef _Element _elementFactory(_Document doc, xmlNode* c_node): cdef _Element result @@ -1113,13 +1113,13 @@ # module-level API for ElementTree -def Element(tag, attrib=None, nsmap=None, **extra): +def Element(_tag, attrib=None, nsmap=None, **_extra): cdef xmlNode* c_node cdef xmlDoc* c_doc cdef _Document doc - ns_utf, name_utf = _getNsTag(tag) + ns_utf, name_utf = _getNsTag(_tag) c_doc = theParser.newDoc() - c_node = _createElement(c_doc, name_utf, attrib, extra) + c_node = _createElement(c_doc, name_utf, attrib, _extra) tree.xmlDocSetRootElement(c_doc, c_node) doc = _documentFactory(c_doc) # add namespaces to node if necessary @@ -1138,14 +1138,14 @@ tree.xmlAddChild(doc._c_doc, c_node) return _commentFactory(doc, c_node) -def SubElement(_Element parent, tag, attrib=None, nsmap=None, **extra): +def SubElement(_Element _parent, _tag, attrib=None, nsmap=None, **_extra): cdef xmlNode* c_node cdef _Document doc - _raiseIfNone(parent) - ns_utf, name_utf = _getNsTag(tag) - doc = parent._doc - c_node = _createElement(doc._c_doc, name_utf, attrib, extra) - tree.xmlAddChild(parent._c_node, c_node) + _raiseIfNone(_parent) + ns_utf, name_utf = _getNsTag(_tag) + doc = _parent._doc + c_node = _createElement(doc._c_doc, name_utf, attrib, _extra) + tree.xmlAddChild(_parent._c_node, c_node) # add namespaces to node if necessary doc._setNodeNamespaces(c_node, ns_utf, nsmap) return _elementFactory(doc, c_node) Modified: lxml/trunk/src/lxml/nsclasses.pxi ============================================================================== --- lxml/trunk/src/lxml/nsclasses.pxi (original) +++ lxml/trunk/src/lxml/nsclasses.pxi Mon Mar 20 14:15:43 2006 @@ -4,31 +4,52 @@ pass cdef class ElementBase(_Element): - """All classes in namespace implementations must inherit from this - one. Note that subclasses *must not* override __init__ or __new__ - as there is absolutely undefined when these objects will be - created or destroyed. All state must be kept in the underlying - XML.""" + """All classes in namespace implementations must inherit from this one. + Note that subclasses *must not* override __init__ or __new__ as it is + absolutely undefined when these objects will be created or destroyed. All + persistent state of elements must be stored in the underlying XML.""" pass class XSLTElement(object): "NOT IMPLEMENTED YET!" pass -cdef object __NAMESPACE_CLASSES -__NAMESPACE_CLASSES = {} +cdef object __NAMESPACE_REGISTRIES +__NAMESPACE_REGISTRIES = {} + +cdef object __FUNCTION_NAMESPACE_REGISTRIES +__FUNCTION_NAMESPACE_REGISTRIES = {} def Namespace(ns_uri): + """Retrieve the namespace object associated with the given URI. Creates a + new one if it does not yet exist.""" + if ns_uri: + ns_utf = _utf8(ns_uri) + else: + ns_utf = None + try: + return __NAMESPACE_REGISTRIES[ns_utf] + except KeyError: + registry = __NAMESPACE_REGISTRIES[ns_utf] = \ + _NamespaceRegistry(ns_uri) + return registry + +def FunctionNamespace(ns_uri): + """Retrieve the function namespace object associated with the given + URI. Creates a new one if it does not yet exist. A function namespace can + only be used to register extension functions.""" if ns_uri: ns_utf = _utf8(ns_uri) else: ns_utf = None try: - return __NAMESPACE_CLASSES[ns_utf] + return __FUNCTION_NAMESPACE_REGISTRIES[ns_utf] except KeyError: - registry = __NAMESPACE_CLASSES[ns_utf] = _NamespaceRegistry(ns_uri) + registry = __FUNCTION_NAMESPACE_REGISTRIES[ns_utf] = \ + _FunctionNamespaceRegistry(ns_uri) return registry + cdef class _NamespaceRegistry: "Dictionary-like registry for namespace implementations" cdef object _ns_uri @@ -94,16 +115,88 @@ def __repr__(self): return "Namespace(%r)" % self._ns_uri +cdef class _FunctionNamespaceRegistry(_NamespaceRegistry): + cdef object _prefix + cdef object _prefix_utf + property prefix: + "Namespace prefix for extension functions." + def __del__(self): + self._prefix = None # no prefix configured + def __get__(self): + return self._prefix + def __set__(self, prefix): + if prefix is None: + prefix = '' # empty prefix + self._prefix_utf = _utf8(prefix) + self._prefix = prefix + + def __setitem__(self, name, item): + if not callable(item): + raise NamespaceRegistryError, "Registered function must be callable." + if name is None: + name_utf = None + else: + name_utf = _utf8(name) + self._extensions[name_utf] = item + + cdef object _get(self, object name): + cdef python.PyObject* dict_result + dict_result = python.PyDict_GetItem(self._extensions, name) + if dict_result is NULL: + raise KeyError, "Name not registered." + return dict_result + + def __repr__(self): + return "FunctionNamespace(%r)" % self._ns_uri + +cdef object _find_all_extensions(): + "Internal lookup function to find all extension functions for XSLT/XPath." + cdef _NamespaceRegistry registry + ns_extensions = {} + for (ns_utf, registry) in __FUNCTION_NAMESPACE_REGISTRIES.iteritems(): + if registry._extensions: + ns_extensions[ns_utf] = registry._extensions + return ns_extensions + +cdef object _find_all_extension_prefixes(): + "Internal lookup function to find all function prefixes for XSLT/XPath." + cdef _FunctionNamespaceRegistry registry + ns_prefixes = {} + for (ns_utf, registry) in __FUNCTION_NAMESPACE_REGISTRIES.iteritems(): + if registry._prefix_utf is not None: + ns_prefixes[registry._prefix_utf] = ns_utf + return ns_prefixes + +cdef _find_extensions(namespaces): + """Returns a dictionary that maps each namespace in the provided list to a + dictionary of name-function mappings defined under that namespace.""" + cdef python.PyObject* dict_result + cdef char* c_ns_utf + extension_dict = {} + for ns_uri in namespaces: + if ns_uri is None: + ns_utf = None + else: + ns_utf = _utf8(ns_uri) + dict_result = python.PyDict_GetItem( + __FUNCTION_NAMESPACE_REGISTRIES, ns_utf) + if dict_result is NULL: + continue + extensions = (<_NamespaceRegistry>dict_result)._extensions + if extensions: + python.PyDict_SetItem(extension_dict, ns_utf, extensions) + return extension_dict + cdef object _find_element_class(char* c_namespace_utf, char* c_element_name_utf): cdef python.PyObject* dict_result cdef _NamespaceRegistry registry if c_namespace_utf is not NULL: dict_result = python.PyDict_GetItemString( - __NAMESPACE_CLASSES, c_namespace_utf) + __NAMESPACE_REGISTRIES, c_namespace_utf) else: dict_result = python.PyDict_GetItem( - __NAMESPACE_CLASSES, None) + __NAMESPACE_REGISTRIES, None) if dict_result is NULL: return _Element Modified: lxml/trunk/src/lxml/python.pxd ============================================================================== --- lxml/trunk/src/lxml/python.pxd (original) +++ lxml/trunk/src/lxml/python.pxd Mon Mar 20 14:15:43 2006 @@ -24,6 +24,8 @@ cdef int PyDict_SetItem(object d, object key, object value) cdef PyObject* PyDict_GetItemString(object d, char* key) cdef PyObject* PyDict_GetItem(object d, object key) + cdef int PyDict_DelItem(object d, object key) + cdef int PyDict_Clear(object d) cdef object PyList_AsTuple(object o) cdef int PyNumber_Check(object instance) Modified: lxml/trunk/src/lxml/xslt.pxi ============================================================================== --- lxml/trunk/src/lxml/xslt.pxi (original) +++ lxml/trunk/src/lxml/xslt.pxi Mon Mar 20 14:15:43 2006 @@ -12,13 +12,13 @@ class XSLTSaveError(XSLTError): pass -class XPathError(LxmlError): +class XSLTExtensionError(XSLTError): pass -class XPathContextError(XPathError): +class XPathError(LxmlError): pass -class XPathNamespaceError(XPathError): +class XPathContextError(XPathError): pass class XPathResultError(XPathError): @@ -27,27 +27,243 @@ class XPathSyntaxError(LxmlSyntaxError): pass -cdef object _RE_STRINGS -cdef object _RE_NAMESPACES -_RE_STRINGS = re.compile('("[^"]*")|(\'[^\']*\')') -_RE_NAMESPACES = re.compile('{([^}]+)}') +################################################################################ +# support for extension functions in XPath/XSLT + +cdef class BaseContext: + cdef xpath.xmlXPathContext* _xpathCtxt + cdef _Document _doc + cdef object _extensions + cdef object _namespaces + cdef object _registered_namespaces + cdef object _registered_extensions + cdef object _extension_functions + cdef object _utf_refs + # for exception handling and temporary reference keeping: + cdef object _temp_elements + cdef object _temp_docs + cdef object _exc_info + + def __init__(self, namespaces, extensions): + self._xpathCtxt = NULL + self._utf_refs = {} + + # fix old format extensions + if isinstance(extensions, (list, tuple)): + new_extensions = {} + for extension in extensions: + for (ns_uri, name), function in extension.items(): + ns_utf = self._to_utf(ns_uri) + name_utf = self._to_utf(name) + try: + new_extensions[ns_utf][name_utf] = function + except KeyError: + new_extensions[ns_utf] = {name_utf : function} + extensions = new_extensions or None + + self._doc = None + self._exc_info = None + self._extensions = extensions + self._namespaces = namespaces + self._registered_namespaces = [] + self._registered_extensions = [] + self._extension_functions = {} + self._temp_elements = {} + self._temp_docs = {} + + cdef object _to_utf(self, s): + "Convert to UTF-8 and keep a reference to the encoded string" + cdef python.PyObject* dict_result + if s is None: + return None + dict_result = python.PyDict_GetItem(self._utf_refs, s) + if dict_result is not NULL: + return dict_result + utf = _utf8(s) + python.PyDict_SetItem(self._utf_refs, s, utf) + return utf + + cdef void _set_xpath_context(self, xpath.xmlXPathContext* xpathCtxt): + self._xpathCtxt = xpathCtxt + xpathCtxt.userData = self + + cdef _register_context(self, _Document doc, int allow_none_namespace): + self._doc = doc + self._exc_info = None + namespaces = self._namespaces + if namespaces is not None: + self.registerNamespaces(namespaces) + extensions = _find_extensions(namespaces.values()) + else: + extensions = _find_all_extensions() + if self._extensions is not None: + # add user provided extensions + extensions.update(self._extensions) + if extensions: + if not allow_none_namespace: + python.PyDict_DelItem(extensions, None) + self._registerExtensionFunctions(extensions) + + cdef _unregister_context(self): + self._unregisterExtensionFunctions() + self._unregisterNamespaces() + self._free_context() + + cdef _free_context(self): + self._registered_namespaces = [] + self._registered_extensions = [] + python.PyDict_Clear(self._utf_refs) + self._doc = None + if self._xpathCtxt is not NULL: + self._xpathCtxt.userData = NULL + self._xpathCtxt = NULL + + # namespaces (internal UTF-8 methods with leading '_') + + def addNamespace(self, prefix, uri): + if self._namespaces is None: + self._namespaces = {prefix : uri} + else: + self._namespaces[prefix] = uri + + def registerNamespaces(self, namespaces): + for prefix, uri in namespaces.items(): + self.registerNamespace(prefix, uri) + + def registerNamespace(self, prefix, ns_uri): + prefix_utf = self._to_utf(prefix) + ns_uri_utf = self._to_utf(ns_uri) + xpath.xmlXPathRegisterNs(self._xpathCtxt, prefix_utf, ns_uri_utf) + self._registered_namespaces.append(prefix_utf) + + cdef _unregisterNamespaces(self): + cdef xpath.xmlXPathContext* xpathCtxt + xpathCtxt = self._xpathCtxt + for prefix_utf in self._registered_namespaces: + xpath.xmlXPathRegisterNs(xpathCtxt, prefix_utf, NULL) + + # extension functions (internal UTF-8 methods with leading '_') + + def registerExtensionFunctions(self, extensions): + for ns_uri, extension in extensions.items(): + for name, function in extension.items(): + self.registerExtensionFunction(ns_uri, name, function) + + def registerExtensionFunction(self, ns_uri, name, function): + self._registerExtensionFunction( + self._to_utf(ns_uri), self._to_utf(name), function) + + cdef _registerExtensionFunctions(self, extensions_utf): + for ns_uri_utf, extension in extensions_utf.items(): + for name_utf, function in extension.items(): + self._registerExtensionFunction(ns_uri_utf, name_utf, function) + + cdef _registerExtensionFunction(self, ns_uri_utf, name_utf, function): + self._contextRegisterExtensionFunction(ns_uri_utf, name_utf) + self._extension_functions[(ns_uri_utf, name_utf)] = function + self._registered_extensions.append((ns_uri_utf, name_utf)) + + cdef _unregisterExtensionFunctions(self): + for ns_uri_utf, name_utf in self._registered_extensions: + self._contextUnregisterExtensionFunction(ns_uri_utf, name_utf) + + def find_extension(self, ns_uri_utf, name_utf): + return self._extension_functions[(ns_uri_utf, name_utf)] + + # Python reference keeping during XPath function evaluation + + cdef _release_temp_refs(self): + "Free temporarily referenced objects from this context." + python.PyDict_Clear(self._temp_elements) + python.PyDict_Clear(self._temp_docs) + + cdef _hold(self, obj): + """A way to temporarily hold references to nodes in the evaluator. + + This is needed because otherwise nodes created in XPath extension + functions would be reference counted too soon, during the XPath + evaluation. This is most important in the case of exceptions. + """ + cdef _NodeBase element + if isinstance(obj, _NodeBase): + obj = (obj,) + elif not python.PySequence_Check(obj): + return + for o in obj: + if isinstance(o, _NodeBase): + element = <_NodeBase>o + #print "Holding element:", element._c_node + python.PyDict_SetItem(self._temp_elements, id(element), element) + #print "Holding document:", element._doc._c_doc + python.PyDict_SetItem(self._temp_docs, id(element._doc), element._doc) + ################################################################################ # XSLT +cdef class XSLTContext(BaseContext): + cdef xslt.xsltTransformContext* _xsltCtxt + def __init__(self, namespaces, extensions): + self._xsltCtxt = NULL + BaseContext.__init__(self, namespaces, extensions) + + cdef register_context(self, xslt.xsltTransformContext* xsltCtxt, _Document doc): + self._xsltCtxt = xsltCtxt + self._set_xpath_context(xsltCtxt.xpathCtxt) + self._register_context(doc, 0) + xsltCtxt.xpathCtxt.userData = self + + cdef unregister_context(self): + cdef xslt.xsltTransformContext* xsltCtxt + xsltCtxt = self._xsltCtxt + if xsltCtxt is NULL: + return + self._unregister_context() + self._xsltCtxt = NULL + + cdef free_context(self): + cdef xslt.xsltTransformContext* xsltCtxt + xsltCtxt = self._xsltCtxt + if xsltCtxt is NULL: + return + self._free_context() + self._xsltCtxt = NULL + xslt.xsltFreeTransformContext(xsltCtxt) + + def _contextRegisterExtensionFunction(self, ns_uri_utf, name_utf): + if ns_uri_utf is None: + raise XSLTExtensionError, "extensions must have non-empty namespaces" + xslt.xsltRegisterExtFunction(self._xsltCtxt, + name_utf, ns_uri_utf, _xpathCallback) + + def _contextUnregisterExtensionFunction(self, ns_uri_utf, name_utf): + if ns_uri_utf is not None: + xslt.xsltRegisterExtFunction(self._xsltCtxt, + name_utf, ns_uri_utf, _xpathCallback) + + cdef class XSLT: """Turn a document into an XSLT object. """ + cdef XSLTContext _context cdef xslt.xsltStylesheet* _c_style - def __init__(self, xslt_input): + def __init__(self, xslt_input, extensions=None): # make a copy of the document as stylesheet needs to assume it # doesn't change cdef xslt.xsltStylesheet* c_style cdef xmlDoc* c_doc + cdef xmlDoc* fake_c_doc cdef _Document doc + cdef _NodeBase root_node + doc = _documentOrRaise(xslt_input) - c_doc = tree.xmlCopyDoc(doc._c_doc, 1) + root_node = _rootNodeOf(xslt_input) + + fake_c_doc = _fakeRootDoc(doc._c_doc, root_node._c_node) + c_doc = tree.xmlCopyDoc(fake_c_doc, 1) + _destroyFakeDoc(doc._c_doc, fake_c_doc) + # XXX work around bug in xmlCopyDoc (fix is upcoming in new release # of libxml2) if doc._c_doc.URL is not NULL: @@ -57,16 +273,19 @@ if c_style is NULL: raise XSLTParseError, "Cannot parse style sheet" self._c_style = c_style + + self._context = XSLTContext(None, extensions) # XXX is it worthwile to use xsltPrecomputeStylesheet here? def __dealloc__(self): # this cleans up copy of doc as well xslt.xsltFreeStylesheet(self._c_style) - + def __call__(self, _input, **_kw): cdef _Document input_doc cdef _NodeBase root_node cdef _Document result_doc + cdef xslt.xsltTransformContext* transform_ctxt cdef xmlDoc* c_result cdef xmlDoc* c_doc cdef char** params @@ -76,6 +295,13 @@ input_doc = _documentOrRaise(_input) root_node = _rootNodeOf(_input) + c_doc = _fakeRootDoc(input_doc._c_doc, root_node._c_node) + + transform_ctxt = xslt.xsltNewTransformContext(self._c_style, c_doc) + if transform_ctxt is NULL: + _destroyFakeDoc(input_doc._c_doc, c_doc) + raise XSLTApplyError, "Error preparing stylesheet run" + if _kw: # allocate space for parameters # * 2 as we want an entry for both key and value, @@ -96,14 +322,19 @@ else: params = NULL - c_doc = _fakeRootDoc(input_doc._c_doc, root_node._c_node) - c_result = xslt.xsltApplyStylesheet(self._c_style, c_doc, params) - _destroyFakeDoc(input_doc._c_doc, c_doc) + self._context._release_temp_refs() + self._context.register_context(transform_ctxt, input_doc) + + c_result = xslt.xsltApplyStylesheetUser(self._c_style, c_doc, params, + NULL, NULL, transform_ctxt) if params is not NULL: - # deallocate space for parameters again + # deallocate space for parameters cstd.free(params) + self._context.free_context() + _destroyFakeDoc(input_doc._c_doc, c_doc) + if c_result is NULL: raise XSLTApplyError, "Error applying stylesheet" @@ -144,71 +375,139 @@ ################################################################################ # XPath -cdef class XPathDocumentEvaluator: +cdef class XPathContext(BaseContext): + cdef object _variables + cdef object _registered_variables + def __init__(self, namespaces, extensions, variables): + BaseContext.__init__(self, namespaces, extensions) + self._variables = variables + self._registered_variables = [] + + cdef register_context(self, xpath.xmlXPathContext* xpathCtxt, _Document doc): + self._set_xpath_context(xpathCtxt) + ns_prefixes = _find_all_extension_prefixes() + if ns_prefixes: + self.registerNamespaces(ns_prefixes) + self._register_context(doc, 1) + if self._variables is not None: + self.registerVariables(self._variables) + + cdef unregister_context(self): + cdef xpath.xmlXPathContext* xpathCtxt + xpathCtxt = self._xpathCtxt + if xpathCtxt is NULL: + return + xpathCtxt.userData = NULL + self._unregister_context() + self._unregisterVariables() + self._registered_variables = [] + self._xpathCtxt = NULL + + cdef free_context(self): + cdef xpath.xmlXPathContext* xpathCtxt + xpathCtxt = self._xpathCtxt + if xpathCtxt is NULL: + return + self._free_context() + self._registered_variables = [] + xpath.xmlXPathFreeContext(xpathCtxt) + + def registerVariables(self, variable_dict): + for name, value in variable_dict.items(): + self.registerVariable(name, value) + + cdef void _unregisterVariables(self): + for name in self._registered_variables: + self._unregisterVariable(name) + + def registerVariable(self, name, value): + self._registerVariable(self._to_utf(name), value) + self._registered_variables.append(name) + + cdef void _registerVariable(self, name_utf, value): + xpath.xmlXPathRegisterVariable( + self._xpathCtxt, name_utf, _wrapXPathObject(value)) + + cdef void _unregisterVariable(self, name_utf): + cdef xpath.xmlXPathContext* xpathCtxt + cdef xpath.xmlXPathObject* xpathVarValue + xpathCtxt = self._xpathCtxt + xpathVarValue = xpath.xmlXPathVariableLookup(xpathCtxt, name_utf) + if xpathVarValue is not NULL: + xpath.xmlXPathRegisterVariable(xpathCtxt, name_utf, NULL) + xpath.xmlXPathFreeObject(xpathVarValue) + + def _contextRegisterExtensionFunction(self, ns_uri_utf, name_utf): + if ns_uri_utf is not None: + xpath.xmlXPathRegisterFuncNS(self._xpathCtxt, + name_utf, ns_uri_utf, _xpathCallback) + else: + xpath.xmlXPathRegisterFunc(self._xpathCtxt, name_utf, + _xpathCallback) + + def _contextUnregisterExtensionFunction(self, ns_uri_utf, name_utf): + if ns_uri_utf is not None: + xpath.xmlXPathRegisterFuncNS(self._xpathCtxt, + name_utf, ns_uri_utf, NULL) + else: + xpath.xmlXPathRegisterFunc(self._xpathCtxt, name_utf, NULL) + + +cdef class XPathEvaluatorBase: + cdef XPathContext _context + + def __init__(self, namespaces, extensions, variables=None): + self._context = XPathContext(namespaces, extensions, variables) + + cdef object _handle_result(self, xpath.xmlXPathObject* xpathObj, _Document doc): + _exc_info = self._context._exc_info + if _exc_info is not None: + type, value, traceback = _exc_info + raise type, value, traceback + if xpathObj is NULL: + raise XPathSyntaxError, "Error in xpath expression." + try: + result = _unwrapXPathObject(xpathObj, doc) + except XPathResultError: + xpath.xmlXPathFreeObject(xpathObj) + raise + xpath.xmlXPathFreeObject(xpathObj) + return result + + +cdef class XPathDocumentEvaluator(XPathEvaluatorBase): """Create an XPath evaluator for a document. """ cdef xpath.xmlXPathContext* _c_ctxt - cdef _NodeBase _root_node cdef _Document _doc - cdef object _extension_functions - cdef object _exc_info - cdef object _namespaces - cdef object _extensions - cdef object _temp_elements - cdef object _temp_docs - def __init__(self, input, namespaces=None, extensions=None): + def __init__(self, etree, namespaces=None, extensions=None): cdef xpath.xmlXPathContext* xpathCtxt cdef int ns_register_status - cdef _Document input_doc - cdef _NodeBase root_node - - input_doc = _documentOrRaise(input) - root_node = _rootNodeOf(input) + cdef _Document doc - xpathCtxt = xpath.xmlXPathNewContext(input_doc._c_doc) + if isinstance(etree, _Document): + doc = <_Document>etree # for internal use only! + else: + doc = (<_ElementTree>etree)._doc + + xpathCtxt = xpath.xmlXPathNewContext(doc._c_doc) if xpathCtxt is NULL: # XXX what triggers this exception? raise XPathContextError, "Unable to create new XPath context" - self._doc = input_doc - self._root_node = root_node + self._doc = doc self._c_ctxt = xpathCtxt - self._c_ctxt.userData = self - self._namespaces = namespaces - self._extensions = extensions - if namespaces is not None: - self.registerNamespaces(namespaces) - self._extension_functions = {} - if extensions is not None: - for extension in extensions: - self._extension_functions.update(extension) - for (ns_uri, name), function in extension.items(): - if ns_uri is not None: - xpath.xmlXPathRegisterFuncNS( - xpathCtxt, name, ns_uri, _xpathCallback) - else: - xpath.xmlXPathRegisterFunc( - xpathCtxt, name, _xpathCallback) - + XPathEvaluatorBase.__init__(self, namespaces, extensions) + def __dealloc__(self): xpath.xmlXPathFreeContext(self._c_ctxt) def registerNamespace(self, prefix, uri): """Register a namespace with the XPath context. """ - s_prefix = _utf8(prefix) - s_uri = _utf8(uri) - # XXX should check be done to verify namespace doesn't already exist? - ns_register_status = xpath.xmlXPathRegisterNs( - self._c_ctxt, s_prefix, s_uri) - if ns_register_status != 0: - # XXX doesn't seem to be possible to trigger this - # from Python - raise XPathNamespaceError, ( - "Unable to register namespaces with prefix " - "%s and uri %s" % (prefix, uri)) + self._context.addNamespace(prefix, uri) def registerNamespaces(self, namespaces): """Register a prefix -> uri dict. @@ -216,91 +515,140 @@ for prefix, uri in namespaces.items(): self.registerNamespace(prefix, uri) - def evaluate(self, path): - return self._evaluate(path, self._root_node._c_node) + def evaluate(self, _path, **_variables): + """Evaluate an XPath expression on the document. Variables + may be given as keyword arguments. Note that namespaces are + currently not supported for variables.""" + return self._evaluate(_path, NULL, _variables) - cdef object _evaluate(self, path, xmlNode* c_ctxt_node): - cdef xpath.xmlXPathObject* xpathObj + cdef object _evaluate(self, path, xmlNode* c_ctxt_node, variable_dict): + cdef xpath.xmlXPathContext* xpathCtxt + cdef xpath.xmlXPathObject* xpathObj cdef xmlNode* c_node + xpathCtxt = self._c_ctxt # if element context is requested; unfortunately need to modify ctxt - self._c_ctxt.node = c_ctxt_node + xpathCtxt.node = c_ctxt_node + + self._context._release_temp_refs() + self._context.register_context(xpathCtxt, self._doc) + self._context.registerVariables(variable_dict) path = _utf8(path) - self._exc_info = None - self._release() - xpathObj = xpath.xmlXPathEvalExpression(path, self._c_ctxt) - if self._exc_info is not None: - type, value, traceback = self._exc_info - self._exc_info = None - raise type, value, traceback - if xpathObj is NULL: - raise XPathSyntaxError, "Error in xpath expression." - try: - result = _unwrapXPathObject(xpathObj, self._doc) - except XPathResultError: - #self._release() - xpath.xmlXPathFreeObject(xpathObj) - raise - xpath.xmlXPathFreeObject(xpathObj) - # release temporarily held python stuff - #self._release() - return result - + xpathObj = xpath.xmlXPathEvalExpression(path, xpathCtxt) + self._context.unregister_context() + + return self._handle_result(xpathObj, self._doc) + #def clone(self): # # XXX pretty expensive so calling this from callback is probably # # not desirable # return XPathEvaluator(self._doc, self._namespaces, self._extensions) - def _release(self): - self._temp_elements = {} - self._temp_docs = {} - - def _hold(self, obj): - """A way to temporarily hold references to nodes in the evaluator. - - This is needed because otherwise nodes created in XPath extension - functions would be reference counted too soon, during the - XPath evaluation. - """ - cdef _NodeBase element - if isinstance(obj, _NodeBase): - obj = [obj] - if not type(obj) in (type([]), type(())): - return - for o in obj: - if isinstance(o, _NodeBase): - element = <_NodeBase>o - #print "Holding element:", element._c_node - self._temp_elements[id(element)] = element - #print "Holding document:", element._doc._c_doc - self._temp_docs[id(element._doc)] = element._doc - cdef class XPathElementEvaluator(XPathDocumentEvaluator): """Create an XPath evaluator for an element. """ cdef _Element _element - + def __init__(self, _Element element, namespaces=None, extensions=None): XPathDocumentEvaluator.__init__( self, element._doc, namespaces, extensions) self._element = element - - def evaluate(self, path): - return self._evaluate(path, self._element._c_node) -def XPathEvaluator(doc_or_element, namespaces=None, extensions=None): - if isinstance(doc_or_element, _ElementTree) or isinstance(doc_or_element, _Document): - return XPathDocumentEvaluator(doc_or_element, namespaces, extensions) + def evaluate(self, _path, **_variables): + """Evaluate an XPath expression on the element. Variables may + be given as keyword arguments. Note that namespaces are + currently not supported for variables.""" + return self._evaluate(_path, self._element._c_node, _variables) + +def XPathEvaluator(etree_or_element, namespaces=None, extensions=None): + if isinstance(etree_or_element, _ElementTree): + return XPathDocumentEvaluator(etree_or_element, namespaces, extensions) else: - return XPathElementEvaluator(doc_or_element, namespaces, extensions) - + return XPathElementEvaluator(etree_or_element, namespaces, extensions) + def Extension(module, function_mapping, ns_uri=None): - result = {} + functions = [] for function_name, xpath_name in function_mapping.items(): - result[(ns_uri, xpath_name)] = getattr(module, function_name) - return result + functions[xpath_name] = getattr(module, function_name) + return {ns_uri : functions} +cdef class XPath(XPathEvaluatorBase): + cdef xpath.xmlXPathCompExpr* _xpath + cdef object _prefix_map + cdef readonly object path + + def __init__(self, path, namespaces=None, extensions=None): + XPathEvaluatorBase.__init__(self, namespaces, extensions, None) + self.path = path + path = _utf8(path) + self._xpath = xpath.xmlXPathCompile(path) + if self._xpath is NULL: + raise XPathSyntaxError, "Error in xpath expression." + + def __call__(self, _etree_or_element, **_variables): + cdef xpath.xmlXPathContext* xpathCtxt + cdef xpath.xmlXPathObject* xpathObj + cdef _Document document + cdef _NodeBase element + cdef XPathContext context + + document = _documentOrRaise(_etree_or_element) + element = _rootNodeOf(_etree_or_element) + + xpathCtxt = xpath.xmlXPathNewContext(document._c_doc) + xpathCtxt.node = element._c_node + + context = self._context + context._release_temp_refs() + context.register_context(xpathCtxt, document) + context.registerVariables(_variables) + + xpathObj = xpath.xmlXPathCompiledEval(self._xpath, xpathCtxt) + + context.unregister_context() + + xpath.xmlXPathFreeContext(xpathCtxt) + + return self._handle_result(xpathObj, document) + + def evaluate(self, _tree, **_variables): + return self(_tree, **_variables) + + def __dealloc__(self): + if self._xpath is not NULL: + xpath.xmlXPathFreeCompExpr(self._xpath) + +cdef object _replace_strings +cdef object _find_namespaces +_replace_strings = re.compile('("[^"]*")|(\'[^\']*\')').sub +_find_namespaces = re.compile('({[^}]+})').findall + +cdef class ETXPath(XPath): + """Special XPath class that supports the ElementTree {uri} notation for + namespaces.""" + def __init__(self, path, extensions=None): + path_utf, namespaces = self._nsextract_path(_utf8(path)) + XPath.__init__(self, funicode(path_utf), namespaces, extensions) + + cdef _nsextract_path(self, path_utf): + # replace {namespaces} by new prefixes + cdef int i + namespaces = {} + stripped_path = _replace_strings('', path_utf) # remove string literals + namespace_defs = [] + i = 1 + for namespace_def in _find_namespaces(stripped_path): + if namespace_def not in namespace_defs: + prefix = python.PyString_FromFormat("xpp%02d", i) + i = i+1 + python.PyList_Append(namespace_defs, namespace_def) + namespace = namespace_def[1:-1] # remove '{}' + python.PyDict_SetItem(namespaces, prefix, namespace) + prefix_str = prefix + ':' + # FIXME: this also replaces {namespaces} within strings! + path_utf = path_utf.replace(namespace_def, prefix_str) + return path_utf, namespaces ################################################################################ # helper functions @@ -386,10 +734,10 @@ cdef xpath.xmlXPathContext* rctxt cdef _Document doc cdef xpath.xmlXPathObject* obj - cdef XPathDocumentEvaluator evaluator - + cdef BaseContext extensions + rctxt = ctxt.context - + # get information on what function is called name = rctxt.function if rctxt.functionURI is not NULL: @@ -398,29 +746,29 @@ uri = None # get our evaluator - evaluator = (rctxt.userData) + extensions = (rctxt.userData) + + # lookup up the extension function in the context + f = extensions.find_extension(uri, name) - # lookup up the extension function in the evaluator - f = evaluator._extension_functions[(uri, name)] - args = [] - doc = evaluator._doc + doc = extensions._doc for i from 0 <= i < nargs: args.append(_unwrapXPathObject(xpath.valuePop(ctxt), doc)) args.reverse() try: # call the function - res = f(evaluator, *args) + res = f(None, *args) # hold python objects temporarily so that they won't get deallocated # during processing - evaluator._hold(res) + extensions._hold(res) # now wrap for XPath consumption obj = _wrapXPathObject(res) except: xpath.xmlXPathErr( ctxt, xmlerror.XML_XPATH_EXPR_ERROR - xmlerror.XML_XPATH_EXPRESSION_OK) - evaluator._exc_info = sys.exc_info() + extensions._exc_info = sys.exc_info() return xpath.valuePush(ctxt, obj) From scoder at codespeak.net Mon Mar 20 14:18:53 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Mon Mar 20 14:18:55 2006 Subject: [Lxml-checkins] r24586 - lxml/trunk/doc Message-ID: <20060320131853.BB9C4100DB@code0.codespeak.net> Author: scoder Date: Mon Mar 20 14:18:47 2006 New Revision: 24586 Modified: lxml/trunk/doc/namespace_extensions.txt Log: doc updates in doc/namespace_extensions.txt, refer to extensions.txt Modified: lxml/trunk/doc/namespace_extensions.txt ============================================================================== --- lxml/trunk/doc/namespace_extensions.txt (original) +++ lxml/trunk/doc/namespace_extensions.txt Mon Mar 20 14:18:47 2006 @@ -2,6 +2,8 @@ Implementing namespaces with the Namespace class ================================================ +(Also see extensions.txt) + Imagine, you have a namespace called 'http://hui.de/honk' and have to treat all of its elements in a specific way, say, to find out if they are really honking. You could provide a function called 'is_honking' @@ -114,7 +116,7 @@ True >>> print honk_element[0].honking Traceback (most recent call last): - File "/var/tmp/python-2.4.2-root/usr/lib/python2.4/doctest.py", line 1243, in __run + File "/usr/lib/python2.4/doctest.py", line 1243, in __run compileflags, 1) in test.globs File "", line 1, in ? print honk_element[0].honking @@ -154,29 +156,8 @@ HONK >>> print honk_element[0].honking Traceback (most recent call last): - File "/var/tmp/python-2.4.2-root/usr/lib/python2.4/doctest.py", line 1243, in __run + File "/usr/lib/python2.4/doctest.py", line 1243, in __run compileflags, 1) in test.globs File "", line 1, in ? print honk_element[0].honking AttributeError: 'HonkNSElement' object has no attribute 'honking' - - -XPath extension functions -========================= - -The same API is used for extension functions in XPath. If you -associate a name in the Namespace with a callable object (that is not -a subclass of ElementBase), it will be used as extension function in -XPath evaluations. - ->>> from lxml.etree import Namespace ->>> def tag_of(context, elem): -... return elem[0].tag ->>> namespace = Namespace('myfunctions') ->>> namespace['tagname'] = tag_of - -You can then use your new function in XPath expressions: - ->>> element = XML('') ->>> [el.tag for el in element.xpath('f:tagname(//honk)', {'f' : 'myfunctions'})] -['honk'] From scoder at codespeak.net Mon Mar 20 14:20:01 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Mon Mar 20 14:20:03 2006 Subject: [Lxml-checkins] r24587 - lxml/trunk Message-ID: <20060320132001.EFE74100DB@code0.codespeak.net> Author: scoder Date: Mon Mar 20 14:19:51 2006 New Revision: 24587 Modified: lxml/trunk/bench.py Log: benchmark for XPath evaluation (merge from scoder2) Modified: lxml/trunk/bench.py ============================================================================== --- lxml/trunk/bench.py (original) +++ lxml/trunk/bench.py Mon Mar 20 14:19:51 2006 @@ -36,6 +36,13 @@ return function return set_value +def onlylib(*libs): + def set_libs(function): + if libs: + function.LIBS = libs + return function + return set_libs + class BenchMarkBase(object): atoz = string.ascii_lowercase @@ -187,6 +194,9 @@ if not name.startswith('bench_'): continue method = getattr(self, name) + if hasattr(method, 'LIBS') and self.lib_name not in method.LIBS: + benchmarks.append((name, None, (), 0, 0)) + continue if method.__doc__: tree_sets = method.__doc__.split() else: @@ -366,6 +376,18 @@ def bench_getiterator_tag_all(self, root): list(root.getiterator("{b}a")) + @onlylib('lxe') + def bench_xpath_class(self, root): + xpath = self.etree.XPath("./*[0]") + for child in root: + xpath(child) + + @onlylib('lxe') + def bench_xpath_element(self, root): + for child in root: + xpath = self.etree.XPathElementEvaluator(child) + xpath.evaluate("./*[0]") + ############################################################ # Main program ############################################################ @@ -474,9 +496,14 @@ for bench_calls in izip(*benchmarks): for lib, (bench, benchmark_setup) in enumerate(izip(benchmark_suites, bench_calls)): - bench_name = benchmark_setup[0] + bench_name, method_call = benchmark_setup[:2] tree_set_name = build_treeset_name(*benchmark_setup[-3:]) - print "%-3s: %-23s (%-10s)" % (bench.lib_name, bench_name[6:29], tree_set_name), + print "%-3s: %-23s" % (bench.lib_name, bench_name[6:29]), + if method_call is None: + print "skipped" + continue + + print "(%-10s)" % tree_set_name, sys.stdout.flush() result = run_bench(bench, *benchmark_setup) From faassen at codespeak.net Mon Mar 20 14:30:13 2006 From: faassen at codespeak.net (faassen@codespeak.net) Date: Mon Mar 20 14:30:14 2006 Subject: [Lxml-checkins] r24589 - lxml/trunk/doc Message-ID: <20060320133013.29EF0100EB@code0.codespeak.net> Author: faassen Date: Mon Mar 20 14:30:12 2006 New Revision: 24589 Modified: lxml/trunk/doc/xpath.txt Log: Some text tweaks. Modified: lxml/trunk/doc/xpath.txt ============================================================================== --- lxml/trunk/doc/xpath.txt (original) +++ lxml/trunk/doc/xpath.txt Mon Mar 20 14:30:12 2006 @@ -1,9 +1,9 @@ XPath extension functions ========================= -This document describes the OLD DEPRICATED way of dealing with XPath extension -functions. For updated documentation, please see the new Namespace API -described in nsclasses.txt and extensions.txt. +Note: this document describes an API that is now DEPRECATED. If you +want to configure XPath extension functions, please read +namespace_extensions.txt and extensions.txt. Extension functions are defined in Python. In order to use such a function, it must have a name by which it can be called in XPath, and an optional namespace From scoder at codespeak.net Mon Mar 20 14:49:36 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Mon Mar 20 14:49:38 2006 Subject: [Lxml-checkins] r24595 - in lxml/trunk/src/lxml: . tests Message-ID: <20060320134936.B2C12100D3@code0.codespeak.net> Author: scoder Date: Mon Mar 20 14:49:22 2006 New Revision: 24595 Modified: lxml/trunk/src/lxml/etree.pyx lxml/trunk/src/lxml/python.pxd lxml/trunk/src/lxml/relaxng.pxi lxml/trunk/src/lxml/tests/test_etree.py lxml/trunk/src/lxml/tests/test_relaxng.py lxml/trunk/src/lxml/xmlerror.pxi lxml/trunk/src/lxml/xmlschema.pxi Log: merged in changes from error-reporting branch Modified: lxml/trunk/src/lxml/etree.pyx ============================================================================== --- lxml/trunk/src/lxml/etree.pyx (original) +++ lxml/trunk/src/lxml/etree.pyx Mon Mar 20 14:49:22 2006 @@ -24,21 +24,13 @@ # what to do with libxml2/libxslt error messages? # 0 : drop -# 1 : provide log via exception property -# 2 : write to stderr +# 1 : use log cdef int __DEBUG __DEBUG = 1 # maximum number of lines in the libxml2/xslt log if __DEBUG == 1 cdef int __MAX_LOG_SIZE -__MAX_LOG_SIZE = 20 - -if __DEBUG == 0: - _shutUpLibxmlErrors() - _shutUpLibxsltErrors() -elif __DEBUG == 1: - _logLibxmlErrors() - _logLibxsltErrors() +__MAX_LOG_SIZE = 100 # make the compiled-in debug state publicly available DEBUG = __DEBUG @@ -49,10 +41,12 @@ # module level superclass for all exceptions class LxmlError(Error): - error_log = property(__build_error_log_tuple) + def __init__(self, *args): + Error.__init__(self, *args) + self.error_log = __copyGlobalErrorLog() # superclass for all syntax errors -class LxmlSyntaxError(SyntaxError, LxmlError): +class LxmlSyntaxError(LxmlError, SyntaxError): pass class XIncludeError(LxmlError): Modified: lxml/trunk/src/lxml/python.pxd ============================================================================== --- lxml/trunk/src/lxml/python.pxd (original) +++ lxml/trunk/src/lxml/python.pxd Mon Mar 20 14:49:22 2006 @@ -27,6 +27,7 @@ cdef int PyDict_DelItem(object d, object key) cdef int PyDict_Clear(object d) cdef object PyList_AsTuple(object o) + cdef object PyObject_GetIter(object o) cdef int PyNumber_Check(object instance) cdef int PyBool_Check(object instance) Modified: lxml/trunk/src/lxml/relaxng.pxi ============================================================================== --- lxml/trunk/src/lxml/relaxng.pxi (original) +++ lxml/trunk/src/lxml/relaxng.pxi Mon Mar 20 14:49:22 2006 @@ -18,6 +18,7 @@ Can also load from filesystem directly given file object or filename. """ cdef relaxng.xmlRelaxNG* _c_schema + cdef _ErrorLog _error_log def __init__(self, _ElementTree etree=None, file=None): cdef relaxng.xmlRelaxNGParserCtxt* parser_ctxt @@ -38,6 +39,8 @@ if self._c_schema is NULL: raise RelaxNGParseError, "Document is not valid Relax NG" relaxng.xmlRelaxNGFreeParserCtxt(parser_ctxt) + + self._error_log = _ErrorLog() def __dealloc__(self): relaxng.xmlRelaxNGFree(self._c_schema) @@ -49,6 +52,7 @@ cdef xmlDoc* c_doc cdef relaxng.xmlRelaxNGValidCtxt* valid_ctxt cdef int ret + self._error_log.connect() valid_ctxt = relaxng.xmlRelaxNGNewValidCtxt(self._c_schema) c_doc = _fakeRootDoc(etree._doc._c_doc, etree._context_node._c_node) @@ -56,10 +60,11 @@ _destroyFakeDoc(etree._doc._c_doc, c_doc) relaxng.xmlRelaxNGFreeValidCtxt(valid_ctxt) + self._error_log.disconnect() if ret == -1: raise RelaxNGValidateError, "Internal error in Relax NG validation" return ret == 0 property error_log: def __get__(self): - return __build_error_log_tuple(self) + return self._error_log.copy() Modified: lxml/trunk/src/lxml/tests/test_etree.py ============================================================================== --- lxml/trunk/src/lxml/tests/test_etree.py (original) +++ lxml/trunk/src/lxml/tests/test_etree.py Mon Mar 20 14:49:22 2006 @@ -29,14 +29,19 @@ parse = self.etree.parse # from StringIO f = StringIO('') - self.etree.clear_error_log() + self.etree.clearErrorLog() try: parse(f) - log = "" + logs = None except SyntaxError, e: - log = '\n'.join(e.error_log) + logs = e.error_log f.close() - self.assert_('mismatch' in log) + self.assert_([ log for log in logs + if 'mismatch' in log.message ]) + self.assert_([ log for log in logs + if 'PARSER' in log.domain_name]) + self.assert_([ log for log in logs + if 'TAG_NAME_MISMATCH' in log.type_name ]) def test_parse_error_from_file(self): parse = self.etree.parse Modified: lxml/trunk/src/lxml/tests/test_relaxng.py ============================================================================== --- lxml/trunk/src/lxml/tests/test_relaxng.py (original) +++ lxml/trunk/src/lxml/tests/test_relaxng.py Mon Mar 20 14:49:22 2006 @@ -25,6 +25,25 @@ self.assert_(schema.validate(tree_valid)) self.assert_(not schema.validate(tree_invalid)) + def test_relaxng_error(self): + tree_invalid = self.parse('') + schema = self.parse('''\ + + + + + + + +''') + schema = etree.RelaxNG(schema) + self.assert_(not schema.validate(tree_invalid)) + errors = schema.error_log + self.assert_([ log for log in errors + if log.level_name == "ERROR" ]) + self.assert_([ log for log in errors + if "not expect" in log.message ]) + def test_relaxng_invalid_schema(self): schema = self.parse('''\ Modified: lxml/trunk/src/lxml/xmlerror.pxi ============================================================================== --- lxml/trunk/src/lxml/xmlerror.pxi (original) +++ lxml/trunk/src/lxml/xmlerror.pxi Mon Mar 20 14:49:22 2006 @@ -1,61 +1,976 @@ ################################################################################ # DEBUG setup -# list to collect error output messages from libxml2/libxslt -cdef object __ERROR_LOG -__ERROR_LOG = [] - -def __build_error_log_tuple(_): - return python.PyList_AsTuple(__ERROR_LOG) - -def clear_error_log(): - del __ERROR_LOG[:] - -cdef void _logLines(char* s): - cdef char* pos - cdef int l - while s is not NULL and s[0] != c'\0': - pos = tree.xmlStrchr(s, c'\n') - if pos is NULL: - py_string = python.PyString_FromString(s) - s = NULL +# module level API functions + +def clearErrorLog(): + """Clear the global error log. + Note that this log is already bounded to a fixed size.""" + __GLOBAL_ERROR_LOG.clear() + +def initThreadLogging(): + "Setup logging for the current thread." + _logLibxmlErrors() + _logLibxsltErrors() + + +# Logging classes + +cdef class _LogEntry: + cdef readonly object domain + cdef readonly object type + cdef readonly object line + cdef readonly object level + cdef readonly object message + cdef readonly object filename + cdef _set(self, xmlerror.xmlError* error): + self.domain = error.domain + self.type = error.code + self.level = error.level + self.line = error.line + self.message = python.PyString_FromString(error.message) + if error.file is NULL: + self.filename = None + else: + self.filename = python.PyString_FromString(error.file) + + def __repr__(self): + if self._filename: + return "%s/%d[%s]%s/%s: %s" % ( + self.filename, self.line, self.level_name, + self.domain_name, self.type_name, self.message) + else: + return "[%s]%s/%s: %s" % ( + self.level_name, self.domain_name, + self.type_name, self.message) + + property domain_name: + def __get__(self): + return LxmlErrorDomains._names[self.domain] + + property type_name: + def __get__(self): + return LxmlErrorTypes._names[self.type] + + property level_name: + def __get__(self): + return LxmlErrorLevels._names[self.level] + +cdef class _BaseErrorLog: + "Immutable base version of an error log." + cdef object _entries + def __init__(self, entries): + self._entries = entries + + def copy(self): + return _BaseErrorLog(self._entries) + + def __iter__(self): + return python.PyObject_GetIter(self._entries) + + def __repr__(self): + return '\n'.join(map(repr, self._entries)) + + def filter_domains(self, domains): + cdef _LogEntry entry + filtered = [] + if not python.PySequence_Check(domains): + domains = (domains,) + for entry in self._entries: + if entry.domain in domains: + python.PyList_Append(filtered, entry) + return _BaseErrorLog(filtered) + + def filter_types(self, types): + cdef _LogEntry entry + if not python.PySequence_Check(types): + types = (types,) + for entry in self._entries: + if entry.type in types: + python.PyList_Append(filtered, entry) + return _BaseErrorLog(filtered) + + def filter_levels(self, levels): + cdef _LogEntry entry + if not python.PySequence_Check(levels): + levels = (levels,) + for entry in self._entries: + if entry.level in levels: + python.PyList_Append(filtered, entry) + return _BaseErrorLog(filtered) + + def filter_from_level(self, level): + cdef _LogEntry entry + for entry in self._entries: + if entry.level >= level: + python.PyList_Append(filtered, entry) + return _BaseErrorLog(filtered) + +cdef class _ErrorLog(_BaseErrorLog): + def __init__(self): + _BaseErrorLog.__init__(self, []) + + def clear(self): + del self._entries[:] + + def copy(self): + return _BaseErrorLog(self._entries[:]) + + def __iter__(self): + return python.PyObject_GetIter(self._entries[:]) + + cdef void connect(self): + del self._entries[:] + xmlerror.xmlSetStructuredErrorFunc(self, _localReceiveError) + + cdef void disconnect(self): + xmlerror.xmlSetStructuredErrorFunc(NULL, _globalReceiveError) + + cdef void _receive(self, xmlerror.xmlError* error): + cdef _LogEntry entry + entry = _LogEntry() + entry._set(error) + if __GLOBAL_ERROR_LOG is not self: + __GLOBAL_ERROR_LOG.receive(entry) + self.receive(entry) + + def receive(self, entry): + python.PyList_Append(self._entries, entry) + +cdef class _DomainErrorLog(_ErrorLog): + def receive(self, entry): + if entry.domain in self._accepted_domains: + _ErrorLog.receive(self, entry) + def __init__(self, domains): + _ErrorLog.__init__(self) + self._accepted_domains = tuple(domains) + +cdef class _RotatingErrorLog(_ErrorLog): + cdef int _max_len + def __init__(self, max_len): + _ErrorLog.__init__(self) + self._max_len = max_len + def receive(self, entry): + entries = self._entries + if python.PyList_GET_SIZE(entries) > self._max_len: + del entries[0] + python.PyList_Append(entries, entry) + +cdef class PyErrorLog(_ErrorLog): + cdef object _log + cdef object _level_map + cdef object _varsOf + def __init__(self, logger_name=None): + _ErrorLog.__init__(self) + import logging + self._level_map = { + LxmlErrorLevels.WARNING : logging.WARNING, + LxmlErrorLevels.ERROR : logging.ERROR, + LxmlErrorLevels.FATAL : logging.CRITICAL + } + self._varsOf = vars + if logger_name: + logger = logging.getLogger(name) else: - l = pos - s - py_string = python.PyString_FromStringAndSize(s, l) - s = pos + 1 - python.PyList_Append(__ERROR_LOG, py_string) - - l = python.PyList_GET_SIZE(__ERROR_LOG) - __MAX_LOG_SIZE - if l > 0: - del __ERROR_LOG[:l] - -cdef void logStructuredErrorFunc(void* userData, - xmlerror.xmlError* error): - _logLines(error.message) + logger = logging.getLogger() + self._log = logger.log -cdef void logGenericErrorFunc(void* ctxt, char* msg, ...): - _logLines(msg) + def copy(self): + return self + + def receive(self, entry): + py_level = self._level_map[entry.level] + self._log( + py_level, + "%(asctime)s %(levelname)s %(domain_name)s %(message)s", + self._varsOf(entry) + ) + +# global list to collect error output messages from libxml2/libxslt +cdef _RotatingErrorLog __GLOBAL_ERROR_LOG +__GLOBAL_ERROR_LOG = _RotatingErrorLog(__MAX_LOG_SIZE) + +def __copyGlobalErrorLog(): + "Helper function for properties in exceptions." + return __GLOBAL_ERROR_LOG.copy() + +# local log function: forward error to logger object +cdef void _localReceiveError(void* c_log_handler, xmlerror.xmlError* error): + cdef _ErrorLog log_handler + if __DEBUG != 0: + log_handler = <_ErrorLog>c_log_handler + log_handler._receive(error) + +# global log functions: overridden by local functions +cdef void _globalReceiveError(void* userData, xmlerror.xmlError* error): + if __DEBUG != 0: + __GLOBAL_ERROR_LOG._receive(error) + +# dummy function: no debug output at all +cdef void _nullGenericErrorFunc(void* ctxt, char* msg, ...): + pass +# setup for global log: cdef void _logLibxmlErrors(): - xmlerror.xmlSetGenericErrorFunc(NULL, nullGenericErrorFunc) - xmlerror.xmlSetStructuredErrorFunc(NULL, logStructuredErrorFunc) + xmlerror.xmlSetGenericErrorFunc(NULL, _nullGenericErrorFunc) + xmlerror.xmlSetStructuredErrorFunc(NULL, _globalReceiveError) cdef void _logLibxsltErrors(): - xslt.xsltSetGenericErrorFunc(NULL, logGenericErrorFunc) + xslt.xsltSetGenericErrorFunc(NULL, _nullGenericErrorFunc) # xslt.xsltSetTransformErrorFunc -# ugly global shutting up of all errors, but seems to work.. -cdef void nullGenericErrorFunc(void* ctxt, char* msg, ...): - pass - -cdef void nullStructuredErrorFunc(void* userData, - xmlerror.xmlError* error): - pass +# init global logging +initThreadLogging() -cdef void _shutUpLibxmlErrors(): - xmlerror.xmlSetGenericErrorFunc(NULL, nullGenericErrorFunc) - xmlerror.xmlSetStructuredErrorFunc(NULL, nullStructuredErrorFunc) +################################################################################ +## CONSTANTS FROM "xmlerror.pxd" +################################################################################ -cdef void _shutUpLibxsltErrors(): - xslt.xsltSetGenericErrorFunc(NULL, nullGenericErrorFunc) - # xslt.xsltSetTransformErrorFunc +class LxmlErrorLevels: + _names = {} + NONE = 0 + WARNING = 1 # A simple warning + ERROR = 2 # A recoverable error + FATAL = 3 # A fatal error + +class LxmlErrorDomains: + _names = {} + NONE = 0 + PARSER = 1 # The XML parser + TREE = 2 # The tree module + NAMESPACE = 3 # The XML Namespace module + DTD = 4 # The XML DTD validation with parser contex + HTML = 5 # The HTML parser + MEMORY = 6 # The memory allocator + OUTPUT = 7 # The serialization code + IO = 8 # The Input/Output stack + FTP = 9 # The FTP module + HTTP = 10 # The FTP module + XINCLUDE = 11 # The XInclude processing + XPATH = 12 # The XPath module + XPOINTER = 13 # The XPointer module + REGEXP = 14 # The regular expressions module + DATATYPE = 15 # The W3C XML Schemas Datatype module + SCHEMASP = 16 # The W3C XML Schemas parser module + SCHEMASV = 17 # The W3C XML Schemas validation module + RELAXNGP = 18 # The Relax-NG parser module + RELAXNGV = 19 # The Relax-NG validator module + CATALOG = 20 # The Catalog module + C14N = 21 # The Canonicalization module + XSLT = 22 # The XSLT engine from libxslt + VALID = 23 # The XML DTD validation with valid context + CHECK = 24 # The error checking module + WRITER = 25 # The xmlwriter module + MODULE = 26 # The dynamically loaded module modu + +class LxmlErrorTypes: + _names = {} + ERR_OK = 0 + ERR_INTERNAL_ERROR = 1 + ERR_NO_MEMORY = 2 + ERR_DOCUMENT_START = 3 # 3 + ERR_DOCUMENT_EMPTY = 4 # 4 + ERR_DOCUMENT_END = 5 # 5 + ERR_INVALID_HEX_CHARREF = 6 # 6 + ERR_INVALID_DEC_CHARREF = 7 # 7 + ERR_INVALID_CHARREF = 8 # 8 + ERR_INVALID_CHAR = 9 # 9 + ERR_CHARREF_AT_EOF = 10 # 10 + ERR_CHARREF_IN_PROLOG = 11 # 11 + ERR_CHARREF_IN_EPILOG = 12 # 12 + ERR_CHARREF_IN_DTD = 13 # 13 + ERR_ENTITYREF_AT_EOF = 14 # 14 + ERR_ENTITYREF_IN_PROLOG = 15 # 15 + ERR_ENTITYREF_IN_EPILOG = 16 # 16 + ERR_ENTITYREF_IN_DTD = 17 # 17 + ERR_PEREF_AT_EOF = 18 # 18 + ERR_PEREF_IN_PROLOG = 19 # 19 + ERR_PEREF_IN_EPILOG = 20 # 20 + ERR_PEREF_IN_INT_SUBSET = 21 # 21 + ERR_ENTITYREF_NO_NAME = 22 # 22 + ERR_ENTITYREF_SEMICOL_MISSING = 23 # 23 + ERR_PEREF_NO_NAME = 24 # 24 + ERR_PEREF_SEMICOL_MISSING = 25 # 25 + ERR_UNDECLARED_ENTITY = 26 # 26 + WAR_UNDECLARED_ENTITY = 27 # 27 + ERR_UNPARSED_ENTITY = 28 # 28 + ERR_ENTITY_IS_EXTERNAL = 29 # 29 + ERR_ENTITY_IS_PARAMETER = 30 # 30 + ERR_UNKNOWN_ENCODING = 31 # 31 + ERR_UNSUPPORTED_ENCODING = 32 # 32 + ERR_STRING_NOT_STARTED = 33 # 33 + ERR_STRING_NOT_CLOSED = 34 # 34 + ERR_NS_DECL_ERROR = 35 # 35 + ERR_ENTITY_NOT_STARTED = 36 # 36 + ERR_ENTITY_NOT_FINISHED = 37 # 37 + ERR_LT_IN_ATTRIBUTE = 38 # 38 + ERR_ATTRIBUTE_NOT_STARTED = 39 # 39 + ERR_ATTRIBUTE_NOT_FINISHED = 40 # 40 + ERR_ATTRIBUTE_WITHOUT_VALUE = 41 # 41 + ERR_ATTRIBUTE_REDEFINED = 42 # 42 + ERR_LITERAL_NOT_STARTED = 43 # 43 + ERR_LITERAL_NOT_FINISHED = 44 # 44 + ERR_COMMENT_NOT_FINISHED = 45 # 45 + ERR_PI_NOT_STARTED = 46 # 46 + ERR_PI_NOT_FINISHED = 47 # 47 + ERR_NOTATION_NOT_STARTED = 48 # 48 + ERR_NOTATION_NOT_FINISHED = 49 # 49 + ERR_ATTLIST_NOT_STARTED = 50 # 50 + ERR_ATTLIST_NOT_FINISHED = 51 # 51 + ERR_MIXED_NOT_STARTED = 52 # 52 + ERR_MIXED_NOT_FINISHED = 53 # 53 + ERR_ELEMCONTENT_NOT_STARTED = 54 # 54 + ERR_ELEMCONTENT_NOT_FINISHED = 55 # 55 + ERR_XMLDECL_NOT_STARTED = 56 # 56 + ERR_XMLDECL_NOT_FINISHED = 57 # 57 + ERR_CONDSEC_NOT_STARTED = 58 # 58 + ERR_CONDSEC_NOT_FINISHED = 59 # 59 + ERR_EXT_SUBSET_NOT_FINISHED = 60 # 60 + ERR_DOCTYPE_NOT_FINISHED = 61 # 61 + ERR_MISPLACED_CDATA_END = 62 # 62 + ERR_CDATA_NOT_FINISHED = 63 # 63 + ERR_RESERVED_XML_NAME = 64 # 64 + ERR_SPACE_REQUIRED = 65 # 65 + ERR_SEPARATOR_REQUIRED = 66 # 66 + ERR_NMTOKEN_REQUIRED = 67 # 67 + ERR_NAME_REQUIRED = 68 # 68 + ERR_PCDATA_REQUIRED = 69 # 69 + ERR_URI_REQUIRED = 70 # 70 + ERR_PUBID_REQUIRED = 71 # 71 + ERR_LT_REQUIRED = 72 # 72 + ERR_GT_REQUIRED = 73 # 73 + ERR_LTSLASH_REQUIRED = 74 # 74 + ERR_EQUAL_REQUIRED = 75 # 75 + ERR_TAG_NAME_MISMATCH = 76 # 76 + ERR_TAG_NOT_FINISHED = 77 # 77 + ERR_STANDALONE_VALUE = 78 # 78 + ERR_ENCODING_NAME = 79 # 79 + ERR_HYPHEN_IN_COMMENT = 80 # 80 + ERR_INVALID_ENCODING = 81 # 81 + ERR_EXT_ENTITY_STANDALONE = 82 # 82 + ERR_CONDSEC_INVALID = 83 # 83 + ERR_VALUE_REQUIRED = 84 # 84 + ERR_NOT_WELL_BALANCED = 85 # 85 + ERR_EXTRA_CONTENT = 86 # 86 + ERR_ENTITY_CHAR_ERROR = 87 # 87 + ERR_ENTITY_PE_INTERNAL = 88 # 88 + ERR_ENTITY_LOOP = 89 # 89 + ERR_ENTITY_BOUNDARY = 90 # 90 + ERR_INVALID_URI = 91 # 91 + ERR_URI_FRAGMENT = 92 # 92 + WAR_CATALOG_PI = 93 # 93 + ERR_NO_DTD = 94 # 94 + ERR_CONDSEC_INVALID_KEYWORD = 95 # 95 + ERR_VERSION_MISSING = 96 # 96 + WAR_UNKNOWN_VERSION = 97 # 97 + WAR_LANG_VALUE = 98 # 98 + WAR_NS_URI = 99 # 99 + WAR_NS_URI_RELATIVE = 100 # 100 + ERR_MISSING_ENCODING = 101 # 101 + NS_ERR_XML_NAMESPACE = 200 + NS_ERR_UNDEFINED_NAMESPACE = 201 # 201 + NS_ERR_QNAME = 202 # 202 + NS_ERR_ATTRIBUTE_REDEFINED = 203 # 203 + DTD_ATTRIBUTE_DEFAULT = 500 + DTD_ATTRIBUTE_REDEFINED = 501 # 501 + DTD_ATTRIBUTE_VALUE = 502 # 502 + DTD_CONTENT_ERROR = 503 # 503 + DTD_CONTENT_MODEL = 504 # 504 + DTD_CONTENT_NOT_DETERMINIST = 505 # 505 + DTD_DIFFERENT_PREFIX = 506 # 506 + DTD_ELEM_DEFAULT_NAMESPACE = 507 # 507 + DTD_ELEM_NAMESPACE = 508 # 508 + DTD_ELEM_REDEFINED = 509 # 509 + DTD_EMPTY_NOTATION = 510 # 510 + DTD_ENTITY_TYPE = 511 # 511 + DTD_ID_FIXED = 512 # 512 + DTD_ID_REDEFINED = 513 # 513 + DTD_ID_SUBSET = 514 # 514 + DTD_INVALID_CHILD = 515 # 515 + DTD_INVALID_DEFAULT = 516 # 516 + DTD_LOAD_ERROR = 517 # 517 + DTD_MISSING_ATTRIBUTE = 518 # 518 + DTD_MIXED_CORRUPT = 519 # 519 + DTD_MULTIPLE_ID = 520 # 520 + DTD_NO_DOC = 521 # 521 + DTD_NO_DTD = 522 # 522 + DTD_NO_ELEM_NAME = 523 # 523 + DTD_NO_PREFIX = 524 # 524 + DTD_NO_ROOT = 525 # 525 + DTD_NOTATION_REDEFINED = 526 # 526 + DTD_NOTATION_VALUE = 527 # 527 + DTD_NOT_EMPTY = 528 # 528 + DTD_NOT_PCDATA = 529 # 529 + DTD_NOT_STANDALONE = 530 # 530 + DTD_ROOT_NAME = 531 # 531 + DTD_STANDALONE_WHITE_SPACE = 532 # 532 + DTD_UNKNOWN_ATTRIBUTE = 533 # 533 + DTD_UNKNOWN_ELEM = 534 # 534 + DTD_UNKNOWN_ENTITY = 535 # 535 + DTD_UNKNOWN_ID = 536 # 536 + DTD_UNKNOWN_NOTATION = 537 # 537 + DTD_STANDALONE_DEFAULTED = 538 # 538 + DTD_XMLID_VALUE = 539 # 539 + DTD_XMLID_TYPE = 540 # 540 + HTML_STRUCURE_ERROR = 800 + HTML_UNKNOWN_TAG = 801 # 801 + RNGP_ANYNAME_ATTR_ANCESTOR = 1000 + RNGP_ATTR_CONFLICT = 1001 # 1001 + RNGP_ATTRIBUTE_CHILDREN = 1002 # 1002 + RNGP_ATTRIBUTE_CONTENT = 1003 # 1003 + RNGP_ATTRIBUTE_EMPTY = 1004 # 1004 + RNGP_ATTRIBUTE_NOOP = 1005 # 1005 + RNGP_CHOICE_CONTENT = 1006 # 1006 + RNGP_CHOICE_EMPTY = 1007 # 1007 + RNGP_CREATE_FAILURE = 1008 # 1008 + RNGP_DATA_CONTENT = 1009 # 1009 + RNGP_DEF_CHOICE_AND_INTERLEAVE = 1010 # 1010 + RNGP_DEFINE_CREATE_FAILED = 1011 # 1011 + RNGP_DEFINE_EMPTY = 1012 # 1012 + RNGP_DEFINE_MISSING = 1013 # 1013 + RNGP_DEFINE_NAME_MISSING = 1014 # 1014 + RNGP_ELEM_CONTENT_EMPTY = 1015 # 1015 + RNGP_ELEM_CONTENT_ERROR = 1016 # 1016 + RNGP_ELEMENT_EMPTY = 1017 # 1017 + RNGP_ELEMENT_CONTENT = 1018 # 1018 + RNGP_ELEMENT_NAME = 1019 # 1019 + RNGP_ELEMENT_NO_CONTENT = 1020 # 1020 + RNGP_ELEM_TEXT_CONFLICT = 1021 # 1021 + RNGP_EMPTY = 1022 # 1022 + RNGP_EMPTY_CONSTRUCT = 1023 # 1023 + RNGP_EMPTY_CONTENT = 1024 # 1024 + RNGP_EMPTY_NOT_EMPTY = 1025 # 1025 + RNGP_ERROR_TYPE_LIB = 1026 # 1026 + RNGP_EXCEPT_EMPTY = 1027 # 1027 + RNGP_EXCEPT_MISSING = 1028 # 1028 + RNGP_EXCEPT_MULTIPLE = 1029 # 1029 + RNGP_EXCEPT_NO_CONTENT = 1030 # 1030 + RNGP_EXTERNALREF_EMTPY = 1031 # 1031 + RNGP_EXTERNAL_REF_FAILURE = 1032 # 1032 + RNGP_EXTERNALREF_RECURSE = 1033 # 1033 + RNGP_FORBIDDEN_ATTRIBUTE = 1034 # 1034 + RNGP_FOREIGN_ELEMENT = 1035 # 1035 + RNGP_GRAMMAR_CONTENT = 1036 # 1036 + RNGP_GRAMMAR_EMPTY = 1037 # 1037 + RNGP_GRAMMAR_MISSING = 1038 # 1038 + RNGP_GRAMMAR_NO_START = 1039 # 1039 + RNGP_GROUP_ATTR_CONFLICT = 1040 # 1040 + RNGP_HREF_ERROR = 1041 # 1041 + RNGP_INCLUDE_EMPTY = 1042 # 1042 + RNGP_INCLUDE_FAILURE = 1043 # 1043 + RNGP_INCLUDE_RECURSE = 1044 # 1044 + RNGP_INTERLEAVE_ADD = 1045 # 1045 + RNGP_INTERLEAVE_CREATE_FAILED = 1046 # 1046 + RNGP_INTERLEAVE_EMPTY = 1047 # 1047 + RNGP_INTERLEAVE_NO_CONTENT = 1048 # 1048 + RNGP_INVALID_DEFINE_NAME = 1049 # 1049 + RNGP_INVALID_URI = 1050 # 1050 + RNGP_INVALID_VALUE = 1051 # 1051 + RNGP_MISSING_HREF = 1052 # 1052 + RNGP_NAME_MISSING = 1053 # 1053 + RNGP_NEED_COMBINE = 1054 # 1054 + RNGP_NOTALLOWED_NOT_EMPTY = 1055 # 1055 + RNGP_NSNAME_ATTR_ANCESTOR = 1056 # 1056 + RNGP_NSNAME_NO_NS = 1057 # 1057 + RNGP_PARAM_FORBIDDEN = 1058 # 1058 + RNGP_PARAM_NAME_MISSING = 1059 # 1059 + RNGP_PARENTREF_CREATE_FAILED = 1060 # 1060 + RNGP_PARENTREF_NAME_INVALID = 1061 # 1061 + RNGP_PARENTREF_NO_NAME = 1062 # 1062 + RNGP_PARENTREF_NO_PARENT = 1063 # 1063 + RNGP_PARENTREF_NOT_EMPTY = 1064 # 1064 + RNGP_PARSE_ERROR = 1065 # 1065 + RNGP_PAT_ANYNAME_EXCEPT_ANYNAME = 1066 # 1066 + RNGP_PAT_ATTR_ATTR = 1067 # 1067 + RNGP_PAT_ATTR_ELEM = 1068 # 1068 + RNGP_PAT_DATA_EXCEPT_ATTR = 1069 # 1069 + RNGP_PAT_DATA_EXCEPT_ELEM = 1070 # 1070 + RNGP_PAT_DATA_EXCEPT_EMPTY = 1071 # 1071 + RNGP_PAT_DATA_EXCEPT_GROUP = 1072 # 1072 + RNGP_PAT_DATA_EXCEPT_INTERLEAVE = 1073 # 1073 + RNGP_PAT_DATA_EXCEPT_LIST = 1074 # 1074 + RNGP_PAT_DATA_EXCEPT_ONEMORE = 1075 # 1075 + RNGP_PAT_DATA_EXCEPT_REF = 1076 # 1076 + RNGP_PAT_DATA_EXCEPT_TEXT = 1077 # 1077 + RNGP_PAT_LIST_ATTR = 1078 # 1078 + RNGP_PAT_LIST_ELEM = 1079 # 1079 + RNGP_PAT_LIST_INTERLEAVE = 1080 # 1080 + RNGP_PAT_LIST_LIST = 1081 # 1081 + RNGP_PAT_LIST_REF = 1082 # 1082 + RNGP_PAT_LIST_TEXT = 1083 # 1083 + RNGP_PAT_NSNAME_EXCEPT_ANYNAME = 1084 # 1084 + RNGP_PAT_NSNAME_EXCEPT_NSNAME = 1085 # 1085 + RNGP_PAT_ONEMORE_GROUP_ATTR = 1086 # 1086 + RNGP_PAT_ONEMORE_INTERLEAVE_ATTR = 1087 # 1087 + RNGP_PAT_START_ATTR = 1088 # 1088 + RNGP_PAT_START_DATA = 1089 # 1089 + RNGP_PAT_START_EMPTY = 1090 # 1090 + RNGP_PAT_START_GROUP = 1091 # 1091 + RNGP_PAT_START_INTERLEAVE = 1092 # 1092 + RNGP_PAT_START_LIST = 1093 # 1093 + RNGP_PAT_START_ONEMORE = 1094 # 1094 + RNGP_PAT_START_TEXT = 1095 # 1095 + RNGP_PAT_START_VALUE = 1096 # 1096 + RNGP_PREFIX_UNDEFINED = 1097 # 1097 + RNGP_REF_CREATE_FAILED = 1098 # 1098 + RNGP_REF_CYCLE = 1099 # 1099 + RNGP_REF_NAME_INVALID = 1100 # 1100 + RNGP_REF_NO_DEF = 1101 # 1101 + RNGP_REF_NO_NAME = 1102 # 1102 + RNGP_REF_NOT_EMPTY = 1103 # 1103 + RNGP_START_CHOICE_AND_INTERLEAVE = 1104 # 1104 + RNGP_START_CONTENT = 1105 # 1105 + RNGP_START_EMPTY = 1106 # 1106 + RNGP_START_MISSING = 1107 # 1107 + RNGP_TEXT_EXPECTED = 1108 # 1108 + RNGP_TEXT_HAS_CHILD = 1109 # 1109 + RNGP_TYPE_MISSING = 1110 # 1110 + RNGP_TYPE_NOT_FOUND = 1111 # 1111 + RNGP_TYPE_VALUE = 1112 # 1112 + RNGP_UNKNOWN_ATTRIBUTE = 1113 # 1113 + RNGP_UNKNOWN_COMBINE = 1114 # 1114 + RNGP_UNKNOWN_CONSTRUCT = 1115 # 1115 + RNGP_UNKNOWN_TYPE_LIB = 1116 # 1116 + RNGP_URI_FRAGMENT = 1117 # 1117 + RNGP_URI_NOT_ABSOLUTE = 1118 # 1118 + RNGP_VALUE_EMPTY = 1119 # 1119 + RNGP_VALUE_NO_CONTENT = 1120 # 1120 + RNGP_XMLNS_NAME = 1121 # 1121 + RNGP_XML_NS = 1122 # 1122 + XPATH_EXPRESSION_OK = 1200 + XPATH_NUMBER_ERROR = 1201 # 1201 + XPATH_UNFINISHED_LITERAL_ERROR = 1202 # 1202 + XPATH_START_LITERAL_ERROR = 1203 # 1203 + XPATH_VARIABLE_REF_ERROR = 1204 # 1204 + XPATH_UNDEF_VARIABLE_ERROR = 1205 # 1205 + XPATH_INVALID_PREDICATE_ERROR = 1206 # 1206 + XPATH_EXPR_ERROR = 1207 # 1207 + XPATH_UNCLOSED_ERROR = 1208 # 1208 + XPATH_UNKNOWN_FUNC_ERROR = 1209 # 1209 + XPATH_INVALID_OPERAND = 1210 # 1210 + XPATH_INVALID_TYPE = 1211 # 1211 + XPATH_INVALID_ARITY = 1212 # 1212 + XPATH_INVALID_CTXT_SIZE = 1213 # 1213 + XPATH_INVALID_CTXT_POSITION = 1214 # 1214 + XPATH_MEMORY_ERROR = 1215 # 1215 + XPTR_SYNTAX_ERROR = 1216 # 1216 + XPTR_RESOURCE_ERROR = 1217 # 1217 + XPTR_SUB_RESOURCE_ERROR = 1218 # 1218 + XPATH_UNDEF_PREFIX_ERROR = 1219 # 1219 + XPATH_ENCODING_ERROR = 1220 # 1220 + XPATH_INVALID_CHAR_ERROR = 1221 # 1221 + TREE_INVALID_HEX = 1300 + TREE_INVALID_DEC = 1301 # 1301 + TREE_UNTERMINATED_ENTITY = 1302 # 1302 + SAVE_NOT_UTF8 = 1400 + SAVE_CHAR_INVALID = 1401 # 1401 + SAVE_NO_DOCTYPE = 1402 # 1402 + SAVE_UNKNOWN_ENCODING = 1403 # 1403 + REGEXP_COMPILE_ERROR = 1450 + IO_UNKNOWN = 1500 + IO_EACCES = 1501 # 1501 + IO_EAGAIN = 1502 # 1502 + IO_EBADF = 1503 # 1503 + IO_EBADMSG = 1504 # 1504 + IO_EBUSY = 1505 # 1505 + IO_ECANCELED = 1506 # 1506 + IO_ECHILD = 1507 # 1507 + IO_EDEADLK = 1508 # 1508 + IO_EDOM = 1509 # 1509 + IO_EEXIST = 1510 # 1510 + IO_EFAULT = 1511 # 1511 + IO_EFBIG = 1512 # 1512 + IO_EINPROGRESS = 1513 # 1513 + IO_EINTR = 1514 # 1514 + IO_EINVAL = 1515 # 1515 + IO_EIO = 1516 # 1516 + IO_EISDIR = 1517 # 1517 + IO_EMFILE = 1518 # 1518 + IO_EMLINK = 1519 # 1519 + IO_EMSGSIZE = 1520 # 1520 + IO_ENAMETOOLONG = 1521 # 1521 + IO_ENFILE = 1522 # 1522 + IO_ENODEV = 1523 # 1523 + IO_ENOENT = 1524 # 1524 + IO_ENOEXEC = 1525 # 1525 + IO_ENOLCK = 1526 # 1526 + IO_ENOMEM = 1527 # 1527 + IO_ENOSPC = 1528 # 1528 + IO_ENOSYS = 1529 # 1529 + IO_ENOTDIR = 1530 # 1530 + IO_ENOTEMPTY = 1531 # 1531 + IO_ENOTSUP = 1532 # 1532 + IO_ENOTTY = 1533 # 1533 + IO_ENXIO = 1534 # 1534 + IO_EPERM = 1535 # 1535 + IO_EPIPE = 1536 # 1536 + IO_ERANGE = 1537 # 1537 + IO_EROFS = 1538 # 1538 + IO_ESPIPE = 1539 # 1539 + IO_ESRCH = 1540 # 1540 + IO_ETIMEDOUT = 1541 # 1541 + IO_EXDEV = 1542 # 1542 + IO_NETWORK_ATTEMPT = 1543 # 1543 + IO_ENCODER = 1544 # 1544 + IO_FLUSH = 1545 # 1545 + IO_WRITE = 1546 # 1546 + IO_NO_INPUT = 1547 # 1547 + IO_BUFFER_FULL = 1548 # 1548 + IO_LOAD_ERROR = 1549 # 1549 + IO_ENOTSOCK = 1550 # 1550 + IO_EISCONN = 1551 # 1551 + IO_ECONNREFUSED = 1552 # 1552 + IO_ENETUNREACH = 1553 # 1553 + IO_EADDRINUSE = 1554 # 1554 + IO_EALREADY = 1555 # 1555 + IO_EAFNOSUPPORT = 1556 # 1556 + XINCLUDE_RECURSION = 1600 + XINCLUDE_PARSE_VALUE = 1601 # 1601 + XINCLUDE_ENTITY_DEF_MISMATCH = 1602 # 1602 + XINCLUDE_NO_HREF = 1603 # 1603 + XINCLUDE_NO_FALLBACK = 1604 # 1604 + XINCLUDE_HREF_URI = 1605 # 1605 + XINCLUDE_TEXT_FRAGMENT = 1606 # 1606 + XINCLUDE_TEXT_DOCUMENT = 1607 # 1607 + XINCLUDE_INVALID_CHAR = 1608 # 1608 + XINCLUDE_BUILD_FAILED = 1609 # 1609 + XINCLUDE_UNKNOWN_ENCODING = 1610 # 1610 + XINCLUDE_MULTIPLE_ROOT = 1611 # 1611 + XINCLUDE_XPTR_FAILED = 1612 # 1612 + XINCLUDE_XPTR_RESULT = 1613 # 1613 + XINCLUDE_INCLUDE_IN_INCLUDE = 1614 # 1614 + XINCLUDE_FALLBACKS_IN_INCLUDE = 1615 # 1615 + XINCLUDE_FALLBACK_NOT_IN_INCLUDE = 1616 # 1616 + XINCLUDE_DEPRECATED_NS = 1617 # 1617 + XINCLUDE_FRAGMENT_ID = 1618 # 1618 + CATALOG_MISSING_ATTR = 1650 + CATALOG_ENTRY_BROKEN = 1651 # 1651 + CATALOG_PREFER_VALUE = 1652 # 1652 + CATALOG_NOT_CATALOG = 1653 # 1653 + CATALOG_RECURSION = 1654 # 1654 + SCHEMAP_PREFIX_UNDEFINED = 1700 + SCHEMAP_ATTRFORMDEFAULT_VALUE = 1701 # 1701 + SCHEMAP_ATTRGRP_NONAME_NOREF = 1702 # 1702 + SCHEMAP_ATTR_NONAME_NOREF = 1703 # 1703 + SCHEMAP_COMPLEXTYPE_NONAME_NOREF = 1704 # 1704 + SCHEMAP_ELEMFORMDEFAULT_VALUE = 1705 # 1705 + SCHEMAP_ELEM_NONAME_NOREF = 1706 # 1706 + SCHEMAP_EXTENSION_NO_BASE = 1707 # 1707 + SCHEMAP_FACET_NO_VALUE = 1708 # 1708 + SCHEMAP_FAILED_BUILD_IMPORT = 1709 # 1709 + SCHEMAP_GROUP_NONAME_NOREF = 1710 # 1710 + SCHEMAP_IMPORT_NAMESPACE_NOT_URI = 1711 # 1711 + SCHEMAP_IMPORT_REDEFINE_NSNAME = 1712 # 1712 + SCHEMAP_IMPORT_SCHEMA_NOT_URI = 1713 # 1713 + SCHEMAP_INVALID_BOOLEAN = 1714 # 1714 + SCHEMAP_INVALID_ENUM = 1715 # 1715 + SCHEMAP_INVALID_FACET = 1716 # 1716 + SCHEMAP_INVALID_FACET_VALUE = 1717 # 1717 + SCHEMAP_INVALID_MAXOCCURS = 1718 # 1718 + SCHEMAP_INVALID_MINOCCURS = 1719 # 1719 + SCHEMAP_INVALID_REF_AND_SUBTYPE = 1720 # 1720 + SCHEMAP_INVALID_WHITE_SPACE = 1721 # 1721 + SCHEMAP_NOATTR_NOREF = 1722 # 1722 + SCHEMAP_NOTATION_NO_NAME = 1723 # 1723 + SCHEMAP_NOTYPE_NOREF = 1724 # 1724 + SCHEMAP_REF_AND_SUBTYPE = 1725 # 1725 + SCHEMAP_RESTRICTION_NONAME_NOREF = 1726 # 1726 + SCHEMAP_SIMPLETYPE_NONAME = 1727 # 1727 + SCHEMAP_TYPE_AND_SUBTYPE = 1728 # 1728 + SCHEMAP_UNKNOWN_ALL_CHILD = 1729 # 1729 + SCHEMAP_UNKNOWN_ANYATTRIBUTE_CHILD = 1730 # 1730 + SCHEMAP_UNKNOWN_ATTR_CHILD = 1731 # 1731 + SCHEMAP_UNKNOWN_ATTRGRP_CHILD = 1732 # 1732 + SCHEMAP_UNKNOWN_ATTRIBUTE_GROUP = 1733 # 1733 + SCHEMAP_UNKNOWN_BASE_TYPE = 1734 # 1734 + SCHEMAP_UNKNOWN_CHOICE_CHILD = 1735 # 1735 + SCHEMAP_UNKNOWN_COMPLEXCONTENT_CHILD = 1736 # 1736 + SCHEMAP_UNKNOWN_COMPLEXTYPE_CHILD = 1737 # 1737 + SCHEMAP_UNKNOWN_ELEM_CHILD = 1738 # 1738 + SCHEMAP_UNKNOWN_EXTENSION_CHILD = 1739 # 1739 + SCHEMAP_UNKNOWN_FACET_CHILD = 1740 # 1740 + SCHEMAP_UNKNOWN_FACET_TYPE = 1741 # 1741 + SCHEMAP_UNKNOWN_GROUP_CHILD = 1742 # 1742 + SCHEMAP_UNKNOWN_IMPORT_CHILD = 1743 # 1743 + SCHEMAP_UNKNOWN_LIST_CHILD = 1744 # 1744 + SCHEMAP_UNKNOWN_NOTATION_CHILD = 1745 # 1745 + SCHEMAP_UNKNOWN_PROCESSCONTENT_CHILD = 1746 # 1746 + SCHEMAP_UNKNOWN_REF = 1747 # 1747 + SCHEMAP_UNKNOWN_RESTRICTION_CHILD = 1748 # 1748 + SCHEMAP_UNKNOWN_SCHEMAS_CHILD = 1749 # 1749 + SCHEMAP_UNKNOWN_SEQUENCE_CHILD = 1750 # 1750 + SCHEMAP_UNKNOWN_SIMPLECONTENT_CHILD = 1751 # 1751 + SCHEMAP_UNKNOWN_SIMPLETYPE_CHILD = 1752 # 1752 + SCHEMAP_UNKNOWN_TYPE = 1753 # 1753 + SCHEMAP_UNKNOWN_UNION_CHILD = 1754 # 1754 + SCHEMAP_ELEM_DEFAULT_FIXED = 1755 # 1755 + SCHEMAP_REGEXP_INVALID = 1756 # 1756 + SCHEMAP_FAILED_LOAD = 1757 # 1757 + SCHEMAP_NOTHING_TO_PARSE = 1758 # 1758 + SCHEMAP_NOROOT = 1759 # 1759 + SCHEMAP_REDEFINED_GROUP = 1760 # 1760 + SCHEMAP_REDEFINED_TYPE = 1761 # 1761 + SCHEMAP_REDEFINED_ELEMENT = 1762 # 1762 + SCHEMAP_REDEFINED_ATTRGROUP = 1763 # 1763 + SCHEMAP_REDEFINED_ATTR = 1764 # 1764 + SCHEMAP_REDEFINED_NOTATION = 1765 # 1765 + SCHEMAP_FAILED_PARSE = 1766 # 1766 + SCHEMAP_UNKNOWN_PREFIX = 1767 # 1767 + SCHEMAP_DEF_AND_PREFIX = 1768 # 1768 + SCHEMAP_UNKNOWN_INCLUDE_CHILD = 1769 # 1769 + SCHEMAP_INCLUDE_SCHEMA_NOT_URI = 1770 # 1770 + SCHEMAP_INCLUDE_SCHEMA_NO_URI = 1771 # 1771 + SCHEMAP_NOT_SCHEMA = 1772 # 1772 + SCHEMAP_UNKNOWN_MEMBER_TYPE = 1773 # 1773 + SCHEMAP_INVALID_ATTR_USE = 1774 # 1774 + SCHEMAP_RECURSIVE = 1775 # 1775 + SCHEMAP_SUPERNUMEROUS_LIST_ITEM_TYPE = 1776 # 1776 + SCHEMAP_INVALID_ATTR_COMBINATION = 1777 # 1777 + SCHEMAP_INVALID_ATTR_INLINE_COMBINATION = 1778 # 1778 + SCHEMAP_MISSING_SIMPLETYPE_CHILD = 1779 # 1779 + SCHEMAP_INVALID_ATTR_NAME = 1780 # 1780 + SCHEMAP_REF_AND_CONTENT = 1781 # 1781 + SCHEMAP_CT_PROPS_CORRECT_1 = 1782 # 1782 + SCHEMAP_CT_PROPS_CORRECT_2 = 1783 # 1783 + SCHEMAP_CT_PROPS_CORRECT_3 = 1784 # 1784 + SCHEMAP_CT_PROPS_CORRECT_4 = 1785 # 1785 + SCHEMAP_CT_PROPS_CORRECT_5 = 1786 # 1786 + SCHEMAP_DERIVATION_OK_RESTRICTION_1 = 1787 # 1787 + SCHEMAP_DERIVATION_OK_RESTRICTION_2_1_1 = 1788 # 1788 + SCHEMAP_DERIVATION_OK_RESTRICTION_2_1_2 = 1789 # 1789 + SCHEMAP_DERIVATION_OK_RESTRICTION_2_2 = 1790 # 1790 + SCHEMAP_DERIVATION_OK_RESTRICTION_3 = 1791 # 1791 + SCHEMAP_WILDCARD_INVALID_NS_MEMBER = 1792 # 1792 + SCHEMAP_INTERSECTION_NOT_EXPRESSIBLE = 1793 # 1793 + SCHEMAP_UNION_NOT_EXPRESSIBLE = 1794 # 1794 + SCHEMAP_SRC_IMPORT_3_1 = 1795 # 1795 + SCHEMAP_SRC_IMPORT_3_2 = 1796 # 1796 + SCHEMAP_DERIVATION_OK_RESTRICTION_4_1 = 1797 # 1797 + SCHEMAP_DERIVATION_OK_RESTRICTION_4_2 = 1798 # 1798 + SCHEMAP_DERIVATION_OK_RESTRICTION_4_3 = 1799 # 1799 + SCHEMAP_COS_CT_EXTENDS_1_3 = 1800 # 1800 + SCHEMAV_NOROOT = 1801 + SCHEMAV_UNDECLAREDELEM = 1802 # 1802 + SCHEMAV_NOTTOPLEVEL = 1803 # 1803 + SCHEMAV_MISSING = 1804 # 1804 + SCHEMAV_WRONGELEM = 1805 # 1805 + SCHEMAV_NOTYPE = 1806 # 1806 + SCHEMAV_NOROLLBACK = 1807 # 1807 + SCHEMAV_ISABSTRACT = 1808 # 1808 + SCHEMAV_NOTEMPTY = 1809 # 1809 + SCHEMAV_ELEMCONT = 1810 # 1810 + SCHEMAV_HAVEDEFAULT = 1811 # 1811 + SCHEMAV_NOTNILLABLE = 1812 # 1812 + SCHEMAV_EXTRACONTENT = 1813 # 1813 + SCHEMAV_INVALIDATTR = 1814 # 1814 + SCHEMAV_INVALIDELEM = 1815 # 1815 + SCHEMAV_NOTDETERMINIST = 1816 # 1816 + SCHEMAV_CONSTRUCT = 1817 # 1817 + SCHEMAV_INTERNAL = 1818 # 1818 + SCHEMAV_NOTSIMPLE = 1819 # 1819 + SCHEMAV_ATTRUNKNOWN = 1820 # 1820 + SCHEMAV_ATTRINVALID = 1821 # 1821 + SCHEMAV_VALUE = 1822 # 1822 + SCHEMAV_FACET = 1823 # 1823 + SCHEMAV_CVC_DATATYPE_VALID_1_2_1 = 1824 # 1824 + SCHEMAV_CVC_DATATYPE_VALID_1_2_2 = 1825 # 1825 + SCHEMAV_CVC_DATATYPE_VALID_1_2_3 = 1826 # 1826 + SCHEMAV_CVC_TYPE_3_1_1 = 1827 # 1827 + SCHEMAV_CVC_TYPE_3_1_2 = 1828 # 1828 + SCHEMAV_CVC_FACET_VALID = 1829 # 1829 + SCHEMAV_CVC_LENGTH_VALID = 1830 # 1830 + SCHEMAV_CVC_MINLENGTH_VALID = 1831 # 1831 + SCHEMAV_CVC_MAXLENGTH_VALID = 1832 # 1832 + SCHEMAV_CVC_MININCLUSIVE_VALID = 1833 # 1833 + SCHEMAV_CVC_MAXINCLUSIVE_VALID = 1834 # 1834 + SCHEMAV_CVC_MINEXCLUSIVE_VALID = 1835 # 1835 + SCHEMAV_CVC_MAXEXCLUSIVE_VALID = 1836 # 1836 + SCHEMAV_CVC_TOTALDIGITS_VALID = 1837 # 1837 + SCHEMAV_CVC_FRACTIONDIGITS_VALID = 1838 # 1838 + SCHEMAV_CVC_PATTERN_VALID = 1839 # 1839 + SCHEMAV_CVC_ENUMERATION_VALID = 1840 # 1840 + SCHEMAV_CVC_COMPLEX_TYPE_2_1 = 1841 # 1841 + SCHEMAV_CVC_COMPLEX_TYPE_2_2 = 1842 # 1842 + SCHEMAV_CVC_COMPLEX_TYPE_2_3 = 1843 # 1843 + SCHEMAV_CVC_COMPLEX_TYPE_2_4 = 1844 # 1844 + SCHEMAV_CVC_ELT_1 = 1845 # 1845 + SCHEMAV_CVC_ELT_2 = 1846 # 1846 + SCHEMAV_CVC_ELT_3_1 = 1847 # 1847 + SCHEMAV_CVC_ELT_3_2_1 = 1848 # 1848 + SCHEMAV_CVC_ELT_3_2_2 = 1849 # 1849 + SCHEMAV_CVC_ELT_4_1 = 1850 # 1850 + SCHEMAV_CVC_ELT_4_2 = 1851 # 1851 + SCHEMAV_CVC_ELT_4_3 = 1852 # 1852 + SCHEMAV_CVC_ELT_5_1_1 = 1853 # 1853 + SCHEMAV_CVC_ELT_5_1_2 = 1854 # 1854 + SCHEMAV_CVC_ELT_5_2_1 = 1855 # 1855 + SCHEMAV_CVC_ELT_5_2_2_1 = 1856 # 1856 + SCHEMAV_CVC_ELT_5_2_2_2_1 = 1857 # 1857 + SCHEMAV_CVC_ELT_5_2_2_2_2 = 1858 # 1858 + SCHEMAV_CVC_ELT_6 = 1859 # 1859 + SCHEMAV_CVC_ELT_7 = 1860 # 1860 + SCHEMAV_CVC_ATTRIBUTE_1 = 1861 # 1861 + SCHEMAV_CVC_ATTRIBUTE_2 = 1862 # 1862 + SCHEMAV_CVC_ATTRIBUTE_3 = 1863 # 1863 + SCHEMAV_CVC_ATTRIBUTE_4 = 1864 # 1864 + SCHEMAV_CVC_COMPLEX_TYPE_3_1 = 1865 # 1865 + SCHEMAV_CVC_COMPLEX_TYPE_3_2_1 = 1866 # 1866 + SCHEMAV_CVC_COMPLEX_TYPE_3_2_2 = 1867 # 1867 + SCHEMAV_CVC_COMPLEX_TYPE_4 = 1868 # 1868 + SCHEMAV_CVC_COMPLEX_TYPE_5_1 = 1869 # 1869 + SCHEMAV_CVC_COMPLEX_TYPE_5_2 = 1870 # 1870 + SCHEMAV_ELEMENT_CONTENT = 1871 # 1871 + SCHEMAV_DOCUMENT_ELEMENT_MISSING = 1872 # 1872 + SCHEMAV_CVC_COMPLEX_TYPE_1 = 1873 # 1873 + SCHEMAV_CVC_AU = 1874 # 1874 + SCHEMAV_CVC_TYPE_1 = 1875 # 1875 + SCHEMAV_CVC_TYPE_2 = 1876 # 1876 + XPTR_UNKNOWN_SCHEME = 1900 + XPTR_CHILDSEQ_START = 1901 # 1901 + XPTR_EVAL_FAILED = 1902 # 1902 + XPTR_EXTRA_OBJECTS = 1903 # 1903 + C14N_CREATE_CTXT = 1950 + C14N_REQUIRES_UTF8 = 1951 # 1951 + C14N_CREATE_STACK = 1952 # 1952 + C14N_INVALID_NODE = 1953 # 1953 + FTP_PASV_ANSWER = 2000 + FTP_EPSV_ANSWER = 2001 # 2001 + FTP_ACCNT = 2002 # 2002 + HTTP_URL_SYNTAX = 2020 + HTTP_USE_IP = 2021 # 2021 + HTTP_UNKNOWN_HOST = 2022 # 2022 + SCHEMAP_SRC_SIMPLE_TYPE_1 = 3000 + SCHEMAP_SRC_SIMPLE_TYPE_2 = 3001 # 3001 + SCHEMAP_SRC_SIMPLE_TYPE_3 = 3002 # 3002 + SCHEMAP_SRC_SIMPLE_TYPE_4 = 3003 # 3003 + SCHEMAP_SRC_RESOLVE = 3004 # 3004 + SCHEMAP_SRC_RESTRICTION_BASE_OR_SIMPLETYPE = 3005 # 3005 + SCHEMAP_SRC_LIST_ITEMTYPE_OR_SIMPLETYPE = 3006 # 3006 + SCHEMAP_SRC_UNION_MEMBERTYPES_OR_SIMPLETYPES = 3007 # 3007 + SCHEMAP_ST_PROPS_CORRECT_1 = 3008 # 3008 + SCHEMAP_ST_PROPS_CORRECT_2 = 3009 # 3009 + SCHEMAP_ST_PROPS_CORRECT_3 = 3010 # 3010 + SCHEMAP_COS_ST_RESTRICTS_1_1 = 3011 # 3011 + SCHEMAP_COS_ST_RESTRICTS_1_2 = 3012 # 3012 + SCHEMAP_COS_ST_RESTRICTS_1_3_1 = 3013 # 3013 + SCHEMAP_COS_ST_RESTRICTS_1_3_2 = 3014 # 3014 + SCHEMAP_COS_ST_RESTRICTS_2_1 = 3015 # 3015 + SCHEMAP_COS_ST_RESTRICTS_2_3_1_1 = 3016 # 3016 + SCHEMAP_COS_ST_RESTRICTS_2_3_1_2 = 3017 # 3017 + SCHEMAP_COS_ST_RESTRICTS_2_3_2_1 = 3018 # 3018 + SCHEMAP_COS_ST_RESTRICTS_2_3_2_2 = 3019 # 3019 + SCHEMAP_COS_ST_RESTRICTS_2_3_2_3 = 3020 # 3020 + SCHEMAP_COS_ST_RESTRICTS_2_3_2_4 = 3021 # 3021 + SCHEMAP_COS_ST_RESTRICTS_2_3_2_5 = 3022 # 3022 + SCHEMAP_COS_ST_RESTRICTS_3_1 = 3023 # 3023 + SCHEMAP_COS_ST_RESTRICTS_3_3_1 = 3024 # 3024 + SCHEMAP_COS_ST_RESTRICTS_3_3_1_2 = 3025 # 3025 + SCHEMAP_COS_ST_RESTRICTS_3_3_2_2 = 3026 # 3026 + SCHEMAP_COS_ST_RESTRICTS_3_3_2_1 = 3027 # 3027 + SCHEMAP_COS_ST_RESTRICTS_3_3_2_3 = 3028 # 3028 + SCHEMAP_COS_ST_RESTRICTS_3_3_2_4 = 3029 # 3029 + SCHEMAP_COS_ST_RESTRICTS_3_3_2_5 = 3030 # 3030 + SCHEMAP_COS_ST_DERIVED_OK_2_1 = 3031 # 3031 + SCHEMAP_COS_ST_DERIVED_OK_2_2 = 3032 # 3032 + SCHEMAP_S4S_ELEM_NOT_ALLOWED = 3033 # 3033 + SCHEMAP_S4S_ELEM_MISSING = 3034 # 3034 + SCHEMAP_S4S_ATTR_NOT_ALLOWED = 3035 # 3035 + SCHEMAP_S4S_ATTR_MISSING = 3036 # 3036 + SCHEMAP_S4S_ATTR_INVALID_VALUE = 3037 # 3037 + SCHEMAP_SRC_ELEMENT_1 = 3038 # 3038 + SCHEMAP_SRC_ELEMENT_2_1 = 3039 # 3039 + SCHEMAP_SRC_ELEMENT_2_2 = 3040 # 3040 + SCHEMAP_SRC_ELEMENT_3 = 3041 # 3041 + SCHEMAP_P_PROPS_CORRECT_1 = 3042 # 3042 + SCHEMAP_P_PROPS_CORRECT_2_1 = 3043 # 3043 + SCHEMAP_P_PROPS_CORRECT_2_2 = 3044 # 3044 + SCHEMAP_E_PROPS_CORRECT_2 = 3045 # 3045 + SCHEMAP_E_PROPS_CORRECT_3 = 3046 # 3046 + SCHEMAP_E_PROPS_CORRECT_4 = 3047 # 3047 + SCHEMAP_E_PROPS_CORRECT_5 = 3048 # 3048 + SCHEMAP_E_PROPS_CORRECT_6 = 3049 # 3049 + SCHEMAP_SRC_INCLUDE = 3050 # 3050 + SCHEMAP_SRC_ATTRIBUTE_1 = 3051 # 3051 + SCHEMAP_SRC_ATTRIBUTE_2 = 3052 # 3052 + SCHEMAP_SRC_ATTRIBUTE_3_1 = 3053 # 3053 + SCHEMAP_SRC_ATTRIBUTE_3_2 = 3054 # 3054 + SCHEMAP_SRC_ATTRIBUTE_4 = 3055 # 3055 + SCHEMAP_NO_XMLNS = 3056 # 3056 + SCHEMAP_NO_XSI = 3057 # 3057 + SCHEMAP_COS_VALID_DEFAULT_1 = 3058 # 3058 + SCHEMAP_COS_VALID_DEFAULT_2_1 = 3059 # 3059 + SCHEMAP_COS_VALID_DEFAULT_2_2_1 = 3060 # 3060 + SCHEMAP_COS_VALID_DEFAULT_2_2_2 = 3061 # 3061 + SCHEMAP_CVC_SIMPLE_TYPE = 3062 # 3062 + SCHEMAP_COS_CT_EXTENDS_1_1 = 3063 # 3063 + SCHEMAP_SRC_IMPORT_1_1 = 3064 # 3064 + SCHEMAP_SRC_IMPORT_1_2 = 3065 # 3065 + SCHEMAP_SRC_IMPORT_2 = 3066 # 3066 + SCHEMAP_SRC_IMPORT_2_1 = 3067 # 3067 + SCHEMAP_SRC_IMPORT_2_2 = 3068 # 3068 + SCHEMAP_INTERNAL = 3069 # 3069 non-W3C + SCHEMAP_NOT_DETERMINISTIC = 3070 # 3070 non-W3C + SCHEMAP_SRC_ATTRIBUTE_GROUP_1 = 3071 # 3071 + SCHEMAP_SRC_ATTRIBUTE_GROUP_2 = 3072 # 3072 + SCHEMAP_SRC_ATTRIBUTE_GROUP_3 = 3073 # 3073 + SCHEMAP_MG_PROPS_CORRECT_1 = 3074 # 3074 + SCHEMAP_MG_PROPS_CORRECT_2 = 3075 # 3075 + SCHEMAP_SRC_CT_1 = 3076 # 3076 + SCHEMAP_DERIVATION_OK_RESTRICTION_2_1_3 = 3077 # 3077 + SCHEMAP_AU_PROPS_CORRECT_2 = 3078 # 3078 + SCHEMAP_A_PROPS_CORRECT_2 = 3079 # 3079 + MODULE_OPEN = 4900 # 4900 + MODULE_CLOSE = 4901 # 4901 + CHECK_FOUND_ELEMENT = 5000 + CHECK_FOUND_ATTRIBUTE = 5001 # 5001 + CHECK_FOUND_TEXT = 5002 # 5002 + CHECK_FOUND_CDATA = 5003 # 5003 + CHECK_FOUND_ENTITYREF = 5004 # 5004 + CHECK_FOUND_ENTITY = 5005 # 5005 + CHECK_FOUND_PI = 5006 # 5006 + CHECK_FOUND_COMMENT = 5007 # 5007 + CHECK_FOUND_DOCTYPE = 5008 # 5008 + CHECK_FOUND_FRAGMENT = 5009 # 5009 + CHECK_FOUND_NOTATION = 5010 # 5010 + CHECK_UNKNOWN_NODE = 5011 # 5011 + CHECK_ENTITY_TYPE = 5012 # 5012 + CHECK_NO_PARENT = 5013 # 5013 + CHECK_NO_DOC = 5014 # 5014 + CHECK_NO_NAME = 5015 # 5015 + CHECK_NO_ELEM = 5016 # 5016 + CHECK_WRONG_DOC = 5017 # 5017 + CHECK_NO_PREV = 5018 # 5018 + CHECK_WRONG_PREV = 5019 # 5019 + CHECK_NO_NEXT = 5020 # 5020 + CHECK_WRONG_NEXT = 5021 # 5021 + CHECK_NOT_DTD = 5022 # 5022 + CHECK_NOT_ATTR = 5023 # 5023 + CHECK_NOT_ATTR_DECL = 5024 # 5024 + CHECK_NOT_ELEM_DECL = 5025 # 5025 + CHECK_NOT_ENTITY_DECL = 5026 # 5026 + CHECK_NOT_NS_DECL = 5027 # 5027 + CHECK_NO_HREF = 5028 # 5028 + CHECK_WRONG_PARENT = 5029 # 5029 + CHECK_NS_SCOPE = 5030 # 5030 + CHECK_NS_ANCESTOR = 5031 # 5031 + CHECK_NOT_UTF8 = 5032 # 5032 + CHECK_NO_DICT = 5033 # 5033 + CHECK_NOT_NCNAME = 5034 # 5034 + CHECK_OUTSIDE_DICT = 5035 # 5035 + CHECK_WRONG_NAME = 5036 # 5036 + CHECK_NAME_NOT_NULL = 5037 # 5037 + CHECK_ = 5038 # 5033 + CHECK_X = 5039 # 503 + +cdef object __names +__names = LxmlErrorLevels._names +for name, value in vars(LxmlErrorLevels).iteritems(): + python.PyDict_SetItem(__names, value, name) + +__names = LxmlErrorDomains._names +for name, value in vars(LxmlErrorDomains).iteritems(): + python.PyDict_SetItem(__names, value, name) + +__names = LxmlErrorTypes._names +for name, value in vars(LxmlErrorTypes).iteritems(): + python.PyDict_SetItem(__names, value, name) Modified: lxml/trunk/src/lxml/xmlschema.pxi ============================================================================== --- lxml/trunk/src/lxml/xmlschema.pxi (original) +++ lxml/trunk/src/lxml/xmlschema.pxi Mon Mar 20 14:49:22 2006 @@ -17,6 +17,7 @@ """Turn a document into an XML Schema validator. """ cdef xmlschema.xmlSchema* _c_schema + cdef _ErrorLog _error_log def __init__(self, _ElementTree etree): cdef _Document doc @@ -30,6 +31,7 @@ xmlschema.xmlSchemaFreeParserCtxt(parser_ctxt) raise XMLSchemaParseError, "Document is not valid XML Schema" xmlschema.xmlSchemaFreeParserCtxt(parser_ctxt) + self._error_log = _ErrorLog() def __dealloc__(self): xmlschema.xmlSchemaFree(self._c_schema) @@ -42,6 +44,7 @@ cdef xmlschema.xmlSchemaValidCtxt* valid_ctxt cdef xmlDoc* c_doc cdef int ret + self._error_log.connect() valid_ctxt = xmlschema.xmlSchemaNewValidCtxt(self._c_schema) c_doc = _fakeRootDoc(etree._doc._c_doc, etree._context_node._c_node) @@ -49,10 +52,11 @@ _destroyFakeDoc(etree._doc._c_doc, c_doc) xmlschema.xmlSchemaFreeValidCtxt(valid_ctxt) + self._error_log.disconnect() if ret == -1: raise XMLSchemaValidateError, "Internal error in XML Schema validation." return ret == 0 property error_log: def __get__(self): - return __build_error_log_tuple(self) + return self._error_log.copy() From scoder at codespeak.net Mon Mar 20 14:56:08 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Mon Mar 20 14:56:09 2006 Subject: [Lxml-checkins] r24597 - lxml/trunk/doc Message-ID: <20060320135608.5DCF5100EB@code0.codespeak.net> Author: scoder Date: Mon Mar 20 14:56:07 2006 New Revision: 24597 Modified: lxml/trunk/doc/main.txt Log: reference lxml 0.9 release Modified: lxml/trunk/doc/main.txt ============================================================================== --- lxml/trunk/doc/main.txt (original) +++ lxml/trunk/doc/main.txt Mon Mar 20 14:56:07 2006 @@ -16,6 +16,8 @@ News ---- +* 2006-03-20: `lxml 0.9`_ released (`changes for 0.9`_) + * 2005-11-03: `lxml 0.8`_ released (`changes for 0.8`_) * 2005-06-15: `lxml 0.7`_ released (`changes for 0.7`_) @@ -26,6 +28,8 @@ * 2005-04-08: `lxml 0.5`_ released! +.. _`lxml 0.9`: lxml-0.9.tgz + .. _`lxml 0.8`: lxml-0.8.tgz .. _`lxml 0.7`: lxml-0.7.tgz @@ -36,6 +40,8 @@ .. _`lxml 0.5`: lxml-0.5.tgz +.. _`CHANGES for 0.9`: changes-0.9.html + .. _`CHANGES for 0.8`: changes-0.8.html .. _`CHANGES for 0.7`: changes-0.7.html From faassen at codespeak.net Mon Mar 20 15:06:10 2006 From: faassen at codespeak.net (faassen@codespeak.net) Date: Mon Mar 20 15:06:12 2006 Subject: [Lxml-checkins] r24598 - lxml/trunk/doc Message-ID: <20060320140610.A4210100EB@code0.codespeak.net> Author: faassen Date: Mon Mar 20 15:06:05 2006 New Revision: 24598 Modified: lxml/trunk/doc/main.txt Log: Add in another 0.9 link. Modified: lxml/trunk/doc/main.txt ============================================================================== --- lxml/trunk/doc/main.txt (original) +++ lxml/trunk/doc/main.txt Mon Mar 20 15:06:05 2006 @@ -96,6 +96,8 @@ Download -------- +* `lxml 0.9`_ (2006-03-20) + * `lxml 0.8`_ (2005-11-03) * `lxml 0.7`_ (2005-06-15) From faassen at codespeak.net Mon Mar 20 15:36:28 2006 From: faassen at codespeak.net (faassen@codespeak.net) Date: Mon Mar 20 15:36:29 2006 Subject: [Lxml-checkins] r24603 - lxml/trunk Message-ID: <20060320143628.D098B10080@code0.codespeak.net> Author: faassen Date: Mon Mar 20 15:36:27 2006 New Revision: 24603 Modified: lxml/trunk/setup.py Log: A bit more metadata for setup. Modified: lxml/trunk/setup.py ============================================================================== --- lxml/trunk/setup.py (original) +++ lxml/trunk/setup.py Mon Mar 20 15:36:27 2006 @@ -16,8 +16,19 @@ setup( name = "lxml", version = open('version.txt').read().strip(), - maintainer = 'Infrae', - maintainer_email="faassen@infrae.com", + author="lxml dev team", + author_email="lxml-dev@codespeak.net", + maintainer="lxml dev team", + maintainer_email="lxml-dev@codespeak.net", + url="http://codespeak.net/lxml", + description="Powerful and Pythonic XML processing library based on libxml2/libxslt with an ElementTree API", + long_description="""\ +lxml is a Pythonic binding for the libxml2 and libxslt libraries. It provides +safe and convenient access to these libraries using the ElementTree API. +It extends the ElementTree API significantly to offer support for +XPath, Relax NG, XML Schema, XSLT, c14n and much more. +""", + package_dir = {'': 'src'}, packages = ['lxml', 'lxml.tests'], ext_modules = [ From faassen at codespeak.net Mon Mar 20 15:37:56 2006 From: faassen at codespeak.net (faassen@codespeak.net) Date: Mon Mar 20 15:37:56 2006 Subject: [Lxml-checkins] r24604 - lxml/trunk/src/lxml/tests Message-ID: <20060320143756.4C12710080@code0.codespeak.net> Author: faassen Date: Mon Mar 20 15:37:55 2006 New Revision: 24604 Modified: lxml/trunk/src/lxml/tests/test_nsclasses.py Log: Compatibility for Python 2.3, which doesn't have an assertFalse. Modified: lxml/trunk/src/lxml/tests/test_nsclasses.py ============================================================================== --- lxml/trunk/src/lxml/tests/test_nsclasses.py (original) +++ lxml/trunk/src/lxml/tests/test_nsclasses.py Mon Mar 20 15:37:55 2006 @@ -10,6 +10,8 @@ from common_imports import etree, HelperTestCase class ETreeNamespaceClassesTestCase(HelperTestCase): + assertFalse = HelperTestCase.failIf + class default_class(etree.ElementBase): pass class maeh_class(etree.ElementBase): From scoder at codespeak.net Mon Mar 20 17:32:21 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Mon Mar 20 17:32:23 2006 Subject: [Lxml-checkins] r24619 - in lxml/trunk: doc src/lxml Message-ID: <20060320163221.E0472100DC@code0.codespeak.net> Author: scoder Date: Mon Mar 20 17:32:19 2006 New Revision: 24619 Modified: lxml/trunk/doc/api.txt lxml/trunk/src/lxml/xmlerror.pxi Log: docs and tests for error handling and resulting bug fixes for xmlerror.pxi Modified: lxml/trunk/doc/api.txt ============================================================================== --- lxml/trunk/doc/api.txt (original) +++ lxml/trunk/doc/api.txt Mon Mar 20 17:32:19 2006 @@ -39,6 +39,35 @@ +Error handling on exceptions +---------------------------- + +Libxml2 provides error messages for failures, be it during parsing, schema +validation or XSL transformation. Whenever an exception is raised, you can +retrieve the errors that occured and "might have" lead to the problem:: + + >>> lxml.etree.clearErrorLog() + >>> broken_xml = '' + >>> try: + ... lxml.etree.parse(StringIO(broken_xml)) + ... except lxml.etree.XMLSyntaxError, e: + ... pass # just put the exception into e + >>> log = e.error_log.filter_levels(lxml.etree.ErrorLevels.FATAL) + >>> print log + :1:FATAL:PARSER:ERR_TAG_NOT_FINISHED: Premature end of data in tag a line 1 + +This might look a little cryptic at first, but it is the information that +libxml2 gives you. At least the message at the end should give you a hint +what went wrong and you can see that the fatal error (FATAL) happened during +parsing (PARSER) line 1 of a string (). Here, PARSER is the so-called +error domain, see lxml.etree.ErrorDomains for that. You can get it from a log +entry like this:: + + >>> entry = log[0] + >>> print entry.domain_name, entry.type_name, entry.filename + PARSER ERR_TAG_NOT_FINISHED + + xpath method on ElementTree, Element ------------------------------------ @@ -97,6 +126,7 @@ >>> r[0].text 'Text' + XSLT ---- @@ -170,6 +200,7 @@ >>> str(result) '\nA\n' + RelaxNG ------- @@ -202,14 +233,29 @@ >>> relaxng.validate(doc2) 0 -Similar to XSLT, there's also a less efficient but easier shortcut -method to do RelaxNG validation:: +Starting with version 0.9, lxml now has a simple API to report the errors +generated by libxml2. If you want to find out why the validation failed in the +second case, you can look up the error log of the validation process and check +it for relevant messages:: + + >>> log = relaxng.error_log + >>> print log.filter_from_errors() + :1:ERROR:RELAXNGV:ERR_LT_IN_ATTRIBUTE: Did not expect element c there + +You can see that the error (ERROR) happened during RelaxNG validation +(RELAXNGV). The message then tells you what went wrong. Note that this error +is local to the RelaxNG object. It will only contain log entries that +appeares during the validation. + +Similar to XSLT, there's also a less efficient but easier shortcut method to +do RelaxNG validation:: >>> doc.relaxng(relaxng_doc) 1 >>> doc2.relaxng(relaxng_doc) 0 + XMLSchema --------- @@ -245,14 +291,21 @@ >>> xmlschema.validate(doc2) 0 -Similar to XSLT and RelaxNG, there's also a less efficient but easier -shortcut method to do XML Schema validation:: +Error reporting works like for the RelaxNG class:: + + >>> log = xmlschema.error_log + >>> print log.filter_from_errors() + :1:ERROR:SCHEMASV:SCHEMAV_ELEMENT_CONTENT: Element 'c': This element is not expected. Expected is ( b ). + +Similar to XSLT and RelaxNG, there's also a less efficient but easier shortcut +method to do XML Schema validation:: >>> doc.xmlschema(xmlschema_doc) 1 >>> doc2.xmlschema(xmlschema_doc) 0 + xinclude -------- @@ -270,6 +323,7 @@ >>> lxml.etree.tostring(tree.getroot()) '\n\n\n' + write_c14n on ElementTree ------------------------- Modified: lxml/trunk/src/lxml/xmlerror.pxi ============================================================================== --- lxml/trunk/src/lxml/xmlerror.pxi (original) +++ lxml/trunk/src/lxml/xmlerror.pxi Mon Mar 20 17:32:19 2006 @@ -28,33 +28,34 @@ self.type = error.code self.level = error.level self.line = error.line - self.message = python.PyString_FromString(error.message) + self.message = python.PyString_FromStringAndSize( + error.message, tree.strlen(error.message) - 1) # strip EOL if error.file is NULL: - self.filename = None + self.filename = '' else: self.filename = python.PyString_FromString(error.file) def __repr__(self): - if self._filename: - return "%s/%d[%s]%s/%s: %s" % ( + if self.filename: + return "%s:%d:%s:%s:%s: %s" % ( self.filename, self.line, self.level_name, self.domain_name, self.type_name, self.message) else: - return "[%s]%s/%s: %s" % ( + return "[]:%s:%s:%s: %s" % ( self.level_name, self.domain_name, self.type_name, self.message) property domain_name: def __get__(self): - return LxmlErrorDomains._names[self.domain] + return ErrorDomains._names[self.domain] property type_name: def __get__(self): - return LxmlErrorTypes._names[self.type] + return ErrorTypes._names[self.type] property level_name: def __get__(self): - return LxmlErrorLevels._names[self.level] + return ErrorLevels._names[self.level] cdef class _BaseErrorLog: "Immutable base version of an error log." @@ -71,6 +72,12 @@ def __repr__(self): return '\n'.join(map(repr, self._entries)) + def __getitem__(self, index): + return self._entries[index] + + def __len__(self): + return len(self._entries) + def filter_domains(self, domains): cdef _LogEntry entry filtered = [] @@ -85,27 +92,41 @@ cdef _LogEntry entry if not python.PySequence_Check(types): types = (types,) + filtered = [] for entry in self._entries: if entry.type in types: python.PyList_Append(filtered, entry) return _BaseErrorLog(filtered) def filter_levels(self, levels): + """Return a log with all messages of the requested level(s). Takes a + single log level or a sequence.""" cdef _LogEntry entry if not python.PySequence_Check(levels): levels = (levels,) + filtered = [] for entry in self._entries: if entry.level in levels: python.PyList_Append(filtered, entry) return _BaseErrorLog(filtered) def filter_from_level(self, level): + "Return a log with all messages of the requested level of worse." cdef _LogEntry entry + filtered = [] for entry in self._entries: if entry.level >= level: python.PyList_Append(filtered, entry) return _BaseErrorLog(filtered) + def filter_from_errors(self): + "Convenience method to get all error messages." + return self.filter_from_level(ErrorLevels.ERROR) + + def filter_from_warnings(self): + "Convenience method to get all warnings or worse." + return self.filter_from_level(ErrorLevels.WARNING) + cdef class _ErrorLog(_BaseErrorLog): def __init__(self): _BaseErrorLog.__init__(self, []) @@ -164,9 +185,9 @@ _ErrorLog.__init__(self) import logging self._level_map = { - LxmlErrorLevels.WARNING : logging.WARNING, - LxmlErrorLevels.ERROR : logging.ERROR, - LxmlErrorLevels.FATAL : logging.CRITICAL + ErrorLevels.WARNING : logging.WARNING, + ErrorLevels.ERROR : logging.ERROR, + ErrorLevels.FATAL : logging.CRITICAL } self._varsOf = vars if logger_name: @@ -226,14 +247,14 @@ ## CONSTANTS FROM "xmlerror.pxd" ################################################################################ -class LxmlErrorLevels: +class ErrorLevels: _names = {} NONE = 0 WARNING = 1 # A simple warning ERROR = 2 # A recoverable error FATAL = 3 # A fatal error -class LxmlErrorDomains: +class ErrorDomains: _names = {} NONE = 0 PARSER = 1 # The XML parser @@ -263,7 +284,7 @@ WRITER = 25 # The xmlwriter module MODULE = 26 # The dynamically loaded module modu -class LxmlErrorTypes: +class ErrorTypes: _names = {} ERR_OK = 0 ERR_INTERNAL_ERROR = 1 @@ -963,14 +984,14 @@ CHECK_X = 5039 # 503 cdef object __names -__names = LxmlErrorLevels._names -for name, value in vars(LxmlErrorLevels).iteritems(): +__names = ErrorLevels._names +for name, value in vars(ErrorLevels).iteritems(): python.PyDict_SetItem(__names, value, name) -__names = LxmlErrorDomains._names -for name, value in vars(LxmlErrorDomains).iteritems(): +__names = ErrorDomains._names +for name, value in vars(ErrorDomains).iteritems(): python.PyDict_SetItem(__names, value, name) -__names = LxmlErrorTypes._names -for name, value in vars(LxmlErrorTypes).iteritems(): +__names = ErrorTypes._names +for name, value in vars(ErrorTypes).iteritems(): python.PyDict_SetItem(__names, value, name) From scoder at codespeak.net Mon Mar 20 17:38:47 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Mon Mar 20 17:38:48 2006 Subject: [Lxml-checkins] r24620 - in lxml/trunk: doc src/lxml Message-ID: <20060320163847.499D6100DC@code0.codespeak.net> Author: scoder Date: Mon Mar 20 17:38:46 2006 New Revision: 24620 Modified: lxml/trunk/doc/api.txt lxml/trunk/src/lxml/xmlerror.pxi Log: doc updates Modified: lxml/trunk/doc/api.txt ============================================================================== --- lxml/trunk/doc/api.txt (original) +++ lxml/trunk/doc/api.txt Mon Mar 20 17:38:46 2006 @@ -59,9 +59,9 @@ This might look a little cryptic at first, but it is the information that libxml2 gives you. At least the message at the end should give you a hint what went wrong and you can see that the fatal error (FATAL) happened during -parsing (PARSER) line 1 of a string (). Here, PARSER is the so-called -error domain, see lxml.etree.ErrorDomains for that. You can get it from a log -entry like this:: +parsing (PARSER) line 1 of a string (, or filename if available). +Here, PARSER is the so-called error domain, see lxml.etree.ErrorDomains for +that. You can get it from a log entry like this:: >>> entry = log[0] >>> print entry.domain_name, entry.type_name, entry.filename Modified: lxml/trunk/src/lxml/xmlerror.pxi ============================================================================== --- lxml/trunk/src/lxml/xmlerror.pxi (original) +++ lxml/trunk/src/lxml/xmlerror.pxi Mon Mar 20 17:38:46 2006 @@ -248,6 +248,7 @@ ################################################################################ class ErrorLevels: + "Libxml2 error levels" _names = {} NONE = 0 WARNING = 1 # A simple warning @@ -255,6 +256,7 @@ FATAL = 3 # A fatal error class ErrorDomains: + "Libxml2 error domains" _names = {} NONE = 0 PARSER = 1 # The XML parser @@ -285,6 +287,7 @@ MODULE = 26 # The dynamically loaded module modu class ErrorTypes: + "Libxml2 error types" _names = {} ERR_OK = 0 ERR_INTERNAL_ERROR = 1 From scoder at codespeak.net Mon Mar 20 17:44:02 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Mon Mar 20 17:44:09 2006 Subject: [Lxml-checkins] r24621 - lxml/trunk/src/lxml Message-ID: <20060320164402.A378C100ED@code0.codespeak.net> Author: scoder Date: Mon Mar 20 17:44:01 2006 New Revision: 24621 Modified: lxml/trunk/src/lxml/xmlerror.pxi Log: doc updates, added filter_from_fatals Modified: lxml/trunk/src/lxml/xmlerror.pxi ============================================================================== --- lxml/trunk/src/lxml/xmlerror.pxi (original) +++ lxml/trunk/src/lxml/xmlerror.pxi Mon Mar 20 17:44:01 2006 @@ -119,8 +119,12 @@ python.PyList_Append(filtered, entry) return _BaseErrorLog(filtered) + def filter_from_fatals(self): + "Convenience method to get all fatal error messages." + return self.filter_from_level(ErrorLevels.FATAL) + def filter_from_errors(self): - "Convenience method to get all error messages." + "Convenience method to get all error messages or worse." return self.filter_from_level(ErrorLevels.ERROR) def filter_from_warnings(self): From scoder at codespeak.net Mon Mar 20 17:45:54 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Mon Mar 20 17:45:56 2006 Subject: [Lxml-checkins] r24622 - lxml/trunk/doc Message-ID: <20060320164554.A23CD100ED@code0.codespeak.net> Author: scoder Date: Mon Mar 20 17:45:53 2006 New Revision: 24622 Modified: lxml/trunk/doc/api.txt Log: doc updates Modified: lxml/trunk/doc/api.txt ============================================================================== --- lxml/trunk/doc/api.txt (original) +++ lxml/trunk/doc/api.txt Mon Mar 20 17:45:53 2006 @@ -42,8 +42,8 @@ Error handling on exceptions ---------------------------- -Libxml2 provides error messages for failures, be it during parsing, schema -validation or XSL transformation. Whenever an exception is raised, you can +Libxml2 provides error messages for failures, be it during parsing, XPath +evaluation or schema validation. Whenever an exception is raised, you can retrieve the errors that occured and "might have" lead to the problem:: >>> lxml.etree.clearErrorLog() @@ -67,6 +67,8 @@ >>> print entry.domain_name, entry.type_name, entry.filename PARSER ERR_TAG_NOT_FINISHED +XSLT error messages are not currently available through the lxml API. + xpath method on ElementTree, Element ------------------------------------ From scoder at codespeak.net Mon Mar 20 18:50:24 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Mon Mar 20 18:50:25 2006 Subject: [Lxml-checkins] r24625 - lxml/trunk Message-ID: <20060320175024.0997A100D3@code0.codespeak.net> Author: scoder Date: Mon Mar 20 18:50:23 2006 New Revision: 24625 Modified: lxml/trunk/CHANGES.txt Log: hint on error logging API Modified: lxml/trunk/CHANGES.txt ============================================================================== --- lxml/trunk/CHANGES.txt (original) +++ lxml/trunk/CHANGES.txt Mon Mar 20 18:50:23 2006 @@ -7,6 +7,8 @@ Features added -------------- +* Error logging API for libxml2 error messages + * Various performance improvements * Benchmark script for lxml, ElementTree and cElementTree From faassen at codespeak.net Mon Mar 20 18:50:41 2006 From: faassen at codespeak.net (faassen@codespeak.net) Date: Mon Mar 20 18:50:42 2006 Subject: [Lxml-checkins] r24626 - in lxml/trunk: doc src/lxml/tests Message-ID: <20060320175041.0B2FB100D3@code0.codespeak.net> Author: faassen Date: Mon Mar 20 18:50:39 2006 New Revision: 24626 Modified: lxml/trunk/doc/extensions.txt lxml/trunk/doc/main.txt lxml/trunk/doc/namespace_extensions.txt lxml/trunk/src/lxml/tests/test_nsclasses.py Log: Fixes to doctests. In particular, modified test_nsclasses.py so we can use ellipsis feature of doctests (...) to suppress most of tracebacks, which otherwise might differ between Python versions and such. Modified: lxml/trunk/doc/extensions.txt ============================================================================== --- lxml/trunk/doc/extensions.txt (original) +++ lxml/trunk/doc/extensions.txt Mon Mar 20 18:50:39 2006 @@ -10,7 +10,7 @@ receives a dummy object. It is currently None, but do not rely on this as it may become meaningful in later versions of lxml. The other arguments are provided by the respective call in the XPath expression. Any number of -arguments is allowed. +arguments is allowed:: >>> def hello(dummy, a): ... return "Hello %s" % a @@ -89,7 +89,7 @@ Extension functions can return any data type for which there is an XPath equivalent. This includes numbers, boolean values, elements and lists of -elements. +elements:: >>> def returnsFloat(_): ... return 1.7 Modified: lxml/trunk/doc/main.txt ============================================================================== --- lxml/trunk/doc/main.txt (original) +++ lxml/trunk/doc/main.txt Mon Mar 20 18:50:39 2006 @@ -72,7 +72,7 @@ .. _`extends this API`: api.html -.. _`extension functions`_: extensions.html +.. _`extension functions`: extensions.html .. _XPath: http://www.w3.org/TR/xpath Modified: lxml/trunk/doc/namespace_extensions.txt ============================================================================== --- lxml/trunk/doc/namespace_extensions.txt (original) +++ lxml/trunk/doc/namespace_extensions.txt Mon Mar 20 18:50:39 2006 @@ -2,22 +2,24 @@ Implementing namespaces with the Namespace class ================================================ -(Also see extensions.txt) +Also see `extensions`_. + +.. _`extensions`: extensions.html Imagine, you have a namespace called 'http://hui.de/honk' and have to treat all of its elements in a specific way, say, to find out if they are really honking. You could provide a function called 'is_honking' that handles that:: ->>> def is_honking(honk_element): -... return honk_element.get('honking') == 'true' + >>> def is_honking(honk_element): + ... return honk_element.get('honking') == 'true' Then you can use it:: ->>> from lxml.etree import XML ->>> honk_element = XML('') ->>> print is_honking(honk_element) -True + >>> from lxml.etree import XML + >>> honk_element = XML('') + >>> print is_honking(honk_element) + True Not too bad, right? Now, imagine, you only want to do that to certain elements from that namespace and prevent others from being passed to @@ -33,34 +35,34 @@ =================== lxml allows you to implement namespaces, in a rather literal -sense. You can do the above like this: +sense. You can do the above like this:: ->>> from lxml.etree import Namespace, ElementBase ->>> class HonkElement(ElementBase): -... def honking(self): -... return self.get('honking') == 'true' -... honking = property(honking) + >>> from lxml.etree import Namespace, ElementBase + >>> class HonkElement(ElementBase): + ... def honking(self): + ... return self.get('honking') == 'true' + ... honking = property(honking) Now you can build the new namespace by calling the Namespace class:: ->>> namespace = Namespace('http://hui.de/honk') + >>> namespace = Namespace('http://hui.de/honk') and then register the new element type with that namespace:: ->>> namespace['honk'] = HonkElement + >>> namespace['honk'] = HonkElement After this, you create and use your XML elements:: ->>> honk_element = XML('') ->>> print honk_element.honking -True + >>> honk_element = XML('') + >>> print honk_element.honking + True The same works when creating elements by hand:: ->>> from lxml.etree import Element ->>> honk_element = Element('{http://hui.de/honk}honk', honking='true') ->>> print honk_element.honking -True + >>> from lxml.etree import Element + >>> honk_element = Element('{http://hui.de/honk}honk', honking='true') + >>> print honk_element.honking + True Essentially, what this allows you to do, is giving elements a specific API based on their namespace and element name. @@ -73,17 +75,17 @@ constructor, neither must there be any internal state (except for their XML representation). Element instances are created and garbage collected at need, so there is no way to predict when and how often a -constructor would be called. Even worse, when the __init__ method is -called, the object may not even be initialized yet to represent the -XML tag, so there is not much use in providing an __init__ method in -subclasses. +constructor would be called. Even worse, when the ``__init__`` method +is called, the object may not even be initialized yet to represent the +XML tag, so there is not much use in providing an ``__init__`` method +in subclasses. However, there is one possible way to do things on element -initialization. Element classes have an _init() method that can be +initialization. Element classes have an ``_init()`` method that can be overridden. It can be used to modify the XML tree, e.g. to construct special children or verify and update attributes. -The semantics of _init() are as follows:: +The semantics of ``_init()`` are as follows: * It is called at least once on element instantiation time. That is, when a Python representation of the element is created. At that @@ -111,16 +113,13 @@ Example:: ->>> honk_element = XML('') ->>> print honk_element.honking -True ->>> print honk_element[0].honking -Traceback (most recent call last): - File "/usr/lib/python2.4/doctest.py", line 1243, in __run - compileflags, 1) in test.globs - File "", line 1, in ? - print honk_element[0].honking -AttributeError: 'etree._Element' object has no attribute 'honking' + >>> honk_element = XML('') + >>> print honk_element.honking + True + >>> print honk_element[0].honking + Traceback (most recent call last): + ... + AttributeError: 'etree._Element' object has no attribute 'honking' You can therefore provide one implementation per element name in each namespace and have lxml select the right one on the fly. If you want @@ -134,30 +133,27 @@ class for a namespace, that is used if no specific element class is provided. Again, you only have to pass None as an element name:: ->>> class HonkNSElement(ElementBase): -... def honk(self): -... return "HONK" ->>> namespace[None] = HonkNSElement - ->>> class HonkElement(HonkNSElement): -... def honking(self): -... return self.get('honking') == 'true' -... honking = property(honking) ->>> namespace['honk'] = HonkElement + >>> class HonkNSElement(ElementBase): + ... def honk(self): + ... return "HONK" + >>> namespace[None] = HonkNSElement + + >>> class HonkElement(HonkNSElement): + ... def honking(self): + ... return self.get('honking') == 'true' + ... honking = property(honking) + >>> namespace['honk'] = HonkElement Now you can use your new namespace:: ->>> honk_element = XML('') ->>> print honk_element.honking -True ->>> print honk_element.honk() -HONK ->>> print honk_element[0].honk() -HONK ->>> print honk_element[0].honking -Traceback (most recent call last): - File "/usr/lib/python2.4/doctest.py", line 1243, in __run - compileflags, 1) in test.globs - File "", line 1, in ? - print honk_element[0].honking -AttributeError: 'HonkNSElement' object has no attribute 'honking' + >>> honk_element = XML('') + >>> print honk_element.honking + True + >>> print honk_element.honk() + HONK + >>> print honk_element[0].honk() + HONK + >>> print honk_element[0].honking + Traceback (most recent call last): + ... + AttributeError: 'HonkNSElement' object has no attribute 'honking' Modified: lxml/trunk/src/lxml/tests/test_nsclasses.py ============================================================================== --- lxml/trunk/src/lxml/tests/test_nsclasses.py (original) +++ lxml/trunk/src/lxml/tests/test_nsclasses.py Mon Mar 20 18:50:39 2006 @@ -147,8 +147,11 @@ def test_suite(): suite = unittest.TestSuite() suite.addTests([unittest.makeSuite(ETreeNamespaceClassesTestCase)]) + optionflags = doctest.NORMALIZE_WHITESPACE|doctest.ELLIPSIS suite.addTests( - [doctest.DocFileSuite('../../../doc/namespace_extensions.txt')]) + [doctest.DocFileSuite('../../../doc/namespace_extensions.txt', + optionflags=optionflags)], + ) return suite if __name__ == '__main__': From faassen at codespeak.net Mon Mar 20 18:59:15 2006 From: faassen at codespeak.net (faassen@codespeak.net) Date: Mon Mar 20 18:59:17 2006 Subject: [Lxml-checkins] r24627 - lxml/trunk/doc Message-ID: <20060320175915.39AC0100EF@code0.codespeak.net> Author: faassen Date: Mon Mar 20 18:59:14 2006 New Revision: 24627 Removed: lxml/trunk/doc/xpath.txt Log: Deprecated API. Deleted: /lxml/trunk/doc/xpath.txt ============================================================================== --- /lxml/trunk/doc/xpath.txt Mon Mar 20 18:59:14 2006 +++ (empty file) @@ -1,149 +0,0 @@ -XPath extension functions -========================= - -Note: this document describes an API that is now DEPRECATED. If you -want to configure XPath extension functions, please read -namespace_extensions.txt and extensions.txt. - -Extension functions are defined in Python. In order to use such a function, it -must have a name by which it can be called in XPath, and an optional namespace -URI. - -As the first argument a function will always receive the -XPathEvaluator object that is currently in the process of evaluating -the XPath expression. - -First, let's create a simple XPath function:: - - >>> def foo(evaluator, a): - ... return "Hello %s" % a - -Now we need to register it as part of an extension. An extension is a -simple dictionary with tuple keys and function values. The tuple keys -are composed of a namespace URI (or `None`), and the name of the -function in XPath. We'll use the namespace URI `None` for now, to -indicate the function isn't in any particular namespace:: - - >>> extension = { (None, 'foo') : foo } - -Now we're going to create an XPath evaluator. To do that, we first need a -document that the evaluator is evaluating against:: - - >>> from lxml import etree - >>> from StringIO import StringIO - >>> f = StringIO('') - >>> doc = etree.parse(f) - -The XPathEvaluator takes the document, an optional dictionary of -namespace prefix to namespace URI mappings, and an optional list of -extensions. We'll just pass in extensions for now:: - - >>> e = etree.XPathEvaluator(doc, extensions=[extension]) - -Now we can use the evaluator to make XPath queries against the document:: - - >>> r = e.evaluate('/a') - >>> r[0].tag - 'a' - -This is not using the extension function. We'll try a very simple -XPath query that does now. It doesn't really use the document at all:: - - >>> e.evaluate("foo('world')") - 'Hello world' - -Let's create a slightly more complicated extension now, one that uses -a namespaced function. We'll just reuse the function foo, but register -it under a different name, and a namespace:: - - >>> extension2 = { ('http://codespeak.net/ns/test', 'different-name') : foo } - -Now let's set up an evaluator to use it. We'll also register our -original extension. As we want to use a namespace function, we first -need to register a namespace prefix we can use in the XPath -expression, so that we can access the namespace. This just like when -you'd want to access a namespaced XML element or attribute:: - - >>> e = etree.XPathEvaluator(doc, - ... namespaces={'test': 'http://codespeak.net/ns/test'}, - ... extensions=[extension, extension2]) - -Since we registered the original extension too for this evaluator, our -`foo` extension function still works:: - - >>> e.evaluate("foo('world')") - 'Hello world' - -But now, we also have access to our namespaced `different-name` -extension function:: - - >>> e.evaluate("test:different-name('there')") - 'Hello there' - -Besides strings is possible to return a number of different objects -from extension functions, such as numbers (floats) and booleans:: - - >>> def returnsFloat(evaluator): - ... return 1.7 - >>> def returnsBool(evaluator): - ... return True - >>> extension3 = { (None, 'returnsFloat') : returnsFloat, - ... (None, 'returnsBool') : returnsBool } - >>> e = etree.XPathEvaluator(doc, None, extensions=[extension3]) - >>> e.evaluate("returnsFloat()") - 1.7 - >>> e.evaluate("returnsBool()") - True - -It's also possible to register namespaces with a evaluator later on:: - - >>> f = StringIO('') - >>> ns_doc = etree.parse(f) - >>> e = etree.XPathEvaluator(ns_doc) - >>> e.registerNamespace('foo', 'http://codespeak.net/ns/test') - >>> e.evaluate('/foo:a')[0].tag - '{http://codespeak.net/ns/test}a' - -Note: the following is rather shaky and like won't work yet in the real world. - -It is also possible to return lists of nodes, and this way it is possible -to return XML structures:: - - >>> def returnsNodeSet(evaluator): - ... results = etree.Element('results') - ... result = etree.SubElement(results, 'result') - ... result.text = "Alpha" - ... result2 = etree.SubElement(results, 'result') - ... result2.text = "Beta" - ... result3 = etree.SubElement(results, 'result') - ... result3.text = "Gamma" - ... return [results] - >>> extension4 = { (None, 'returnsNodeSet') : returnsNodeSet } - >>> e = etree.XPathEvaluator(doc, None, extensions=[extension4]) - >>> r = e.evaluate("returnsNodeSet()") - >>> len(r) - 1 - >>> t = r[0] - >>> t.tag - 'results' - >>> len(t) - 3 - >>> t[0].tag - 'result' - >>> t[0].text - 'Alpha' - >>> t[1].text - 'Beta' - -It's even possible to filter that result set with another XPath -expression:: - - >>> r = e.evaluate("returnsNodeSet()/result") - >>> len(r) - 3 - >>> r[0].tag - 'result' - >>> r[1].tag - 'result' - >>> r[0].text - 'Alpha' From scoder at codespeak.net Mon Mar 20 19:03:14 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Mon Mar 20 19:03:15 2006 Subject: [Lxml-checkins] r24628 - lxml/trunk Message-ID: <20060320180314.201BC100EE@code0.codespeak.net> Author: scoder Date: Mon Mar 20 19:03:13 2006 New Revision: 24628 Modified: lxml/trunk/INSTALL.txt Log: state that Pyrex is only needed for generating the C source, not for compilation Modified: lxml/trunk/INSTALL.txt ============================================================================== --- lxml/trunk/INSTALL.txt (original) +++ lxml/trunk/INSTALL.txt Mon Mar 20 19:03:13 2006 @@ -12,7 +12,11 @@ * libxslt 1.1.12 (newer versions should work). It can be found here: http://xmlsoft.org/XSLT/downloads.html -You also need Pyrex (0.9.3) to compile the software. The official +The lxml wrapper around libxml2 and libxslt is written in Pyrex_. If you are +using a release version of lxml, it should come with the generated C file in +the source distribution. There is no need to regenerate it using Pyrex. +However, if you want to use more recent SVN versions of lxml or want to work +on the code, you will need Pyrex to regenerate the C-code. The official homepage can be found here: * http://www.cosc.canterbury.ac.nz/~greg/python/Pyrex/ From scoder at codespeak.net Mon Mar 20 19:04:53 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Mon Mar 20 19:04:54 2006 Subject: [Lxml-checkins] r24629 - lxml/trunk Message-ID: <20060320180453.66FA5100EE@code0.codespeak.net> Author: scoder Date: Mon Mar 20 19:04:52 2006 New Revision: 24629 Modified: lxml/trunk/INSTALL.txt Log: missing link Modified: lxml/trunk/INSTALL.txt ============================================================================== --- lxml/trunk/INSTALL.txt (original) +++ lxml/trunk/INSTALL.txt Mon Mar 20 19:04:52 2006 @@ -21,6 +21,8 @@ * http://www.cosc.canterbury.ac.nz/~greg/python/Pyrex/ +.. Pyrex_: http://www.cosc.canterbury.ac.nz/~greg/python/Pyrex/ + However, see below for an updated version if you have any trouble using it, especially with GCC 4.x. From faassen at codespeak.net Mon Mar 20 19:07:53 2006 From: faassen at codespeak.net (faassen@codespeak.net) Date: Mon Mar 20 19:07:54 2006 Subject: [Lxml-checkins] r24630 - lxml/trunk/doc Message-ID: <20060320180753.27BAF100EE@code0.codespeak.net> Author: faassen Date: Mon Mar 20 19:07:52 2006 New Revision: 24630 Modified: lxml/trunk/doc/main.txt Log: Reference to SAX API. Modified: lxml/trunk/doc/main.txt ============================================================================== --- lxml/trunk/doc/main.txt (original) +++ lxml/trunk/doc/main.txt Mon Mar 20 19:07:52 2006 @@ -66,6 +66,9 @@ `implementing namespaces`_ using tag specific element classes. This is a simple way to write arbitrary XML driven APIs on top of lxml. +lxml also offers a `SAX compliant API`_, that works with the SAX support +in the standar dlibrary. + .. _`ElementTree API`: http://effbot.org/zone/element-index.htm .. _`ElementTree compatibility overview`: compatibility.html @@ -86,6 +89,8 @@ .. _`implementing namespaces`: namespace_extensions.html +.. _`SAX compliant API`: sax.html + Mailing list ------------ From faassen at codespeak.net Mon Mar 20 19:08:00 2006 From: faassen at codespeak.net (faassen@codespeak.net) Date: Mon Mar 20 19:08:01 2006 Subject: [Lxml-checkins] r24631 - lxml/trunk Message-ID: <20060320180800.3D561100EE@code0.codespeak.net> Author: faassen Date: Mon Mar 20 19:07:59 2006 New Revision: 24631 Modified: lxml/trunk/INSTALL.txt Log: Minor tweaks. Modified: lxml/trunk/INSTALL.txt ============================================================================== --- lxml/trunk/INSTALL.txt (original) +++ lxml/trunk/INSTALL.txt Mon Mar 20 19:07:59 2006 @@ -13,7 +13,7 @@ http://xmlsoft.org/XSLT/downloads.html The lxml wrapper around libxml2 and libxslt is written in Pyrex_. If you are -using a release version of lxml, it should come with the generated C file in +using a released version of lxml, it should come with the generated C file in the source distribution. There is no need to regenerate it using Pyrex. However, if you want to use more recent SVN versions of lxml or want to work on the code, you will need Pyrex to regenerate the C-code. The official @@ -21,7 +21,7 @@ * http://www.cosc.canterbury.ac.nz/~greg/python/Pyrex/ -.. Pyrex_: http://www.cosc.canterbury.ac.nz/~greg/python/Pyrex/ +.. _Pyrex: http://www.cosc.canterbury.ac.nz/~greg/python/Pyrex/ However, see below for an updated version if you have any trouble using it, especially with GCC 4.x. From faassen at codespeak.net Mon Mar 20 19:12:20 2006 From: faassen at codespeak.net (faassen@codespeak.net) Date: Mon Mar 20 19:12:22 2006 Subject: [Lxml-checkins] r24632 - lxml/trunk Message-ID: <20060320181220.E5673100EF@code0.codespeak.net> Author: faassen Date: Mon Mar 20 19:12:20 2006 New Revision: 24632 Modified: lxml/trunk/INSTALL.txt Log: Further text tweaks. Modified: lxml/trunk/INSTALL.txt ============================================================================== --- lxml/trunk/INSTALL.txt (original) +++ lxml/trunk/INSTALL.txt Mon Mar 20 19:12:20 2006 @@ -12,21 +12,17 @@ * libxslt 1.1.12 (newer versions should work). It can be found here: http://xmlsoft.org/XSLT/downloads.html -The lxml wrapper around libxml2 and libxslt is written in Pyrex_. If you are -using a released version of lxml, it should come with the generated C file in -the source distribution. There is no need to regenerate it using Pyrex. -However, if you want to use more recent SVN versions of lxml or want to work -on the code, you will need Pyrex to regenerate the C-code. The official -homepage can be found here: +You also need Python 2.3 or later. -* http://www.cosc.canterbury.ac.nz/~greg/python/Pyrex/ +If you are not using a released version of lxml and want to build lxml +from SVN, you also need Pyrex_. If you are using a released version of +lxml, it should come with the generated C file in the source +distribution, so no Pyrex is needed then. .. _Pyrex: http://www.cosc.canterbury.ac.nz/~greg/python/Pyrex/ -However, see below for an updated version if you have any trouble -using it, especially with GCC 4.x. - -You also need Python 2.3 or later. +See also the notes on building gcc 4.0 below if you are having trouble +with Pyrex. Installation ------------ From scoder at codespeak.net Mon Mar 20 19:13:01 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Mon Mar 20 19:13:02 2006 Subject: [Lxml-checkins] r24633 - lxml/trunk Message-ID: <20060320181301.9F19C100EF@code0.codespeak.net> Author: scoder Date: Mon Mar 20 19:13:00 2006 New Revision: 24633 Modified: lxml/trunk/MANIFEST.in Log: exclude pyrex.txt from dist Modified: lxml/trunk/MANIFEST.in ============================================================================== --- lxml/trunk/MANIFEST.in (original) +++ lxml/trunk/MANIFEST.in Mon Mar 20 19:13:00 2006 @@ -2,3 +2,4 @@ recursive-include src *.pyx *.pxd *.pxi *.py etree.c etree.h recursive-include src/lxml/tests *.rng *.xslt *.xml recursive-include doc *.txt *.xml *.mgp +exclude doc/pyrex.txt From faassen at codespeak.net Mon Mar 20 19:13:20 2006 From: faassen at codespeak.net (faassen@codespeak.net) Date: Mon Mar 20 19:13:20 2006 Subject: [Lxml-checkins] r24634 - lxml/trunk Message-ID: <20060320181320.0F7F7100EF@code0.codespeak.net> Author: faassen Date: Mon Mar 20 19:13:19 2006 New Revision: 24634 Modified: lxml/trunk/INSTALL.txt Log: One more text tweak. Modified: lxml/trunk/INSTALL.txt ============================================================================== --- lxml/trunk/INSTALL.txt (original) +++ lxml/trunk/INSTALL.txt Mon Mar 20 19:13:19 2006 @@ -14,10 +14,9 @@ You also need Python 2.3 or later. -If you are not using a released version of lxml and want to build lxml -from SVN, you also need Pyrex_. If you are using a released version of -lxml, it should come with the generated C file in the source -distribution, so no Pyrex is needed then. +If want to build lxml from SVN, you also need Pyrex_. If you are using +a released version of lxml, it should come with the generated C file +in the source distribution, so no Pyrex is needed in that case. .. _Pyrex: http://www.cosc.canterbury.ac.nz/~greg/python/Pyrex/ From scoder at codespeak.net Mon Mar 20 19:21:42 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Mon Mar 20 19:21:43 2006 Subject: [Lxml-checkins] r24636 - lxml/trunk/src/lxml/tests Message-ID: <20060320182142.48BD2100F0@code0.codespeak.net> Author: scoder Date: Mon Mar 20 19:21:41 2006 New Revision: 24636 Modified: lxml/trunk/src/lxml/tests/test_xslt.py Log: removed xpath.txt doctests from test_xslt.py Modified: lxml/trunk/src/lxml/tests/test_xslt.py ============================================================================== --- lxml/trunk/src/lxml/tests/test_xslt.py (original) +++ lxml/trunk/src/lxml/tests/test_xslt.py Mon Mar 20 19:21:41 2006 @@ -342,8 +342,6 @@ suite = unittest.TestSuite() suite.addTests([unittest.makeSuite(ETreeXSLTTestCase)]) suite.addTests( - [doctest.DocFileSuite('../../../doc/xpath.txt')]) - suite.addTests( [doctest.DocFileSuite('../../../doc/extensions.txt')]) return suite From scoder at codespeak.net Mon Mar 20 19:51:14 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Mon Mar 20 19:51:15 2006 Subject: [Lxml-checkins] r24638 - lxml/trunk Message-ID: <20060320185114.56ADB100F3@code0.codespeak.net> Author: scoder Date: Mon Mar 20 19:51:12 2006 New Revision: 24638 Modified: lxml/trunk/setup.py Log: make it compile without Pyrex and generate the source with Pyrex Modified: lxml/trunk/setup.py ============================================================================== --- lxml/trunk/setup.py (original) +++ lxml/trunk/setup.py Mon Mar 20 19:51:12 2006 @@ -1,5 +1,9 @@ import os +def flags(cmd): + wf, rf, ef = os.popen3(cmd) + return rf.read().strip().split(' ') + try: from setuptools import setup from setuptools.extension import Extension @@ -7,11 +11,27 @@ from distutils.core import setup from distutils.extension import Extension -from Pyrex.Distutils import build_ext as build_pyx - -def flags(cmd): - wf, rf, ef = os.popen3(cmd) - return rf.read().strip().split(' ') +try: + from Pyrex.Distutils import build_ext as build_pyx + etree_extension = { + 'cmdclass' : {'build_ext': build_pyx}, + 'ext_modules' : [ Extension( + "lxml.etree", + sources = ["src/lxml/etree.pyx"], + extra_compile_args = ['-w'] + flags('xslt-config --cflags'), + extra_link_args = flags('xslt-config --libs') + )] + } +except ImportError: + print "NOTE: Trying to build without Pyrex, needs generated etree.c" + etree_extension = { + 'ext_modules' : [ Extension( + "lxml.etree", + sources = ["src/lxml/etree.c"], + extra_compile_args = ['-w'] + flags('xslt-config --cflags'), + extra_link_args = flags('xslt-config --libs') + )] + } setup( name = "lxml", @@ -31,11 +51,5 @@ package_dir = {'': 'src'}, packages = ['lxml', 'lxml.tests'], - ext_modules = [ - Extension( - "lxml.etree", - sources = ["src/lxml/etree.pyx"], - extra_compile_args = ['-w'] + flags('xslt-config --cflags'), - extra_link_args = flags('xslt-config --libs'))], - cmdclass = {'build_ext': build_pyx} + **etree_extension ) From scoder at codespeak.net Mon Mar 20 20:03:41 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Mon Mar 20 20:03:42 2006 Subject: [Lxml-checkins] r24639 - lxml/tag/lxml-0.9 Message-ID: <20060320190341.E13BC100F2@code0.codespeak.net> Author: scoder Date: Mon Mar 20 20:03:30 2006 New Revision: 24639 Added: lxml/tag/lxml-0.9/ - copied from r24638, lxml/trunk/ Log: [TAG] lxml 0.9 From scoder at codespeak.net Mon Mar 20 20:24:58 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Mon Mar 20 20:25:00 2006 Subject: [Lxml-checkins] r24641 - lxml/trunk Message-ID: <20060320192458.A0DE8100F3@code0.codespeak.net> Author: scoder Date: Mon Mar 20 20:24:57 2006 New Revision: 24641 Modified: lxml/trunk/setup.py Log: clean up of setup.py Modified: lxml/trunk/setup.py ============================================================================== --- lxml/trunk/setup.py (original) +++ lxml/trunk/setup.py Mon Mar 20 20:24:57 2006 @@ -11,27 +11,14 @@ from distutils.core import setup from distutils.extension import Extension +setup_args = {} try: from Pyrex.Distutils import build_ext as build_pyx - etree_extension = { - 'cmdclass' : {'build_ext': build_pyx}, - 'ext_modules' : [ Extension( - "lxml.etree", - sources = ["src/lxml/etree.pyx"], - extra_compile_args = ['-w'] + flags('xslt-config --cflags'), - extra_link_args = flags('xslt-config --libs') - )] - } + sources = ["src/lxml/etree.pyx"] + setup_args['cmdclass'] = {'build_ext' : build_pyx} except ImportError: - print "NOTE: Trying to build without Pyrex, needs generated etree.c" - etree_extension = { - 'ext_modules' : [ Extension( - "lxml.etree", - sources = ["src/lxml/etree.c"], - extra_compile_args = ['-w'] + flags('xslt-config --cflags'), - extra_link_args = flags('xslt-config --libs') - )] - } + print "*NOTE*: Trying to build without Pyrex, needs pre-generated 'src/lxml/etree.c' !" + sources = ["src/lxml/etree.c"] setup( name = "lxml", @@ -41,15 +28,24 @@ maintainer="lxml dev team", maintainer_email="lxml-dev@codespeak.net", url="http://codespeak.net/lxml", - description="Powerful and Pythonic XML processing library based on libxml2/libxslt with an ElementTree API", + + description="Powerful and Pythonic XML processing library combining libxml2/libxslt with the ElementTree API.", + long_description="""\ -lxml is a Pythonic binding for the libxml2 and libxslt libraries. It provides +lxml is a Pythonic binding for the libxml2 and libxslt libraries. It provides safe and convenient access to these libraries using the ElementTree API. -It extends the ElementTree API significantly to offer support for -XPath, Relax NG, XML Schema, XSLT, c14n and much more. + +It extends the ElementTree API significantly to offer support for XPath, +RelaxNG, XML Schema, XSLT, C14N and much more. """, package_dir = {'': 'src'}, packages = ['lxml', 'lxml.tests'], - **etree_extension + ext_modules = [ Extension( + "lxml.etree", + sources = sources, + extra_compile_args = ['-w'] + flags('xslt-config --cflags'), + extra_link_args = flags('xslt-config --libs') + )], + **setup_args ) From scoder at codespeak.net Mon Mar 20 20:30:14 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Mon Mar 20 20:30:14 2006 Subject: [Lxml-checkins] r24642 - lxml/trunk Message-ID: <20060320193014.66FB0100EE@code0.codespeak.net> Author: scoder Date: Mon Mar 20 20:30:08 2006 New Revision: 24642 Modified: lxml/trunk/CHANGES.txt Log: typo Modified: lxml/trunk/CHANGES.txt ============================================================================== --- lxml/trunk/CHANGES.txt (original) +++ lxml/trunk/CHANGES.txt Mon Mar 20 20:30:08 2006 @@ -48,7 +48,7 @@ * Segfault in XSLT.tostring() -* ElementTree object no longer interfere, Elements can be root of different +* ElementTree objects no longer interfere, Elements can be root of different ElementTrees at the same time * document('') now works in XSLT documents read from files (in-memory From scoder at codespeak.net Mon Mar 20 20:36:50 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Mon Mar 20 20:36:52 2006 Subject: [Lxml-checkins] r24643 - lxml/trunk/doc Message-ID: <20060320193650.AB797100F3@code0.codespeak.net> Author: scoder Date: Mon Mar 20 20:36:39 2006 New Revision: 24643 Modified: lxml/trunk/doc/sax.txt Log: typo Modified: lxml/trunk/doc/sax.txt ============================================================================== --- lxml/trunk/doc/sax.txt (original) +++ lxml/trunk/doc/sax.txt Mon Mar 20 20:36:39 2006 @@ -7,7 +7,7 @@ compatible with that in the Python core (xml.sax), so is useful for interfacing lxml with code that uses the Python core SAX facilities. -Producing SAX events for an ElementTree or Element +Producing SAX events from an ElementTree or Element -------------------------------------------------- Let's make a tree we can generate SAX events for:: From ogrisel at codespeak.net Mon Mar 20 21:08:59 2006 From: ogrisel at codespeak.net (ogrisel@codespeak.net) Date: Mon Mar 20 21:09:01 2006 Subject: [Lxml-checkins] r24644 - lxml/trunk Message-ID: <20060320200859.8ED20100F4@code0.codespeak.net> Author: ogrisel Date: Mon Mar 20 21:08:53 2006 New Revision: 24644 Modified: lxml/trunk/INSTALL.txt Log: added a reference to the easy_install installation procedure Modified: lxml/trunk/INSTALL.txt ============================================================================== --- lxml/trunk/INSTALL.txt (original) +++ lxml/trunk/INSTALL.txt Mon Mar 20 21:08:53 2006 @@ -26,7 +26,13 @@ Installation ------------ -Type:: +If you have easy_install_, you can use:: + + sudo easy_install lxml + +.. _easy_install: http://peak.telecommunity.com/DevCenter/EasyInstall + +Otherwise type:: python setup.py install @@ -103,7 +109,7 @@ include_dirs = guess_include_dirs() Into something like this:: - + include_dirs = ['/usr/include/libxml2'] If that still doesn't work, try registering the extension in a From scoder at codespeak.net Tue Mar 21 10:09:11 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Tue Mar 21 10:09:13 2006 Subject: [Lxml-checkins] r24666 - lxml/trunk Message-ID: <20060321090911.A3167100F9@code0.codespeak.net> Author: scoder Date: Tue Mar 21 10:09:09 2006 New Revision: 24666 Modified: lxml/trunk/INSTALL.txt Log: some clean up, section on binary lxml 0.9 for Windows Modified: lxml/trunk/INSTALL.txt ============================================================================== --- lxml/trunk/INSTALL.txt (original) +++ lxml/trunk/INSTALL.txt Tue Mar 21 10:09:09 2006 @@ -12,6 +12,8 @@ * libxslt 1.1.12 (newer versions should work). It can be found here: http://xmlsoft.org/XSLT/downloads.html +See below for instructions how to get those for Windows. + You also need Python 2.3 or later. If want to build lxml from SVN, you also need Pyrex_. If you are using @@ -23,6 +25,7 @@ See also the notes on building gcc 4.0 below if you are having trouble with Pyrex. + Installation ------------ @@ -49,8 +52,28 @@ This will not install lxml, but if you place lxml's "src" on your PYTHONPATH somehow, you can import it and play with it. -Building lxml with gcc 4.0 --------------------------- + +Installation on Windows +----------------------- + +As always, installation on Windows is different. If you do not want to go +through the hassle of compiling everything by hand, you can use the binary +distribution of libxml2 and libxslt. It is available here: + +http://www.zlatkovic.com/libxml.en.html + +Note that you need both libxml2 and libxslt, as well as iconv and zlib. You +can then download a binary version of lxml 0.9 for Python 2.4 from the +following address: + +http://carcass.dhs.org/lxml-0.9.win32-py2.4.exe + +It was kindly contributed by Steve Howe. If this doesn't work for you, feel +free to report to the mailing list. + + +Building lxml with gcc 4.0 or Python 2.4 +---------------------------------------- Pyrex 0.9.3.1 generates C code that gcc 4.0 does not accept. Pending an official release of a version of Pyrex that does work with gcc 4.0, the lxml @@ -78,9 +101,9 @@ http://codespeak.net/lxml/Pyrex-0.9.3-gcc4-small.patch -It may however actually be that at the time you read this, this extra -patch has been applied by the distributions as well. You may still -encounter the following problem when building the extension:: +It may however actually be that at the time you read this, this extra patch +has been applied by the distributions as well. You may still encounter the +following problem when building the extension on Python 2.4:: TypeError: swig_sources() takes exactly 2 arguments (3 given) @@ -93,7 +116,9 @@ def swig_sources (self, sources, *otherargs): -The above install files have these three changes applied. +The above install files have these changes applied. It should do no harm if +you install them instead of the official Pyrex version. + Troubleshooting --------------- @@ -120,6 +145,7 @@ .. _`mailing list`: http://codespeak.net/mailman/listinfo/lxml-dev + Running the tests ----------------- From scoder at codespeak.net Tue Mar 21 10:54:02 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Tue Mar 21 10:54:03 2006 Subject: [Lxml-checkins] r24669 - lxml/pyrex/Pyrex/Compiler Message-ID: <20060321095402.4FB43100FE@code0.codespeak.net> Author: scoder Date: Tue Mar 21 10:54:01 2006 New Revision: 24669 Modified: lxml/pyrex/Pyrex/Compiler/Code.py Log: store target Python version in code writer (default: 2.1.0) Modified: lxml/pyrex/Pyrex/Compiler/Code.py ============================================================================== --- lxml/pyrex/Pyrex/Compiler/Code.py (original) +++ lxml/pyrex/Pyrex/Compiler/Code.py Tue Mar 21 10:54:01 2006 @@ -21,7 +21,7 @@ in_try_finally = 0 - def __init__(self, outfile_name): + def __init__(self, outfile_name, python_version=(2,1,0)): self.f = open_new_file(outfile_name) self.level = 0 self.bol = 1 @@ -30,6 +30,7 @@ self.error_label = None self.filename_table = {} self.filename_list = [] + self.python_version = python_version def putln(self, code = ""): if self.marker and self.bol: From scoder at codespeak.net Tue Mar 21 10:54:44 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Tue Mar 21 10:54:46 2006 Subject: [Lxml-checkins] r24670 - lxml/pyrex/Pyrex/Compiler Message-ID: <20060321095444.058E1100FE@code0.codespeak.net> Author: scoder Date: Tue Mar 21 10:54:44 2006 New Revision: 24670 Modified: lxml/pyrex/Pyrex/Compiler/ExprNodes.py Log: write better code for tuple setup on Python 2.4 (if selected by code writer target) Modified: lxml/pyrex/Pyrex/Compiler/ExprNodes.py ============================================================================== --- lxml/pyrex/Pyrex/Compiler/ExprNodes.py (original) +++ lxml/pyrex/Pyrex/Compiler/ExprNodes.py Tue Mar 21 10:54:44 2006 @@ -1759,21 +1759,38 @@ # Tuple constructor. def generate_operation_code(self, code): - code.putln( - "%s = PyTuple_New(%s); if (!%s) %s" % ( - self.result, - len(self.args), - self.result, - code.error_goto(self.pos))) - for i in range(len(self.args)): - arg = self.args[i] - if not arg.result_in_temp(): - code.put_incref(arg.result, arg.type) + if code.python_version >= (2,4): + args = [''] + for arg in self.args: + result = arg.result + args.append(result) + if not arg.result_in_temp(): + code.put_incref(result, arg.type) + arg_string = ', '.join(args) code.putln( - "PyTuple_SET_ITEM(%s, %s, %s);" % ( + "%s = PyTuple_Pack(%s%s); if (!%s) %s" % ( self.result, - i, - arg.result)) + len(self.args), + arg_string, + self.result, + code.error_goto(self.pos))) + else: # Python version < 2.4 + code.putln( + "%s = PyTuple_New(%s); if (!%s) %s" % ( + self.result, + len(self.args), + self.result, + code.error_goto(self.pos))) + for i in range(len(self.args)): + arg = self.args[i] + result = arg.result + if not arg.result_in_temp(): + code.put_incref(result, arg.type) + code.putln( + "PyTuple_SET_ITEM(%s, %s, %s);" % ( + self.result, + i, + result)) def generate_subexpr_disposal_code(self, code): # We call generate_post_assignment_code here instead From scoder at codespeak.net Tue Mar 21 11:20:02 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Tue Mar 21 11:20:04 2006 Subject: [Lxml-checkins] r24671 - lxml/pyrex/Pyrex/Compiler Message-ID: <20060321102002.DBCF110105@code0.codespeak.net> Author: scoder Date: Tue Mar 21 11:20:01 2006 New Revision: 24671 Modified: lxml/pyrex/Pyrex/Compiler/Main.py lxml/pyrex/Pyrex/Compiler/Nodes.py Log: allow supplying Python target version at command line Modified: lxml/pyrex/Pyrex/Compiler/Main.py ============================================================================== --- lxml/pyrex/Pyrex/Compiler/Main.py (original) +++ lxml/pyrex/Pyrex/Compiler/Main.py Tue Mar 21 11:20:01 2006 @@ -30,9 +30,10 @@ # modules {string : ModuleScope} # include_directories [string] - def __init__(self, include_directories): + def __init__(self, include_directories, target_version): self.modules = {"__builtin__" : BuiltinScope()} self.include_directories = include_directories + self.target_version = target_version def find_module(self, module_name, relative_to = None, pos = None, need_pxd = 1): @@ -230,6 +231,12 @@ self.object_file = None self.extension_file = None +def python_target_version(options): + if options.python_version: + return tuple(map(int, options.python_version.split('.'))) + else: + return (2,1,0) + def compile(source, options = None, c_compile = 0, c_link = 0): """ @@ -245,7 +252,8 @@ options.c_only = 0 if c_link: options.obj_only = 0 - context = Context(options.include_path) + context = Context(options.include_path, + python_target_version(options)) return context.compile(source, options) #------------------------------------------------------------------------ @@ -265,7 +273,8 @@ sources = args if options.show_version: print >>sys.stderr, "Pyrex version %s" % Version.version - context = Context(options.include_path) + context = Context(options.include_path, + python_target_version(options)) for source in sources: try: result = context.compile(source, options) @@ -286,6 +295,7 @@ default_options = CompilationOptions( show_version = 0, use_listing_file = 0, + python_version = "2.1.0", errors_to_stderr = 1, c_only = 1, obj_only = 1, Modified: lxml/pyrex/Pyrex/Compiler/Nodes.py ============================================================================== --- lxml/pyrex/Pyrex/Compiler/Nodes.py (original) +++ lxml/pyrex/Pyrex/Compiler/Nodes.py Tue Mar 21 11:20:01 2006 @@ -127,14 +127,15 @@ if entry.visibility == 'public': public_extension_types.append(entry) if public_vars_and_funcs or public_extension_types: + python_version = env.context.target_version #import os #outname_base, _ = os.path.splitext(result.c_file) #result.h_file = outname_base + ".h" #result.i_file = outname_base + ".pxi" result.h_file = replace_suffix(result.c_file, ".h") result.i_file = replace_suffix(result.c_file, ".pxi") - h_code = Code.CCodeWriter(result.h_file) - i_code = Code.PyrexCodeWriter(result.i_file) + h_code = Code.CCodeWriter(result.h_file, python_version) + i_code = Code.PyrexCodeWriter(result.i_file, python_version) for entry in public_vars_and_funcs: h_code.putln("extern %s;" % entry.type.declaration_code( @@ -162,9 +163,10 @@ i_code.dedent() def generate_c_code(self, env, result): + python_version = env.context.target_version modules = [] self.find_referenced_modules(env, modules, {}) - code = Code.CCodeWriter(result.c_file) + code = Code.CCodeWriter(result.c_file, python_version) code.init_labels() self.generate_module_preamble(env, modules, code) for module in modules: From scoder at codespeak.net Tue Mar 21 11:31:33 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Tue Mar 21 11:31:36 2006 Subject: [Lxml-checkins] r24672 - lxml/trunk Message-ID: <20060321103133.2B2DE1010A@code0.codespeak.net> Author: scoder Date: Tue Mar 21 11:31:25 2006 New Revision: 24672 Modified: lxml/trunk/INSTALL.txt Log: typo Modified: lxml/trunk/INSTALL.txt ============================================================================== --- lxml/trunk/INSTALL.txt (original) +++ lxml/trunk/INSTALL.txt Tue Mar 21 11:31:25 2006 @@ -22,8 +22,8 @@ .. _Pyrex: http://www.cosc.canterbury.ac.nz/~greg/python/Pyrex/ -See also the notes on building gcc 4.0 below if you are having trouble -with Pyrex. +See also the notes on building with gcc 4.0 below if you are having +trouble with Pyrex. Installation From scoder at codespeak.net Tue Mar 21 15:57:49 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Tue Mar 21 15:57:51 2006 Subject: [Lxml-checkins] r24690 - lxml/pyrex/Pyrex/Distutils Message-ID: <20060321145749.1E2B6100EC@code0.codespeak.net> Author: scoder Date: Tue Mar 21 15:57:47 2006 New Revision: 24690 Modified: lxml/pyrex/Pyrex/Distutils/build_ext.py Log: make unused swig keyword argument a tuple argument Modified: lxml/pyrex/Pyrex/Distutils/build_ext.py ============================================================================== --- lxml/pyrex/Pyrex/Distutils/build_ext.py (original) +++ lxml/pyrex/Pyrex/Distutils/build_ext.py Tue Mar 21 15:57:47 2006 @@ -32,7 +32,7 @@ if self.compiler == 'mingw32': self.swig_cpp = 1 - def swig_sources (self, sources, extension=None): + def swig_sources (self, sources, *otherargs): if not self.extensions: return From scoder at codespeak.net Tue Mar 21 16:46:40 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Tue Mar 21 16:46:42 2006 Subject: [Lxml-checkins] r24695 - lxml/trunk/doc Message-ID: <20060321154640.CF31F10113@code0.codespeak.net> Author: scoder Date: Tue Mar 21 16:46:39 2006 New Revision: 24695 Modified: lxml/trunk/doc/api.txt Log: do not check libxml2 error message in doctest Modified: lxml/trunk/doc/api.txt ============================================================================== --- lxml/trunk/doc/api.txt (original) +++ lxml/trunk/doc/api.txt Tue Mar 21 16:46:39 2006 @@ -296,8 +296,15 @@ Error reporting works like for the RelaxNG class:: >>> log = xmlschema.error_log - >>> print log.filter_from_errors() - :1:ERROR:SCHEMASV:SCHEMAV_ELEMENT_CONTENT: Element 'c': This element is not expected. Expected is ( b ). + >>> errors = log.filter_from_errors() + >>> print errors[0].domain_name + SCHEMASV + >>> print errors[0].type_name + SCHEMAV_ELEMENT_CONTENT + +If you were to print this log entry, you would get something like the following:: + + :1:ERROR::SCHEMAV_ELEMENT_CONTENT: Element 'c': This element is not expected. Expected is ( b ). Similar to XSLT and RelaxNG, there's also a less efficient but easier shortcut method to do XML Schema validation:: From scoder at codespeak.net Wed Mar 22 10:40:21 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Wed Mar 22 10:40:23 2006 Subject: [Lxml-checkins] r24773 - lxml/trunk Message-ID: <20060322094021.7CEB2100DA@code0.codespeak.net> Author: scoder Date: Wed Mar 22 10:40:20 2006 New Revision: 24773 Modified: lxml/trunk/setup.py Log: added Trove classifiers for cheeseshop Modified: lxml/trunk/setup.py ============================================================================== --- lxml/trunk/setup.py (original) +++ lxml/trunk/setup.py Wed Mar 22 10:40:20 2006 @@ -39,6 +39,18 @@ RelaxNG, XML Schema, XSLT, C14N and much more. """, + classifiers = [ + 'Development Status :: 5 - Production/Stable', + 'Intended Audience :: Developers', + 'Intended Audience :: Information Technology', + 'License :: OSI Approved :: BSD License', + 'Programming Language :: Python', + 'Programming Language :: C', + 'Operating System :: OS Independent', + 'Topic :: Text Processing :: Markup :: XML', + 'Topic :: Software Development :: Libraries :: Python Modules' + ], + package_dir = {'': 'src'}, packages = ['lxml', 'lxml.tests'], ext_modules = [ Extension( From scoder at codespeak.net Wed Mar 22 12:17:30 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Wed Mar 22 12:17:31 2006 Subject: [Lxml-checkins] r24782 - lxml/trunk Message-ID: <20060322111730.BC4A41011A@code0.codespeak.net> Author: scoder Date: Wed Mar 22 12:17:23 2006 New Revision: 24782 Modified: lxml/trunk/INSTALL.txt Log: make install notes more readable, put in remarks on MacOS-X Modified: lxml/trunk/INSTALL.txt ============================================================================== --- lxml/trunk/INSTALL.txt (original) +++ lxml/trunk/INSTALL.txt Wed Mar 22 12:17:23 2006 @@ -4,6 +4,8 @@ Requirements ------------ +You need Python 2.3 or later. + You need libxml2 and libxslt, in particular: * libxml 2.6.16 (newer versions should work). It can be found here: @@ -12,13 +14,13 @@ * libxslt 1.1.12 (newer versions should work). It can be found here: http://xmlsoft.org/XSLT/downloads.html -See below for instructions how to get those for Windows. - -You also need Python 2.3 or later. - -If want to build lxml from SVN, you also need Pyrex_. If you are using -a released version of lxml, it should come with the generated C file -in the source distribution, so no Pyrex is needed in that case. +See below for instructions how to get these for Windows. On MacOS-X 10.4, you +can use the installed system libraries and the binary egg distribution of +lxml. + +If you want to build lxml from SVN, you also need Pyrex_. If you are using a +released version of lxml, it should come with the generated C file in the +source distribution, so no Pyrex is needed in that case. .. _Pyrex: http://www.cosc.canterbury.ac.nz/~greg/python/Pyrex/ @@ -29,19 +31,21 @@ Installation ------------ -If you have easy_install_, you can use:: +If you have easy_install_, you can run the following as super-user:: - sudo easy_install lxml + easy_install lxml .. _easy_install: http://peak.telecommunity.com/DevCenter/EasyInstall -Otherwise type:: +This has been reported to work on both Linux and MacOS-X 10.4. Otherwise +type:: python setup.py install to compile and install the library. -It's also possible to do this:: +If you do not want to install lxml right away, but first test it from the +source directory, you can do this:: python setup.py build_ext -i @@ -49,8 +53,8 @@ make -This will not install lxml, but if you place lxml's "src" on your -PYTHONPATH somehow, you can import it and play with it. +If you then place lxml's "src" directory on your PYTHONPATH somehow, you can +import lxml.etree and play with it. Installation on Windows @@ -68,8 +72,13 @@ http://carcass.dhs.org/lxml-0.9.win32-py2.4.exe -It was kindly contributed by Steve Howe. If this doesn't work for you, feel -free to report to the mailing list. +or the egg distribution from + +http://cheeseshop.python.org/pypi/lxml + +The egg can directly be installed using easy_install_. Both builds were kindly +contributed by Steve Howe. If they do not work for you, feel free to report to +the mailing list. Building lxml with gcc 4.0 or Python 2.4 From scoder at codespeak.net Wed Mar 22 12:24:49 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Wed Mar 22 12:24:50 2006 Subject: [Lxml-checkins] r24784 - lxml/trunk Message-ID: <20060322112449.9F7F21011A@code0.codespeak.net> Author: scoder Date: Wed Mar 22 12:24:43 2006 New Revision: 24784 Modified: lxml/trunk/INSTALL.txt Log: small clarification Modified: lxml/trunk/INSTALL.txt ============================================================================== --- lxml/trunk/INSTALL.txt (original) +++ lxml/trunk/INSTALL.txt Wed Mar 22 12:24:43 2006 @@ -37,13 +37,12 @@ .. _easy_install: http://peak.telecommunity.com/DevCenter/EasyInstall -This has been reported to work on both Linux and MacOS-X 10.4. Otherwise -type:: +This has been reported to work on Linux, MacOS-X 10.4 and Windows, as long as +libxml2 and libxslt are installed. To compile and install lxml without +easy_install, type:: python setup.py install -to compile and install the library. - If you do not want to install lxml right away, but first test it from the source directory, you can do this:: From scoder at codespeak.net Wed Mar 22 12:28:33 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Wed Mar 22 12:28:34 2006 Subject: [Lxml-checkins] r24786 - lxml/trunk Message-ID: <20060322112833.442121011C@code0.codespeak.net> Author: scoder Date: Wed Mar 22 12:28:31 2006 New Revision: 24786 Modified: lxml/trunk/INSTALL.txt Log: small clarification Modified: lxml/trunk/INSTALL.txt ============================================================================== --- lxml/trunk/INSTALL.txt (original) +++ lxml/trunk/INSTALL.txt Wed Mar 22 12:28:31 2006 @@ -39,7 +39,7 @@ This has been reported to work on Linux, MacOS-X 10.4 and Windows, as long as libxml2 and libxslt are installed. To compile and install lxml without -easy_install, type:: +easy_install, download the source tar-ball, unpack it and type:: python setup.py install From scoder at codespeak.net Wed Mar 22 17:03:30 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Wed Mar 22 17:03:31 2006 Subject: [Lxml-checkins] r24807 - lxml/trunk Message-ID: <20060322160330.2B3B010036@code0.codespeak.net> Author: scoder Date: Wed Mar 22 17:03:29 2006 New Revision: 24807 Modified: lxml/trunk/INSTALL.txt Log: removed obsolete section on troubleshooting with old setup.py script, added paragraph on contacting the list if things really don't work Modified: lxml/trunk/INSTALL.txt ============================================================================== --- lxml/trunk/INSTALL.txt (original) +++ lxml/trunk/INSTALL.txt Wed Mar 22 17:03:29 2006 @@ -27,6 +27,12 @@ See also the notes on building with gcc 4.0 below if you are having trouble with Pyrex. +If you have read these instructions and still cannot manage to install lxml, +you can check the archives of the `mailing list`_ to see if your problem is +known or otherwise send a mail to the list. + + .. _`mailing list`: http://codespeak.net/mailman/listinfo/lxml-dev + Installation ------------ @@ -128,30 +134,6 @@ you install them instead of the official Pyrex version. -Troubleshooting ---------------- - -lxml's setup.py tries to be smart and uses libxml2's xml2-config to -find the installation path of libxml2. If this cannot be found or -doesn't work for some reason or another, try editing the setup.py, -by changing this:: - - # if you want to configure include dir manually, you can do so here, - # for instance: - # include_dirs = ['/usr/include/libxml2'] - include_dirs = guess_include_dirs() - -Into something like this:: - - include_dirs = ['/usr/include/libxml2'] - -If that still doesn't work, try registering the extension in a -different way entirely; there's a commented block of code at the -bottom of setup.py with an example. - -If you still have trouble, contact us on the `mailing list`_. - -.. _`mailing list`: http://codespeak.net/mailman/listinfo/lxml-dev Running the tests From scoder at codespeak.net Wed Mar 22 19:15:57 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Wed Mar 22 19:15:58 2006 Subject: [Lxml-checkins] r24812 - lxml/branch/lxml-0.9.x Message-ID: <20060322181557.07A181014B@code0.codespeak.net> Author: scoder Date: Wed Mar 22 19:15:56 2006 New Revision: 24812 Added: lxml/branch/lxml-0.9.x/ - copied from r24811, lxml/trunk/ Log: new branch for maintenance on lxml 0.9.x From ogrisel at codespeak.net Wed Mar 22 22:33:01 2006 From: ogrisel at codespeak.net (ogrisel@codespeak.net) Date: Wed Mar 22 22:33:03 2006 Subject: [Lxml-checkins] r24824 - lxml/trunk Message-ID: <20060322213301.F364F10145@code0.codespeak.net> Author: ogrisel Date: Wed Mar 22 22:33:01 2006 New Revision: 24824 Modified: lxml/trunk/setup.py Log: adding .xml .rng .xslt file for the lxml.tests package Modified: lxml/trunk/setup.py ============================================================================== --- lxml/trunk/setup.py (original) +++ lxml/trunk/setup.py Wed Mar 22 22:33:01 2006 @@ -53,6 +53,7 @@ package_dir = {'': 'src'}, packages = ['lxml', 'lxml.tests'], + package_data = {'lxml.tests': ['*.xml', '*.rng', '*.xslt']}, ext_modules = [ Extension( "lxml.etree", sources = sources, From ogrisel at codespeak.net Wed Mar 22 22:48:02 2006 From: ogrisel at codespeak.net (ogrisel@codespeak.net) Date: Wed Mar 22 22:48:04 2006 Subject: [Lxml-checkins] r24828 - lxml/trunk Message-ID: <20060322214802.70BF510145@code0.codespeak.net> Author: ogrisel Date: Wed Mar 22 22:48:01 2006 New Revision: 24828 Modified: lxml/trunk/CHANGES.txt lxml/trunk/setup.py Log: actually it's better to simply remove lxml.tests out of site-packages + package_data is new in 2.4 and would have broken 2.3 compat Modified: lxml/trunk/CHANGES.txt ============================================================================== --- lxml/trunk/CHANGES.txt (original) +++ lxml/trunk/CHANGES.txt Wed Mar 22 22:48:01 2006 @@ -1,6 +1,14 @@ lxml changelog ============== +1.0 +=== + +Bugs fixed +---------- + +* Removed lxml.tests out of the package (while remaining in the source tarball) + 0.9 (2006-03-20) ================ Modified: lxml/trunk/setup.py ============================================================================== --- lxml/trunk/setup.py (original) +++ lxml/trunk/setup.py Wed Mar 22 22:48:01 2006 @@ -52,8 +52,7 @@ ], package_dir = {'': 'src'}, - packages = ['lxml', 'lxml.tests'], - package_data = {'lxml.tests': ['*.xml', '*.rng', '*.xslt']}, + packages = ['lxml'], ext_modules = [ Extension( "lxml.etree", sources = sources, From ogrisel at codespeak.net Wed Mar 22 22:51:04 2006 From: ogrisel at codespeak.net (ogrisel@codespeak.net) Date: Wed Mar 22 22:51:05 2006 Subject: [Lxml-checkins] r24829 - lxml/branch/lxml-0.9.x Message-ID: <20060322215104.E103810145@code0.codespeak.net> Author: ogrisel Date: Wed Mar 22 22:51:04 2006 New Revision: 24829 Modified: lxml/branch/lxml-0.9.x/CHANGES.txt lxml/branch/lxml-0.9.x/setup.py Log: same changed in trunk and 0.9.x branch as this is a bugfix Modified: lxml/branch/lxml-0.9.x/CHANGES.txt ============================================================================== --- lxml/branch/lxml-0.9.x/CHANGES.txt (original) +++ lxml/branch/lxml-0.9.x/CHANGES.txt Wed Mar 22 22:51:04 2006 @@ -1,6 +1,14 @@ lxml changelog ============== +0.9.1 +===== + +Bugs fixed +---------- + +* Removed lxml.tests out of the package (while remaining in the source tarball) + 0.9 (2006-03-20) ================ Modified: lxml/branch/lxml-0.9.x/setup.py ============================================================================== --- lxml/branch/lxml-0.9.x/setup.py (original) +++ lxml/branch/lxml-0.9.x/setup.py Wed Mar 22 22:51:04 2006 @@ -52,7 +52,7 @@ ], package_dir = {'': 'src'}, - packages = ['lxml', 'lxml.tests'], + packages = ['lxml'], ext_modules = [ Extension( "lxml.etree", sources = sources, From ogrisel at codespeak.net Wed Mar 22 22:51:10 2006 From: ogrisel at codespeak.net (ogrisel@codespeak.net) Date: Wed Mar 22 22:51:11 2006 Subject: [Lxml-checkins] r24830 - lxml/trunk Message-ID: <20060322215110.119F31014C@code0.codespeak.net> Author: ogrisel Date: Wed Mar 22 22:51:09 2006 New Revision: 24830 Modified: lxml/trunk/CHANGES.txt Log: same changed in trunk and 0.9.x branch as this is a bugfix Modified: lxml/trunk/CHANGES.txt ============================================================================== --- lxml/trunk/CHANGES.txt (original) +++ lxml/trunk/CHANGES.txt Wed Mar 22 22:51:09 2006 @@ -1,8 +1,8 @@ lxml changelog ============== -1.0 -=== +0.9.1 +===== Bugs fixed ---------- From scoder at codespeak.net Thu Mar 23 06:49:57 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Thu Mar 23 06:49:59 2006 Subject: [Lxml-checkins] r24852 - lxml/trunk Message-ID: <20060323054957.9A10410153@code0.codespeak.net> Author: scoder Date: Thu Mar 23 06:49:56 2006 New Revision: 24852 Modified: lxml/trunk/CHANGES.txt Log: englishification :) Modified: lxml/trunk/CHANGES.txt ============================================================================== --- lxml/trunk/CHANGES.txt (original) +++ lxml/trunk/CHANGES.txt Thu Mar 23 06:49:56 2006 @@ -7,7 +7,7 @@ Bugs fixed ---------- -* Removed lxml.tests out of the package (while remaining in the source tarball) +* lxml.tests package will no longer be installed (is still in source tar) 0.9 (2006-03-20) ================ From scoder at codespeak.net Thu Mar 23 06:50:39 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Thu Mar 23 06:50:39 2006 Subject: [Lxml-checkins] r24853 - lxml/branch/lxml-0.9.x Message-ID: <20060323055039.0CA7010153@code0.codespeak.net> Author: scoder Date: Thu Mar 23 06:50:38 2006 New Revision: 24853 Modified: lxml/branch/lxml-0.9.x/CHANGES.txt Log: merged in doc update from trunk Modified: lxml/branch/lxml-0.9.x/CHANGES.txt ============================================================================== --- lxml/branch/lxml-0.9.x/CHANGES.txt (original) +++ lxml/branch/lxml-0.9.x/CHANGES.txt Thu Mar 23 06:50:38 2006 @@ -7,7 +7,7 @@ Bugs fixed ---------- -* Removed lxml.tests out of the package (while remaining in the source tarball) +* lxml.tests package will no longer be installed (is still in source tar) 0.9 (2006-03-20) ================ From scoder at codespeak.net Thu Mar 23 07:28:14 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Thu Mar 23 07:28:16 2006 Subject: [Lxml-checkins] r24854 - lxml/trunk Message-ID: <20060323062814.0C9E710153@code0.codespeak.net> Author: scoder Date: Thu Mar 23 07:28:13 2006 New Revision: 24854 Modified: lxml/trunk/INSTALL.txt Log: note on running the test suite from the tar distribution Modified: lxml/trunk/INSTALL.txt ============================================================================== --- lxml/trunk/INSTALL.txt (original) +++ lxml/trunk/INSTALL.txt Thu Mar 23 07:28:13 2006 @@ -50,7 +50,7 @@ python setup.py install If you do not want to install lxml right away, but first test it from the -source directory, you can do this:: +source directory, you can build it in-place like this:: python setup.py build_ext -i @@ -134,16 +134,17 @@ you install them instead of the official Pyrex version. +Running the tests and reporting errors +-------------------------------------- - -Running the tests ------------------ - -You can run the main tests by using:: +The source distribution (tgz) contains a test suite for lxml. You can run it +from the top-level directory:: python test.py -Alternatively, you can use:: +Note that the test script only tests the in-place build (see "Installation" +above), as it searches the "src" directory. You can use the following +one-step command to trigger an in-place build and test it:: make test @@ -156,9 +157,9 @@ python selftest2.py -If the tests give failures, errors, or worse, segmentation faults, -we'd really like to know. Please contact us on the `mailing list`_, -and please specify the version of libxml2, libxslt and Python you were -using. +If the tests give failures, errors, or worse, segmentation faults, we'd really +like to know. Please contact us on the `mailing list`_, and please specify the +version of lxml, libxml2, libxslt and Python you were using, as well as your +operating system type (Linux, Windows, MacOs, ...). .. _`mailing list`: http://codespeak.net/mailman/listinfo/lxml-dev From scoder at codespeak.net Thu Mar 23 07:28:36 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Thu Mar 23 07:28:37 2006 Subject: [Lxml-checkins] r24855 - lxml/branch/lxml-0.9.x Message-ID: <20060323062836.6696310153@code0.codespeak.net> Author: scoder Date: Thu Mar 23 07:28:30 2006 New Revision: 24855 Modified: lxml/branch/lxml-0.9.x/INSTALL.txt Log: merged in doc update from trunk Modified: lxml/branch/lxml-0.9.x/INSTALL.txt ============================================================================== --- lxml/branch/lxml-0.9.x/INSTALL.txt (original) +++ lxml/branch/lxml-0.9.x/INSTALL.txt Thu Mar 23 07:28:30 2006 @@ -50,7 +50,7 @@ python setup.py install If you do not want to install lxml right away, but first test it from the -source directory, you can do this:: +source directory, you can build it in-place like this:: python setup.py build_ext -i @@ -134,16 +134,17 @@ you install them instead of the official Pyrex version. +Running the tests and reporting errors +-------------------------------------- - -Running the tests ------------------ - -You can run the main tests by using:: +The source distribution (tgz) contains a test suite for lxml. You can run it +from the top-level directory:: python test.py -Alternatively, you can use:: +Note that the test script only tests the in-place build (see "Installation" +above), as it searches the "src" directory. You can use the following +one-step command to trigger an in-place build and test it:: make test @@ -156,9 +157,9 @@ python selftest2.py -If the tests give failures, errors, or worse, segmentation faults, -we'd really like to know. Please contact us on the `mailing list`_, -and please specify the version of libxml2, libxslt and Python you were -using. +If the tests give failures, errors, or worse, segmentation faults, we'd really +like to know. Please contact us on the `mailing list`_, and please specify the +version of lxml, libxml2, libxslt and Python you were using, as well as your +operating system type (Linux, Windows, MacOs, ...). .. _`mailing list`: http://codespeak.net/mailman/listinfo/lxml-dev From scoder at codespeak.net Thu Mar 23 08:41:31 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Thu Mar 23 08:41:33 2006 Subject: [Lxml-checkins] r24856 - lxml/trunk/src/lxml Message-ID: <20060323074131.BED1B10151@code0.codespeak.net> Author: scoder Date: Thu Mar 23 08:41:30 2006 New Revision: 24856 Modified: lxml/trunk/src/lxml/etree.pyx lxml/trunk/src/lxml/python.pxd Log: more fast paths through _createElement, makes Element() another 20-50% faster in common cases Modified: lxml/trunk/src/lxml/etree.pyx ============================================================================== --- lxml/trunk/src/lxml/etree.pyx (original) +++ lxml/trunk/src/lxml/etree.pyx Thu Mar 23 08:41:30 2006 @@ -1089,14 +1089,17 @@ cdef xmlNode* _createElement(xmlDoc* c_doc, object name_utf, object attrib, object extra) except NULL: cdef xmlNode* c_node - if attrib is None: - attrib = {} - attrib.update(extra) + if python.PyObject_IsTrue(extra): + if attrib is None: + attrib = extra + else: + attrib.update(extra) c_node = tree.xmlNewDocNode(c_doc, NULL, name_utf, NULL) - for name, value in attrib.items(): - attr_name_utf = _utf8(name) - value_utf = _utf8(value) - tree.xmlNewProp(c_node, attr_name_utf, value_utf) + if python.PyObject_IsTrue(attrib): + for name, value in attrib.items(): + attr_name_utf = _utf8(name) + value_utf = _utf8(value) + tree.xmlNewProp(c_node, attr_name_utf, value_utf) return c_node cdef xmlNode* _createComment(xmlDoc* c_doc, char* text): Modified: lxml/trunk/src/lxml/python.pxd ============================================================================== --- lxml/trunk/src/lxml/python.pxd (original) +++ lxml/trunk/src/lxml/python.pxd Thu Mar 23 08:41:30 2006 @@ -34,6 +34,7 @@ cdef int PySequence_Check(object instance) cdef int PyType_Check(object instance) cdef int PyCallable_Check(object instance) + cdef int PyObject_IsTrue(object instance) cdef int PyObject_IsInstance(object instance, object classes) cdef int PyObject_HasAttr(object obj, object attr) From scoder at codespeak.net Thu Mar 23 09:07:12 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Thu Mar 23 09:07:14 2006 Subject: [Lxml-checkins] r24857 - lxml/trunk/src/lxml Message-ID: <20060323080712.C927810151@code0.codespeak.net> Author: scoder Date: Thu Mar 23 09:07:11 2006 New Revision: 24857 Modified: lxml/trunk/src/lxml/sax.py Log: fixed bug: sax.py used internal function _getNsTag without caring about the side effect of UTF-8 conversion Modified: lxml/trunk/src/lxml/sax.py ============================================================================== --- lxml/trunk/src/lxml/sax.py (original) +++ lxml/trunk/src/lxml/sax.py Thu Mar 23 09:07:11 2006 @@ -1,5 +1,11 @@ from xml.sax.handler import ContentHandler -from lxml.etree import ElementTree, Element, SubElement, _getNsTag +from lxml.etree import ElementTree, Element, SubElement + +def _getNsTag(tag): + if tag[0] == '{': + return tag[1:].split('}', 1) + else: + return tag class ElementTreeContentHandler(object, ContentHandler): """Build an lxml ElementTree from SAX events. From scoder at codespeak.net Thu Mar 23 09:09:45 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Thu Mar 23 09:09:46 2006 Subject: [Lxml-checkins] r24858 - lxml/trunk/src/lxml Message-ID: <20060323080945.694A710151@code0.codespeak.net> Author: scoder Date: Thu Mar 23 09:09:34 2006 New Revision: 24858 Modified: lxml/trunk/src/lxml/sax.py Log: bugfix to bugfix: return tuple also in the default case Modified: lxml/trunk/src/lxml/sax.py ============================================================================== --- lxml/trunk/src/lxml/sax.py (original) +++ lxml/trunk/src/lxml/sax.py Thu Mar 23 09:09:34 2006 @@ -5,7 +5,7 @@ if tag[0] == '{': return tag[1:].split('}', 1) else: - return tag + return None, tag class ElementTreeContentHandler(object, ContentHandler): """Build an lxml ElementTree from SAX events. From scoder at codespeak.net Thu Mar 23 09:10:27 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Thu Mar 23 09:10:28 2006 Subject: [Lxml-checkins] r24859 - lxml/branch/lxml-0.9.x/src/lxml Message-ID: <20060323081027.6596310151@code0.codespeak.net> Author: scoder Date: Thu Mar 23 09:10:26 2006 New Revision: 24859 Modified: lxml/branch/lxml-0.9.x/src/lxml/sax.py Log: bug fix from trunk: sax.py used internal function _getNsTag without caring about the side effect of UTF-8 conversion Modified: lxml/branch/lxml-0.9.x/src/lxml/sax.py ============================================================================== --- lxml/branch/lxml-0.9.x/src/lxml/sax.py (original) +++ lxml/branch/lxml-0.9.x/src/lxml/sax.py Thu Mar 23 09:10:26 2006 @@ -1,5 +1,11 @@ from xml.sax.handler import ContentHandler -from lxml.etree import ElementTree, Element, SubElement, _getNsTag +from lxml.etree import ElementTree, Element, SubElement + +def _getNsTag(tag): + if tag[0] == '{': + return tag[1:].split('}', 1) + else: + return None, tag class ElementTreeContentHandler(object, ContentHandler): """Build an lxml ElementTree from SAX events. From scoder at codespeak.net Thu Mar 23 09:11:59 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Thu Mar 23 09:12:00 2006 Subject: [Lxml-checkins] r24860 - lxml/trunk/src/lxml Message-ID: <20060323081159.976B210151@code0.codespeak.net> Author: scoder Date: Thu Mar 23 09:11:53 2006 New Revision: 24860 Modified: lxml/trunk/src/lxml/etree.pyx lxml/trunk/src/lxml/python.pxd Log: clean up, make _getNsTag and _getFilenameForFile internal C functions (speeds up calling) Modified: lxml/trunk/src/lxml/etree.pyx ============================================================================== --- lxml/trunk/src/lxml/etree.pyx (original) +++ lxml/trunk/src/lxml/etree.pyx Thu Mar 23 09:11:53 2006 @@ -1089,13 +1089,13 @@ cdef xmlNode* _createElement(xmlDoc* c_doc, object name_utf, object attrib, object extra) except NULL: cdef xmlNode* c_node - if python.PyObject_IsTrue(extra): + if extra: if attrib is None: attrib = extra else: attrib.update(extra) c_node = tree.xmlNewDocNode(c_doc, NULL, name_utf, NULL) - if python.PyObject_IsTrue(attrib): + if attrib: for name, value in attrib.items(): attr_name_utf = _utf8(name) value_utf = _utf8(value) @@ -1528,7 +1528,7 @@ else: raise TypeError, "Argument must be string or unicode." -def _getNsTag(tag): +cdef _getNsTag(tag): """Given a tag, find namespace URI and tag name. Return None for NS uri if no namespace URI available. """ @@ -1562,7 +1562,7 @@ else: return s -def _getFilenameForFile(source): +cdef _getFilenameForFile(source): """Given a Python File or Gzip object, give filename back. Returns None if not a file object. Modified: lxml/trunk/src/lxml/python.pxd ============================================================================== --- lxml/trunk/src/lxml/python.pxd (original) +++ lxml/trunk/src/lxml/python.pxd Thu Mar 23 09:11:53 2006 @@ -34,7 +34,6 @@ cdef int PySequence_Check(object instance) cdef int PyType_Check(object instance) cdef int PyCallable_Check(object instance) - cdef int PyObject_IsTrue(object instance) cdef int PyObject_IsInstance(object instance, object classes) cdef int PyObject_HasAttr(object obj, object attr) From scoder at codespeak.net Thu Mar 23 09:23:27 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Thu Mar 23 09:23:28 2006 Subject: [Lxml-checkins] r24861 - lxml/branch/lxml-0.9.x/src/lxml Message-ID: <20060323082327.EE1CD10151@code0.codespeak.net> Author: scoder Date: Thu Mar 23 09:23:26 2006 New Revision: 24861 Modified: lxml/branch/lxml-0.9.x/src/lxml/etree.pyx Log: merged in updates from trunk: performance updates in _createElement, C-ification of helper functions Modified: lxml/branch/lxml-0.9.x/src/lxml/etree.pyx ============================================================================== --- lxml/branch/lxml-0.9.x/src/lxml/etree.pyx (original) +++ lxml/branch/lxml-0.9.x/src/lxml/etree.pyx Thu Mar 23 09:23:26 2006 @@ -1089,14 +1089,17 @@ cdef xmlNode* _createElement(xmlDoc* c_doc, object name_utf, object attrib, object extra) except NULL: cdef xmlNode* c_node - if attrib is None: - attrib = {} - attrib.update(extra) + if extra: + if attrib is None: + attrib = extra + else: + attrib.update(extra) c_node = tree.xmlNewDocNode(c_doc, NULL, name_utf, NULL) - for name, value in attrib.items(): - attr_name_utf = _utf8(name) - value_utf = _utf8(value) - tree.xmlNewProp(c_node, attr_name_utf, value_utf) + if attrib: + for name, value in attrib.items(): + attr_name_utf = _utf8(name) + value_utf = _utf8(value) + tree.xmlNewProp(c_node, attr_name_utf, value_utf) return c_node cdef xmlNode* _createComment(xmlDoc* c_doc, char* text): @@ -1525,7 +1528,7 @@ else: raise TypeError, "Argument must be string or unicode." -def _getNsTag(tag): +cdef _getNsTag(tag): """Given a tag, find namespace URI and tag name. Return None for NS uri if no namespace URI available. """ @@ -1559,7 +1562,7 @@ else: return s -def _getFilenameForFile(source): +cdef _getFilenameForFile(source): """Given a Python File or Gzip object, give filename back. Returns None if not a file object. From scoder at codespeak.net Thu Mar 23 10:12:55 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Thu Mar 23 10:12:56 2006 Subject: [Lxml-checkins] r24863 - in lxml/pyrex: Pyrex/Compiler dist Message-ID: <20060323091255.22CA210151@code0.codespeak.net> Author: scoder Date: Thu Mar 23 10:12:31 2006 New Revision: 24863 Modified: lxml/pyrex/Pyrex/Compiler/Code.py lxml/pyrex/Pyrex/Compiler/ExprNodes.py lxml/pyrex/Pyrex/Compiler/Main.py lxml/pyrex/Pyrex/Compiler/Nodes.py lxml/pyrex/dist/Pyrex-0.9.3.1-1.noarch.rpm lxml/pyrex/dist/Pyrex-0.9.3.1-1.src.rpm lxml/pyrex/dist/Pyrex-0.9.3.1.tar.gz Log: reverted last updates regarding Python version specific code (too difficult to use), replaced by conditional compilation to make sure the required Python API is available at compile time Modified: lxml/pyrex/Pyrex/Compiler/Code.py ============================================================================== --- lxml/pyrex/Pyrex/Compiler/Code.py (original) +++ lxml/pyrex/Pyrex/Compiler/Code.py Thu Mar 23 10:12:31 2006 @@ -21,7 +21,7 @@ in_try_finally = 0 - def __init__(self, outfile_name, python_version=(2,1,0)): + def __init__(self, outfile_name): self.f = open_new_file(outfile_name) self.level = 0 self.bol = 1 @@ -30,7 +30,6 @@ self.error_label = None self.filename_table = {} self.filename_list = [] - self.python_version = python_version def putln(self, code = ""): if self.marker and self.bol: @@ -57,6 +56,18 @@ if dl > 0: self.level += dl + def begin_require_python(self, hex_version): + self.putln("#if PY_VERSION_HEX >= %#010X" % hex_version) + + def else_require_python(self, hex_version=None): + if hex_version is not None: + self.putln("#elif PY_VERSION_HEX >= %#010X" % hex_version) + else: + self.putln("#else") + + def end_require_python(self): + self.putln("#endif") + def increase_indent(self): self.level = self.level + 1 Modified: lxml/pyrex/Pyrex/Compiler/ExprNodes.py ============================================================================== --- lxml/pyrex/Pyrex/Compiler/ExprNodes.py (original) +++ lxml/pyrex/Pyrex/Compiler/ExprNodes.py Thu Mar 23 10:12:31 2006 @@ -1759,38 +1759,39 @@ # Tuple constructor. def generate_operation_code(self, code): - if code.python_version >= (2,4): - args = [''] - for arg in self.args: - result = arg.result - args.append(result) - if not arg.result_in_temp(): - code.put_incref(result, arg.type) - arg_string = ', '.join(args) - code.putln( - "%s = PyTuple_Pack(%s%s); if (!%s) %s" % ( - self.result, - len(self.args), - arg_string, - self.result, - code.error_goto(self.pos))) - else: # Python version < 2.4 + code.begin_require_python(0x020400F0) + args = [''] + for arg in self.args: + result = arg.result + args.append(result) + if not arg.result_in_temp(): + code.put_incref(result, arg.type) + arg_string = ', '.join(args) + code.putln( + "%s = PyTuple_Pack(%s%s); if (!%s) %s" % ( + self.result, + len(self.args), + arg_string, + self.result, + code.error_goto(self.pos))) + code.else_require_python() # else: Python version < 2.4 + code.putln( + "%s = PyTuple_New(%s); if (!%s) %s" % ( + self.result, + len(self.args), + self.result, + code.error_goto(self.pos))) + for i in range(len(self.args)): + arg = self.args[i] + result = arg.result + if not arg.result_in_temp(): + code.put_incref(result, arg.type) code.putln( - "%s = PyTuple_New(%s); if (!%s) %s" % ( - self.result, - len(self.args), + "PyTuple_SET_ITEM(%s, %s, %s);" % ( self.result, - code.error_goto(self.pos))) - for i in range(len(self.args)): - arg = self.args[i] - result = arg.result - if not arg.result_in_temp(): - code.put_incref(result, arg.type) - code.putln( - "PyTuple_SET_ITEM(%s, %s, %s);" % ( - self.result, - i, - result)) + i, + result)) + code.end_require_python() def generate_subexpr_disposal_code(self, code): # We call generate_post_assignment_code here instead Modified: lxml/pyrex/Pyrex/Compiler/Main.py ============================================================================== --- lxml/pyrex/Pyrex/Compiler/Main.py (original) +++ lxml/pyrex/Pyrex/Compiler/Main.py Thu Mar 23 10:12:31 2006 @@ -30,10 +30,9 @@ # modules {string : ModuleScope} # include_directories [string] - def __init__(self, include_directories, target_version): + def __init__(self, include_directories): self.modules = {"__builtin__" : BuiltinScope()} self.include_directories = include_directories - self.target_version = target_version def find_module(self, module_name, relative_to = None, pos = None, need_pxd = 1): @@ -231,13 +230,6 @@ self.object_file = None self.extension_file = None -def python_target_version(options): - if options.python_version: - return tuple(map(int, options.python_version.split('.'))) - else: - return (2,1,0) - - def compile(source, options = None, c_compile = 0, c_link = 0): """ compile(source, options = default_options) @@ -252,8 +244,7 @@ options.c_only = 0 if c_link: options.obj_only = 0 - context = Context(options.include_path, - python_target_version(options)) + context = Context(options.include_path) return context.compile(source, options) #------------------------------------------------------------------------ @@ -273,8 +264,7 @@ sources = args if options.show_version: print >>sys.stderr, "Pyrex version %s" % Version.version - context = Context(options.include_path, - python_target_version(options)) + context = Context(options.include_path) for source in sources: try: result = context.compile(source, options) @@ -295,7 +285,6 @@ default_options = CompilationOptions( show_version = 0, use_listing_file = 0, - python_version = "2.1.0", errors_to_stderr = 1, c_only = 1, obj_only = 1, Modified: lxml/pyrex/Pyrex/Compiler/Nodes.py ============================================================================== --- lxml/pyrex/Pyrex/Compiler/Nodes.py (original) +++ lxml/pyrex/Pyrex/Compiler/Nodes.py Thu Mar 23 10:12:31 2006 @@ -127,15 +127,14 @@ if entry.visibility == 'public': public_extension_types.append(entry) if public_vars_and_funcs or public_extension_types: - python_version = env.context.target_version #import os #outname_base, _ = os.path.splitext(result.c_file) #result.h_file = outname_base + ".h" #result.i_file = outname_base + ".pxi" result.h_file = replace_suffix(result.c_file, ".h") result.i_file = replace_suffix(result.c_file, ".pxi") - h_code = Code.CCodeWriter(result.h_file, python_version) - i_code = Code.PyrexCodeWriter(result.i_file, python_version) + h_code = Code.CCodeWriter(result.h_file) + i_code = Code.PyrexCodeWriter(result.i_file) for entry in public_vars_and_funcs: h_code.putln("extern %s;" % entry.type.declaration_code( @@ -163,10 +162,9 @@ i_code.dedent() def generate_c_code(self, env, result): - python_version = env.context.target_version modules = [] self.find_referenced_modules(env, modules, {}) - code = Code.CCodeWriter(result.c_file, python_version) + code = Code.CCodeWriter(result.c_file) code.init_labels() self.generate_module_preamble(env, modules, code) for module in modules: Modified: lxml/pyrex/dist/Pyrex-0.9.3.1-1.noarch.rpm ============================================================================== Binary files. No diff available. Modified: lxml/pyrex/dist/Pyrex-0.9.3.1-1.src.rpm ============================================================================== Binary files. No diff available. Modified: lxml/pyrex/dist/Pyrex-0.9.3.1.tar.gz ============================================================================== Binary files. No diff available. From scoder at codespeak.net Thu Mar 23 10:14:36 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Thu Mar 23 10:14:38 2006 Subject: [Lxml-checkins] r24864 - lxml/pyrex/Pyrex/Compiler Message-ID: <20060323091436.DC3A010151@code0.codespeak.net> Author: scoder Date: Thu Mar 23 10:14:35 2006 New Revision: 24864 Modified: lxml/pyrex/Pyrex/Compiler/ExprNodes.py Log: comments Modified: lxml/pyrex/Pyrex/Compiler/ExprNodes.py ============================================================================== --- lxml/pyrex/Pyrex/Compiler/ExprNodes.py (original) +++ lxml/pyrex/Pyrex/Compiler/ExprNodes.py Thu Mar 23 10:14:35 2006 @@ -1760,6 +1760,7 @@ def generate_operation_code(self, code): code.begin_require_python(0x020400F0) + # Python version >= 2.4 args = [''] for arg in self.args: result = arg.result @@ -1774,7 +1775,8 @@ arg_string, self.result, code.error_goto(self.pos))) - code.else_require_python() # else: Python version < 2.4 + code.else_require_python() + # else: Python version < 2.4 code.putln( "%s = PyTuple_New(%s); if (!%s) %s" % ( self.result, From scoder at codespeak.net Thu Mar 23 10:50:43 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Thu Mar 23 10:50:45 2006 Subject: [Lxml-checkins] r24865 - lxml/trunk/src/lxml Message-ID: <20060323095043.0554E100AA@code0.codespeak.net> Author: scoder Date: Thu Mar 23 10:50:37 2006 New Revision: 24865 Modified: lxml/trunk/src/lxml/etree.pyx Log: small C-ification Modified: lxml/trunk/src/lxml/etree.pyx ============================================================================== --- lxml/trunk/src/lxml/etree.pyx (original) +++ lxml/trunk/src/lxml/etree.pyx Thu Mar 23 10:50:37 2006 @@ -81,7 +81,7 @@ return _elementFactory(self, c_node) def buildNewPrefix(self): - ns = "ns%d" % self._ns_counter + ns = python.PyString_FromFormat("ns%d", self._ns_counter) self._ns_counter = self._ns_counter + 1 return ns From scoder at codespeak.net Thu Mar 23 11:59:15 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Thu Mar 23 11:59:16 2006 Subject: [Lxml-checkins] r24869 - lxml/trunk/src/lxml Message-ID: <20060323105915.3056D10137@code0.codespeak.net> Author: scoder Date: Thu Mar 23 11:59:03 2006 New Revision: 24869 Modified: lxml/trunk/src/lxml/etree.h lxml/trunk/src/lxml/etree.pyx lxml/trunk/src/lxml/python.pxd Log: make string -> char* conversion explicit (C macro '_cstr') , replace it with straight Python API call Modified: lxml/trunk/src/lxml/etree.h ============================================================================== --- lxml/trunk/src/lxml/etree.h (original) +++ lxml/trunk/src/lxml/etree.h Thu Mar 23 11:59:03 2006 @@ -4,6 +4,7 @@ #define isinstance(a,b) PyObject_IsInstance(a,b) #define hasattr(a,b) PyObject_HasAttr(a,b) #define callable(a) PyCallable_Check(a) +#define _cstr(s) PyString_AS_STRING(s) #define _isElement(c_node) \ ((c_node)->type == XML_ELEMENT_NODE || \ (c_node)->type == XML_COMMENT_NODE) Modified: lxml/trunk/src/lxml/etree.pyx ============================================================================== --- lxml/trunk/src/lxml/etree.pyx (original) +++ lxml/trunk/src/lxml/etree.pyx Thu Mar 23 11:59:03 2006 @@ -1,6 +1,6 @@ cimport tree, python from tree cimport xmlDoc, xmlNode, xmlAttr, xmlNs, _isElement -from python cimport isinstance, hasattr +from python cimport isinstance, hasattr, callable, _cstr cimport xpath cimport xslt cimport xmlerror @@ -96,7 +96,7 @@ # create ns if existing ns cannot be found # try to simulate ElementTree's namespace prefix creation prefix = self.buildNewPrefix() - c_ns = tree.xmlNewNs(c_node, href, prefix) + c_ns = tree.xmlNewNs(c_node, href, _cstr(prefix)) return c_ns cdef void _setNodeNs(self, xmlNode* c_node, char* href): @@ -122,10 +122,10 @@ c_doc = self._c_doc for prefix, href in nsmap.items(): href_utf = _utf8(href) - c_href = href_utf + c_href = _cstr(href_utf) if prefix is not None: prefix_utf = _utf8(prefix) - c_prefix = prefix_utf + c_prefix = _cstr(prefix_utf) else: c_prefix = NULL # add namespace with prefix if ns is not already known @@ -513,10 +513,10 @@ def __set__(self, value): cdef xmlNs* c_ns ns, text = _getNsTag(value) - tree.xmlNodeSetName(self._c_node, text) + tree.xmlNodeSetName(self._c_node, _cstr(text)) if ns is None: return - self._doc._setNodeNs(self._c_node, ns) + self._doc._setNodeNs(self._c_node, _cstr(ns)) # not in ElementTree, read-only property prefix: @@ -543,7 +543,7 @@ # now add new text node with value at start text = _utf8(value) c_text_node = tree.xmlNewDocText(self._doc._c_doc, - text) + _cstr(text)) if self._c_node.children is NULL: tree.xmlAddChild(self._c_node, c_text_node) else: @@ -561,7 +561,7 @@ if value is None: return text = _utf8(value) - c_text_node = tree.xmlNewDocText(self._doc._c_doc, text) + c_text_node = tree.xmlNewDocText(self._doc._c_doc, _cstr(text)) # XXX what if we're the top element? tree.xmlAddNextSibling(self._c_node, c_text_node) @@ -579,7 +579,7 @@ def __getslice__(self, start, stop): cdef xmlNode* c_node cdef _Document doc - cdef int c + cdef int c, c_stop # this does not work for negative start, stop, however, # python seems to convert these to positive start, stop before # calling, so this all works perfectly (at the cost of a len() call) @@ -587,9 +587,10 @@ if c_node is NULL: return [] c = start + c_stop = stop result = [] doc = self._doc - while c_node is not NULL and c < stop: + while c_node is not NULL and c < c_stop: if _isElement(c_node): ret = python.PyList_Append(result, _elementFactory(doc, c_node)) if ret: @@ -692,11 +693,13 @@ # XXX more redundancy, but might be slightly faster than # return self.attrib.get(key, default) cdef char* cresult + cdef char* c_tag ns, tag = _getNsTag(key) + c_tag = _cstr(tag) if ns is None: - cresult = tree.xmlGetNoNsProp(self._c_node, tag) + cresult = tree.xmlGetNoNsProp(self._c_node, c_tag) else: - cresult = tree.xmlGetNsProp(self._c_node, tag, ns) + cresult = tree.xmlGetNsProp(self._c_node, c_tag, _cstr(ns)) if cresult is NULL: result = default else: @@ -839,22 +842,28 @@ # MANIPULATORS def __setitem__(self, key, value): cdef xmlNs* c_ns + cdef char* c_value + cdef char* c_tag ns, tag = _getNsTag(key) + c_tag = _cstr(tag) value = _utf8(value) + c_value = _cstr(value) if ns is None: - tree.xmlSetProp(self._c_node, tag, value) + tree.xmlSetProp(self._c_node, c_tag, c_value) else: - c_ns = self._doc._findOrBuildNodeNs(self._c_node, ns) - tree.xmlSetNsProp(self._c_node, c_ns, tag, value) + c_ns = self._doc._findOrBuildNodeNs(self._c_node, _cstr(ns)) + tree.xmlSetNsProp(self._c_node, c_ns, c_tag, c_value) def __delitem__(self, key): cdef xmlNs* c_ns cdef xmlAttr* c_attr + cdef char* c_tag ns, tag = _getNsTag(key) + c_tag = _cstr(tag) if ns is None: - c_attr = tree.xmlHasProp(self._c_node, tag) + c_attr = tree.xmlHasProp(self._c_node, c_tag) else: - c_attr = tree.xmlHasNsProp(self._c_node, tag, ns) + c_attr = tree.xmlHasNsProp(self._c_node, c_tag, _cstr(ns)) if c_attr is NULL: # XXX free namespace that is not in use..? raise KeyError, key @@ -870,11 +879,13 @@ def __getitem__(self, key): cdef xmlNs* c_ns cdef char* cresult + cdef char* c_tag ns, tag = _getNsTag(key) + c_tag = _cstr(tag) if ns is None: - cresult = tree.xmlGetNoNsProp(self._c_node, tag) + cresult = tree.xmlGetNoNsProp(self._c_node, c_tag) else: - cresult = tree.xmlGetNsProp(self._c_node, tag, ns) + cresult = tree.xmlGetNsProp(self._c_node, c_tag, _cstr(ns)) if cresult is NULL: # XXX free namespace that is not in use..? raise KeyError, key @@ -908,7 +919,7 @@ c_node = (self._c_node.properties) while c_node is not NULL: if c_node.type == tree.XML_ATTRIBUTE_NODE: - result.append(_namespacedName(c_node)) + python.PyList_Append(result, _namespacedName(c_node)) c_node = c_node.next return result @@ -918,7 +929,7 @@ c_node = (self._c_node.properties) while c_node is not NULL: if c_node.type == tree.XML_ATTRIBUTE_NODE: - result.append(self._getValue(c_node)) + python.PyList_Append(result, self._getValue(c_node)) c_node = c_node.next return result @@ -936,7 +947,7 @@ c_node = (self._c_node.properties) while c_node is not NULL: if c_node.type == tree.XML_ATTRIBUTE_NODE: - result.append(( + python.PyList_Append(result, ( _namespacedName(c_node), self._getValue(c_node) )) @@ -946,11 +957,13 @@ def has_key(self, key): cdef xmlNs* c_ns cdef char* result + cdef char* c_tag ns, tag = _getNsTag(key) + c_tag = _cstr(tag) if ns is None: - result = tree.xmlGetNoNsProp(self._c_node, tag) + result = tree.xmlGetNoNsProp(self._c_node, c_tag) else: - result = tree.xmlGetNsProp(self._c_node, tag, ns) + result = tree.xmlGetNsProp(self._c_node, c_tag, _cstr(ns)) if result is not NULL: tree.xmlFree(result) return True @@ -960,11 +973,13 @@ def __contains__(self, key): cdef xmlNs* c_ns cdef char* result + cdef char* c_tag ns, tag = _getNsTag(key) + c_tag = _cstr(tag) if ns is None: - result = tree.xmlGetNoNsProp(self._c_node, tag) + result = tree.xmlGetNoNsProp(self._c_node, c_tag) else: - result = tree.xmlGetNsProp(self._c_node, tag, ns) + result = tree.xmlGetNsProp(self._c_node, c_tag, _cstr(ns)) if result is not NULL: tree.xmlFree(result) return True @@ -1064,11 +1079,11 @@ self._iterator = iter(element_iterator) ns_href, name = _getNsTag(tag) self._pystrings = (ns_href, name) # keep Python references - self._name = name + self._name = _cstr(name) if ns_href is None: self._href = NULL else: - self._href = ns_href + self._href = _cstr(ns_href) def __iter__(self): return self def __next__(self): @@ -1094,12 +1109,12 @@ attrib = extra else: attrib.update(extra) - c_node = tree.xmlNewDocNode(c_doc, NULL, name_utf, NULL) + c_node = tree.xmlNewDocNode(c_doc, NULL, _cstr(name_utf), NULL) if attrib: for name, value in attrib.items(): attr_name_utf = _utf8(name) value_utf = _utf8(value) - tree.xmlNewProp(c_node, attr_name_utf, value_utf) + tree.xmlNewProp(c_node, _cstr(attr_name_utf), _cstr(value_utf)) return c_node cdef xmlNode* _createComment(xmlDoc* c_doc, char* text): @@ -1210,7 +1225,7 @@ if encoding in ('utf8', 'UTF8', 'utf-8'): encoding = 'UTF-8' doc = element._doc - enc = encoding + enc = _cstr(encoding) # it is necessary to *and* find the encoding handler *and* use # encoding during output enchandler = tree.xmlFindCharEncodingHandler(enc) @@ -1521,7 +1536,7 @@ cdef object _utf8(object s): if python.PyString_Check(s): - assert not isutf8(s), "All strings must be Unicode or ASCII" + assert not isutf8(_cstr(s)), "All strings must be Unicode or ASCII" return s elif python.PyUnicode_Check(s): return python.PyUnicode_AsUTF8String(s) @@ -1536,7 +1551,7 @@ cdef char* c_pos cdef int nslen tag = _utf8(tag) - c_tag = tag + c_tag = _cstr(tag) if c_tag[0] == c'{': c_pos = tree.xmlStrchr(c_tag+1, c'}') if c_pos is NULL: Modified: lxml/trunk/src/lxml/python.pxd ============================================================================== --- lxml/trunk/src/lxml/python.pxd (original) +++ lxml/trunk/src/lxml/python.pxd Thu Mar 23 11:59:03 2006 @@ -41,3 +41,4 @@ cdef int isinstance(object instance, object classes) cdef int hasattr(object obj, object attr) cdef int callable(object obj) + cdef char* _cstr(object s) From scoder at codespeak.net Thu Mar 23 12:35:34 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Thu Mar 23 12:35:36 2006 Subject: [Lxml-checkins] r24870 - lxml/trunk Message-ID: <20060323113534.162C71013B@code0.codespeak.net> Author: scoder Date: Thu Mar 23 12:35:33 2006 New Revision: 24870 Modified: lxml/trunk/bench.py Log: bench.py option '-c' to zero out callgrind data before starting benchmark Modified: lxml/trunk/bench.py ============================================================================== --- lxml/trunk/bench.py (original) +++ lxml/trunk/bench.py Thu Mar 23 12:35:33 2006 @@ -394,6 +394,7 @@ if __name__ == '__main__': import_lxml = True + callgrind_zero = False if len(sys.argv) > 1: try: sys.argv.remove('-i') @@ -407,6 +408,12 @@ except ValueError: pass + try: + sys.argv.remove('-c') + callgrind_zero = True + except ValueError: + pass + _etrees = [] if import_lxml: from lxml import etree @@ -494,6 +501,11 @@ print " T%d:" % (i+1), ' '.join("%6.4f" % t for t in tree_times) print + if callgrind_zero: + cmd = open("callgrind.cmd", 'w') + cmd.write('Zero\n') + cmd.close() + for bench_calls in izip(*benchmarks): for lib, (bench, benchmark_setup) in enumerate(izip(benchmark_suites, bench_calls)): bench_name, method_call = benchmark_setup[:2] From scoder at codespeak.net Thu Mar 23 12:49:25 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Thu Mar 23 12:49:26 2006 Subject: [Lxml-checkins] r24871 - lxml/trunk Message-ID: <20060323114925.3FBDB1013B@code0.codespeak.net> Author: scoder Date: Thu Mar 23 12:49:23 2006 New Revision: 24871 Modified: lxml/trunk/bench.py Log: new benchmark for repeated tag retrieval Modified: lxml/trunk/bench.py ============================================================================== --- lxml/trunk/bench.py (original) +++ lxml/trunk/bench.py Thu Mar 23 12:49:23 2006 @@ -343,6 +343,11 @@ for child in root: child.tag + def bench_tag_repeat(self, root): + for child in root: + for i in repeat(0, 100): + child.tag + @with_text(utext=True, text=True, no_text=True) def bench_text(self, root): for child in root: @@ -466,6 +471,7 @@ for tree in tree_set ] times = [] + args = () for i in range(3): gc.collect() gc.disable() @@ -478,6 +484,7 @@ t = 1000.0 * t / len(call_repeat) times.append(t) gc.enable() + del args return times def build_treeset_name(trees, tn, an): From scoder at codespeak.net Thu Mar 23 12:50:40 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Thu Mar 23 12:50:41 2006 Subject: [Lxml-checkins] r24872 - lxml/trunk/src/lxml Message-ID: <20060323115040.B088A1013B@code0.codespeak.net> Author: scoder Date: Thu Mar 23 12:50:39 2006 New Revision: 24872 Modified: lxml/trunk/src/lxml/etree.pyx Log: cached element.tag property Modified: lxml/trunk/src/lxml/etree.pyx ============================================================================== --- lxml/trunk/src/lxml/etree.pyx (original) +++ lxml/trunk/src/lxml/etree.pyx Thu Mar 23 12:50:39 2006 @@ -364,6 +364,8 @@ return result cdef class _Element(_NodeBase): + cdef object _tag + # MANIPULATORS def __setitem__(self, index, _NodeBase element): @@ -508,11 +510,15 @@ # PROPERTIES property tag: def __get__(self): - return _namespacedName(self._c_node) + if self._tag is not None: + return self._tag + self._tag = _namespacedName(self._c_node) + return self._tag def __set__(self, value): cdef xmlNs* c_ns ns, text = _getNsTag(value) + self._tag = value tree.xmlNodeSetName(self._c_node, _cstr(text)) if ns is None: return @@ -776,6 +782,7 @@ else: assert 0, "Unknown node type: %s" % c_node.type result = element_class() + result._tag = None result._doc = doc result._c_node = c_node result._proxy_type = PROXY_ELEMENT From scoder at codespeak.net Thu Mar 23 12:55:32 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Thu Mar 23 12:55:33 2006 Subject: [Lxml-checkins] r24873 - in lxml/branch/lxml-0.9.x: . src/lxml Message-ID: <20060323115532.563201013B@code0.codespeak.net> Author: scoder Date: Thu Mar 23 12:55:31 2006 New Revision: 24873 Modified: lxml/branch/lxml-0.9.x/bench.py lxml/branch/lxml-0.9.x/src/lxml/etree.pyx Log: merged in updates from trunk: cache for element.tag and benchmark Modified: lxml/branch/lxml-0.9.x/bench.py ============================================================================== --- lxml/branch/lxml-0.9.x/bench.py (original) +++ lxml/branch/lxml-0.9.x/bench.py Thu Mar 23 12:55:31 2006 @@ -343,6 +343,11 @@ for child in root: child.tag + def bench_tag_repeat(self, root): + for child in root: + for i in repeat(0, 100): + child.tag + @with_text(utext=True, text=True, no_text=True) def bench_text(self, root): for child in root: @@ -459,6 +464,7 @@ for tree in tree_set ] times = [] + args = () for i in range(3): gc.collect() gc.disable() @@ -471,6 +477,7 @@ t = 1000.0 * t / len(call_repeat) times.append(t) gc.enable() + del args return times def build_treeset_name(trees, tn, an): Modified: lxml/branch/lxml-0.9.x/src/lxml/etree.pyx ============================================================================== --- lxml/branch/lxml-0.9.x/src/lxml/etree.pyx (original) +++ lxml/branch/lxml-0.9.x/src/lxml/etree.pyx Thu Mar 23 12:55:31 2006 @@ -364,6 +364,8 @@ return result cdef class _Element(_NodeBase): + cdef object _tag + # MANIPULATORS def __setitem__(self, index, _NodeBase element): @@ -508,11 +510,15 @@ # PROPERTIES property tag: def __get__(self): - return _namespacedName(self._c_node) + if self._tag is not None: + return self._tag + self._tag = _namespacedName(self._c_node) + return self._tag def __set__(self, value): cdef xmlNs* c_ns ns, text = _getNsTag(value) + self._tag = value tree.xmlNodeSetName(self._c_node, text) if ns is None: return @@ -773,6 +779,7 @@ else: assert 0, "Unknown node type: %s" % c_node.type result = element_class() + result._tag = None result._doc = doc result._c_node = c_node result._proxy_type = PROXY_ELEMENT From scoder at codespeak.net Thu Mar 23 13:08:28 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Thu Mar 23 13:08:30 2006 Subject: [Lxml-checkins] r24874 - lxml/trunk Message-ID: <20060323120828.7E1FB10142@code0.codespeak.net> Author: scoder Date: Thu Mar 23 13:08:26 2006 New Revision: 24874 Modified: lxml/trunk/CHANGES.txt Log: updated CHANGES.txt Modified: lxml/trunk/CHANGES.txt ============================================================================== --- lxml/trunk/CHANGES.txt (original) +++ lxml/trunk/CHANGES.txt Thu Mar 23 13:08:26 2006 @@ -4,9 +4,18 @@ 0.9.1 ===== +Features added +-------------- + +* Speedup for repeatedly accessing element tag names + +* Minor API performance improvements + Bugs fixed ---------- +* sax.py was handling UTF-8 encoded tag names where it shouldn't + * lxml.tests package will no longer be installed (is still in source tar) 0.9 (2006-03-20) From scoder at codespeak.net Thu Mar 23 13:09:47 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Thu Mar 23 13:09:48 2006 Subject: [Lxml-checkins] r24875 - lxml/branch/lxml-0.9.x Message-ID: <20060323120947.46D8C10142@code0.codespeak.net> Author: scoder Date: Thu Mar 23 13:09:41 2006 New Revision: 24875 Modified: lxml/branch/lxml-0.9.x/CHANGES.txt Log: merged in updated CHANGES.txt from trunk for 0.9.1 Modified: lxml/branch/lxml-0.9.x/CHANGES.txt ============================================================================== --- lxml/branch/lxml-0.9.x/CHANGES.txt (original) +++ lxml/branch/lxml-0.9.x/CHANGES.txt Thu Mar 23 13:09:41 2006 @@ -4,9 +4,18 @@ 0.9.1 ===== +Features added +-------------- + +* Speedup for repeatedly accessing element tag names + +* Minor API performance improvements + Bugs fixed ---------- +* sax.py was handling UTF-8 encoded tag names where it shouldn't + * lxml.tests package will no longer be installed (is still in source tar) 0.9 (2006-03-20) From scoder at codespeak.net Thu Mar 23 13:18:07 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Thu Mar 23 13:18:08 2006 Subject: [Lxml-checkins] r24876 - lxml/branch/lxml-0.9.x Message-ID: <20060323121807.AC34F10142@code0.codespeak.net> Author: scoder Date: Thu Mar 23 13:18:01 2006 New Revision: 24876 Modified: lxml/branch/lxml-0.9.x/version.txt Log: version set tp 0.9.1 Modified: lxml/branch/lxml-0.9.x/version.txt ============================================================================== --- lxml/branch/lxml-0.9.x/version.txt (original) +++ lxml/branch/lxml-0.9.x/version.txt Thu Mar 23 13:18:01 2006 @@ -1 +1 @@ -0.9 +0.9.1 From scoder at codespeak.net Thu Mar 23 13:48:10 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Thu Mar 23 13:48:12 2006 Subject: [Lxml-checkins] r24880 - lxml/branch/lxml-0.9.x Message-ID: <20060323124810.F168210144@code0.codespeak.net> Author: scoder Date: Thu Mar 23 13:48:09 2006 New Revision: 24880 Modified: lxml/branch/lxml-0.9.x/CHANGES.txt lxml/branch/lxml-0.9.x/setup.py Log: set zip_safe flag for setuptools Modified: lxml/branch/lxml-0.9.x/CHANGES.txt ============================================================================== --- lxml/branch/lxml-0.9.x/CHANGES.txt (original) +++ lxml/branch/lxml-0.9.x/CHANGES.txt Thu Mar 23 13:48:09 2006 @@ -7,6 +7,8 @@ Features added -------------- +* zip_safe flag allows setuptools to install lxml as zipped egg + * Speedup for repeatedly accessing element tag names * Minor API performance improvements Modified: lxml/branch/lxml-0.9.x/setup.py ============================================================================== --- lxml/branch/lxml-0.9.x/setup.py (original) +++ lxml/branch/lxml-0.9.x/setup.py Thu Mar 23 13:48:09 2006 @@ -4,14 +4,16 @@ wf, rf, ef = os.popen3(cmd) return rf.read().strip().split(' ') +setup_args = {} + try: from setuptools import setup from setuptools.extension import Extension + setup_args['zip_safe'] = True except ImportError: from distutils.core import setup from distutils.extension import Extension -setup_args = {} try: from Pyrex.Distutils import build_ext as build_pyx sources = ["src/lxml/etree.pyx"] From scoder at codespeak.net Thu Mar 23 13:50:26 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Thu Mar 23 13:50:28 2006 Subject: [Lxml-checkins] r24881 - lxml/trunk Message-ID: <20060323125026.EB32D10144@code0.codespeak.net> Author: scoder Date: Thu Mar 23 13:50:25 2006 New Revision: 24881 Modified: lxml/trunk/CHANGES.txt lxml/trunk/setup.py Log: merged in zip_safe flag from 0.9 branch Modified: lxml/trunk/CHANGES.txt ============================================================================== --- lxml/trunk/CHANGES.txt (original) +++ lxml/trunk/CHANGES.txt Thu Mar 23 13:50:25 2006 @@ -7,6 +7,8 @@ Features added -------------- +* zip_safe flag allows setuptools to install lxml as zipped egg + * Speedup for repeatedly accessing element tag names * Minor API performance improvements Modified: lxml/trunk/setup.py ============================================================================== --- lxml/trunk/setup.py (original) +++ lxml/trunk/setup.py Thu Mar 23 13:50:25 2006 @@ -4,14 +4,16 @@ wf, rf, ef = os.popen3(cmd) return rf.read().strip().split(' ') +setup_args = {} + try: from setuptools import setup from setuptools.extension import Extension + setup_args['zip_safe'] = True except ImportError: from distutils.core import setup from distutils.extension import Extension -setup_args = {} try: from Pyrex.Distutils import build_ext as build_pyx sources = ["src/lxml/etree.pyx"] From scoder at codespeak.net Thu Mar 23 17:00:02 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Thu Mar 23 17:00:03 2006 Subject: [Lxml-checkins] r24889 - lxml/trunk Message-ID: <20060323160002.3D7FD10142@code0.codespeak.net> Author: scoder Date: Thu Mar 23 16:59:56 2006 New Revision: 24889 Modified: lxml/trunk/setup.py Log: read latest version notes from CHANGES.txt into package data Modified: lxml/trunk/setup.py ============================================================================== --- lxml/trunk/setup.py (original) +++ lxml/trunk/setup.py Thu Mar 23 16:59:56 2006 @@ -5,6 +5,8 @@ return rf.read().strip().split(' ') setup_args = {} +changelog_text = "" +version = open('version.txt').read().strip() try: from setuptools import setup @@ -22,9 +24,32 @@ print "*NOTE*: Trying to build without Pyrex, needs pre-generated 'src/lxml/etree.c' !" sources = ["src/lxml/etree.c"] +try: + changelog = open("CHANGES.txt", 'r') +except: + print "*NOTE*: couldn't open CHANGES.txt !" +else: + inside = 0 + changelog_lines = [] + for line in changelog: + if line.startswith('====='): + inside += 1 + if inside > 3: + break + if inside > 1: + changelog_lines.append(line) + elif version in line: + changelog_lines.append(line) + inside += 1 + + if changelog_lines: + changelog_text = ''.join(changelog_lines[:-1]) + + changelog.close() + setup( name = "lxml", - version = open('version.txt').read().strip(), + version = version, author="lxml dev team", author_email="lxml-dev@codespeak.net", maintainer="lxml dev team", @@ -39,7 +64,8 @@ It extends the ElementTree API significantly to offer support for XPath, RelaxNG, XML Schema, XSLT, C14N and much more. -""", + +""" + changelog_text, classifiers = [ 'Development Status :: 5 - Production/Stable', From scoder at codespeak.net Thu Mar 23 17:01:09 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Thu Mar 23 17:01:10 2006 Subject: [Lxml-checkins] r24890 - lxml/branch/lxml-0.9.x Message-ID: <20060323160109.B3DB510142@code0.codespeak.net> Author: scoder Date: Thu Mar 23 17:01:07 2006 New Revision: 24890 Modified: lxml/branch/lxml-0.9.x/setup.py Log: read latest version notes from CHANGES.txt into package data Modified: lxml/branch/lxml-0.9.x/setup.py ============================================================================== --- lxml/branch/lxml-0.9.x/setup.py (original) +++ lxml/branch/lxml-0.9.x/setup.py Thu Mar 23 17:01:07 2006 @@ -5,6 +5,8 @@ return rf.read().strip().split(' ') setup_args = {} +changelog_text = "" +version = open('version.txt').read().strip() try: from setuptools import setup @@ -22,9 +24,32 @@ print "*NOTE*: Trying to build without Pyrex, needs pre-generated 'src/lxml/etree.c' !" sources = ["src/lxml/etree.c"] +try: + changelog = open("CHANGES.txt", 'r') +except: + print "*NOTE*: couldn't open CHANGES.txt !" +else: + inside = 0 + changelog_lines = [] + for line in changelog: + if line.startswith('====='): + inside += 1 + if inside > 3: + break + if inside > 1: + changelog_lines.append(line) + elif version in line: + changelog_lines.append(line) + inside += 1 + + if changelog_lines: + changelog_text = ''.join(changelog_lines[:-1]) + + changelog.close() + setup( name = "lxml", - version = open('version.txt').read().strip(), + version = version, author="lxml dev team", author_email="lxml-dev@codespeak.net", maintainer="lxml dev team", @@ -39,7 +64,8 @@ It extends the ElementTree API significantly to offer support for XPath, RelaxNG, XML Schema, XSLT, C14N and much more. -""", + +""" + changelog_text, classifiers = [ 'Development Status :: 5 - Production/Stable', From scoder at codespeak.net Fri Mar 24 08:11:12 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Fri Mar 24 08:11:16 2006 Subject: [Lxml-checkins] r24933 - in lxml/trunk: . src/lxml src/lxml/tests Message-ID: <20060324071112.7E7D310145@code0.codespeak.net> Author: scoder Date: Fri Mar 24 08:11:10 2006 New Revision: 24933 Modified: lxml/trunk/CHANGES.txt lxml/trunk/src/lxml/sax.py lxml/trunk/src/lxml/tests/test_sax.py Log: lxml.sax updates: - added startElement and endElement to ElementTreeContentHandler (namespace-free events) - default keyword arguments for empty attributes - raise SaxError when opening and closing tags do not match - some clean up and more test cases Modified: lxml/trunk/CHANGES.txt ============================================================================== --- lxml/trunk/CHANGES.txt (original) +++ lxml/trunk/CHANGES.txt Fri Mar 24 08:11:10 2006 @@ -7,6 +7,13 @@ Features added -------------- +* lxml.sax.ElementTreeContentHandler checks closing elements and raises + SaxError on mismatch + +* lxml.sax.ElementTreeContentHandler now supports namespace-less SAX events + (startElement, endElement) and defaults to empty attributes (keyword + argument) + * zip_safe flag allows setuptools to install lxml as zipped egg * Speedup for repeatedly accessing element tag names Modified: lxml/trunk/src/lxml/sax.py ============================================================================== --- lxml/trunk/src/lxml/sax.py (original) +++ lxml/trunk/src/lxml/sax.py Fri Mar 24 08:11:10 2006 @@ -1,11 +1,14 @@ from xml.sax.handler import ContentHandler -from lxml.etree import ElementTree, Element, SubElement +from lxml.etree import ElementTree, Element, SubElement, LxmlError + +class SaxError(LxmlError): + pass def _getNsTag(tag): if tag[0] == '{': - return tag[1:].split('}', 1) + return tuple(tag[1:].split('}', 1)) else: - return None, tag + return (None, tag) class ElementTreeContentHandler(object, ContentHandler): """Build an lxml ElementTree from SAX events. @@ -47,27 +50,30 @@ if prefix is None: self._default_ns = ns_uri_list[-1] - def startElementNS(self, name, qname, attributes): - ns_uri, local_name = name + def startElementNS(self, ns_name, qname, attributes=None): + ns_uri, local_name = ns_name if ns_uri: - el_name = "{%s}%s" % name + el_name = "{%s}%s" % ns_name elif self._default_ns: el_name = "{%s}%s" % (self._default_ns, local_name) else: el_name = local_name - try: - iter_attributes = attributes.iteritems() - except AttributeError: - iter_attributes = attributes.items() - - attrs = {} - for name_tuple, value in iter_attributes: - if name_tuple[0]: - attr_name = "{%s}%s" % name_tuple - else: - attr_name = name_tuple[1] - attrs[attr_name] = value + if attributes: + attrs = {} + try: + iter_attributes = attributes.iteritems() + except AttributeError: + iter_attributes = attributes.items() + + for name_tuple, value in iter_attributes: + if name_tuple[0]: + attr_name = "{%s}%s" % name_tuple + else: + attr_name = name_tuple[1] + attrs[attr_name] = value + else: + attrs = None element_stack = self._element_stack if self._root is None: @@ -79,8 +85,17 @@ self._new_mappings.clear() - def endElementNS(self, name, qname): - self._element_stack.pop() + def endElementNS(self, ns_name, qname): + element = self._element_stack.pop() + tag = element.tag + if ns_name != _getNsTag(tag): + raise SaxError, "Unexpected element closed: {%s}%s" % ns_name + + def startElement(self, name, attributes=None): + self.startElementNS((None, name), name, attributes) + + def endElement(self, name): + self.endElementNS((None, name), name) def characters(self, data): last_element = self._element_stack[-1] @@ -111,20 +126,22 @@ def _recursive_saxify(self, element, prefixes): new_prefixes = [] - if element.attrib: + build_qname = self._build_qname + attribs = element.items() + if attribs: attr_values = {} attr_qnames = {} - for attr_ns_name, value in element.attrib.items(): + for attr_ns_name, value in attribs: attr_ns_tuple = _getNsTag(attr_ns_name) attr_values[attr_ns_tuple] = value - attr_qnames[attr_ns_tuple] = _build_qname( + attr_qnames[attr_ns_tuple] = build_qname( attr_ns_tuple[0], attr_ns_tuple[1], prefixes, new_prefixes) sax_attributes = self._attr_class(attr_values, attr_qnames) else: sax_attributes = self._empty_attributes ns_uri, local_name = _getNsTag(element.tag) - qname = _build_qname(ns_uri, local_name, prefixes, new_prefixes) + qname = build_qname(ns_uri, local_name, prefixes, new_prefixes) content_handler = self._content_handler for prefix, uri in new_prefixes: @@ -141,15 +158,15 @@ if element.tail: content_handler.characters(element.tail) + def _build_qname(self, ns_uri, local_name, prefixes, new_prefixes): + if ns_uri is None: + return local_name + try: + prefix = prefixes[ns_uri] + except KeyError: + prefix = prefixes[ns_uri] = u'ns%02d' % len(prefixes) + new_prefixes.append( (prefix, ns_uri) ) + return prefix + ':' + local_name + def saxify(element_or_tree, content_handler): return ElementTreeProducer(element_or_tree, content_handler).saxify() - -def _build_qname(ns_uri, local_name, prefixes, new_prefixes): - if ns_uri is None: - return local_name - try: - prefix = prefixes[ns_uri] - except KeyError: - prefix = prefixes[ns_uri] = u'ns%02d' % len(prefixes) - new_prefixes.append( (prefix, ns_uri) ) - return prefix + ':' + local_name Modified: lxml/trunk/src/lxml/tests/test_sax.py ============================================================================== --- lxml/trunk/src/lxml/tests/test_sax.py (original) +++ lxml/trunk/src/lxml/tests/test_sax.py Fri Mar 24 08:11:10 2006 @@ -24,6 +24,12 @@ self.assertEquals('abbbba', xml_out) + def test_etree_sax_attributes(self): + tree = self.parse('abba') + xml_out = self._saxify_serialize(tree) + self.assertEquals('abba', + xml_out) + def test_etree_sax_ns1(self): tree = self.parse('abbbba') new_tree = self._saxify_unsaxify(tree) @@ -122,6 +128,36 @@ self.assertEqual(root[1].tag, '{blaA}c') + def test_etree_sax_no_ns(self): + handler = sax.ElementTreeContentHandler() + handler.startDocument() + handler.startElement('a', {}) + handler.startElement('b', {}) + handler.endElement('b') + handler.startElement('c') # with empty attributes + handler.endElement('c') + handler.endElement('a') + handler.endDocument() + + new_tree = handler.etree + root = new_tree.getroot() + self.assertEqual(root.tag, 'a') + self.assertEqual(root[0].tag, 'b') + self.assertEqual(root[1].tag, 'c') + + def test_etree_sax_error(self): + handler = sax.ElementTreeContentHandler() + handler.startDocument() + handler.startElement('a') + self.assertRaises(sax.SaxError, handler.endElement, 'b') + + def test_etree_sax_error2(self): + handler = sax.ElementTreeContentHandler() + handler.startDocument() + handler.startElement('a') + handler.startElement('b') + self.assertRaises(sax.SaxError, handler.endElement, 'a') + def _saxify_unsaxify(self, saxifiable): handler = sax.ElementTreeContentHandler() sax.ElementTreeProducer(saxifiable, handler).saxify() From scoder at codespeak.net Fri Mar 24 08:15:38 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Fri Mar 24 08:15:40 2006 Subject: [Lxml-checkins] r24934 - in lxml/branch/lxml-0.9.x: . src/lxml src/lxml/tests Message-ID: <20060324071538.925E410145@code0.codespeak.net> Author: scoder Date: Fri Mar 24 08:15:22 2006 New Revision: 24934 Modified: lxml/branch/lxml-0.9.x/CHANGES.txt lxml/branch/lxml-0.9.x/src/lxml/sax.py lxml/branch/lxml-0.9.x/src/lxml/tests/test_sax.py Log: merged in sax updates from trunk Modified: lxml/branch/lxml-0.9.x/CHANGES.txt ============================================================================== --- lxml/branch/lxml-0.9.x/CHANGES.txt (original) +++ lxml/branch/lxml-0.9.x/CHANGES.txt Fri Mar 24 08:15:22 2006 @@ -7,6 +7,13 @@ Features added -------------- +* lxml.sax.ElementTreeContentHandler checks closing elements and raises + SaxError on mismatch + +* lxml.sax.ElementTreeContentHandler now supports namespace-less SAX events + (startElement, endElement) and defaults to empty attributes (keyword + argument) + * zip_safe flag allows setuptools to install lxml as zipped egg * Speedup for repeatedly accessing element tag names Modified: lxml/branch/lxml-0.9.x/src/lxml/sax.py ============================================================================== --- lxml/branch/lxml-0.9.x/src/lxml/sax.py (original) +++ lxml/branch/lxml-0.9.x/src/lxml/sax.py Fri Mar 24 08:15:22 2006 @@ -1,11 +1,14 @@ from xml.sax.handler import ContentHandler -from lxml.etree import ElementTree, Element, SubElement +from lxml.etree import ElementTree, Element, SubElement, LxmlError + +class SaxError(LxmlError): + pass def _getNsTag(tag): if tag[0] == '{': - return tag[1:].split('}', 1) + return tuple(tag[1:].split('}', 1)) else: - return None, tag + return (None, tag) class ElementTreeContentHandler(object, ContentHandler): """Build an lxml ElementTree from SAX events. @@ -47,27 +50,30 @@ if prefix is None: self._default_ns = ns_uri_list[-1] - def startElementNS(self, name, qname, attributes): - ns_uri, local_name = name + def startElementNS(self, ns_name, qname, attributes=None): + ns_uri, local_name = ns_name if ns_uri: - el_name = "{%s}%s" % name + el_name = "{%s}%s" % ns_name elif self._default_ns: el_name = "{%s}%s" % (self._default_ns, local_name) else: el_name = local_name - try: - iter_attributes = attributes.iteritems() - except AttributeError: - iter_attributes = attributes.items() - - attrs = {} - for name_tuple, value in iter_attributes: - if name_tuple[0]: - attr_name = "{%s}%s" % name_tuple - else: - attr_name = name_tuple[1] - attrs[attr_name] = value + if attributes: + attrs = {} + try: + iter_attributes = attributes.iteritems() + except AttributeError: + iter_attributes = attributes.items() + + for name_tuple, value in iter_attributes: + if name_tuple[0]: + attr_name = "{%s}%s" % name_tuple + else: + attr_name = name_tuple[1] + attrs[attr_name] = value + else: + attrs = None element_stack = self._element_stack if self._root is None: @@ -79,8 +85,17 @@ self._new_mappings.clear() - def endElementNS(self, name, qname): - self._element_stack.pop() + def endElementNS(self, ns_name, qname): + element = self._element_stack.pop() + tag = element.tag + if ns_name != _getNsTag(tag): + raise SaxError, "Unexpected element closed: {%s}%s" % ns_name + + def startElement(self, name, attributes=None): + self.startElementNS((None, name), name, attributes) + + def endElement(self, name): + self.endElementNS((None, name), name) def characters(self, data): last_element = self._element_stack[-1] @@ -111,20 +126,22 @@ def _recursive_saxify(self, element, prefixes): new_prefixes = [] - if element.attrib: + build_qname = self._build_qname + attribs = element.items() + if attribs: attr_values = {} attr_qnames = {} - for attr_ns_name, value in element.attrib.items(): + for attr_ns_name, value in attribs: attr_ns_tuple = _getNsTag(attr_ns_name) attr_values[attr_ns_tuple] = value - attr_qnames[attr_ns_tuple] = _build_qname( + attr_qnames[attr_ns_tuple] = build_qname( attr_ns_tuple[0], attr_ns_tuple[1], prefixes, new_prefixes) sax_attributes = self._attr_class(attr_values, attr_qnames) else: sax_attributes = self._empty_attributes ns_uri, local_name = _getNsTag(element.tag) - qname = _build_qname(ns_uri, local_name, prefixes, new_prefixes) + qname = build_qname(ns_uri, local_name, prefixes, new_prefixes) content_handler = self._content_handler for prefix, uri in new_prefixes: @@ -141,15 +158,15 @@ if element.tail: content_handler.characters(element.tail) + def _build_qname(self, ns_uri, local_name, prefixes, new_prefixes): + if ns_uri is None: + return local_name + try: + prefix = prefixes[ns_uri] + except KeyError: + prefix = prefixes[ns_uri] = u'ns%02d' % len(prefixes) + new_prefixes.append( (prefix, ns_uri) ) + return prefix + ':' + local_name + def saxify(element_or_tree, content_handler): return ElementTreeProducer(element_or_tree, content_handler).saxify() - -def _build_qname(ns_uri, local_name, prefixes, new_prefixes): - if ns_uri is None: - return local_name - try: - prefix = prefixes[ns_uri] - except KeyError: - prefix = prefixes[ns_uri] = u'ns%02d' % len(prefixes) - new_prefixes.append( (prefix, ns_uri) ) - return prefix + ':' + local_name Modified: lxml/branch/lxml-0.9.x/src/lxml/tests/test_sax.py ============================================================================== --- lxml/branch/lxml-0.9.x/src/lxml/tests/test_sax.py (original) +++ lxml/branch/lxml-0.9.x/src/lxml/tests/test_sax.py Fri Mar 24 08:15:22 2006 @@ -24,6 +24,12 @@ self.assertEquals('abbbba', xml_out) + def test_etree_sax_attributes(self): + tree = self.parse('abba') + xml_out = self._saxify_serialize(tree) + self.assertEquals('abba', + xml_out) + def test_etree_sax_ns1(self): tree = self.parse('abbbba') new_tree = self._saxify_unsaxify(tree) @@ -122,6 +128,36 @@ self.assertEqual(root[1].tag, '{blaA}c') + def test_etree_sax_no_ns(self): + handler = sax.ElementTreeContentHandler() + handler.startDocument() + handler.startElement('a', {}) + handler.startElement('b', {}) + handler.endElement('b') + handler.startElement('c') # with empty attributes + handler.endElement('c') + handler.endElement('a') + handler.endDocument() + + new_tree = handler.etree + root = new_tree.getroot() + self.assertEqual(root.tag, 'a') + self.assertEqual(root[0].tag, 'b') + self.assertEqual(root[1].tag, 'c') + + def test_etree_sax_error(self): + handler = sax.ElementTreeContentHandler() + handler.startDocument() + handler.startElement('a') + self.assertRaises(sax.SaxError, handler.endElement, 'b') + + def test_etree_sax_error2(self): + handler = sax.ElementTreeContentHandler() + handler.startDocument() + handler.startElement('a') + handler.startElement('b') + self.assertRaises(sax.SaxError, handler.endElement, 'a') + def _saxify_unsaxify(self, saxifiable): handler = sax.ElementTreeContentHandler() sax.ElementTreeProducer(saxifiable, handler).saxify() From scoder at codespeak.net Fri Mar 24 08:32:14 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Fri Mar 24 08:32:15 2006 Subject: [Lxml-checkins] r24935 - lxml/pyrex/Pyrex/Compiler Message-ID: <20060324073214.618FE10145@code0.codespeak.net> Author: scoder Date: Fri Mar 24 08:32:12 2006 New Revision: 24935 Modified: lxml/pyrex/Pyrex/Compiler/Nodes.py Log: applied 01-fix-unused-variables.patch by David M. Cooke (Oct 2005) Modified: lxml/pyrex/Pyrex/Compiler/Nodes.py ============================================================================== --- lxml/pyrex/Pyrex/Compiler/Nodes.py (original) +++ lxml/pyrex/Pyrex/Compiler/Nodes.py Fri Mar 24 08:32:12 2006 @@ -510,7 +510,12 @@ code.putln( "static void %s(PyObject *o) {" % scope.mangle_internal("tp_dealloc")) - self.generate_self_cast(scope, code) + # only need the object cast to the type if we need to decref + # some instance attributes + for entry in scope.var_entries: + if entry.type.is_pyobject: + self.generate_self_cast(scope, code) + break self.generate_usr_dealloc_call(scope, code) for entry in scope.var_entries: if entry.type.is_pyobject: @@ -554,9 +559,13 @@ code.putln( "static int %s(PyObject *o, visitproc v, void *a) {" % scope.mangle_internal("tp_traverse")) - code.putln( - "int e;") - self.generate_self_cast(scope, code) + # only need e, p if we have object attributes + for entry in scope.var_entries: + if entry.type.is_pyobject: + code.putln( + "int e;") + self.generate_self_cast(scope, code) + break if base_type: code.putln( "%s->tp_traverse(o, v, a);" % @@ -585,7 +594,11 @@ code.putln( "static int %s(PyObject *o) {" % scope.mangle_internal("tp_clear")) - self.generate_self_cast(scope, code) + # only need cast to self type if have object attributes to dereference + for entry in scope.var_entries: + if entry.type.is_pyobject: + self.generate_self_cast(scope, code) + break if base_type: code.putln( "%s->tp_clear(o);" % From scoder at codespeak.net Fri Mar 24 08:45:06 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Fri Mar 24 08:45:13 2006 Subject: [Lxml-checkins] r24936 - lxml/pyrex/Pyrex/Compiler Message-ID: <20060324074506.0DF0B10145@code0.codespeak.net> Author: scoder Date: Fri Mar 24 08:45:00 2006 New Revision: 24936 Modified: lxml/pyrex/Pyrex/Compiler/Version.py Log: added '_lxml' suffix to Pyrex version to distinguish from official versions Modified: lxml/pyrex/Pyrex/Compiler/Version.py ============================================================================== --- lxml/pyrex/Pyrex/Compiler/Version.py (original) +++ lxml/pyrex/Pyrex/Compiler/Version.py Fri Mar 24 08:45:00 2006 @@ -1 +1 @@ -version = '0.9.3.1' +version = '0.9.3.1_lxml' From scoder at codespeak.net Fri Mar 24 08:58:26 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Fri Mar 24 08:58:27 2006 Subject: [Lxml-checkins] r24937 - lxml/pyrex/Pyrex/Compiler Message-ID: <20060324075826.9F27810145@code0.codespeak.net> Author: scoder Date: Fri Mar 24 08:58:25 2006 New Revision: 24937 Modified: lxml/pyrex/Pyrex/Compiler/Code.py lxml/pyrex/Pyrex/Compiler/Nodes.py Log: applied new 02-emit-only-used-labels.patch by David M. Cooke Modified: lxml/pyrex/Pyrex/Compiler/Code.py ============================================================================== --- lxml/pyrex/Pyrex/Compiler/Code.py (original) +++ lxml/pyrex/Pyrex/Compiler/Code.py Fri Mar 24 08:58:25 2006 @@ -91,6 +91,7 @@ def init_labels(self): self.label_counter = 0 + self.used_labels = {} self.return_label = self.new_label() self.new_error_label() self.continue_label = None @@ -146,8 +147,16 @@ self.set_all_labels(new_labels) return old_labels + def mark_label_used(self, lbl): + self.used_labels[lbl] = 1 + def put_label(self, lbl): - self.putln("%s:;" % lbl) + if lbl in self.used_labels: + self.putln("%s:;" % lbl) + + def put_goto(self, lbl): + self.putln("goto %s;" % lbl) + self.mark_label_used(lbl) def put_var_declarations(self, entries, static = 0, dll_linkage = None): for entry in entries: @@ -257,6 +266,7 @@ term)) def error_goto(self, pos): + self.mark_label_used(self.error_label) return "{%s = %s[%s]; %s = %s; goto %s;}" % ( Naming.filename_cname, Naming.filetable_cname, Modified: lxml/pyrex/Pyrex/Compiler/Nodes.py ============================================================================== --- lxml/pyrex/Pyrex/Compiler/Nodes.py (original) +++ lxml/pyrex/Pyrex/Compiler/Nodes.py Fri Mar 24 08:58:25 2006 @@ -1707,7 +1707,7 @@ val = self.return_type.default_value if val: code.putln("%s = %s;" % (Naming.retval_cname, val)) - code.putln("goto %s;" % code.return_label) + code.put_goto(code.return_label) # ----- Error cleanup code.put_label(code.error_label) code.put_var_xdecrefs(lenv.temp_entries) @@ -2699,9 +2699,7 @@ if not code.break_label: error(self.pos, "break statement not inside loop") else: - code.putln( - "goto %s;" % - code.break_label) + code.put_goto(code.break_label) class ContinueStatNode(StatNode): @@ -2715,9 +2713,7 @@ elif not code.continue_label: error(self.pos, "continue statement not inside loop") else: - code.putln( - "goto %s;" % - code.continue_label) + code.put_goto(code.continue_label) class ReturnStatNode(StatNode): @@ -2776,9 +2772,7 @@ "%s = %s;" % ( Naming.retval_cname, self.return_type.default_value)) - code.putln( - "goto %s;" % - code.return_label) + code.put_goto(code.return_label) class RaiseStatNode(StatNode): @@ -2940,9 +2934,7 @@ "if (%s) {" % self.condition.result) self.body.generate_execution_code(code) - code.putln( - "goto %s;" % - end_label) + code.put_goto(end_label) code.putln("}") @@ -2971,12 +2963,12 @@ old_loop_labels = code.new_loop_labels() code.putln( "while (1) {") - code.put_label(code.continue_label) self.condition.generate_evaluation_code(code) code.putln( "if (!%s) break;" % self.condition.result) self.body.generate_execution_code(code) + code.put_label(code.continue_label) code.putln("}") break_label = code.break_label code.set_loop_labels(old_loop_labels) @@ -3024,10 +3016,10 @@ self.iterator.generate_evaluation_code(code) code.putln( "for (;;) {") - code.put_label(code.continue_label) self.item.generate_evaluation_code(code) self.target.generate_assignment_code(self.item, code) self.body.generate_execution_code(code) + code.put_label(code.continue_label) code.putln( "}") break_label = code.break_label @@ -3178,9 +3170,7 @@ self.else_clause.generate_execution_code(code) code.putln( "}") - code.putln( - "goto %s;" % - end_label) + code.put_goto(end_label) code.put_label(our_error_label) code.put_var_xdecrefs_clear(self.cleanup_list) default_clause_seen = 0 @@ -3192,9 +3182,7 @@ error(except_clause.pos, "Default except clause not last") except_clause.generate_handling_code(code, end_label) if not default_clause_seen: - code.putln( - "goto %s;" % - code.error_label) + code.put_goto(code.error_label) code.put_label(end_label) @@ -3259,9 +3247,7 @@ else: self.exc_value.generate_disposal_code(code) self.body.generate_execution_code(code) - code.putln( - "goto %s;" - % end_label) + code.put_goto(end_label) code.putln( "}") @@ -3334,6 +3320,7 @@ code.putln( "__pyx_why = 0; goto %s;" % catch_label) + code.mark_label_used(catch_label) for i in range(len(new_labels)): if new_labels[i] and new_labels[i] <> "": if new_labels[i] == new_error_label: @@ -3345,6 +3332,7 @@ new_labels[i], i+1, catch_label)) + code.mark_label_used(catch_label) code.put_label(catch_label) code.set_all_labels(old_labels) self.finally_clause.generate_execution_code(code) @@ -3359,6 +3347,7 @@ "case %s: goto %s;" % ( i+1, old_labels[i])) + code.mark_label_used(old_labels[i]) code.putln( "}") code.putln( @@ -3378,9 +3367,7 @@ code.putln( "%s = %s;" % ( self.lineno_var, Naming.lineno_cname)) - code.putln( - "goto %s;" % - catch_label) + code.put_goto(catch_label) code.putln( "}") @@ -3398,9 +3385,7 @@ code.putln( "%s = 0;" % var) - code.putln( - "goto %s;" % - error_label) + code.put_goto(error_label) code.putln( "}") From scoder at codespeak.net Fri Mar 24 09:59:56 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Fri Mar 24 09:59:58 2006 Subject: [Lxml-checkins] r24939 - lxml/trunk/src/lxml Message-ID: <20060324085956.C7C4D1014D@code0.codespeak.net> Author: scoder Date: Fri Mar 24 09:59:54 2006 New Revision: 24939 Modified: lxml/trunk/src/lxml/xslt.pxi Log: added _cstr() calls in xslt.pxi (reason as before: make C string conversion explicit) Modified: lxml/trunk/src/lxml/xslt.pxi ============================================================================== --- lxml/trunk/src/lxml/xslt.pxi (original) +++ lxml/trunk/src/lxml/xslt.pxi Fri Mar 24 09:59:54 2006 @@ -233,13 +233,15 @@ def _contextRegisterExtensionFunction(self, ns_uri_utf, name_utf): if ns_uri_utf is None: raise XSLTExtensionError, "extensions must have non-empty namespaces" - xslt.xsltRegisterExtFunction(self._xsltCtxt, - name_utf, ns_uri_utf, _xpathCallback) + xslt.xsltRegisterExtFunction( + self._xsltCtxt, _cstr(name_utf), _cstr(ns_uri_utf), + _xpathCallback) def _contextUnregisterExtensionFunction(self, ns_uri_utf, name_utf): if ns_uri_utf is not None: - xslt.xsltRegisterExtFunction(self._xsltCtxt, - name_utf, ns_uri_utf, _xpathCallback) + xslt.xsltRegisterExtFunction( + self._xsltCtxt, _cstr(name_utf), _cstr(ns_uri_utf), + _xpathCallback) cdef class XSLT: @@ -311,12 +313,12 @@ keep_ref = [] for key, value in _kw.items(): k = _utf8(key) - keep_ref.append(k) + python.PyList_Append(keep_ref, k) v = _utf8(value) - keep_ref.append(v) - params[i] = k + python.PyList_Append(keep_ref, v) + params[i] = _cstr(k) i = i + 1 - params[i] = v + params[i] = _cstr(v) i = i + 1 params[i] = NULL else: @@ -426,31 +428,34 @@ cdef void _registerVariable(self, name_utf, value): xpath.xmlXPathRegisterVariable( - self._xpathCtxt, name_utf, _wrapXPathObject(value)) + self._xpathCtxt, _cstr(name_utf), _wrapXPathObject(value)) cdef void _unregisterVariable(self, name_utf): cdef xpath.xmlXPathContext* xpathCtxt cdef xpath.xmlXPathObject* xpathVarValue xpathCtxt = self._xpathCtxt - xpathVarValue = xpath.xmlXPathVariableLookup(xpathCtxt, name_utf) + xpathVarValue = xpath.xmlXPathVariableLookup(xpathCtxt, _cstr(name_utf)) if xpathVarValue is not NULL: - xpath.xmlXPathRegisterVariable(xpathCtxt, name_utf, NULL) + xpath.xmlXPathRegisterVariable(xpathCtxt, _cstr(name_utf), NULL) xpath.xmlXPathFreeObject(xpathVarValue) def _contextRegisterExtensionFunction(self, ns_uri_utf, name_utf): if ns_uri_utf is not None: - xpath.xmlXPathRegisterFuncNS(self._xpathCtxt, - name_utf, ns_uri_utf, _xpathCallback) + xpath.xmlXPathRegisterFuncNS( + self._xpathCtxt, _cstr(name_utf), _cstr(ns_uri_utf), + _xpathCallback) else: - xpath.xmlXPathRegisterFunc(self._xpathCtxt, name_utf, - _xpathCallback) + xpath.xmlXPathRegisterFunc( + self._xpathCtxt, _cstr(name_utf), + _xpathCallback) def _contextUnregisterExtensionFunction(self, ns_uri_utf, name_utf): if ns_uri_utf is not None: - xpath.xmlXPathRegisterFuncNS(self._xpathCtxt, - name_utf, ns_uri_utf, NULL) + xpath.xmlXPathRegisterFuncNS( + self._xpathCtxt, _cstr(name_utf), _cstr(ns_uri_utf), NULL) else: - xpath.xmlXPathRegisterFunc(self._xpathCtxt, name_utf, NULL) + xpath.xmlXPathRegisterFunc( + self._xpathCtxt, _cstr(name_utf), NULL) cdef class XPathEvaluatorBase: @@ -535,7 +540,7 @@ self._context.registerVariables(variable_dict) path = _utf8(path) - xpathObj = xpath.xmlXPathEvalExpression(path, xpathCtxt) + xpathObj = xpath.xmlXPathEvalExpression(_cstr(path), xpathCtxt) self._context.unregister_context() return self._handle_result(xpathObj, self._doc) @@ -582,7 +587,7 @@ XPathEvaluatorBase.__init__(self, namespaces, extensions, None) self.path = path path = _utf8(path) - self._xpath = xpath.xmlXPathCompile(path) + self._xpath = xpath.xmlXPathCompile(_cstr(path)) if self._xpath is NULL: raise XPathSyntaxError, "Error in xpath expression." @@ -660,7 +665,7 @@ obj = _utf8(obj) if python.PyString_Check(obj): # XXX use the Wrap variant? Or leak... - return xpath.xmlXPathNewCString(obj) + return xpath.xmlXPathNewCString(_cstr(obj)) if python.PyBool_Check(obj): return xpath.xmlXPathNewBoolean(obj) if python.PyNumber_Check(obj): From scoder at codespeak.net Fri Mar 24 21:48:30 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Fri Mar 24 21:48:31 2006 Subject: [Lxml-checkins] r24977 - lxml/pyrex/Pyrex/Compiler Message-ID: <20060324204830.136A610137@code0.codespeak.net> Author: scoder Date: Fri Mar 24 21:48:29 2006 New Revision: 24977 Modified: lxml/pyrex/Pyrex/Compiler/ExprNodes.py Log: reverted empty tuple removal patch in favour of replacing indirect PyObject_CallObject() calls by straight PyObject_Call() Modified: lxml/pyrex/Pyrex/Compiler/ExprNodes.py ============================================================================== --- lxml/pyrex/Pyrex/Compiler/ExprNodes.py (original) +++ lxml/pyrex/Pyrex/Compiler/ExprNodes.py Fri Mar 24 21:48:29 2006 @@ -1191,10 +1191,9 @@ self.self = function.obj function.obj = CloneNode(self.self) if self.function.type.is_pyobject: - if self.args: - self.arg_tuple = TupleNode(self.pos, args = self.args) - self.arg_tuple.analyse_types(env) + self.arg_tuple = TupleNode(self.pos, args = self.args) self.args = None + self.arg_tuple.analyse_types(env) self.type = PyrexTypes.py_object_type self.is_temp = 1 else: @@ -1286,15 +1285,11 @@ def generate_result_code(self, code): #print_call_chain("SimpleCallNode.generate_result_code") ### if self.function.type.is_pyobject: - if self.arg_tuple: - arg_result = self.arg_tuple.result - else: - arg_result = "0" code.putln( - "%s = PyObject_CallObject(%s, %s); if (!%s) %s" % ( + "%s = PyObject_Call(%s, %s, 0); if (!%s) %s" % ( self.result, self.function.result, - arg_result, + self.arg_tuple.result, self.result, code.error_goto(self.pos))) elif self.function.type.is_cfunction: @@ -1759,24 +1754,25 @@ # Tuple constructor. def generate_operation_code(self, code): - code.begin_require_python(0x020400F0) - # Python version >= 2.4 - args = [''] - for arg in self.args: - result = arg.result - args.append(result) - if not arg.result_in_temp(): - code.put_incref(result, arg.type) - arg_string = ', '.join(args) - code.putln( - "%s = PyTuple_Pack(%s%s); if (!%s) %s" % ( - self.result, - len(self.args), - arg_string, - self.result, - code.error_goto(self.pos))) - code.else_require_python() - # else: Python version < 2.4 + if self.args: + code.begin_require_python(0x020400F0) + # non-empty tuple and Python version >= 2.4 + args = [''] + for arg in self.args: + result = arg.result + args.append(result) + if not arg.result_in_temp(): + code.put_incref(result, arg.type) + arg_string = ', '.join(args) + code.putln( + "%s = PyTuple_Pack(%s%s); if (!%s) %s" % ( + self.result, + len(self.args), + arg_string, + self.result, + code.error_goto(self.pos))) + code.else_require_python() + # else: empty tuple or Python version < 2.4 code.putln( "%s = PyTuple_New(%s); if (!%s) %s" % ( self.result, @@ -1793,7 +1789,8 @@ self.result, i, result)) - code.end_require_python() + if self.args: + code.end_require_python() def generate_subexpr_disposal_code(self, code): # We call generate_post_assignment_code here instead From scoder at codespeak.net Fri Mar 24 22:35:14 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Fri Mar 24 22:35:15 2006 Subject: [Lxml-checkins] r24979 - lxml/pyrex/Pyrex/Compiler Message-ID: <20060324213514.4CB811012D@code0.codespeak.net> Author: scoder Date: Fri Mar 24 22:35:13 2006 New Revision: 24979 Modified: lxml/pyrex/Pyrex/Compiler/ExprNodes.py Log: removed PyTuple_Pack calls used in Py2.4 - reason: PyTuple_SET_ITEM + PyObject_Call is now faster than PyTuple_Pack + PyObject_CallObject before Modified: lxml/pyrex/Pyrex/Compiler/ExprNodes.py ============================================================================== --- lxml/pyrex/Pyrex/Compiler/ExprNodes.py (original) +++ lxml/pyrex/Pyrex/Compiler/ExprNodes.py Fri Mar 24 22:35:13 2006 @@ -1754,25 +1754,6 @@ # Tuple constructor. def generate_operation_code(self, code): - if self.args: - code.begin_require_python(0x020400F0) - # non-empty tuple and Python version >= 2.4 - args = [''] - for arg in self.args: - result = arg.result - args.append(result) - if not arg.result_in_temp(): - code.put_incref(result, arg.type) - arg_string = ', '.join(args) - code.putln( - "%s = PyTuple_Pack(%s%s); if (!%s) %s" % ( - self.result, - len(self.args), - arg_string, - self.result, - code.error_goto(self.pos))) - code.else_require_python() - # else: empty tuple or Python version < 2.4 code.putln( "%s = PyTuple_New(%s); if (!%s) %s" % ( self.result, @@ -1789,8 +1770,6 @@ self.result, i, result)) - if self.args: - code.end_require_python() def generate_subexpr_disposal_code(self, code): # We call generate_post_assignment_code here instead From scoder at codespeak.net Sun Mar 26 12:53:35 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Sun Mar 26 12:53:39 2006 Subject: [Lxml-checkins] r25008 - in lxml/branch/htmlparser: . doc src/lxml src/lxml/tests Message-ID: <20060326105335.1E28F10079@code0.codespeak.net> Author: scoder Date: Sun Mar 26 12:53:24 2006 New Revision: 25008 Added: lxml/branch/htmlparser/bench.py - copied unchanged from r25007, lxml/trunk/bench.py lxml/branch/htmlparser/doc/extensions.txt - copied unchanged from r25007, lxml/trunk/doc/extensions.txt lxml/branch/htmlparser/src/lxml/etree.h - copied unchanged from r25007, lxml/trunk/src/lxml/etree.h lxml/branch/htmlparser/src/lxml/htmlparser.pxd lxml/branch/htmlparser/src/lxml/python.pxd - copied unchanged from r25007, lxml/trunk/src/lxml/python.pxd lxml/branch/htmlparser/src/lxml/xmlerror.pxi - copied unchanged from r25007, lxml/trunk/src/lxml/xmlerror.pxi Removed: lxml/branch/htmlparser/doc/xpath.txt Modified: lxml/branch/htmlparser/CHANGES.txt lxml/branch/htmlparser/INSTALL.txt lxml/branch/htmlparser/MANIFEST.in lxml/branch/htmlparser/Makefile lxml/branch/htmlparser/doc/api.txt lxml/branch/htmlparser/doc/main.txt lxml/branch/htmlparser/doc/namespace_extensions.txt lxml/branch/htmlparser/doc/sax.txt lxml/branch/htmlparser/setup.py lxml/branch/htmlparser/src/lxml/etree.pyx lxml/branch/htmlparser/src/lxml/nsclasses.pxi lxml/branch/htmlparser/src/lxml/parser.pxi lxml/branch/htmlparser/src/lxml/proxy.pxi lxml/branch/htmlparser/src/lxml/relaxng.pxi lxml/branch/htmlparser/src/lxml/sax.py lxml/branch/htmlparser/src/lxml/tests/test_elementtree.py lxml/branch/htmlparser/src/lxml/tests/test_etree.py lxml/branch/htmlparser/src/lxml/tests/test_nsclasses.py lxml/branch/htmlparser/src/lxml/tests/test_relaxng.py lxml/branch/htmlparser/src/lxml/tests/test_sax.py lxml/branch/htmlparser/src/lxml/tests/test_unicode.py lxml/branch/htmlparser/src/lxml/tests/test_xpathevaluator.py lxml/branch/htmlparser/src/lxml/tests/test_xslt.py lxml/branch/htmlparser/src/lxml/tree.pxd lxml/branch/htmlparser/src/lxml/xmlschema.pxi lxml/branch/htmlparser/src/lxml/xslt.pxi lxml/branch/htmlparser/version.txt Log: very preliminary unusable state, merged in changes from trunk revision: updated to revision 25007 to reflect changes in lxml 0.9 Modified: lxml/branch/htmlparser/CHANGES.txt ============================================================================== --- lxml/branch/htmlparser/CHANGES.txt (original) +++ lxml/branch/htmlparser/CHANGES.txt Sun Mar 26 12:53:24 2006 @@ -1,12 +1,64 @@ lxml changelog ============== -Under development -================= +0.9.1 +===== Features added -------------- +* lxml.sax.ElementTreeContentHandler checks closing elements and raises + SaxError on mismatch + +* lxml.sax.ElementTreeContentHandler now supports namespace-less SAX events + (startElement, endElement) and defaults to empty attributes (keyword + argument) + +* zip_safe flag allows setuptools to install lxml as zipped egg + +* Speedup for repeatedly accessing element tag names + +* Minor API performance improvements + +Bugs fixed +---------- + +* sax.py was handling UTF-8 encoded tag names where it shouldn't + +* lxml.tests package will no longer be installed (is still in source tar) + +0.9 (2006-03-20) +================ + +Features added +-------------- + +* Error logging API for libxml2 error messages + +* Various performance improvements + +* Benchmark script for lxml, ElementTree and cElementTree + +* Support for registering extension functions through new FunctionNamespace + class (see doc/extensions.txt) + +* Support for variables in XPath expressions (also in XPath class) + +* XPath class for compiled XPath expressions + +* XMLID module level function + +* XMLParser API for customized libxml2 parser configuration + +* Support for custom Element classes through new Namespace API (see + doc/namespace_extensions.txt) + +* Common exception base class LxmlError for module exceptions + +* real iterator support in iter(Element), Element.getiterator() + +* XSLT objects are callable, result trees support str() + * Added MANIFEST.in for easier creation of RPM files. * 'getparent' method on elements allows navigation to an element's @@ -15,6 +67,19 @@ * Python core compatible SAX tree builder and SAX event generator. See doc/sax.txt for more information. +Bugs fixed +---------- + +* Segfaults and memory leaks in various API functions of Element + +* Segfault in XSLT.tostring() + +* ElementTree objects no longer interfere, Elements can be root of different + ElementTrees at the same time + +* document('') now works in XSLT documents read from files (in-memory + documents cannot support this due to libxslt deficiencies) + 0.8 (2005-11-03) ================ Modified: lxml/branch/htmlparser/INSTALL.txt ============================================================================== --- lxml/branch/htmlparser/INSTALL.txt (original) +++ lxml/branch/htmlparser/INSTALL.txt Sun Mar 26 12:53:24 2006 @@ -4,6 +4,8 @@ Requirements ------------ +You need Python 2.3 or later. + You need libxml2 and libxslt, in particular: * libxml 2.6.16 (newer versions should work). It can be found here: @@ -12,40 +14,98 @@ * libxslt 1.1.12 (newer versions should work). It can be found here: http://xmlsoft.org/XSLT/downloads.html -You also need Pyrex (0.9.3) to compile the software. It can be found -here: +See below for instructions how to get these for Windows. On MacOS-X 10.4, you +can use the installed system libraries and the binary egg distribution of +lxml. + +If you want to build lxml from SVN, you also need Pyrex_. If you are using a +released version of lxml, it should come with the generated C file in the +source distribution, so no Pyrex is needed in that case. + +.. _Pyrex: http://www.cosc.canterbury.ac.nz/~greg/python/Pyrex/ + +See also the notes on building with gcc 4.0 below if you are having +trouble with Pyrex. -* http://www.cosc.canterbury.ac.nz/~greg/python/Pyrex/ +If you have read these instructions and still cannot manage to install lxml, +you can check the archives of the `mailing list`_ to see if your problem is +known or otherwise send a mail to the list. + + .. _`mailing list`: http://codespeak.net/mailman/listinfo/lxml-dev -You also need Python 2.3 (Python 2.4 also ought to work). Installation ------------ -Type:: +If you have easy_install_, you can run the following as super-user:: - python setup.py install + easy_install lxml + +.. _easy_install: http://peak.telecommunity.com/DevCenter/EasyInstall -to compile and install the library. +This has been reported to work on Linux, MacOS-X 10.4 and Windows, as long as +libxml2 and libxslt are installed. To compile and install lxml without +easy_install, download the source tar-ball, unpack it and type:: -It's also possible to do this:: + python setup.py install + +If you do not want to install lxml right away, but first test it from the +source directory, you can build it in-place like this:: - python2.3 setup.py build_ext -i + python setup.py build_ext -i or just:: make -This will not install lxml, but if you place lxml's "src" on your -PYTHONPATH somehow, you can import it and play with it. +If you then place lxml's "src" directory on your PYTHONPATH somehow, you can +import lxml.etree and play with it. + + +Installation on Windows +----------------------- + +As always, installation on Windows is different. If you do not want to go +through the hassle of compiling everything by hand, you can use the binary +distribution of libxml2 and libxslt. It is available here: + +http://www.zlatkovic.com/libxml.en.html + +Note that you need both libxml2 and libxslt, as well as iconv and zlib. You +can then download a binary version of lxml 0.9 for Python 2.4 from the +following address: -Building lxml with gcc 4.0 --------------------------- +http://carcass.dhs.org/lxml-0.9.win32-py2.4.exe -Pyrex 0.9.3 generates C code that gcc 4.0 does not accept. Pending an -official release of a version of Pyrex that does work with gcc 4.0, -here's a patch to Pyrex that makes lxml compile and appear to work -with gcc 4.0: +or the egg distribution from + +http://cheeseshop.python.org/pypi/lxml + +The egg can directly be installed using easy_install_. Both builds were kindly +contributed by Steve Howe. If they do not work for you, feel free to report to +the mailing list. + + +Building lxml with gcc 4.0 or Python 2.4 +---------------------------------------- + +Pyrex 0.9.3.1 generates C code that gcc 4.0 does not accept. Pending an +official release of a version of Pyrex that does work with gcc 4.0, the lxml +project currently provides an updated version of Pyrex in its Subversion +repository: + +http://codespeak.net/svn/lxml/pyrex/ + +To install it, you can just download one of the following files: + +http://codespeak.net/svn/lxml/pyrex/dist/Pyrex-0.9.3.1.tar.gz + +http://codespeak.net/svn/lxml/pyrex/dist/Pyrex-0.9.3.1-1.src.rpm + +It is based on Pyrex 0.9.3.1 and contains a number of patches that make lxml +compile and appear to work with gcc 4.0. If you use this version, you can +simply skip the rest of the section. In case you want to apply them yourself, +the first one is: http://codespeak.net/lxml/Pyrex-0.9.3-gcc4.patch @@ -55,57 +115,51 @@ http://codespeak.net/lxml/Pyrex-0.9.3-gcc4-small.patch -It may however actually be that at the time you read this, this extra -patch has been applied by the distributions as well. +It may however actually be that at the time you read this, this extra patch +has been applied by the distributions as well. You may still encounter the +following problem when building the extension on Python 2.4:: -Troubleshooting ---------------- + TypeError: swig_sources() takes exactly 2 arguments (3 given) -lxml's setup.py tries to be smart and uses libxml2's xml2-config to -find the installation path of libxml2. If this cannot be found or -doesn't work for some reason or another, try editing the setup.py, -by changing this:: - - # if you want to configure include dir manually, you can do so here, - # for instance: - # include_dirs = ['/usr/include/libxml2'] - include_dirs = guess_include_dirs() - -Into something like this:: - - include_dirs = ['/usr/include/libxml2'] - -If that still doesn't work, try registering the extension in a -different way entirely; there's a commented block of code at the -bottom of setup.py with an example. +To fix this, look for the following line in Pyrex/Distutils/build_ext.py +(around line 35):: -If you still have trouble, contact us on the `mailing list`_. + def swig_sources (self, sources): + +and change it to:: + + def swig_sources (self, sources, *otherargs): + +The above install files have these changes applied. It should do no harm if +you install them instead of the official Pyrex version. -.. _`mailing list`: http://codespeak.net/mailman/listinfo/lxml-dev -Running the tests ------------------ +Running the tests and reporting errors +-------------------------------------- -You can run the main tests by using:: +The source distribution (tgz) contains a test suite for lxml. You can run it +from the top-level directory:: python test.py -Alternatively, you can use:: +Note that the test script only tests the in-place build (see "Installation" +above), as it searches the "src" directory. You can use the following +one-step command to trigger an in-place build and test it:: make test To run the ElementTree and cElementTree compatibility tests, make sure you have lxml on your PYTHONPATH first, then run:: - python2.3 selftest.py + python selftest.py and:: - python2.3 selftest2.py + python selftest2.py -If the tests give failures, errors, or worse, segmentation faults, -we'd really like to know. Please contact us on the `mailing list`_, -and please specify the version of libxml2, libxslt and Python you were -using. +If the tests give failures, errors, or worse, segmentation faults, we'd really +like to know. Please contact us on the `mailing list`_, and please specify the +version of lxml, libxml2, libxslt and Python you were using, as well as your +operating system type (Linux, Windows, MacOs, ...). .. _`mailing list`: http://codespeak.net/mailman/listinfo/lxml-dev Modified: lxml/branch/htmlparser/MANIFEST.in ============================================================================== --- lxml/branch/htmlparser/MANIFEST.in (original) +++ lxml/branch/htmlparser/MANIFEST.in Sun Mar 26 12:53:24 2006 @@ -1,4 +1,5 @@ include setup.py MANIFEST.in *.txt -recursive-include src *.pyx *.pxd *.pxi *.py etree.c +recursive-include src *.pyx *.pxd *.pxi *.py etree.c etree.h recursive-include src/lxml/tests *.rng *.xslt *.xml -recursive-include doc *.txt +recursive-include doc *.txt *.xml *.mgp +exclude doc/pyrex.txt Modified: lxml/branch/htmlparser/Makefile ============================================================================== --- lxml/branch/htmlparser/Makefile (original) +++ lxml/branch/htmlparser/Makefile Sun Mar 26 12:53:24 2006 @@ -18,6 +18,9 @@ test_inplace: inplace $(PYTHON) test.py $(TESTFLAGS) $(TESTOPTS) +bench_inplace: inplace + $(PYTHON) bench.py -i + ftest_build: build $(PYTHON) test.py -f $(TESTFLAGS) $(TESTOPTS) @@ -27,6 +30,8 @@ # XXX What should the default be? test: test_inplace +bench: bench_inplace + ftest: ftest_inplace clean: Modified: lxml/branch/htmlparser/doc/api.txt ============================================================================== --- lxml/branch/htmlparser/doc/api.txt (original) +++ lxml/branch/htmlparser/doc/api.txt Sun Mar 26 12:53:24 2006 @@ -39,6 +39,37 @@ +Error handling on exceptions +---------------------------- + +Libxml2 provides error messages for failures, be it during parsing, XPath +evaluation or schema validation. Whenever an exception is raised, you can +retrieve the errors that occured and "might have" lead to the problem:: + + >>> lxml.etree.clearErrorLog() + >>> broken_xml = '' + >>> try: + ... lxml.etree.parse(StringIO(broken_xml)) + ... except lxml.etree.XMLSyntaxError, e: + ... pass # just put the exception into e + >>> log = e.error_log.filter_levels(lxml.etree.ErrorLevels.FATAL) + >>> print log + :1:FATAL:PARSER:ERR_TAG_NOT_FINISHED: Premature end of data in tag a line 1 + +This might look a little cryptic at first, but it is the information that +libxml2 gives you. At least the message at the end should give you a hint +what went wrong and you can see that the fatal error (FATAL) happened during +parsing (PARSER) line 1 of a string (, or filename if available). +Here, PARSER is the so-called error domain, see lxml.etree.ErrorDomains for +that. You can get it from a log entry like this:: + + >>> entry = log[0] + >>> print entry.domain_name, entry.type_name, entry.filename + PARSER ERR_TAG_NOT_FINISHED + +XSLT error messages are not currently available through the lxml API. + + xpath method on ElementTree, Element ------------------------------------ @@ -97,6 +128,7 @@ >>> r[0].text 'Text' + XSLT ---- @@ -170,6 +202,7 @@ >>> str(result) '\nA\n' + RelaxNG ------- @@ -202,14 +235,29 @@ >>> relaxng.validate(doc2) 0 -Similar to XSLT, there's also a less efficient but easier shortcut -method to do RelaxNG validation:: +Starting with version 0.9, lxml now has a simple API to report the errors +generated by libxml2. If you want to find out why the validation failed in the +second case, you can look up the error log of the validation process and check +it for relevant messages:: + + >>> log = relaxng.error_log + >>> print log.filter_from_errors() + :1:ERROR:RELAXNGV:ERR_LT_IN_ATTRIBUTE: Did not expect element c there + +You can see that the error (ERROR) happened during RelaxNG validation +(RELAXNGV). The message then tells you what went wrong. Note that this error +is local to the RelaxNG object. It will only contain log entries that +appeares during the validation. + +Similar to XSLT, there's also a less efficient but easier shortcut method to +do RelaxNG validation:: >>> doc.relaxng(relaxng_doc) 1 >>> doc2.relaxng(relaxng_doc) 0 + XMLSchema --------- @@ -245,14 +293,28 @@ >>> xmlschema.validate(doc2) 0 -Similar to XSLT and RelaxNG, there's also a less efficient but easier -shortcut method to do XML Schema validation:: +Error reporting works like for the RelaxNG class:: + + >>> log = xmlschema.error_log + >>> errors = log.filter_from_errors() + >>> print errors[0].domain_name + SCHEMASV + >>> print errors[0].type_name + SCHEMAV_ELEMENT_CONTENT + +If you were to print this log entry, you would get something like the following:: + + :1:ERROR::SCHEMAV_ELEMENT_CONTENT: Element 'c': This element is not expected. Expected is ( b ). + +Similar to XSLT and RelaxNG, there's also a less efficient but easier shortcut +method to do XML Schema validation:: >>> doc.xmlschema(xmlschema_doc) 1 >>> doc2.xmlschema(xmlschema_doc) 0 + xinclude -------- @@ -270,6 +332,7 @@ >>> lxml.etree.tostring(tree.getroot()) '\n\n\n' + write_c14n on ElementTree ------------------------- Modified: lxml/branch/htmlparser/doc/main.txt ============================================================================== --- lxml/branch/htmlparser/doc/main.txt (original) +++ lxml/branch/htmlparser/doc/main.txt Sun Mar 26 12:53:24 2006 @@ -16,6 +16,8 @@ News ---- +* 2006-03-20: `lxml 0.9`_ released (`changes for 0.9`_) + * 2005-11-03: `lxml 0.8`_ released (`changes for 0.8`_) * 2005-06-15: `lxml 0.7`_ released (`changes for 0.7`_) @@ -26,6 +28,8 @@ * 2005-04-08: `lxml 0.5`_ released! +.. _`lxml 0.9`: lxml-0.9.tgz + .. _`lxml 0.8`: lxml-0.8.tgz .. _`lxml 0.7`: lxml-0.7.tgz @@ -36,6 +40,8 @@ .. _`lxml 0.5`: lxml-0.5.tgz +.. _`CHANGES for 0.9`: changes-0.9.html + .. _`CHANGES for 0.8`: changes-0.8.html .. _`CHANGES for 0.7`: changes-0.7.html @@ -53,8 +59,15 @@ lxml also `extends this API`_ to expose libxml2 and libxslt specific functionality, such as XPath_, `Relax NG`_, `XML Schema`_, `XSLT`_, and -`c14n`_. There is also more `detailed information`_ about what's -possible with XPath. +`c14n`_. Python code can be called from XPath expressions and XSLT stylesheets +through the use of `extension functions`_. + +In addition to the ElementTree API, lxml also features an API for +`implementing namespaces`_ using tag specific element classes. This is a +simple way to write arbitrary XML driven APIs on top of lxml. + +lxml also offers a `SAX compliant API`_, that works with the SAX support +in the standar dlibrary. .. _`ElementTree API`: http://effbot.org/zone/element-index.htm @@ -62,7 +75,7 @@ .. _`extends this API`: api.html -.. _`detailed information`: xpath.html +.. _`extension functions`: extensions.html .. _XPath: http://www.w3.org/TR/xpath @@ -74,6 +87,10 @@ .. _`c14n`: http://www.w3.org/TR/2001/REC-xml-c14n-20010315 +.. _`implementing namespaces`: namespace_extensions.html + +.. _`SAX compliant API`: sax.html + Mailing list ------------ @@ -84,6 +101,8 @@ Download -------- +* `lxml 0.9`_ (2006-03-20) + * `lxml 0.8`_ (2005-11-03) * `lxml 0.7`_ (2005-06-15) Modified: lxml/branch/htmlparser/doc/namespace_extensions.txt ============================================================================== --- lxml/branch/htmlparser/doc/namespace_extensions.txt (original) +++ lxml/branch/htmlparser/doc/namespace_extensions.txt Sun Mar 26 12:53:24 2006 @@ -2,20 +2,24 @@ Implementing namespaces with the Namespace class ================================================ +Also see `extensions`_. + +.. _`extensions`: extensions.html + Imagine, you have a namespace called 'http://hui.de/honk' and have to treat all of its elements in a specific way, say, to find out if they are really honking. You could provide a function called 'is_honking' that handles that:: ->>> def is_honking(honk_element): -... return honk_element.get('honking') == 'true' + >>> def is_honking(honk_element): + ... return honk_element.get('honking') == 'true' Then you can use it:: ->>> from lxml.etree import XML ->>> honk_element = XML('') ->>> print is_honking(honk_element) -True + >>> from lxml.etree import XML + >>> honk_element = XML('') + >>> print is_honking(honk_element) + True Not too bad, right? Now, imagine, you only want to do that to certain elements from that namespace and prevent others from being passed to @@ -31,34 +35,34 @@ =================== lxml allows you to implement namespaces, in a rather literal -sense. You can do the above like this: +sense. You can do the above like this:: ->>> from lxml.etree import Namespace, ElementBase ->>> class HonkElement(ElementBase): -... def honking(self): -... return self.get('honking') == 'true' -... honking = property(honking) + >>> from lxml.etree import Namespace, ElementBase + >>> class HonkElement(ElementBase): + ... def honking(self): + ... return self.get('honking') == 'true' + ... honking = property(honking) Now you can build the new namespace by calling the Namespace class:: ->>> namespace = Namespace('http://hui.de/honk') + >>> namespace = Namespace('http://hui.de/honk') -and then register the new element with that namespace:: +and then register the new element type with that namespace:: ->>> namespace['honk'] = HonkElement + >>> namespace['honk'] = HonkElement After this, you create and use your XML elements:: ->>> honk_element = XML('') ->>> print honk_element.honking -True + >>> honk_element = XML('') + >>> print honk_element.honking + True The same works when creating elements by hand:: ->>> from lxml.etree import Element ->>> honk_element = Element('{http://hui.de/honk}honk', honking='true') ->>> print honk_element.honking -True + >>> from lxml.etree import Element + >>> honk_element = Element('{http://hui.de/honk}honk', honking='true') + >>> print honk_element.honking + True Essentially, what this allows you to do, is giving elements a specific API based on their namespace and element name. @@ -71,17 +75,17 @@ constructor, neither must there be any internal state (except for their XML representation). Element instances are created and garbage collected at need, so there is no way to predict when and how often a -constructor would be called. Even worse, when the __init__ method is -called, the object may not even be initialized yet to represent the -XML tag, so there is not much use in providing an __init__ method in -subclasses. +constructor would be called. Even worse, when the ``__init__`` method +is called, the object may not even be initialized yet to represent the +XML tag, so there is not much use in providing an ``__init__`` method +in subclasses. However, there is one possible way to do things on element -initialization. Element classes have an _init() method that can be +initialization. Element classes have an ``_init()`` method that can be overridden. It can be used to modify the XML tree, e.g. to construct special children or verify and update attributes. -The semantics of _init() are as follows:: +The semantics of ``_init()`` are as follows: * It is called at least once on element instantiation time. That is, when a Python representation of the element is created. At that @@ -109,16 +113,13 @@ Example:: ->>> honk_element = XML('') ->>> print honk_element.honking -True ->>> print honk_element[0].honking -Traceback (most recent call last): - File "/var/tmp/python-2.4.2-root/usr/lib/python2.4/doctest.py", line 1243, in __run - compileflags, 1) in test.globs - File "", line 1, in ? - print honk_element[0].honking -AttributeError: 'etree._Element' object has no attribute 'honking' + >>> honk_element = XML('') + >>> print honk_element.honking + True + >>> print honk_element[0].honking + Traceback (most recent call last): + ... + AttributeError: 'etree._Element' object has no attribute 'honking' You can therefore provide one implementation per element name in each namespace and have lxml select the right one on the fly. If you want @@ -132,51 +133,27 @@ class for a namespace, that is used if no specific element class is provided. Again, you only have to pass None as an element name:: ->>> class HonkNSElement(ElementBase): -... def honk(self): -... return "HONK" ->>> namespace[None] = HonkNSElement - ->>> class HonkElement(HonkNSElement): -... def honking(self): -... return self.get('honking') == 'true' -... honking = property(honking) ->>> namespace['honk'] = HonkElement + >>> class HonkNSElement(ElementBase): + ... def honk(self): + ... return "HONK" + >>> namespace[None] = HonkNSElement + + >>> class HonkElement(HonkNSElement): + ... def honking(self): + ... return self.get('honking') == 'true' + ... honking = property(honking) + >>> namespace['honk'] = HonkElement Now you can use your new namespace:: ->>> honk_element = XML('') ->>> print honk_element.honking -True ->>> print honk_element.honk() -HONK ->>> print honk_element[0].honk() -HONK ->>> print honk_element[0].honking -Traceback (most recent call last): - File "/var/tmp/python-2.4.2-root/usr/lib/python2.4/doctest.py", line 1243, in __run - compileflags, 1) in test.globs - File "", line 1, in ? - print honk_element[0].honking -AttributeError: 'HonkNSElement' object has no attribute 'honking' - - -XPath extension functions -========================= - -The same API is used for extension functions in XPath. If you -associate a name in the Namespace with a callable object (that is not -a subclass of ElementBase), it will be used as extension function in -XPath evaluations. - ->>> from lxml.etree import Namespace ->>> def tag_of(context, elem): -... return elem[0].tag ->>> namespace = Namespace('myfunctions') ->>> namespace['tagname'] = tag_of - -You can then use your new function in XPath expressions: - ->>> element = XML('') ->>> element.xpath('f:tagname(//honk)', {'f' : 'myfunctions'}) -'honk' + >>> honk_element = XML('') + >>> print honk_element.honking + True + >>> print honk_element.honk() + HONK + >>> print honk_element[0].honk() + HONK + >>> print honk_element[0].honking + Traceback (most recent call last): + ... + AttributeError: 'HonkNSElement' object has no attribute 'honking' Modified: lxml/branch/htmlparser/doc/sax.txt ============================================================================== --- lxml/branch/htmlparser/doc/sax.txt (original) +++ lxml/branch/htmlparser/doc/sax.txt Sun Mar 26 12:53:24 2006 @@ -7,7 +7,7 @@ compatible with that in the Python core (xml.sax), so is useful for interfacing lxml with code that uses the Python core SAX facilities. -Producing SAX events for an ElementTree or Element +Producing SAX events from an ElementTree or Element -------------------------------------------------- Let's make a tree we can generate SAX events for:: Deleted: /lxml/branch/htmlparser/doc/xpath.txt ============================================================================== --- /lxml/branch/htmlparser/doc/xpath.txt Sun Mar 26 12:53:24 2006 +++ (empty file) @@ -1,149 +0,0 @@ -XPath extension functions -========================= - -This document describes how to deal with XPath extension -functions. This documentation is preliminary as the API is still in -flux. - -An extension function is defined in Python. In order to use it in -XPath, it needs to have a name by which it can be called in XPath, and -an optional namespace URI. - -As the first argument a function will always receive the -XPathEvaluator object that is currently in the process of evaluating -the XPath expression. - -First, let's create a simple XPath function:: - - >>> def foo(evaluator, a): - ... return "Hello %s" % a - -Now we need to register it as part of an extension. An extension is a -simple dictionary with tuple keys and function values. The tuple keys -are composed of a namespace URI (or `None`), and the name of the -function in XPath. We'll use the namespace URI `None` for now, to -indicate the function isn't in any particular namespace:: - - >>> extension = { (None, 'foo') : foo } - -Now we're going to create an XPath evaluator. To do that, we first need a -document that the evaluator is evaluating against:: - - >>> from lxml import etree - >>> from StringIO import StringIO - >>> f = StringIO('') - >>> doc = etree.parse(f) - -The XPathEvaluator takes the document, an optional dictionary of -namespace prefix to namespace URI mappings, and an optional list of -extensions. We'll just pass in extensions for now:: - - >>> e = etree.XPathEvaluator(doc, extensions=[extension]) - -Now we can use the evaluator to make XPath queries against the document:: - - >>> r = e.evaluate('/a') - >>> r[0].tag - 'a' - -This is not using the extension function. We'll try a very simple -XPath query that does now. It doesn't really use the document at all:: - - >>> e.evaluate("foo('world')") - 'Hello world' - -Let's create a slightly more complicated extension now, one that uses -a namespaced function. We'll just reuse the function foo, but register -it under a different name, and a namespace:: - - >>> extension2 = { ('http://codespeak.net/ns/test', 'different-name') : foo } - -Now let's set up an evaluator to use it. We'll also register our -original extension. As we want to use a namespace function, we first -need to register a namespace prefix we can use in the XPath -expression, so that we can access the namespace. This just like when -you'd want to access a namespaced XML element or attribute:: - - >>> e = etree.XPathEvaluator(doc, - ... namespaces={'test': 'http://codespeak.net/ns/test'}, - ... extensions=[extension, extension2]) - -Since we registered the original extension too for this evaluator, our -`foo` extension function still works:: - - >>> e.evaluate("foo('world')") - 'Hello world' - -But now, we also have access to our namespaced `different-name` -extension function:: - - >>> e.evaluate("test:different-name('there')") - 'Hello there' - -Besides strings is possible to return a number of different objects -from extension functions, such as numbers (floats) and booleans:: - - >>> def returnsFloat(evaluator): - ... return 1.7 - >>> def returnsBool(evaluator): - ... return True - >>> extension3 = { (None, 'returnsFloat') : returnsFloat, - ... (None, 'returnsBool') : returnsBool } - >>> e = etree.XPathEvaluator(doc, None, extensions=[extension3]) - >>> e.evaluate("returnsFloat()") - 1.7 - >>> e.evaluate("returnsBool()") - True - -It's also possible to register namespaces with a evaluator later on:: - - >>> f = StringIO('') - >>> ns_doc = etree.parse(f) - >>> e = etree.XPathEvaluator(ns_doc) - >>> e.registerNamespace('foo', 'http://codespeak.net/ns/test') - >>> e.evaluate('/foo:a')[0].tag - '{http://codespeak.net/ns/test}a' - -Note: the following is rather shaky and like won't work yet in the real world. - -It is also possible to return lists of nodes, and this way it is possible -to return XML structures:: - - >>> def returnsNodeSet(evaluator): - ... results = etree.Element('results') - ... result = etree.SubElement(results, 'result') - ... result.text = "Alpha" - ... result2 = etree.SubElement(results, 'result') - ... result2.text = "Beta" - ... result3 = etree.SubElement(results, 'result') - ... result3.text = "Gamma" - ... return [results] - >>> extension4 = { (None, 'returnsNodeSet') : returnsNodeSet } - >>> e = etree.XPathEvaluator(doc, None, extensions=[extension4]) - >>> r = e.evaluate("returnsNodeSet()") - >>> len(r) - 1 - >>> t = r[0] - >>> t.tag - 'results' - >>> len(t) - 3 - >>> t[0].tag - 'result' - >>> t[0].text - 'Alpha' - >>> t[1].text - 'Beta' - -It's even possible to filter that result set with another XPath -expression:: - - >>> r = e.evaluate("returnsNodeSet()/result") - >>> len(r) - 3 - >>> r[0].tag - 'result' - >>> r[1].tag - 'result' - >>> r[0].text - 'Alpha' Modified: lxml/branch/htmlparser/setup.py ============================================================================== --- lxml/branch/htmlparser/setup.py (original) +++ lxml/branch/htmlparser/setup.py Sun Mar 26 12:53:24 2006 @@ -1,30 +1,91 @@ import os +def flags(cmd): + wf, rf, ef = os.popen3(cmd) + return rf.read().strip().split(' ') + +setup_args = {} +changelog_text = "" +version = open('version.txt').read().strip() + try: from setuptools import setup from setuptools.extension import Extension + setup_args['zip_safe'] = True except ImportError: from distutils.core import setup from distutils.extension import Extension -from Pyrex.Distutils import build_ext as build_pyx +try: + from Pyrex.Distutils import build_ext as build_pyx + sources = ["src/lxml/etree.pyx"] + setup_args['cmdclass'] = {'build_ext' : build_pyx} +except ImportError: + print "*NOTE*: Trying to build without Pyrex, needs pre-generated 'src/lxml/etree.c' !" + sources = ["src/lxml/etree.c"] -def flags(cmd): - wf, rf, ef = os.popen3(cmd) - return rf.read().strip().split(' ') +try: + changelog = open("CHANGES.txt", 'r') +except: + print "*NOTE*: couldn't open CHANGES.txt !" +else: + inside = 0 + changelog_lines = [] + for line in changelog: + if line.startswith('====='): + inside += 1 + if inside > 3: + break + if inside > 1: + changelog_lines.append(line) + elif version in line: + changelog_lines.append(line) + inside += 1 + + if changelog_lines: + changelog_text = ''.join(changelog_lines[:-1]) + + changelog.close() setup( name = "lxml", - version = open('version.txt').read().strip(), - maintainer = 'Infrae', - maintainer_email="faassen@infrae.com", + version = version, + author="lxml dev team", + author_email="lxml-dev@codespeak.net", + maintainer="lxml dev team", + maintainer_email="lxml-dev@codespeak.net", + url="http://codespeak.net/lxml", + + description="Powerful and Pythonic XML processing library combining libxml2/libxslt with the ElementTree API.", + + long_description="""\ +lxml is a Pythonic binding for the libxml2 and libxslt libraries. It provides +safe and convenient access to these libraries using the ElementTree API. + +It extends the ElementTree API significantly to offer support for XPath, +RelaxNG, XML Schema, XSLT, C14N and much more. + +""" + changelog_text, + + classifiers = [ + 'Development Status :: 5 - Production/Stable', + 'Intended Audience :: Developers', + 'Intended Audience :: Information Technology', + 'License :: OSI Approved :: BSD License', + 'Programming Language :: Python', + 'Programming Language :: C', + 'Operating System :: OS Independent', + 'Topic :: Text Processing :: Markup :: XML', + 'Topic :: Software Development :: Libraries :: Python Modules' + ], + package_dir = {'': 'src'}, - packages = ['lxml', 'lxml.tests'], - ext_modules = [ - Extension( - "lxml.etree", - sources = ["src/lxml/etree.pyx"], - extra_compile_args = ['-w'] + flags('xslt-config --cflags'), - extra_link_args = flags('xslt-config --libs'))], - cmdclass = {'build_ext': build_pyx} + packages = ['lxml'], + ext_modules = [ Extension( + "lxml.etree", + sources = sources, + extra_compile_args = ['-w'] + flags('xslt-config --cflags'), + extra_link_args = flags('xslt-config --libs') + )], + **setup_args ) Modified: lxml/branch/htmlparser/src/lxml/etree.pyx ============================================================================== --- lxml/branch/htmlparser/src/lxml/etree.pyx (original) +++ lxml/branch/htmlparser/src/lxml/etree.pyx Sun Mar 26 12:53:24 2006 @@ -1,36 +1,39 @@ -cimport tree -from tree cimport xmlDoc, xmlNode, xmlAttr, xmlNs +cimport tree, python +from tree cimport xmlDoc, xmlNode, xmlAttr, xmlNs, _isElement +from python cimport isinstance, hasattr, callable, _cstr cimport xpath cimport xslt cimport xmlerror cimport xinclude cimport c14n cimport cstd -import re, types +import re import _elementpath from StringIO import StringIO import sys -# should libxml2/libxslt be allowed to shout? -DEBUG = False - -cdef int PROXY_ELEMENT -cdef int PROXY_ATTRIB -cdef int PROXY_ATTRIB_ITER -cdef int PROXY_ELEMENT_ITER - -PROXY_ELEMENT = 0 -PROXY_ATTRIB = 1 -PROXY_ATTRIB_ITER = 2 -PROXY_ELEMENT_ITER = 3 - - # the rules # any libxml C argument/variable is prefixed with c_ # any non-public function/class is prefixed with an underscore # instance creation is always through factories +ctypedef enum LXML_PROXY_TYPE: + PROXY_ELEMENT + PROXY_ATTRIB + +# what to do with libxml2/libxslt error messages? +# 0 : drop +# 1 : use log +cdef int __DEBUG +__DEBUG = 1 + +# maximum number of lines in the libxml2/xslt log if __DEBUG == 1 +cdef int __MAX_LOG_SIZE +__MAX_LOG_SIZE = 100 + +# make the compiled-in debug state publicly available +DEBUG = __DEBUG # Error superclass for ElementTree compatibility class Error(Exception): @@ -38,10 +41,12 @@ # module level superclass for all exceptions class LxmlError(Error): - pass + def __init__(self, *args): + Error.__init__(self, *args) + self.error_log = __copyGlobalErrorLog() # superclass for all syntax errors -class LxmlSyntaxError(SyntaxError, LxmlError): +class LxmlSyntaxError(LxmlError, SyntaxError): pass class XIncludeError(LxmlError): @@ -68,11 +73,6 @@ #print self._c_doc.dict is theParser._c_dict tree.xmlFreeDoc(self._c_doc) - def buildNewPrefix(self): - ns = "ns%d" % self._ns_counter - self._ns_counter = self._ns_counter + 1 - return ns - def getroot(self): cdef xmlNode* c_node c_node = tree.xmlDocGetRootElement(self._c_doc) @@ -80,6 +80,65 @@ return None return _elementFactory(self, c_node) + def buildNewPrefix(self): + ns = python.PyString_FromFormat("ns%d", self._ns_counter) + self._ns_counter = self._ns_counter + 1 + return ns + + cdef xmlNs* _findOrBuildNodeNs(self, xmlNode* c_node, char* href): + """Get or create namespace structure for a node. + """ + cdef xmlNs* c_ns + # look for existing ns + c_ns = tree.xmlSearchNsByHref(self._c_doc, c_node, href) + if c_ns is not NULL: + return c_ns + # create ns if existing ns cannot be found + # try to simulate ElementTree's namespace prefix creation + prefix = self.buildNewPrefix() + c_ns = tree.xmlNewNs(c_node, href, _cstr(prefix)) + return c_ns + + cdef void _setNodeNs(self, xmlNode* c_node, char* href): + "Lookup namespace structure and set it for the node." + cdef xmlNs* c_ns + c_ns = self._findOrBuildNodeNs(c_node, href) + tree.xmlSetNs(c_node, c_ns) + + cdef void _setNodeNamespaces(self, xmlNode* c_node, + object node_ns_utf, object nsmap): + """Lookup current namespace prefixes, then set namespace structure for + node and register new ns-prefix mappings. + """ + cdef xmlNs* c_ns + cdef xmlDoc* c_doc + cdef char* c_prefix + cdef char* c_href + if not nsmap: + if node_ns_utf is not None: + self._setNodeNs(c_node, node_ns_utf) + return + + c_doc = self._c_doc + for prefix, href in nsmap.items(): + href_utf = _utf8(href) + c_href = _cstr(href_utf) + if prefix is not None: + prefix_utf = _utf8(prefix) + c_prefix = _cstr(prefix_utf) + else: + c_prefix = NULL + # add namespace with prefix if ns is not already known + c_ns = tree.xmlSearchNsByHref(c_doc, c_node, c_href) + if c_ns is NULL: + c_ns = tree.xmlNewNs(c_node, c_href, c_prefix) + if href_utf == node_ns_utf: + tree.xmlSetNs(c_node, c_ns) + node_ns_utf = None + + if node_ns_utf is not None: + self._setNodeNs(c_node, node_ns_utf) + cdef _Document _parseDocument(source, parser): cdef xmlDoc* c_doc # XXX simplistic (c)StringIO support @@ -139,25 +198,6 @@ this if they recursively call _init() in the superclasses. """ - cdef xmlNs* _getNs(self, char* href): - """Get or create namespace structure. - """ - cdef xmlDoc* c_doc - cdef xmlNode* c_node - - c_doc = self._doc._c_doc - c_node = self._c_node - cdef xmlNs* c_ns - # look for existing ns - c_ns = tree.xmlSearchNsByHref(c_doc, c_node, href) - if c_ns is not NULL: - return c_ns - # create ns if existing ns cannot be found - # try to simulate ElementTree's namespace prefix creation - prefix = self._doc.buildNewPrefix() - c_ns = tree.xmlNewNs(c_node, href, prefix) - return c_ns - cdef class _ElementTree: cdef _Document _doc cdef _NodeBase _context_node @@ -188,7 +228,7 @@ def getiterator(self, tag=None): root = self.getroot() if root is None: - return [] + return () return root.getiterator(tag) def find(self, path): @@ -213,7 +253,7 @@ return root.findall(path) # extensions to ElementTree API - def xpath(self, path, namespaces=None): + def xpath(self, _path, namespaces=None, **_variables): """XPath evaluate in context of document. namespaces is an optional dictionary with prefix to namespace URI @@ -228,9 +268,9 @@ against the same document, it is more efficient to use XPathEvaluator directly. """ - return XPathDocumentEvaluator(self, namespaces).evaluate(path) + return XPathDocumentEvaluator(self._doc, namespaces).evaluate(_path, **_variables) - def xslt(self, xslt, **kw): + def xslt(self, _xslt, extensions=None, **_kw): """Transform this document using other document. xslt is a tree that should be XSLT @@ -242,8 +282,8 @@ multiple documents, it is more efficient to use the XSLT class directly. """ - style = XSLT(xslt) - return style(self, **kw) + style = XSLT(_xslt, extensions) + return style(self, **_kw) def relaxng(self, relaxng): """Validate this document using other document. @@ -324,19 +364,23 @@ return result cdef class _Element(_NodeBase): + cdef object _tag + # MANIPULATORS def __setitem__(self, index, _NodeBase element): cdef xmlNode* c_node cdef xmlNode* c_next + cdef int foreign c_node = _findChild(self._c_node, index) if c_node is NULL: raise IndexError + foreign = self._doc is not element._doc c_next = element._c_node.next _removeText(c_node.next) tree.xmlReplaceNode(c_node, element._c_node) _moveTail(c_next, element._c_node) - changeDocumentBelow(element, self._doc) + changeDocumentBelow(element, self._doc, foreign) def __delitem__(self, index): cdef xmlNode* c_node @@ -355,12 +399,12 @@ cdef xmlNode* c_node cdef xmlNode* c_next cdef _Element mynode + cdef int foreign # first, find start of slice c_node = _findChild(self._c_node, start) # now delete the slice - _deleteSlice(c_node, start, stop) - # now find start of slice again, for insertion (just before it) - c_node = _findChild(self._c_node, start) + if start != stop: + c_node = _deleteSlice(c_node, start, stop) # if the insertion point is at the end, append there if c_node is NULL: for node in value: @@ -368,7 +412,9 @@ return # if the next element is in the list, insert before it for node in value: + _raiseIfNone(node) mynode = node + foreign = self._doc is not mynode._doc # store possible text tail c_next = mynode._c_node.next # now move node previous to insertion point @@ -377,7 +423,7 @@ # and move tail just behind his node _moveTail(c_next, mynode._c_node) # move it into a new document - changeDocumentBelow(mynode, self._doc) + changeDocumentBelow(mynode, self._doc, foreign) def __deepcopy__(self, memo): return self.__copy__() @@ -396,33 +442,39 @@ def append(self, _Element element): cdef xmlNode* c_next - cdef xmlNode* c_next2 + cdef xmlNode* c_node + cdef int foreign + _raiseIfNone(element) + foreign = self._doc is not element._doc + c_node = element._c_node # store possible text node - c_next = element._c_node.next + c_next = c_node.next # XXX what if element is coming from a different document? - tree.xmlUnlinkNode(element._c_node) + tree.xmlUnlinkNode(c_node) # move node itself - tree.xmlAddChild(self._c_node, element._c_node) - _moveTail(c_next, element._c_node) + tree.xmlAddChild(self._c_node, c_node) + _moveTail(c_next, c_node) # uh oh, elements may be pointing to different doc when # parent element has moved; change them too.. - changeDocumentBelow(element, self._doc) + changeDocumentBelow(element, self._doc, foreign) def clear(self): cdef xmlAttr* c_attr cdef xmlAttr* c_attr_next cdef xmlNode* c_node cdef xmlNode* c_node_next - self.text = None - self.tail = None + c_node = self._c_node + # remove self.text and self.tail + _removeText(c_node.children) + _removeText(c_node.next) # remove all attributes - c_attr = self._c_node.properties + c_attr = c_node.properties while c_attr is not NULL: c_attr_next = c_attr.next tree.xmlRemoveProp(c_attr) c_attr = c_attr_next # remove all subelements - c_node = self._c_node.children + c_node = c_node.children while c_node is not NULL: c_node_next = c_node.next if _isElement(c_node): @@ -434,40 +486,43 @@ def insert(self, index, _Element element): cdef xmlNode* c_node cdef xmlNode* c_next + cdef int foreign + _raiseIfNone(element) c_node = _findChild(self._c_node, index) if c_node is NULL: self.append(element) return + foreign = self._doc is not element._doc c_next = element._c_node.next tree.xmlAddPrevSibling(c_node, element._c_node) _moveTail(c_next, element._c_node) - changeDocumentBelow(element, self._doc) + changeDocumentBelow(element, self._doc, foreign) def remove(self, _Element element): cdef xmlNode* c_node - c_node = self._c_node.children - while c_node is not NULL: - if c_node is element._c_node: - _removeText(element._c_node.next) - tree.xmlUnlinkNode(element._c_node) - return - c_node = c_node.next - else: - raise ValueError, "Matching element could not be found" + _raiseIfNone(element) + c_node = element._c_node + if c_node.parent is not self._c_node: + raise ValueError, "Element is not a child of this node." + _removeText(c_node.next) + tree.xmlUnlinkNode(c_node) # PROPERTIES property tag: def __get__(self): - return _namespacedName(self._c_node) + if self._tag is not None: + return self._tag + self._tag = _namespacedName(self._c_node) + return self._tag def __set__(self, value): cdef xmlNs* c_ns ns, text = _getNsTag(value) - tree.xmlNodeSetName(self._c_node, text) + self._tag = value + tree.xmlNodeSetName(self._c_node, _cstr(text)) if ns is None: return - c_ns = self._getNs(ns) - tree.xmlSetNs(self._c_node, c_ns) + self._doc._setNodeNs(self._c_node, _cstr(ns)) # not in ElementTree, read-only property prefix: @@ -483,7 +538,6 @@ property text: def __get__(self): - cdef xmlNode* c_node return _collectText(self._c_node.children) def __set__(self, value): @@ -493,9 +547,9 @@ if value is None: return # now add new text node with value at start - text = value.encode('UTF-8') + text = _utf8(value) c_text_node = tree.xmlNewDocText(self._doc._c_doc, - text) + _cstr(text)) if self._c_node.children is NULL: tree.xmlAddChild(self._c_node, c_text_node) else: @@ -504,7 +558,6 @@ property tail: def __get__(self): - cdef xmlNode* c_node return _collectText(self._c_node.next) def __set__(self, value): @@ -513,8 +566,8 @@ _removeText(self._c_node.next) if value is None: return - text = value.encode('UTF-8') - c_text_node = tree.xmlNewDocText(self._doc._c_doc, text) + text = _utf8(value) + c_text_node = tree.xmlNewDocText(self._doc._c_doc, _cstr(text)) # XXX what if we're the top element? tree.xmlAddNextSibling(self._c_node, c_text_node) @@ -531,7 +584,8 @@ def __getslice__(self, start, stop): cdef xmlNode* c_node - cdef int c + cdef _Document doc + cdef int c, c_stop # this does not work for negative start, stop, however, # python seems to convert these to positive start, stop before # calling, so this all works perfectly (at the cost of a len() call) @@ -539,10 +593,14 @@ if c_node is NULL: return [] c = start + c_stop = stop result = [] - while c_node is not NULL and c < stop: + doc = self._doc + while c_node is not NULL and c < c_stop: if _isElement(c_node): - result.append(_elementFactory(self._doc, c_node)) + ret = python.PyList_Append(result, _elementFactory(doc, c_node)) + if ret: + raise c = c + 1 c_node = c_node.next return result @@ -558,47 +616,96 @@ c_node = c_node.next return c + def __nonzero__(self): + cdef xmlNode* c_node + c_node = _findChildBackwards(self._c_node, 0) + return c_node != NULL + def __iter__(self): return ElementChildIterator(self) def index(self, _Element x, start=None, stop=None): - cdef int k + cdef int k + cdef int l + cdef int c_stop + cdef int c_start cdef xmlNode* c_child - k = 0 - c_child = self._c_node.children + cdef xmlNode* c_start_node + _raiseIfNone(x) + c_child = x._c_node + if c_child.parent is not self._c_node: + raise ValueError, "Element is not a child of this node." - # account for negative start and stop by turning them into positive - if start is not None and start < 0: - start = len(self) + start - if stop is not None and stop < 0: - stop = len(self) + stop - - while c_child is not NULL: - if _isElement(c_child): - if c_child is x._c_node: - if ((start is None or k >= start) and - (stop is None or k < stop)): - return k - else: - # since there is only a single element to be found - # if we found it out of range, we will not find - # it anymore in the range, so we bail out - raise ValueError, "list.index(x): x not in list" - else: + if start is None: + c_start = 0 + else: + c_start = start + if stop is None: + c_stop = 0 + else: + c_stop = stop + if c_stop == 0 or \ + c_start >= c_stop and (c_stop > 0 or c_start < 0): + raise ValueError, "list.index(x): x not in slice" + + # for negative slice indices, check slice before searching index + if c_start < 0 or c_stop < 0: + # start from right, at most up to leftmost(c_start, c_stop) + if c_start < c_stop: + k = -c_start + else: + k = -c_stop + c_start_node = self._c_node.last + l = 1 + while c_start_node != c_child and l < k: + if _isElement(c_start_node): + l = l + 1 + c_start_node = c_start_node.prev + if c_start_node == c_child: + # found! before slice end? + if c_stop < 0 and l <= -c_stop: + raise ValueError, "list.index(x): x not in slice" + elif c_start < 0: + raise ValueError, "list.index(x): x not in slice" + + # now determine the index backwards from child + c_child = c_child.prev + k = 0 + if c_stop > 0: + # we can optimize: stop after c_stop elements if not found + while c_child != NULL and k < c_stop: + if _isElement(c_child): k = k + 1 - c_child = c_child.next - - raise ValueError, "list index(x): x not in list" + c_child = c_child.prev + if k < c_stop: + return k + else: + # traverse all + while c_child != NULL: + if _isElement(c_child): + k = k + 1 + c_child = c_child.prev + if c_start > 0: + if k >= c_start: + return k + else: + return k + if c_start or c_stop: + raise ValueError, "list.index(x): x not in slice" + else: + raise ValueError, "list.index(x): x not in list" def get(self, key, default=None): # XXX more redundancy, but might be slightly faster than # return self.attrib.get(key, default) cdef char* cresult + cdef char* c_tag ns, tag = _getNsTag(key) + c_tag = _cstr(tag) if ns is None: - cresult = tree.xmlGetNoNsProp(self._c_node, tag) + cresult = tree.xmlGetNoNsProp(self._c_node, c_tag) else: - cresult = tree.xmlGetNsProp(self._c_node, tag, ns) + cresult = tree.xmlGetNsProp(self._c_node, c_tag, _cstr(ns)) if cresult is NULL: result = default else: @@ -614,11 +721,16 @@ def getchildren(self): cdef xmlNode* c_node + cdef _Document doc + cdef int ret result = [] + doc = self._doc c_node = self._c_node.children while c_node is not NULL: if _isElement(c_node): - result.append(_elementFactory(self._doc, c_node)) + ret = python.PyList_Append(result, _elementFactory(doc, c_node)) + if ret: + raise c_node = c_node.next return result @@ -630,17 +742,11 @@ return None def getiterator(self, tag=None): - result = [] - if tag == "*": - tag = None - if tag is None or self.tag == tag: - result.append(self) - for node in self: - result.extend(node.getiterator(tag)) - return result - - # XXX this doesn't work yet - # return _docOrderIteratorFactory(self._doc, self._c_node, tag) + iterator = ElementDepthFirstIterator(self) + if tag is None or tag == '*': + return iterator + else: + return ElementTagFilter(iterator, tag) def makeelement(self, tag, attrib): return Element(tag, attrib) @@ -654,8 +760,8 @@ def findall(self, path): return _elementpath.findall(self, path) - def xpath(self, path, namespaces=None): - return XPathElementEvaluator(self, namespaces).evaluate(path) + def xpath(self, _path, namespaces=None, **_variables): + return XPathElementEvaluator(self, namespaces).evaluate(_path, **_variables) cdef _Element _elementFactory(_Document doc, xmlNode* c_node): cdef _Element result @@ -671,11 +777,12 @@ else: c_ns_href = c_node.ns.href element_class = _find_element_class(c_ns_href, c_node.name) - result = element_class() elif c_node.type == tree.XML_COMMENT_NODE: - result = _Comment() + element_class = _Comment else: assert 0, "Unknown node type: %s" % c_node.type + result = element_class() + result._tag = None result._doc = doc result._c_node = c_node result._proxy_type = PROXY_ELEMENT @@ -707,7 +814,7 @@ # ACCESSORS def __repr__(self): - return "" % id(self) + return "" % self.text def __getitem__(self, n): raise IndexError @@ -742,22 +849,28 @@ # MANIPULATORS def __setitem__(self, key, value): cdef xmlNs* c_ns + cdef char* c_value + cdef char* c_tag ns, tag = _getNsTag(key) - value = value.encode('UTF-8') + c_tag = _cstr(tag) + value = _utf8(value) + c_value = _cstr(value) if ns is None: - tree.xmlSetProp(self._c_node, tag, value) + tree.xmlSetProp(self._c_node, c_tag, c_value) else: - c_ns = self._getNs(ns) - tree.xmlSetNsProp(self._c_node, c_ns, tag, value) + c_ns = self._doc._findOrBuildNodeNs(self._c_node, _cstr(ns)) + tree.xmlSetNsProp(self._c_node, c_ns, c_tag, c_value) def __delitem__(self, key): cdef xmlNs* c_ns cdef xmlAttr* c_attr + cdef char* c_tag ns, tag = _getNsTag(key) + c_tag = _cstr(tag) if ns is None: - c_attr = tree.xmlHasProp(self._c_node, tag) + c_attr = tree.xmlHasProp(self._c_node, c_tag) else: - c_attr = tree.xmlHasNsProp(self._c_node, tag, ns) + c_attr = tree.xmlHasNsProp(self._c_node, c_tag, _cstr(ns)) if c_attr is NULL: # XXX free namespace that is not in use..? raise KeyError, key @@ -773,11 +886,13 @@ def __getitem__(self, key): cdef xmlNs* c_ns cdef char* cresult + cdef char* c_tag ns, tag = _getNsTag(key) + c_tag = _cstr(tag) if ns is None: - cresult = tree.xmlGetNoNsProp(self._c_node, tag) + cresult = tree.xmlGetNoNsProp(self._c_node, c_tag) else: - cresult = tree.xmlGetNsProp(self._c_node, tag, ns) + cresult = tree.xmlGetNsProp(self._c_node, c_tag, _cstr(ns)) if cresult is NULL: # XXX free namespace that is not in use..? raise KeyError, key @@ -811,7 +926,7 @@ c_node = (self._c_node.properties) while c_node is not NULL: if c_node.type == tree.XML_ATTRIBUTE_NODE: - result.append(_namespacedName(c_node)) + python.PyList_Append(result, _namespacedName(c_node)) c_node = c_node.next return result @@ -821,7 +936,7 @@ c_node = (self._c_node.properties) while c_node is not NULL: if c_node.type == tree.XML_ATTRIBUTE_NODE: - result.append(self._getValue(c_node)) + python.PyList_Append(result, self._getValue(c_node)) c_node = c_node.next return result @@ -839,7 +954,7 @@ c_node = (self._c_node.properties) while c_node is not NULL: if c_node.type == tree.XML_ATTRIBUTE_NODE: - result.append(( + python.PyList_Append(result, ( _namespacedName(c_node), self._getValue(c_node) )) @@ -849,11 +964,13 @@ def has_key(self, key): cdef xmlNs* c_ns cdef char* result + cdef char* c_tag ns, tag = _getNsTag(key) + c_tag = _cstr(tag) if ns is None: - result = tree.xmlGetNoNsProp(self._c_node, tag) + result = tree.xmlGetNoNsProp(self._c_node, c_tag) else: - result = tree.xmlGetNsProp(self._c_node, tag, ns) + result = tree.xmlGetNsProp(self._c_node, c_tag, _cstr(ns)) if result is not NULL: tree.xmlFree(result) return True @@ -863,11 +980,13 @@ def __contains__(self, key): cdef xmlNs* c_ns cdef char* result + cdef char* c_tag ns, tag = _getNsTag(key) + c_tag = _cstr(tag) if ns is None: - result = tree.xmlGetNoNsProp(self._c_node, tag) + result = tree.xmlGetNoNsProp(self._c_node, c_tag) else: - result = tree.xmlGetNsProp(self._c_node, tag, ns) + result = tree.xmlGetNsProp(self._c_node, c_tag, _cstr(ns)) if result is not NULL: tree.xmlFree(result) return True @@ -888,44 +1007,121 @@ cdef class ElementChildIterator: # we keep Python references here to control GC - cdef object _node - def __init__(self, node): # Python ref! + cdef _NodeBase _node + def __init__(self, _NodeBase node): # Python ref! cdef xmlNode* c_node - cdef _NodeBase base_node - base_node = <_NodeBase>node - c_node = _findChildForwards(base_node._c_node, 0) + c_node = _findChildForwards(node._c_node, 0) if c_node is NULL: self._node = None else: - self._node = _elementFactory(base_node._doc, c_node) + self._node = _elementFactory(node._doc, c_node) def __iter__(self): return self def __next__(self): cdef xmlNode* c_node - cdef _NodeBase base_node - current_node = self._node # Python ref! + cdef _NodeBase current_node + # Python ref: + current_node = self._node if current_node is None: raise StopIteration - base_node = <_NodeBase>current_node - c_node = _nextElement(base_node._c_node) + c_node = _nextElement(current_node._c_node) if c_node is NULL: self._node = None else: - self._node = _elementFactory(base_node._doc, c_node) + # Python ref: + self._node = _elementFactory(current_node._doc, c_node) return current_node -cdef xmlNode* _createElement(xmlDoc* c_doc, object tag, +cdef class ElementDepthFirstIterator: + """Iterates over an element and its sub-elements in document order (depth + first pre-order).""" + # we keep Python references here to control GC + # keep next node to return and a stack of position state in the tree + cdef object _stack + cdef _NodeBase _next_node + def __init__(self, _NodeBase node): + cdef xmlNode* c_node + _raiseIfNone(node) + self._next_node = node + self._stack = [] + self._findAndPushNextNode(node) + def __iter__(self): + return self + def __next__(self): + cdef xmlNode* c_node + cdef _NodeBase next_node + current_node = self._next_node + if current_node is None: + raise StopIteration + stack = self._stack + if python.PyList_GET_SIZE(stack) == 0: + self._next_node = None + return current_node + next_node = stack[-1] + self._next_node = next_node + self._findAndPushNextNode(next_node) + return current_node + + cdef void _findAndPushNextNode(self, _NodeBase node): + cdef xmlNode* c_node + stack = self._stack + # try next child level until we hit a leaf + c_node = _findChildForwards(node._c_node, 0) + if c_node is NULL: + pop = stack.pop + while c_node is NULL and python.PyList_GET_SIZE(stack): + # walk up the stack until we find a sibling + node = pop() + c_node = _nextElement(node._c_node) + if c_node is not NULL: + python.PyList_Append( + stack, _elementFactory(node._doc, c_node)) + +cdef class ElementTagFilter: + cdef object _iterator + cdef object _pystrings + cdef char* _href + cdef char* _name + def __init__(self, element_iterator, tag): + self._iterator = iter(element_iterator) + ns_href, name = _getNsTag(tag) + self._pystrings = (ns_href, name) # keep Python references + self._name = _cstr(name) + if ns_href is None: + self._href = NULL + else: + self._href = _cstr(ns_href) + def __iter__(self): + return self + def __next__(self): + cdef _NodeBase node + while 1: + node = self._iterator.next() + if self._tagMatches(node._c_node): + return node + + cdef int _tagMatches(self, xmlNode* c_node): + if tree.strcmp(c_node.name, self._name) == 0: + if c_node.ns == NULL or c_node.ns.href == NULL: + return self._href == NULL + else: + return tree.strcmp(c_node.ns.href, self._href) == 0 + return 0 + +cdef xmlNode* _createElement(xmlDoc* c_doc, object name_utf, object attrib, object extra) except NULL: cdef xmlNode* c_node - tag_utf = tag.encode('UTF-8') - if attrib is None: - attrib = {} - attrib.update(extra) - c_node = tree.xmlNewDocNode(c_doc, NULL, tag_utf, NULL) - for name, value in attrib.items(): - name_utf = name.encode('UTF-8') - value_utf = value.encode('UTF-8') - tree.xmlNewProp(c_node, name_utf, value_utf) + if extra: + if attrib is None: + attrib = extra + else: + attrib.update(extra) + c_node = tree.xmlNewDocNode(c_doc, NULL, _cstr(name_utf), NULL) + if attrib: + for name, value in attrib.items(): + attr_name_utf = _utf8(name) + value_utf = _utf8(value) + tree.xmlNewProp(c_node, _cstr(attr_name_utf), _cstr(value_utf)) return c_node cdef xmlNode* _createComment(xmlDoc* c_doc, char* text): @@ -936,44 +1132,42 @@ # module-level API for ElementTree -def Element(tag, attrib=None, nsmap=None, **extra): - cdef _Document doc - cdef _Element result +def Element(_tag, attrib=None, nsmap=None, **_extra): cdef xmlNode* c_node cdef xmlDoc* c_doc + cdef _Document doc + ns_utf, name_utf = _getNsTag(_tag) c_doc = theParser.newDoc() - c_node = _createElement(c_doc, tag, attrib, extra) + c_node = _createElement(c_doc, name_utf, attrib, _extra) tree.xmlDocSetRootElement(c_doc, c_node) - # add namespaces to node if necessary - _addNamespaces(c_doc, c_node, nsmap) - # XXX hack for namespaces doc = _documentFactory(c_doc) - result = _elementFactory(doc, c_node) - result.tag = tag - return result + # add namespaces to node if necessary + doc._setNodeNamespaces(c_node, ns_utf, nsmap) + return _elementFactory(doc, c_node) def Comment(text=None): cdef _Document doc cdef xmlNode* c_node if text is None: - text = '' - text = ' %s ' % text.encode('UTF-8') + text = ' ' + else: + text = ' %s ' % _utf8(text) doc = _documentFactory( theParser.newDoc() ) c_node = _createComment(doc._c_doc, text) tree.xmlAddChild(doc._c_doc, c_node) return _commentFactory(doc, c_node) -def SubElement(_Element parent, tag, attrib=None, nsmap=None, **extra): - cdef xmlNode* c_node - cdef _Element element - c_node = _createElement(parent._doc._c_doc, tag, attrib, extra) - element = _elementFactory(parent._doc, c_node) - parent.append(element) +def SubElement(_Element _parent, _tag, attrib=None, nsmap=None, **_extra): + cdef xmlNode* c_node + cdef _Document doc + _raiseIfNone(_parent) + ns_utf, name_utf = _getNsTag(_tag) + doc = _parent._doc + c_node = _createElement(doc._c_doc, name_utf, attrib, _extra) + tree.xmlAddChild(_parent._c_node, c_node) # add namespaces to node if necessary - _addNamespaces(parent._doc._c_doc, c_node, nsmap) - # XXX hack for namespaces - element.tag = tag - return element + doc._setNodeNamespaces(c_node, ns_utf, nsmap) + return _elementFactory(doc, c_node) def ElementTree(_Element element=None, file=None, parser=None): cdef xmlNode* c_next @@ -1002,18 +1196,26 @@ def XML(text): cdef xmlDoc* c_doc - if isinstance(text, unicode): - text = _stripDeclaration(text.encode('UTF-8')) + if python.PyUnicode_Check(text): + text = _stripDeclaration(_utf8(text)) c_doc = theParser.parseDoc(text, None) return _documentFactory(c_doc).getroot() fromstring = XML +def XMLID(text): + root = XML(text) + dic = {} + for elem in root.xpath('//*[string(@id)]'): + python.PyDict_SetItem(dic, elem.get('id'), elem) + return (root, dic) + def iselement(element): return isinstance(element, _Element) def dump(_NodeBase elem): assert elem is not None, "Must supply element." + # better, but not ET compatible : _raiseIfNone(elem) _dumpToFile(sys.stdout, elem._doc._c_doc, elem._c_node) def tostring(_NodeBase element, encoding='us-ascii'): @@ -1023,13 +1225,14 @@ cdef char* enc assert element is not None + # better, but not ET compatible : _raiseIfNone(element) #if encoding is None: # encoding = 'UTF-8' if encoding in ('utf8', 'UTF8', 'utf-8'): encoding = 'UTF-8' doc = element._doc - enc = encoding + enc = _cstr(encoding) # it is necessary to *and* find the encoding handler *and* use # encoding during output enchandler = tree.xmlFindCharEncodingHandler(enc) @@ -1052,21 +1255,9 @@ doc = _parseDocument(source, parser) return ElementTree(doc.getroot()) -cdef _addNamespaces(xmlDoc* c_doc, xmlNode* c_node, object nsmap): - cdef xmlNs* c_ns - if nsmap is None: - return - for prefix, href in nsmap.items(): - # add namespace with prefix if ns is not already known - c_ns = tree.xmlSearchNsByHref(c_doc, c_node, href) - if c_ns is NULL: - if prefix is not None: - tree.xmlNewNs(c_node, href, prefix) - else: - tree.xmlNewNs(c_node, href, NULL) - # include submodules +include "xmlerror.pxi" # error and log handling include "nsclasses.pxi" # Namespace implementation and registry include "xslt.pxi" # XPath and XSLT include "relaxng.pxi" # RelaxNG @@ -1081,6 +1272,10 @@ # Private helper functions +cdef void _raiseIfNone(el): + if el is None: + raise TypeError, "Argument must not be None." + cdef _Document _documentOrRaise(object input): cdef _Document doc doc = _documentOf(input) @@ -1163,13 +1358,13 @@ cdef _dumpToFile(f, xmlDoc* c_doc, xmlNode* c_node): - cdef tree.PyObject* o + cdef python.PyObject* o cdef tree.xmlOutputBuffer* c_buffer - if not tree.PyFile_Check(f): + if not python.PyFile_Check(f): raise ValueError, "Not a file" - o = f - c_buffer = tree.xmlOutputBufferCreateFile(tree.PyFile_AsFile(o), NULL) + o = f + c_buffer = tree.xmlOutputBufferCreateFile(python.PyFile_AsFile(o), NULL) tree.xmlNodeDumpOutput(c_buffer, c_doc, c_node, 0, 0, NULL) # dump next node if it's a text node _dumpNextNode(c_buffer, c_doc, c_node, NULL) @@ -1202,14 +1397,31 @@ If there was no text to collect, return None """ + cdef int scount + cdef char* text + cdef xmlNode* c_node_cur + # check for multiple text nodes + scount = 0 + text = NULL + c_node_cur = c_node + while c_node_cur is not NULL and c_node_cur.type == tree.XML_TEXT_NODE: + if c_node_cur.content[0] != c'\0': + text = c_node_cur.content + scount = scount + 1 + c_node_cur = c_node_cur.next + + # handle two most common cases first + if text is NULL: + return None + if scount == 1: + return funicode(text) + + # the rest is not performance critical anymore result = '' while c_node is not NULL and c_node.type == tree.XML_TEXT_NODE: result = result + c_node.content c_node = c_node.next - if result: - return funicode(result) - else: - return None + return funicode(result) cdef _removeText(xmlNode* c_node): """Remove all text nodes. @@ -1290,17 +1502,18 @@ c_target = c_tail c_tail = c_next -cdef int _isElement(xmlNode* c_node): - return (c_node.type == tree.XML_ELEMENT_NODE or - c_node.type == tree.XML_COMMENT_NODE) +### see etree.h: +## cdef int _isElement(xmlNode* c_node): +## return (c_node.type == tree.XML_ELEMENT_NODE or +## c_node.type == tree.XML_COMMENT_NODE) -cdef void _deleteSlice(xmlNode* c_node, int start, int stop): +cdef xmlNode* _deleteSlice(xmlNode* c_node, int start, int stop): """Delete slice, starting with c_node, start counting at start, end at stop. """ cdef xmlNode* c_next cdef int c if c_node is NULL: - return + return NULL # now start deleting nodes c = start while c_node is not NULL and c < stop: @@ -1311,18 +1524,8 @@ _removeNode(c_node) c = c + 1 c_node = c_next + return c_node -def _getNsTag(tag): - """Given a tag, find namespace URI and tag name. - Return None for NS uri if no namespace URI available. - """ - tag = tag.encode('UTF-8') - if tag[0] == '{': - i = tag.find('}') - assert i != -1 - return tag[1:i], tag[i + 1:] - return None, tag - cdef int isutf8(char* string): cdef int i i = 0 @@ -1335,18 +1538,53 @@ cdef object funicode(char* s): if isutf8(s): - return tree.PyUnicode_DecodeUTF8(s, tree.strlen(s), "strict") - return tree.PyString_FromStringAndSize(s, tree.strlen(s)) + return python.PyUnicode_DecodeUTF8(s, tree.strlen(s), NULL) + return python.PyString_FromString(s) + +cdef object _utf8(object s): + if python.PyString_Check(s): + assert not isutf8(_cstr(s)), "All strings must be Unicode or ASCII" + return s + elif python.PyUnicode_Check(s): + return python.PyUnicode_AsUTF8String(s) + else: + raise TypeError, "Argument must be string or unicode." +cdef _getNsTag(tag): + """Given a tag, find namespace URI and tag name. + Return None for NS uri if no namespace URI available. + """ + cdef char* c_tag + cdef char* c_pos + cdef int nslen + tag = _utf8(tag) + c_tag = _cstr(tag) + if c_tag[0] == c'{': + c_pos = tree.xmlStrchr(c_tag+1, c'}') + if c_pos is NULL: + raise ValueError, "Invalid tag name" + nslen = c_pos - c_tag - 1 + ns = python.PyString_FromStringAndSize(c_tag+1, nslen) + tag = python.PyString_FromString(c_pos+1) + else: + ns = None + return ns, tag + cdef object _namespacedName(xmlNode* c_node): + cdef char* href + cdef char* name + name = c_node.name if c_node.ns is NULL or c_node.ns.href is NULL: - return funicode(c_node.name) + return funicode(name) else: - # XXX optimize - s = "{%s}%s" % (c_node.ns.href, c_node.name) - return funicode(s) + href = c_node.ns.href + s = python.PyString_FromFormat("{%s}%s", href, name) + if isutf8(href) or isutf8(name): + return python.PyUnicode_FromEncodedObject(s, 'UTF-8', NULL) + else: + return s -def _getFilenameForFile(source): +cdef _getFilenameForFile(source): """Given a Python File or Gzip object, give filename back. Returns None if not a file object. @@ -1359,7 +1597,7 @@ return source.filename return None -cdef void changeDocumentBelow(_NodeBase node, _Document doc): +cdef void changeDocumentBelow(_NodeBase node, _Document doc, int recursive): """For a node and all nodes below, change document. A node can change document in certain operations as an XML @@ -1367,7 +1605,8 @@ tree below (including the current node). It also reconciliates namespaces so they're correct inside the new environment. """ - changeDocumentBelowHelper(node._c_node, doc) + if recursive: + changeDocumentBelowHelper(node._c_node, doc) tree.xmlReconciliateNs(doc._c_doc, node._c_node) cdef void changeDocumentBelowHelper(xmlNode* c_node, _Document doc): @@ -1400,27 +1639,3 @@ changeDocumentBelowHelper(c_current, doc) c_attr_current = c_attr_current.next - -################################################################################ -# DEBUG setup - -cdef void nullGenericErrorFunc(void* ctxt, char* msg, ...): - pass - -cdef void nullStructuredErrorFunc(void* userData, - xmlerror.xmlError* error): - pass - -cdef void _shutUpLibxmlErrors(): - xmlerror.xmlSetGenericErrorFunc(NULL, nullGenericErrorFunc) - xmlerror.xmlSetStructuredErrorFunc(NULL, nullStructuredErrorFunc) - -cdef void _shutUpLibxsltErrors(): - xslt.xsltSetGenericErrorFunc(NULL, nullGenericErrorFunc) - # xslt.xsltSetTransformErrorFunc - -# ugly global shutting up of all errors, but seems to work.. -if not DEBUG: - _shutUpLibxmlErrors() - _shutUpLibxsltErrors() - Added: lxml/branch/htmlparser/src/lxml/htmlparser.pxd ============================================================================== --- (empty file) +++ lxml/branch/htmlparser/src/lxml/htmlparser.pxd Sun Mar 26 12:53:24 2006 @@ -0,0 +1,16 @@ +from tree cimport xmlDoc, xmlDict +from xmlparser cimport xmlParserCtxt +from xmlerror cimport xmlError + +cdef extern from "libxml/HTMLparser.h": + ctypedef enum htmlParserOption: + HTML_PARSE_RECOVER # Relaxed parsing + HTML_PARSE_NOERROR # suppress error reports + HTML_PARSE_NOWARNING # suppress warning reports + HTML_PARSE_PEDANTIC # pedantic error reporting + HTML_PARSE_NOBLANKS # remove blank nodes + HTML_PARSE_NONET # Forbid network access + HTML_PARSE_COMPACT # compact small text nodes + + xmlParserCtxt* htmlCreateMemoryParserCtxt(char* buffer, + int size) Modified: lxml/branch/htmlparser/src/lxml/nsclasses.pxi ============================================================================== --- lxml/branch/htmlparser/src/lxml/nsclasses.pxi (original) +++ lxml/branch/htmlparser/src/lxml/nsclasses.pxi Sun Mar 26 12:53:24 2006 @@ -3,32 +3,53 @@ class NamespaceRegistryError(LxmlError): pass -class ElementBase(_Element): - """All classes in namespace implementations must inherit from this - one. Note that subclasses *must not* override __init__ or __new__ - as there is absolutely undefined when these objects will be - created or destroyed. All state must be kept in the underlying - XML.""" +cdef class ElementBase(_Element): + """All classes in namespace implementations must inherit from this one. + Note that subclasses *must not* override __init__ or __new__ as it is + absolutely undefined when these objects will be created or destroyed. All + persistent state of elements must be stored in the underlying XML.""" pass class XSLTElement(object): "NOT IMPLEMENTED YET!" pass -cdef object __NAMESPACE_CLASSES -__NAMESPACE_CLASSES = {} +cdef object __NAMESPACE_REGISTRIES +__NAMESPACE_REGISTRIES = {} + +cdef object __FUNCTION_NAMESPACE_REGISTRIES +__FUNCTION_NAMESPACE_REGISTRIES = {} def Namespace(ns_uri): + """Retrieve the namespace object associated with the given URI. Creates a + new one if it does not yet exist.""" + if ns_uri: + ns_utf = _utf8(ns_uri) + else: + ns_utf = None + try: + return __NAMESPACE_REGISTRIES[ns_utf] + except KeyError: + registry = __NAMESPACE_REGISTRIES[ns_utf] = \ + _NamespaceRegistry(ns_uri) + return registry + +def FunctionNamespace(ns_uri): + """Retrieve the function namespace object associated with the given + URI. Creates a new one if it does not yet exist. A function namespace can + only be used to register extension functions.""" if ns_uri: - ns_utf = ns_uri.encode('UTF-8') + ns_utf = _utf8(ns_uri) else: ns_utf = None try: - return __NAMESPACE_CLASSES[ns_utf] + return __FUNCTION_NAMESPACE_REGISTRIES[ns_utf] except KeyError: - registry = __NAMESPACE_CLASSES[ns_utf] = _NamespaceRegistry(ns_uri) + registry = __FUNCTION_NAMESPACE_REGISTRIES[ns_utf] = \ + _FunctionNamespaceRegistry(ns_uri) return registry + cdef class _NamespaceRegistry: "Dictionary-like registry for namespace implementations" cdef object _ns_uri @@ -56,11 +77,11 @@ self[name] = item def __setitem__(self, name, item): - if isinstance(item, (type, types.ClassType)) and issubclass(item, ElementBase): + if python.PyType_Check(item) and issubclass(item, ElementBase): d = self._classes elif name is None: raise NamespaceRegistryError, "Registered name can only be None for elements." - elif isinstance(item, (type, types.ClassType)) and issubclass(item, XSLTElement): + elif python.PyType_Check(item) and issubclass(item, XSLTElement): d = self._xslt_elements elif callable(item): d = self._extensions @@ -70,68 +91,128 @@ if name is None: name_utf = None else: - name_utf = name.encode('UTF-8') + name_utf = _utf8(name) d[name_utf] = item def __getitem__(self, name): - try: - return self._classes[name] - except KeyError: - return self._extensions[name] + name_utf = _utf8(name) + return self._get(name_utf) + + cdef object _get(self, object name): + cdef python.PyObject* dict_result + dict_result = python.PyDict_GetItem(self._classes, name) + if dict_result is NULL: + dict_result = python.PyDict_GetItem(self._extensions, name) + if dict_result is NULL: + raise KeyError, "Name not registered." + return dict_result def clear(self): self._classes.clear() self._extensions.clear() + #self.self._xslt_elements.clear() -cdef object _find_all_namespaces(): - "Hack to register all extension functions in XSLT" - ns_uris = [] - for s in __NAMESPACE_CLASSES.keys(): - ns_uris.append(unicode(s, 'UTF-8')) - return ns_uris + def __repr__(self): + return "Namespace(%r)" % self._ns_uri -cdef _NamespaceRegistry _find_namespace_registry(object ns_uri): - if ns_uri: - ns_utf = ns_uri.encode('UTF-8') - else: - ns_utf = None - return __NAMESPACE_CLASSES[ns_utf] +cdef class _FunctionNamespaceRegistry(_NamespaceRegistry): + cdef object _prefix + cdef object _prefix_utf + property prefix: + "Namespace prefix for extension functions." + def __del__(self): + self._prefix = None # no prefix configured + def __get__(self): + return self._prefix + def __set__(self, prefix): + if prefix is None: + prefix = '' # empty prefix + self._prefix_utf = _utf8(prefix) + self._prefix = prefix + + def __setitem__(self, name, item): + if not callable(item): + raise NamespaceRegistryError, "Registered function must be callable." + if name is None: + name_utf = None + else: + name_utf = _utf8(name) + self._extensions[name_utf] = item + + cdef object _get(self, object name): + cdef python.PyObject* dict_result + dict_result = python.PyDict_GetItem(self._extensions, name) + if dict_result is NULL: + raise KeyError, "Name not registered." + return dict_result + + def __repr__(self): + return "FunctionNamespace(%r)" % self._ns_uri + +cdef object _find_all_extensions(): + "Internal lookup function to find all extension functions for XSLT/XPath." + cdef _NamespaceRegistry registry + ns_extensions = {} + for (ns_utf, registry) in __FUNCTION_NAMESPACE_REGISTRIES.iteritems(): + if registry._extensions: + ns_extensions[ns_utf] = registry._extensions + return ns_extensions + +cdef object _find_all_extension_prefixes(): + "Internal lookup function to find all function prefixes for XSLT/XPath." + cdef _FunctionNamespaceRegistry registry + ns_prefixes = {} + for (ns_utf, registry) in __FUNCTION_NAMESPACE_REGISTRIES.iteritems(): + if registry._prefix_utf is not None: + ns_prefixes[registry._prefix_utf] = ns_utf + return ns_prefixes cdef _find_extensions(namespaces): + """Returns a dictionary that maps each namespace in the provided list to a + dictionary of name-function mappings defined under that namespace.""" + cdef python.PyObject* dict_result + cdef char* c_ns_utf extension_dict = {} for ns_uri in namespaces: - try: - extensions = _find_namespace_registry(ns_uri)._extensions - except KeyError: + if ns_uri is None: + ns_utf = None + else: + ns_utf = _utf8(ns_uri) + dict_result = python.PyDict_GetItem( + __FUNCTION_NAMESPACE_REGISTRIES, ns_utf) + if dict_result is NULL: continue + extensions = (<_NamespaceRegistry>dict_result)._extensions if extensions: - extension_dict[ns_uri] = extensions + python.PyDict_SetItem(extension_dict, ns_utf, extensions) return extension_dict cdef object _find_element_class(char* c_namespace_utf, char* c_element_name_utf): + cdef python.PyObject* dict_result cdef _NamespaceRegistry registry - element_name_utf = c_element_name_utf - if c_namespace_utf == NULL: - if element_name_utf[:1] == '{': - namespace_utf, element_name_utf = element_name_utf[1:].split('}', 1) - else: - namespace_utf = None + if c_namespace_utf is not NULL: + dict_result = python.PyDict_GetItemString( + __NAMESPACE_REGISTRIES, c_namespace_utf) else: - namespace_utf = c_namespace_utf - - try: - registry = __NAMESPACE_CLASSES[namespace_utf] - except KeyError: + dict_result = python.PyDict_GetItem( + __NAMESPACE_REGISTRIES, None) + if dict_result is NULL: return _Element + + registry = <_NamespaceRegistry>dict_result classes = registry._classes - try: - return classes[element_name_utf] - except KeyError: - pass - try: - return classes[None] - except KeyError: - return _Element + if c_element_name_utf is not NULL: + dict_result = python.PyDict_GetItemString( + classes, c_element_name_utf) + else: + dict_result = NULL + + if dict_result is NULL: + dict_result = python.PyDict_GetItem(classes, None) + if dict_result is not NULL: + return dict_result + else: + return _Element Modified: lxml/branch/htmlparser/src/lxml/parser.pxi ============================================================================== --- lxml/branch/htmlparser/src/lxml/parser.pxi (original) +++ lxml/branch/htmlparser/src/lxml/parser.pxi Sun Mar 26 12:53:24 2006 @@ -1,24 +1,52 @@ # XML parser that provides dictionary sharing cimport xmlparser +cimport htmlparser from xmlparser cimport xmlParserCtxt, xmlDict class XMLSyntaxError(LxmlSyntaxError): pass -cdef int _DEFAULT_PARSE_OPTIONS -_DEFAULT_PARSE_OPTIONS = ( +cdef int _XML_DEFAULT_PARSE_OPTIONS +_XML_DEFAULT_PARSE_OPTIONS = ( xmlparser.XML_PARSE_NOENT | xmlparser.XML_PARSE_NOCDATA | xmlparser.XML_PARSE_NOWARNING | xmlparser.XML_PARSE_NOERROR ) -cdef int _ORIG_DEFAULT_PARSE_OPTIONS -_ORIG_DEFAULT_PARSE_OPTIONS = _DEFAULT_PARSE_OPTIONS +cdef int _XML_ORIG_DEFAULT_PARSE_OPTIONS +_XML_ORIG_DEFAULT_PARSE_OPTIONS = _XML_DEFAULT_PARSE_OPTIONS -cdef class XMLParser: +cdef int _HTML_DEFAULT_PARSE_OPTIONS +_HTML_DEFAULT_PARSE_OPTIONS = ( + htmlparser.HTML_PARSE_RECOVER | + htmlparser.HTML_PARSE_NOERROR | + htmlparser.HTML_PARSE_NOWARNING + ) + +cdef int _HTML_ORIG_DEFAULT_PARSE_OPTIONS +_HTML_ORIG_DEFAULT_PARSE_OPTIONS = _HTML_DEFAULT_PARSE_OPTIONS + + +cdef class _BaseParser: + cdef int _parse_options + + cdef xmlParserCtxt* newMemoryParserContext(self, text_utf): + return NULL + + cdef xmlParserCtxt* newFileParserContext(self): + return NULL + + cdef xmlDoc* parseMemory(self, xmlParserCtxt* ctx): + return NULL + + cdef xmlDoc* parseFile(self, xmlParserCtxt* ctx, char* filename, int options): + return NULL + + +cdef class XMLParser(_BaseParser): """The XML parser. Parsers can be supplied as additional argument to various parse functions of the lxml API. A default parser is always available and can be replaced by a call to the global function @@ -26,24 +54,18 @@ major run-time overhead. The keyword arguments in the constructor are mainly based on the libxml2 - parser configuration. The 'from_parser' keyword additionally allows to - provide a parser whose configurations is copied before applying the - additional arguments. Note that DTD validation obviously implies loading - the DTD. + parser configuration. A DTD will only be loaded if validation or + attribute default values are requested. """ - cdef int _parse_options - def __init__(self, load_dtd=False, validate_dtd=False, no_network=False, - ns_clean=False, from_parser=None): + def __init__(self, attribute_defaults=False, dtd_validation=False, + no_network=False, ns_clean=False): cdef int parse_options - if from_parser is not None: - parse_options = from_parser._parse_options - else: - parse_options = _ORIG_DEFAULT_PARSE_OPTIONS + parse_options = _XML_ORIG_DEFAULT_PARSE_OPTIONS - if validate_dtd: + if dtd_validation: parse_options = parse_options | xmlparser.XML_PARSE_DTDLOAD | \ xmlparser.XML_PARSE_DTDVALID - if load_dtd: + if attribute_defaults: parse_options = parse_options | xmlparser.XML_PARSE_DTDLOAD | \ xmlparser.XML_PARSE_DTDATTR if no_network: @@ -53,6 +75,58 @@ self._parse_options = parse_options + property error_log: + def __get__(self): + return __build_error_log_tuple(self) + +## def copy(self, attribute_defaults=None, dtd_validation=None, +## no_network=None, ns_clean=None): +## cdef int parse_options +## parse_options = self._parse_options +## if attribute_defaults is None: +## attribute_defaults = parse_options & xmlparser.XML_PARSE_DTDATTR +## if dtd_validation is None: +## dtd_validation = parse_options & xmlparser.XML_PARSE_DTDVALID +## if no_network is None: +## no_network = parse_options & xmlparser.XML_PARSE_NONET +## if ns_clean is None: +## ns_clean = parse_options & xmlparser.XML_PARSE_NSCLEAN + +## return self.__class__(attribute_defaults=attribute_defaults, +## dtd_validation=dtd_validation, +## no_network=no_network, ns_clean=ns_clean) + + cdef xmlParserCtxt* newMemoryParserContext(self, text_utf): + return xmlparser.xmlCreateDocParserCtxt(text_utf) + + cdef xmlDoc* parseMemory(self, xmlParserCtxt* ctx, text_utf): + pass + + +cdef class HTMLParser(_BaseParser): + """The HTML parser. This parser allows reading broken HTML into XML. + """ + cdef int _HTML_DEFAULT_PARSE_OPTIONS + def __init__(self, recover=True, compact_text=True, no_network=False, + from_parser=None): + cdef int parse_options + if from_parser is not None: + parse_options = (from_parser)._parse_options + else: + parse_options = _HTML_DEFAULT_PARSE_OPTIONS + + if not recover: + parse_options = parse_options & ~htmlparser.HTML_PARSE_RECOVER + if not compact_text: + parse_options = parse_options & ~htmlparser.HTML_PARSE_COMPACT + if no_network: + parse_options = parse_options | htmlparser.HTML_PARSE_NONET + + self._parse_options = parse_options + + cdef xmlParserCtxt* newMemoryParserContext(self, text_utf): + return htmlparser.htmlCreateMemoryParserCtxt(text_utf, len(text_utf)) + def set_default_parser(parser=None): """Set a default XMLParser. This parser is used globally whenever no @@ -61,9 +135,9 @@ parser is reset to the original configuration. """ if parser is not None: - _DEFAULT_PARSE_OPTIONS = (parser)._parse_options + _XML_DEFAULT_PARSE_OPTIONS = (parser)._parse_options else: - _DEFAULT_PARSE_OPTIONS = _ORIG_DEFAULT_PARSE_OPTIONS + _XML_DEFAULT_PARSE_OPTIONS = _XML_ORIG_DEFAULT_PARSE_OPTIONS cdef class Parser: @@ -91,7 +165,7 @@ if parser is not None: parse_options = (parser)._parse_options else: - parse_options = _DEFAULT_PARSE_OPTIONS + parse_options = _XML_DEFAULT_PARSE_OPTIONS self._initParse() pctxt = xmlparser.xmlCreateDocParserCtxt(text) @@ -123,7 +197,7 @@ if parser is not None: parse_options = (parser)._parse_options else: - parse_options = _DEFAULT_PARSE_OPTIONS + parse_options = _XML_DEFAULT_PARSE_OPTIONS self._initParse() pctxt = xmlparser.xmlNewParserCtxt() Modified: lxml/branch/htmlparser/src/lxml/proxy.pxi ============================================================================== --- lxml/branch/htmlparser/src/lxml/proxy.pxi (original) +++ lxml/branch/htmlparser/src/lxml/proxy.pxi Sun Mar 26 12:53:24 2006 @@ -7,8 +7,8 @@ cdef struct _ProxyRef cdef struct _ProxyRef: - tree.PyObject* proxy - int type + python.PyObject* proxy + LXML_PROXY_TYPE type _ProxyRef* next ctypedef _ProxyRef ProxyRef @@ -30,43 +30,31 @@ cdef int hasProxy(xmlNode* c_node): return c_node._private is not NULL -cdef ProxyRef* createProxyRef(_NodeBase proxy, int proxy_type): - """Create a backpointer proxy reference for a proxy and type. - """ - cdef ProxyRef* result - result = cstd.malloc(sizeof(ProxyRef)) - result.proxy = proxy - result.type = proxy_type - result.next = NULL - return result - cdef void registerProxy(_NodeBase proxy, int proxy_type): """Register a proxy and type for the node it's proxying for. """ + cdef xmlNode* c_node cdef ProxyRef* ref - cdef ProxyRef* prev_ref # cannot register for NULL - if proxy._c_node is NULL: + c_node = proxy._c_node + if c_node is NULL: return # XXX should we check whether we ran into proxy_type before? #print "registering for:", proxy._c_node - ref = proxy._c_node._private - if ref is NULL: - proxy._c_node._private = createProxyRef(proxy, proxy_type) - return - while ref is not NULL: - prev_ref = ref - ref = ref.next - prev_ref.next = createProxyRef(proxy, proxy_type) + ref = cstd.malloc(sizeof(ProxyRef)) + ref.proxy = proxy + ref.type = proxy_type + ref.next = c_node._private + c_node._private = ref # prepend cdef void unregisterProxy(_NodeBase proxy): """Unregister a proxy for the node it's proxying for. """ - cdef tree.PyObject* proxy_ref + cdef python.PyObject* proxy_ref cdef ProxyRef* ref cdef ProxyRef* prev_ref cdef xmlNode* c_node - proxy_ref = proxy + proxy_ref = proxy c_node = proxy._c_node ref = c_node._private if ref.proxy == proxy_ref: Modified: lxml/branch/htmlparser/src/lxml/relaxng.pxi ============================================================================== --- lxml/branch/htmlparser/src/lxml/relaxng.pxi (original) +++ lxml/branch/htmlparser/src/lxml/relaxng.pxi Sun Mar 26 12:53:24 2006 @@ -18,6 +18,7 @@ Can also load from filesystem directly given file object or filename. """ cdef relaxng.xmlRelaxNG* _c_schema + cdef _ErrorLog _error_log def __init__(self, _ElementTree etree=None, file=None): cdef relaxng.xmlRelaxNGParserCtxt* parser_ctxt @@ -38,6 +39,8 @@ if self._c_schema is NULL: raise RelaxNGParseError, "Document is not valid Relax NG" relaxng.xmlRelaxNGFreeParserCtxt(parser_ctxt) + + self._error_log = _ErrorLog() def __dealloc__(self): relaxng.xmlRelaxNGFree(self._c_schema) @@ -49,6 +52,7 @@ cdef xmlDoc* c_doc cdef relaxng.xmlRelaxNGValidCtxt* valid_ctxt cdef int ret + self._error_log.connect() valid_ctxt = relaxng.xmlRelaxNGNewValidCtxt(self._c_schema) c_doc = _fakeRootDoc(etree._doc._c_doc, etree._context_node._c_node) @@ -56,7 +60,11 @@ _destroyFakeDoc(etree._doc._c_doc, c_doc) relaxng.xmlRelaxNGFreeValidCtxt(valid_ctxt) + self._error_log.disconnect() if ret == -1: raise RelaxNGValidateError, "Internal error in Relax NG validation" return ret == 0 + property error_log: + def __get__(self): + return self._error_log.copy() Modified: lxml/branch/htmlparser/src/lxml/sax.py ============================================================================== --- lxml/branch/htmlparser/src/lxml/sax.py (original) +++ lxml/branch/htmlparser/src/lxml/sax.py Sun Mar 26 12:53:24 2006 @@ -1,5 +1,14 @@ from xml.sax.handler import ContentHandler -from lxml.etree import ElementTree, Element, SubElement, _getNsTag +from lxml.etree import ElementTree, Element, SubElement, LxmlError + +class SaxError(LxmlError): + pass + +def _getNsTag(tag): + if tag[0] == '{': + return tuple(tag[1:].split('}', 1)) + else: + return (None, tag) class ElementTreeContentHandler(object, ContentHandler): """Build an lxml ElementTree from SAX events. @@ -41,27 +50,30 @@ if prefix is None: self._default_ns = ns_uri_list[-1] - def startElementNS(self, name, qname, attributes): - ns_uri, local_name = name + def startElementNS(self, ns_name, qname, attributes=None): + ns_uri, local_name = ns_name if ns_uri: - el_name = "{%s}%s" % name + el_name = "{%s}%s" % ns_name elif self._default_ns: el_name = "{%s}%s" % (self._default_ns, local_name) else: el_name = local_name - try: - iter_attributes = attributes.iteritems() - except AttributeError: - iter_attributes = attributes.items() - - attrs = {} - for name_tuple, value in iter_attributes: - if name_tuple[0]: - attr_name = "{%s}%s" % name_tuple - else: - attr_name = name_tuple[1] - attrs[attr_name] = value + if attributes: + attrs = {} + try: + iter_attributes = attributes.iteritems() + except AttributeError: + iter_attributes = attributes.items() + + for name_tuple, value in iter_attributes: + if name_tuple[0]: + attr_name = "{%s}%s" % name_tuple + else: + attr_name = name_tuple[1] + attrs[attr_name] = value + else: + attrs = None element_stack = self._element_stack if self._root is None: @@ -73,8 +85,17 @@ self._new_mappings.clear() - def endElementNS(self, name, qname): - self._element_stack.pop() + def endElementNS(self, ns_name, qname): + element = self._element_stack.pop() + tag = element.tag + if ns_name != _getNsTag(tag): + raise SaxError, "Unexpected element closed: {%s}%s" % ns_name + + def startElement(self, name, attributes=None): + self.startElementNS((None, name), name, attributes) + + def endElement(self, name): + self.endElementNS((None, name), name) def characters(self, data): last_element = self._element_stack[-1] @@ -105,20 +126,22 @@ def _recursive_saxify(self, element, prefixes): new_prefixes = [] - if element.attrib: + build_qname = self._build_qname + attribs = element.items() + if attribs: attr_values = {} attr_qnames = {} - for attr_ns_name, value in element.attrib.items(): + for attr_ns_name, value in attribs: attr_ns_tuple = _getNsTag(attr_ns_name) attr_values[attr_ns_tuple] = value - attr_qnames[attr_ns_tuple] = _build_qname( + attr_qnames[attr_ns_tuple] = build_qname( attr_ns_tuple[0], attr_ns_tuple[1], prefixes, new_prefixes) sax_attributes = self._attr_class(attr_values, attr_qnames) else: sax_attributes = self._empty_attributes ns_uri, local_name = _getNsTag(element.tag) - qname = _build_qname(ns_uri, local_name, prefixes, new_prefixes) + qname = build_qname(ns_uri, local_name, prefixes, new_prefixes) content_handler = self._content_handler for prefix, uri in new_prefixes: @@ -135,15 +158,15 @@ if element.tail: content_handler.characters(element.tail) + def _build_qname(self, ns_uri, local_name, prefixes, new_prefixes): + if ns_uri is None: + return local_name + try: + prefix = prefixes[ns_uri] + except KeyError: + prefix = prefixes[ns_uri] = u'ns%02d' % len(prefixes) + new_prefixes.append( (prefix, ns_uri) ) + return prefix + ':' + local_name + def saxify(element_or_tree, content_handler): return ElementTreeProducer(element_or_tree, content_handler).saxify() - -def _build_qname(ns_uri, local_name, prefixes, new_prefixes): - if ns_uri is None: - return local_name - try: - prefix = prefixes[ns_uri] - except KeyError: - prefix = prefixes[ns_uri] = u'ns%02d' % len(prefixes) - new_prefixes.append( (prefix, ns_uri) ) - return prefix + ':' + local_name Modified: lxml/branch/htmlparser/src/lxml/tests/test_elementtree.py ============================================================================== --- lxml/branch/htmlparser/src/lxml/tests/test_elementtree.py (original) +++ lxml/branch/htmlparser/src/lxml/tests/test_elementtree.py Sun Mar 26 12:53:24 2006 @@ -12,8 +12,6 @@ from StringIO import StringIO import os, shutil, tempfile, copy -import gzip -import urllib2 from common_imports import etree, ElementTree, HelperTestCase, fileInTestDir, canonicalize @@ -336,6 +334,29 @@ self.assertEquals(0, len(root)) self.assertEquals('This is a text.', root.text) + def test_XMLID(self): + XMLID = self.etree.XMLID + XML = self.etree.XML + xml_text = ''' + +

...

+

...

+

Regular paragraph.

+

...

+
+ ''' + + root, dic = XMLID(xml_text) + root2 = XML(xml_text) + self.assertEquals(self._writeElement(root), + self._writeElement(root2)) + expected = { + "chapter1" : root[0], + "note1" : root[1], + "warn1" : root[3] + } + self.assertEquals(dic, expected) + def test_fromstring(self): fromstring = self.etree.fromstring @@ -570,6 +591,8 @@ b.tail = 'hoi' self.assertEquals('hoi', b.tail) + self.assertEquals('dag', + a.tail) def test_tail_append(self): Element = self.etree.Element @@ -742,6 +765,75 @@ '', other) + def test_del_insert(self): + Element = self.etree.Element + SubElement = self.etree.SubElement + + a = Element('a') + b = SubElement(a, 'b') + bs = SubElement(b, 'bs') + c = SubElement(a, 'c') + cs = SubElement(c, 'cs') + + el = a[0] + self.assertXML( + '', + a) + self.assertXML('', b) + self.assertXML('', c) + + del a[0] + self.assertXML( + '', + a) + self.assertXML('', b) + self.assertXML('', c) + + a.insert(0, el) + self.assertXML( + '', + a) + self.assertXML('', b) + self.assertXML('', c) + + def test_del_setitem(self): + Element = self.etree.Element + SubElement = self.etree.SubElement + + a = Element('a') + b = SubElement(a, 'b') + bs = SubElement(b, 'bs') + c = SubElement(a, 'c') + cs = SubElement(c, 'cs') + + el = a[0] + del a[0] + a[0] = el + self.assertXML( + '', + a) + self.assertXML('', b) + self.assertXML('', c) + + def test_del_setslice(self): + Element = self.etree.Element + SubElement = self.etree.SubElement + + a = Element('a') + b = SubElement(a, 'b') + bs = SubElement(b, 'bs') + c = SubElement(a, 'c') + cs = SubElement(c, 'cs') + + el = a[0] + del a[0] + a[0:0] = [el] + self.assertXML( + '', + a) + self.assertXML('', b) + self.assertXML('', c) + def test_delitem_tail(self): ElementTree = self.etree.ElementTree f = StringIO('B2C2') @@ -795,7 +887,7 @@ self.assertXML( '', a) - + def test_insert(self): Element = self.etree.Element SubElement = self.etree.SubElement @@ -1152,6 +1244,36 @@ [b, e], list(a)) + def test_delslice_negative1(self): + Element = self.etree.Element + SubElement = self.etree.SubElement + + a = Element('a') + b = SubElement(a, 'b') + c = SubElement(a, 'c') + d = SubElement(a, 'd') + e = SubElement(a, 'e') + + del a[1:-1] + self.assertEquals( + [b, e], + list(a)) + + def test_delslice_negative2(self): + Element = self.etree.Element + SubElement = self.etree.SubElement + + a = Element('a') + b = SubElement(a, 'b') + c = SubElement(a, 'c') + d = SubElement(a, 'd') + e = SubElement(a, 'e') + + del a[-3:-1] + self.assertEquals( + [b, e], + list(a)) + def test_delslice_tail(self): ElementTree = self.etree.ElementTree f = StringIO('B2C2D2E2') @@ -1209,6 +1331,23 @@ 'B2X2Y2Z2E2', a) + def test_setslice_negative(self): + Element = self.etree.Element + SubElement = self.etree.SubElement + + a = Element('a') + b = SubElement(a, 'b') + c = SubElement(a, 'c') + d = SubElement(a, 'd') + + x = Element('x') + y = Element('y') + + a[1:-1] = [x, y] + self.assertEquals( + [b, x, y, d], + list(a)) + def test_setslice_end(self): Element = self.etree.Element SubElement = self.etree.SubElement @@ -1318,14 +1457,37 @@ ns2 = 'http://xml.infrae.com/2' a = Element('{%s}a' % ns) b = SubElement(a, '{%s}b' % ns2) + c = SubElement(a, '{%s}c' % ns) self.assertEquals('{%s}a' % ns, a.tag) self.assertEquals('{%s}b' % ns2, b.tag) - self.assertEquals( - '{%s}a' % ns, a.tag) - self.assertEquals( - '{%s}b' % ns2, b.tag) + self.assertEquals('{%s}c' % ns, + c.tag) + self.assertEquals('{%s}a' % ns, + a.tag) + self.assertEquals('{%s}b' % ns2, + b.tag) + self.assertEquals('{%s}c' % ns, + c.tag) + + def test_ns_tag_parse(self): + Element = self.etree.Element + SubElement = self.etree.SubElement + ElementTree = self.etree.ElementTree + + ns = 'http://xml.infrae.com/1' + ns2 = 'http://xml.infrae.com/2' + f = StringIO('' % (ns, ns2)) + t = ElementTree(file=f) + + a = t.getroot() + self.assertEquals('{%s}a' % ns, + a.tag) + self.assertEquals('{%s}b' % ns2, + a[0].tag) + self.assertEquals('{%s}b' % ns, + a[1].tag) def test_ns_attr(self): Element = self.etree.Element Modified: lxml/branch/htmlparser/src/lxml/tests/test_etree.py ============================================================================== --- lxml/branch/htmlparser/src/lxml/tests/test_etree.py (original) +++ lxml/branch/htmlparser/src/lxml/tests/test_etree.py Sun Mar 26 12:53:24 2006 @@ -11,9 +11,6 @@ import unittest, doctest from StringIO import StringIO -import os, shutil, tempfile, copy -import gzip -import urllib2 from common_imports import etree, HelperTestCase, fileInTestDir, canonicalize @@ -28,6 +25,24 @@ self.assertRaises(SyntaxError, parse, f) f.close() + def test_parse_error_logging(self): + parse = self.etree.parse + # from StringIO + f = StringIO('') + self.etree.clearErrorLog() + try: + parse(f) + logs = None + except SyntaxError, e: + logs = e.error_log + f.close() + self.assert_([ log for log in logs + if 'mismatch' in log.message ]) + self.assert_([ log for log in logs + if 'PARSER' in log.domain_name]) + self.assert_([ log for log in logs + if 'TAG_NAME_MISMATCH' in log.type_name ]) + def test_parse_error_from_file(self): parse = self.etree.parse # from file @@ -191,6 +206,8 @@ ValueError, e.index, e[3], 0, 2) self.assertRaises( ValueError, e.index, e[8], 0, -3) + self.assertRaises( + ValueError, e.index, e[8], -5, -3) self.assertEquals( 8, e.index(e[8], 0, -1)) self.assertEquals( Modified: lxml/branch/htmlparser/src/lxml/tests/test_nsclasses.py ============================================================================== --- lxml/branch/htmlparser/src/lxml/tests/test_nsclasses.py (original) +++ lxml/branch/htmlparser/src/lxml/tests/test_nsclasses.py Sun Mar 26 12:53:24 2006 @@ -5,11 +5,13 @@ namespace registry mechanism """ -import unittest +import unittest, doctest from common_imports import etree, HelperTestCase class ETreeNamespaceClassesTestCase(HelperTestCase): + assertFalse = HelperTestCase.failIf + class default_class(etree.ElementBase): pass class maeh_class(etree.ElementBase): @@ -65,9 +67,91 @@ etree.Namespace(u'ns11').clear() + def test_default_class(self): + bluff_dict = { + None : self.bluff_class, + 'maeh' : self.maeh_class + } + + ns = etree.Namespace("uri:nsDefClass") + ns.update(bluff_dict) + + tree = self.parse(u''' + + + + ''') + + el = tree.getroot() + self.assertFalse(isinstance(el, etree.ElementBase)) + for child in el[:-1]: + self.assert_(isinstance(child, etree.ElementBase), child.tag) + self.assertFalse(isinstance(el[-1], etree.ElementBase)) + + self.assert_(hasattr(el[0], 'bluff')) + self.assert_(hasattr(el[1], 'bluff')) + self.assert_(hasattr(el[2], 'maeh')) + self.assert_(hasattr(el[3], 'maeh')) + self.assertFalse(hasattr(el[4], 'maeh')) + del el + + ns.clear() + + def test_create_element(self): + bluff_dict = {u'bluff' : self.bluff_class} + etree.Namespace(u'ns20').update(bluff_dict) + + maeh_dict = {u'maeh' : self.maeh_class} + etree.Namespace(u'ns21').update(maeh_dict) + + el = etree.Element("{ns20}bluff") + self.assert_(hasattr(el, 'bluff')) + + child = etree.SubElement(el, "{ns21}maeh") + self.assert_(hasattr(child, 'maeh')) + child = etree.SubElement(el, "{ns20}bluff") + self.assert_(hasattr(child, 'bluff')) + child = etree.SubElement(el, "{ns21}bluff") + self.assertFalse(hasattr(child, 'bluff')) + self.assertFalse(hasattr(child, 'maeh')) + + self.assert_(hasattr(el[0], 'maeh')) + self.assert_(hasattr(el[1], 'bluff')) + self.assertFalse(hasattr(el[2], 'bluff')) + self.assertFalse(hasattr(el[2], 'maeh')) + + self.assertEquals(el.bluff(), u'bluff') + self.assertEquals(el[0].maeh(), u'maeh') + self.assertEquals(el[1].bluff(), u'bluff') + + etree.Namespace(u'ns20').clear() + etree.Namespace(u'ns21').clear() + + def test_create_element_default(self): + bluff_dict = {None : self.bluff_class} + etree.Namespace(u'ns30').update(bluff_dict) + + maeh_dict = {u'maeh' : self.maeh_class} + etree.Namespace(None).update(maeh_dict) + + el = etree.Element("{ns30}bluff") + etree.SubElement(el, "maeh") + self.assert_(hasattr(el, 'bluff')) + self.assert_(hasattr(el[0], 'maeh')) + self.assertEquals(el.bluff(), u'bluff') + self.assertEquals(el[0].maeh(), u'maeh') + + etree.Namespace(None).clear() + etree.Namespace(u'ns30').clear() + def test_suite(): suite = unittest.TestSuite() suite.addTests([unittest.makeSuite(ETreeNamespaceClassesTestCase)]) + optionflags = doctest.NORMALIZE_WHITESPACE|doctest.ELLIPSIS + suite.addTests( + [doctest.DocFileSuite('../../../doc/namespace_extensions.txt', + optionflags=optionflags)], + ) return suite if __name__ == '__main__': Modified: lxml/branch/htmlparser/src/lxml/tests/test_relaxng.py ============================================================================== --- lxml/branch/htmlparser/src/lxml/tests/test_relaxng.py (original) +++ lxml/branch/htmlparser/src/lxml/tests/test_relaxng.py Sun Mar 26 12:53:24 2006 @@ -25,6 +25,25 @@ self.assert_(schema.validate(tree_valid)) self.assert_(not schema.validate(tree_invalid)) + def test_relaxng_error(self): + tree_invalid = self.parse('') + schema = self.parse('''\ + + + + + + + +''') + schema = etree.RelaxNG(schema) + self.assert_(not schema.validate(tree_invalid)) + errors = schema.error_log + self.assert_([ log for log in errors + if log.level_name == "ERROR" ]) + self.assert_([ log for log in errors + if "not expect" in log.message ]) + def test_relaxng_invalid_schema(self): schema = self.parse('''\ Modified: lxml/branch/htmlparser/src/lxml/tests/test_sax.py ============================================================================== --- lxml/branch/htmlparser/src/lxml/tests/test_sax.py (original) +++ lxml/branch/htmlparser/src/lxml/tests/test_sax.py Sun Mar 26 12:53:24 2006 @@ -24,6 +24,12 @@ self.assertEquals('abbbba', xml_out) + def test_etree_sax_attributes(self): + tree = self.parse('abba') + xml_out = self._saxify_serialize(tree) + self.assertEquals('abba', + xml_out) + def test_etree_sax_ns1(self): tree = self.parse('abbbba') new_tree = self._saxify_unsaxify(tree) @@ -122,6 +128,36 @@ self.assertEqual(root[1].tag, '{blaA}c') + def test_etree_sax_no_ns(self): + handler = sax.ElementTreeContentHandler() + handler.startDocument() + handler.startElement('a', {}) + handler.startElement('b', {}) + handler.endElement('b') + handler.startElement('c') # with empty attributes + handler.endElement('c') + handler.endElement('a') + handler.endDocument() + + new_tree = handler.etree + root = new_tree.getroot() + self.assertEqual(root.tag, 'a') + self.assertEqual(root[0].tag, 'b') + self.assertEqual(root[1].tag, 'c') + + def test_etree_sax_error(self): + handler = sax.ElementTreeContentHandler() + handler.startDocument() + handler.startElement('a') + self.assertRaises(sax.SaxError, handler.endElement, 'b') + + def test_etree_sax_error2(self): + handler = sax.ElementTreeContentHandler() + handler.startDocument() + handler.startElement('a') + handler.startElement('b') + self.assertRaises(sax.SaxError, handler.endElement, 'a') + def _saxify_unsaxify(self, saxifiable): handler = sax.ElementTreeContentHandler() sax.ElementTreeProducer(saxifiable, handler).saxify() Modified: lxml/branch/htmlparser/src/lxml/tests/test_unicode.py ============================================================================== --- lxml/branch/htmlparser/src/lxml/tests/test_unicode.py (original) +++ lxml/branch/htmlparser/src/lxml/tests/test_unicode.py Sun Mar 26 12:53:24 2006 @@ -6,7 +6,7 @@ ascii_uni = u'a' -uni = u'?' +uni = u'?\uF8D2' # klingon etc. class UnicodeTestCase(unittest.TestCase): def test_unicode_xml(self): @@ -17,6 +17,11 @@ el = etree.Element(uni) self.assertEquals(uni, el.tag) + def test_unicode_nstag(self): + tag = u"{%s}%s" % (uni, uni) + el = etree.Element(tag) + self.assertEquals(tag, el.tag) + def test_unicode_attr(self): el = etree.Element('foo', {'bar': uni}) self.assertEquals(uni, el.attrib['bar']) Modified: lxml/branch/htmlparser/src/lxml/tests/test_xpathevaluator.py ============================================================================== --- lxml/branch/htmlparser/src/lxml/tests/test_xpathevaluator.py (original) +++ lxml/branch/htmlparser/src/lxml/tests/test_xpathevaluator.py Sun Mar 26 12:53:24 2006 @@ -59,8 +59,8 @@ def test_xpath_list_comment(self): tree = self.parse('') - self.assertEquals([''], - tree.xpath('/a/node()')) + self.assertEquals([''], + map(repr, tree.xpath('/a/node()'))) def test_rel_xpath_boolean(self): root = etree.XML('') @@ -89,6 +89,9 @@ self.assertEquals( [root[0]], root.xpath('//baz:b', {'baz': 'uri:a'})) + self.assertRaises( + TypeError, + root.xpath, '//b', {None: 'uri:a'}) def test_xpath_error(self): tree = self.parse('') @@ -151,6 +154,81 @@ self.assertEquals('Hoi', r[0].text) self.assertEquals('Dag', r[1].text) + def test_xpath_variables(self): + x = self.parse('') + e = etree.XPathEvaluator(x) + + expr = "/a[@attr=$aval]" + r = e.evaluate(expr, aval=1) + self.assertEquals(0, len(r)) + + r = e.evaluate(expr, aval="true") + self.assertEquals(1, len(r)) + self.assertEquals("true", r[0].get('attr')) + + r = e.evaluate(expr, aval=True) + self.assertEquals(1, len(r)) + self.assertEquals("true", r[0].get('attr')) + + +class ETreeXPathClassTestCase(HelperTestCase): + "Tests for the XPath class" + def test_xpath_compile_doc(self): + x = self.parse('') + + expr = etree.XPath("/a[@attr != 'true']") + r = expr.evaluate(x) + self.assertEquals(0, len(r)) + + expr = etree.XPath("/a[@attr = 'true']") + r = expr.evaluate(x) + self.assertEquals(1, len(r)) + + expr = etree.XPath( expr.path ) + r = expr.evaluate(x) + self.assertEquals(1, len(r)) + + def test_xpath_compile_element(self): + x = self.parse('') + root = x.getroot() + + expr = etree.XPath("./b") + r = expr.evaluate(root) + self.assertEquals(1, len(r)) + self.assertEquals('b', r[0].tag) + + expr = etree.XPath("./*") + r = expr.evaluate(root) + self.assertEquals(2, len(r)) + + def test_xpath_compile_vars(self): + x = self.parse('') + + expr = etree.XPath("/a[@attr=$aval]") + r = expr.evaluate(x, aval=False) + self.assertEquals(0, len(r)) + + r = expr.evaluate(x, aval=True) + self.assertEquals(1, len(r)) + + def test_xpath_compile_error(self): + self.assertRaises(SyntaxError, etree.XPath, '\\fad') + +class ETreeETXPathClassTestCase(HelperTestCase): + "Tests for the ETXPath class" + def test_xpath_compile_ns(self): + x = self.parse('') + + expr = etree.ETXPath("/a/{nsa}b") + r = expr.evaluate(x) + self.assertEquals(1, len(r)) + self.assertEquals('{nsa}b', r[0].tag) + + expr = etree.ETXPath("/a/{nsb}b") + r = expr.evaluate(x) + self.assertEquals(1, len(r)) + self.assertEquals('{nsb}b', r[0].tag) + SAMPLE_XML = etree.parse(StringIO(""" text @@ -245,6 +323,8 @@ def test_suite(): suite = unittest.TestSuite() suite.addTests([unittest.makeSuite(ETreeXPathTestCase)]) + suite.addTests([unittest.makeSuite(ETreeXPathClassTestCase)]) + suite.addTests([unittest.makeSuite(ETreeETXPathClassTestCase)]) suite.addTests([doctest.DocTestSuite()]) return suite Modified: lxml/branch/htmlparser/src/lxml/tests/test_xslt.py ============================================================================== --- lxml/branch/htmlparser/src/lxml/tests/test_xslt.py (original) +++ lxml/branch/htmlparser/src/lxml/tests/test_xslt.py Sun Mar 26 12:53:24 2006 @@ -29,6 +29,39 @@ B ''', st.tostring(res)) + def test_xslt_input(self): + tree = self.parse('BC') + style = self.parse('''\ + + + + + +''') + + st = etree.XSLT(style) + st = etree.XSLT(style.getroot()) + self.assertRaises(TypeError, etree.XSLT, None) + + def test_xslt_input_partial_doc(self): + tree = self.parse('BC') + style = self.parse('''\ + + + + + + + +''') + + self.assertRaises(etree.XSLTParseError, etree.XSLT, style) + root_node = style.getroot() + self.assertRaises(etree.XSLTParseError, etree.XSLT, root_node) + st = etree.XSLT(root_node[0]) + def test_xslt_broken(self): tree = self.parse('') style = self.parse('''\ @@ -248,6 +281,43 @@ self.assertEquals(self._rootstring(result), 'C') + def test_extensions1(self): + tree = self.parse('B') + style = self.parse('''\ + + +''') + + def mytext(ctxt, values): + return 'X' * len(values) + + result = tree.xslt(style, {'testns' : {'mytext' : mytext}}) + self.assertEquals(self._rootstring(result), + 'X') + + def test_extensions2(self): + tree = self.parse('B') + style = self.parse('''\ + + +''') + + def mytext(ctxt, values): + return 'X' * len(values) + + namespace = etree.FunctionNamespace('testns') + namespace['mytext'] = mytext + + result = tree.xslt(style) + self.assertEquals(self._rootstring(result), + 'X') + def test_xslt_document_parse(self): # make sure document('') works from loaded files xslt = etree.XSLT(etree.parse(fileInTestDir("test-document.xslt"))) @@ -272,7 +342,7 @@ suite = unittest.TestSuite() suite.addTests([unittest.makeSuite(ETreeXSLTTestCase)]) suite.addTests( - [doctest.DocFileSuite('../../../doc/xpath.txt')]) + [doctest.DocFileSuite('../../../doc/extensions.txt')]) return suite if __name__ == '__main__': Modified: lxml/branch/htmlparser/src/lxml/tree.pxd ============================================================================== --- lxml/branch/htmlparser/src/lxml/tree.pxd (original) +++ lxml/branch/htmlparser/src/lxml/tree.pxd Sun Mar 26 12:53:24 2006 @@ -3,16 +3,7 @@ cdef extern from "stdio.h": ctypedef struct FILE cdef int strlen(char* s) - -cdef extern from "Python.h": - ctypedef struct PyObject - - cdef FILE* PyFile_AsFile(PyObject* p) - cdef int PyFile_Check(object p) - cdef object PyFile_Name(object p) - cdef void Py_INCREF(object o) - cdef object PyUnicode_DecodeUTF8(char* s, int size, char* errors) - cdef object PyString_FromStringAndSize(char* s, int size) + cdef int strcmp(char* s1, char* s2) cdef extern from "libxml/encoding.h": ctypedef struct xmlCharEncodingHandler @@ -178,4 +169,7 @@ cdef extern from "libxml/xmlstring.h": cdef char* xmlStrdup(char* cur) - + cdef char* xmlStrchr(char* cur, char value) + +cdef extern from "etree.h": + cdef int _isElement(xmlNode* node) Modified: lxml/branch/htmlparser/src/lxml/xmlschema.pxi ============================================================================== --- lxml/branch/htmlparser/src/lxml/xmlschema.pxi (original) +++ lxml/branch/htmlparser/src/lxml/xmlschema.pxi Sun Mar 26 12:53:24 2006 @@ -17,6 +17,7 @@ """Turn a document into an XML Schema validator. """ cdef xmlschema.xmlSchema* _c_schema + cdef _ErrorLog _error_log def __init__(self, _ElementTree etree): cdef _Document doc @@ -30,6 +31,7 @@ xmlschema.xmlSchemaFreeParserCtxt(parser_ctxt) raise XMLSchemaParseError, "Document is not valid XML Schema" xmlschema.xmlSchemaFreeParserCtxt(parser_ctxt) + self._error_log = _ErrorLog() def __dealloc__(self): xmlschema.xmlSchemaFree(self._c_schema) @@ -42,6 +44,7 @@ cdef xmlschema.xmlSchemaValidCtxt* valid_ctxt cdef xmlDoc* c_doc cdef int ret + self._error_log.connect() valid_ctxt = xmlschema.xmlSchemaNewValidCtxt(self._c_schema) c_doc = _fakeRootDoc(etree._doc._c_doc, etree._context_node._c_node) @@ -49,6 +52,11 @@ _destroyFakeDoc(etree._doc._c_doc, c_doc) xmlschema.xmlSchemaFreeValidCtxt(valid_ctxt) + self._error_log.disconnect() if ret == -1: raise XMLSchemaValidateError, "Internal error in XML Schema validation." return ret == 0 + + property error_log: + def __get__(self): + return self._error_log.copy() Modified: lxml/branch/htmlparser/src/lxml/xslt.pxi ============================================================================== --- lxml/branch/htmlparser/src/lxml/xslt.pxi (original) +++ lxml/branch/htmlparser/src/lxml/xslt.pxi Sun Mar 26 12:53:24 2006 @@ -12,13 +12,13 @@ class XSLTSaveError(XSLTError): pass -class XPathError(LxmlError): +class XSLTExtensionError(XSLTError): pass -class XPathContextError(XPathError): +class XPathError(LxmlError): pass -class XPathNamespaceError(XPathError): +class XPathContextError(XPathError): pass class XPathResultError(XPathError): @@ -27,27 +27,245 @@ class XPathSyntaxError(LxmlSyntaxError): pass -cdef object _RE_STRINGS -cdef object _RE_NAMESPACES -_RE_STRINGS = re.compile('("[^"]*")|(\'[^\']*\')') -_RE_NAMESPACES = re.compile('{([^}]+)}') +################################################################################ +# support for extension functions in XPath/XSLT + +cdef class BaseContext: + cdef xpath.xmlXPathContext* _xpathCtxt + cdef _Document _doc + cdef object _extensions + cdef object _namespaces + cdef object _registered_namespaces + cdef object _registered_extensions + cdef object _extension_functions + cdef object _utf_refs + # for exception handling and temporary reference keeping: + cdef object _temp_elements + cdef object _temp_docs + cdef object _exc_info + + def __init__(self, namespaces, extensions): + self._xpathCtxt = NULL + self._utf_refs = {} + + # fix old format extensions + if isinstance(extensions, (list, tuple)): + new_extensions = {} + for extension in extensions: + for (ns_uri, name), function in extension.items(): + ns_utf = self._to_utf(ns_uri) + name_utf = self._to_utf(name) + try: + new_extensions[ns_utf][name_utf] = function + except KeyError: + new_extensions[ns_utf] = {name_utf : function} + extensions = new_extensions or None + + self._doc = None + self._exc_info = None + self._extensions = extensions + self._namespaces = namespaces + self._registered_namespaces = [] + self._registered_extensions = [] + self._extension_functions = {} + self._temp_elements = {} + self._temp_docs = {} + + cdef object _to_utf(self, s): + "Convert to UTF-8 and keep a reference to the encoded string" + cdef python.PyObject* dict_result + if s is None: + return None + dict_result = python.PyDict_GetItem(self._utf_refs, s) + if dict_result is not NULL: + return dict_result + utf = _utf8(s) + python.PyDict_SetItem(self._utf_refs, s, utf) + return utf + + cdef void _set_xpath_context(self, xpath.xmlXPathContext* xpathCtxt): + self._xpathCtxt = xpathCtxt + xpathCtxt.userData = self + + cdef _register_context(self, _Document doc, int allow_none_namespace): + self._doc = doc + self._exc_info = None + namespaces = self._namespaces + if namespaces is not None: + self.registerNamespaces(namespaces) + extensions = _find_extensions(namespaces.values()) + else: + extensions = _find_all_extensions() + if self._extensions is not None: + # add user provided extensions + extensions.update(self._extensions) + if extensions: + if not allow_none_namespace: + python.PyDict_DelItem(extensions, None) + self._registerExtensionFunctions(extensions) + + cdef _unregister_context(self): + self._unregisterExtensionFunctions() + self._unregisterNamespaces() + self._free_context() + + cdef _free_context(self): + self._registered_namespaces = [] + self._registered_extensions = [] + python.PyDict_Clear(self._utf_refs) + self._doc = None + if self._xpathCtxt is not NULL: + self._xpathCtxt.userData = NULL + self._xpathCtxt = NULL + + # namespaces (internal UTF-8 methods with leading '_') + + def addNamespace(self, prefix, uri): + if self._namespaces is None: + self._namespaces = {prefix : uri} + else: + self._namespaces[prefix] = uri + + def registerNamespaces(self, namespaces): + for prefix, uri in namespaces.items(): + self.registerNamespace(prefix, uri) + + def registerNamespace(self, prefix, ns_uri): + prefix_utf = self._to_utf(prefix) + ns_uri_utf = self._to_utf(ns_uri) + xpath.xmlXPathRegisterNs(self._xpathCtxt, prefix_utf, ns_uri_utf) + self._registered_namespaces.append(prefix_utf) + + cdef _unregisterNamespaces(self): + cdef xpath.xmlXPathContext* xpathCtxt + xpathCtxt = self._xpathCtxt + for prefix_utf in self._registered_namespaces: + xpath.xmlXPathRegisterNs(xpathCtxt, prefix_utf, NULL) + + # extension functions (internal UTF-8 methods with leading '_') + + def registerExtensionFunctions(self, extensions): + for ns_uri, extension in extensions.items(): + for name, function in extension.items(): + self.registerExtensionFunction(ns_uri, name, function) + + def registerExtensionFunction(self, ns_uri, name, function): + self._registerExtensionFunction( + self._to_utf(ns_uri), self._to_utf(name), function) + + cdef _registerExtensionFunctions(self, extensions_utf): + for ns_uri_utf, extension in extensions_utf.items(): + for name_utf, function in extension.items(): + self._registerExtensionFunction(ns_uri_utf, name_utf, function) + + cdef _registerExtensionFunction(self, ns_uri_utf, name_utf, function): + self._contextRegisterExtensionFunction(ns_uri_utf, name_utf) + self._extension_functions[(ns_uri_utf, name_utf)] = function + self._registered_extensions.append((ns_uri_utf, name_utf)) + + cdef _unregisterExtensionFunctions(self): + for ns_uri_utf, name_utf in self._registered_extensions: + self._contextUnregisterExtensionFunction(ns_uri_utf, name_utf) + + def find_extension(self, ns_uri_utf, name_utf): + return self._extension_functions[(ns_uri_utf, name_utf)] + + # Python reference keeping during XPath function evaluation + + cdef _release_temp_refs(self): + "Free temporarily referenced objects from this context." + python.PyDict_Clear(self._temp_elements) + python.PyDict_Clear(self._temp_docs) + + cdef _hold(self, obj): + """A way to temporarily hold references to nodes in the evaluator. + + This is needed because otherwise nodes created in XPath extension + functions would be reference counted too soon, during the XPath + evaluation. This is most important in the case of exceptions. + """ + cdef _NodeBase element + if isinstance(obj, _NodeBase): + obj = (obj,) + elif not python.PySequence_Check(obj): + return + for o in obj: + if isinstance(o, _NodeBase): + element = <_NodeBase>o + #print "Holding element:", element._c_node + python.PyDict_SetItem(self._temp_elements, id(element), element) + #print "Holding document:", element._doc._c_doc + python.PyDict_SetItem(self._temp_docs, id(element._doc), element._doc) + ################################################################################ # XSLT +cdef class XSLTContext(BaseContext): + cdef xslt.xsltTransformContext* _xsltCtxt + def __init__(self, namespaces, extensions): + self._xsltCtxt = NULL + BaseContext.__init__(self, namespaces, extensions) + + cdef register_context(self, xslt.xsltTransformContext* xsltCtxt, _Document doc): + self._xsltCtxt = xsltCtxt + self._set_xpath_context(xsltCtxt.xpathCtxt) + self._register_context(doc, 0) + xsltCtxt.xpathCtxt.userData = self + + cdef unregister_context(self): + cdef xslt.xsltTransformContext* xsltCtxt + xsltCtxt = self._xsltCtxt + if xsltCtxt is NULL: + return + self._unregister_context() + self._xsltCtxt = NULL + + cdef free_context(self): + cdef xslt.xsltTransformContext* xsltCtxt + xsltCtxt = self._xsltCtxt + if xsltCtxt is NULL: + return + self._free_context() + self._xsltCtxt = NULL + xslt.xsltFreeTransformContext(xsltCtxt) + + def _contextRegisterExtensionFunction(self, ns_uri_utf, name_utf): + if ns_uri_utf is None: + raise XSLTExtensionError, "extensions must have non-empty namespaces" + xslt.xsltRegisterExtFunction( + self._xsltCtxt, _cstr(name_utf), _cstr(ns_uri_utf), + _xpathCallback) + + def _contextUnregisterExtensionFunction(self, ns_uri_utf, name_utf): + if ns_uri_utf is not None: + xslt.xsltRegisterExtFunction( + self._xsltCtxt, _cstr(name_utf), _cstr(ns_uri_utf), + _xpathCallback) + + cdef class XSLT: """Turn a document into an XSLT object. """ + cdef XSLTContext _context cdef xslt.xsltStylesheet* _c_style - def __init__(self, xslt_input): + def __init__(self, xslt_input, extensions=None): # make a copy of the document as stylesheet needs to assume it # doesn't change cdef xslt.xsltStylesheet* c_style cdef xmlDoc* c_doc + cdef xmlDoc* fake_c_doc cdef _Document doc + cdef _NodeBase root_node + doc = _documentOrRaise(xslt_input) - c_doc = tree.xmlCopyDoc(doc._c_doc, 1) + root_node = _rootNodeOf(xslt_input) + + fake_c_doc = _fakeRootDoc(doc._c_doc, root_node._c_node) + c_doc = tree.xmlCopyDoc(fake_c_doc, 1) + _destroyFakeDoc(doc._c_doc, fake_c_doc) + # XXX work around bug in xmlCopyDoc (fix is upcoming in new release # of libxml2) if doc._c_doc.URL is not NULL: @@ -57,16 +275,19 @@ if c_style is NULL: raise XSLTParseError, "Cannot parse style sheet" self._c_style = c_style + + self._context = XSLTContext(None, extensions) # XXX is it worthwile to use xsltPrecomputeStylesheet here? def __dealloc__(self): # this cleans up copy of doc as well xslt.xsltFreeStylesheet(self._c_style) - + def __call__(self, _input, **_kw): cdef _Document input_doc cdef _NodeBase root_node cdef _Document result_doc + cdef xslt.xsltTransformContext* transform_ctxt cdef xmlDoc* c_result cdef xmlDoc* c_doc cdef char** params @@ -76,6 +297,13 @@ input_doc = _documentOrRaise(_input) root_node = _rootNodeOf(_input) + c_doc = _fakeRootDoc(input_doc._c_doc, root_node._c_node) + + transform_ctxt = xslt.xsltNewTransformContext(self._c_style, c_doc) + if transform_ctxt is NULL: + _destroyFakeDoc(input_doc._c_doc, c_doc) + raise XSLTApplyError, "Error preparing stylesheet run" + if _kw: # allocate space for parameters # * 2 as we want an entry for both key and value, @@ -84,26 +312,31 @@ i = 0 keep_ref = [] for key, value in _kw.items(): - k = key.encode('UTF-8') - keep_ref.append(k) - v = value.encode('UTF-8') - keep_ref.append(v) - params[i] = k + k = _utf8(key) + python.PyList_Append(keep_ref, k) + v = _utf8(value) + python.PyList_Append(keep_ref, v) + params[i] = _cstr(k) i = i + 1 - params[i] = v + params[i] = _cstr(v) i = i + 1 params[i] = NULL else: params = NULL - c_doc = _fakeRootDoc(input_doc._c_doc, root_node._c_node) - c_result = xslt.xsltApplyStylesheet(self._c_style, c_doc, params) - _destroyFakeDoc(input_doc._c_doc, c_doc) + self._context._release_temp_refs() + self._context.register_context(transform_ctxt, input_doc) + + c_result = xslt.xsltApplyStylesheetUser(self._c_style, c_doc, params, + NULL, NULL, transform_ctxt) if params is not NULL: - # deallocate space for parameters again + # deallocate space for parameters cstd.free(params) + self._context.free_context() + _destroyFakeDoc(input_doc._c_doc, c_doc) + if c_result is NULL: raise XSLTApplyError, "Error applying stylesheet" @@ -144,71 +377,142 @@ ################################################################################ # XPath -cdef class XPathDocumentEvaluator: +cdef class XPathContext(BaseContext): + cdef object _variables + cdef object _registered_variables + def __init__(self, namespaces, extensions, variables): + BaseContext.__init__(self, namespaces, extensions) + self._variables = variables + self._registered_variables = [] + + cdef register_context(self, xpath.xmlXPathContext* xpathCtxt, _Document doc): + self._set_xpath_context(xpathCtxt) + ns_prefixes = _find_all_extension_prefixes() + if ns_prefixes: + self.registerNamespaces(ns_prefixes) + self._register_context(doc, 1) + if self._variables is not None: + self.registerVariables(self._variables) + + cdef unregister_context(self): + cdef xpath.xmlXPathContext* xpathCtxt + xpathCtxt = self._xpathCtxt + if xpathCtxt is NULL: + return + xpathCtxt.userData = NULL + self._unregister_context() + self._unregisterVariables() + self._registered_variables = [] + self._xpathCtxt = NULL + + cdef free_context(self): + cdef xpath.xmlXPathContext* xpathCtxt + xpathCtxt = self._xpathCtxt + if xpathCtxt is NULL: + return + self._free_context() + self._registered_variables = [] + xpath.xmlXPathFreeContext(xpathCtxt) + + def registerVariables(self, variable_dict): + for name, value in variable_dict.items(): + self.registerVariable(name, value) + + cdef void _unregisterVariables(self): + for name in self._registered_variables: + self._unregisterVariable(name) + + def registerVariable(self, name, value): + self._registerVariable(self._to_utf(name), value) + self._registered_variables.append(name) + + cdef void _registerVariable(self, name_utf, value): + xpath.xmlXPathRegisterVariable( + self._xpathCtxt, _cstr(name_utf), _wrapXPathObject(value)) + + cdef void _unregisterVariable(self, name_utf): + cdef xpath.xmlXPathContext* xpathCtxt + cdef xpath.xmlXPathObject* xpathVarValue + xpathCtxt = self._xpathCtxt + xpathVarValue = xpath.xmlXPathVariableLookup(xpathCtxt, _cstr(name_utf)) + if xpathVarValue is not NULL: + xpath.xmlXPathRegisterVariable(xpathCtxt, _cstr(name_utf), NULL) + xpath.xmlXPathFreeObject(xpathVarValue) + + def _contextRegisterExtensionFunction(self, ns_uri_utf, name_utf): + if ns_uri_utf is not None: + xpath.xmlXPathRegisterFuncNS( + self._xpathCtxt, _cstr(name_utf), _cstr(ns_uri_utf), + _xpathCallback) + else: + xpath.xmlXPathRegisterFunc( + self._xpathCtxt, _cstr(name_utf), + _xpathCallback) + + def _contextUnregisterExtensionFunction(self, ns_uri_utf, name_utf): + if ns_uri_utf is not None: + xpath.xmlXPathRegisterFuncNS( + self._xpathCtxt, _cstr(name_utf), _cstr(ns_uri_utf), NULL) + else: + xpath.xmlXPathRegisterFunc( + self._xpathCtxt, _cstr(name_utf), NULL) + + +cdef class XPathEvaluatorBase: + cdef XPathContext _context + + def __init__(self, namespaces, extensions, variables=None): + self._context = XPathContext(namespaces, extensions, variables) + + cdef object _handle_result(self, xpath.xmlXPathObject* xpathObj, _Document doc): + _exc_info = self._context._exc_info + if _exc_info is not None: + type, value, traceback = _exc_info + raise type, value, traceback + if xpathObj is NULL: + raise XPathSyntaxError, "Error in xpath expression." + try: + result = _unwrapXPathObject(xpathObj, doc) + except XPathResultError: + xpath.xmlXPathFreeObject(xpathObj) + raise + xpath.xmlXPathFreeObject(xpathObj) + return result + + +cdef class XPathDocumentEvaluator(XPathEvaluatorBase): """Create an XPath evaluator for a document. """ cdef xpath.xmlXPathContext* _c_ctxt - cdef _NodeBase _root_node cdef _Document _doc - cdef object _extension_functions - cdef object _exc_info - cdef object _namespaces - cdef object _extensions - cdef object _temp_elements - cdef object _temp_docs - def __init__(self, input, namespaces=None, extensions=None): + def __init__(self, etree, namespaces=None, extensions=None): cdef xpath.xmlXPathContext* xpathCtxt cdef int ns_register_status - cdef _Document input_doc - cdef _NodeBase root_node - - input_doc = _documentOrRaise(input) - root_node = _rootNodeOf(input) + cdef _Document doc - xpathCtxt = xpath.xmlXPathNewContext(input_doc._c_doc) + if isinstance(etree, _Document): + doc = <_Document>etree # for internal use only! + else: + doc = (<_ElementTree>etree)._doc + + xpathCtxt = xpath.xmlXPathNewContext(doc._c_doc) if xpathCtxt is NULL: # XXX what triggers this exception? raise XPathContextError, "Unable to create new XPath context" - self._doc = input_doc - self._root_node = root_node + self._doc = doc self._c_ctxt = xpathCtxt - self._c_ctxt.userData = self - self._namespaces = namespaces - self._extensions = extensions - if namespaces is not None: - self.registerNamespaces(namespaces) - self._extension_functions = {} - if extensions is not None: - for extension in extensions: - self._extension_functions.update(extension) - for (ns_uri, name), function in extension.items(): - if ns_uri is not None: - xpath.xmlXPathRegisterFuncNS( - xpathCtxt, name, ns_uri, _xpathCallback) - else: - xpath.xmlXPathRegisterFunc( - xpathCtxt, name, _xpathCallback) - + XPathEvaluatorBase.__init__(self, namespaces, extensions) + def __dealloc__(self): xpath.xmlXPathFreeContext(self._c_ctxt) def registerNamespace(self, prefix, uri): """Register a namespace with the XPath context. """ - s_prefix = prefix.encode('UTF8') - s_uri = uri.encode('UTF8') - # XXX should check be done to verify namespace doesn't already exist? - ns_register_status = xpath.xmlXPathRegisterNs( - self._c_ctxt, s_prefix, s_uri) - if ns_register_status != 0: - # XXX doesn't seem to be possible to trigger this - # from Python - raise XPathNamespaceError, ( - "Unable to register namespaces with prefix " - "%s and uri %s" % (prefix, uri)) + self._context.addNamespace(prefix, uri) def registerNamespaces(self, namespaces): """Register a prefix -> uri dict. @@ -216,91 +520,140 @@ for prefix, uri in namespaces.items(): self.registerNamespace(prefix, uri) - def evaluate(self, path): - return self._evaluate(path, self._root_node._c_node) + def evaluate(self, _path, **_variables): + """Evaluate an XPath expression on the document. Variables + may be given as keyword arguments. Note that namespaces are + currently not supported for variables.""" + return self._evaluate(_path, NULL, _variables) - cdef object _evaluate(self, path, xmlNode* c_ctxt_node): - cdef xpath.xmlXPathObject* xpathObj + cdef object _evaluate(self, path, xmlNode* c_ctxt_node, variable_dict): + cdef xpath.xmlXPathContext* xpathCtxt + cdef xpath.xmlXPathObject* xpathObj cdef xmlNode* c_node + xpathCtxt = self._c_ctxt # if element context is requested; unfortunately need to modify ctxt - self._c_ctxt.node = c_ctxt_node + xpathCtxt.node = c_ctxt_node + + self._context._release_temp_refs() + self._context.register_context(xpathCtxt, self._doc) + self._context.registerVariables(variable_dict) + + path = _utf8(path) + xpathObj = xpath.xmlXPathEvalExpression(_cstr(path), xpathCtxt) + self._context.unregister_context() + + return self._handle_result(xpathObj, self._doc) - path = path.encode('UTF-8') - self._exc_info = None - self._release() - xpathObj = xpath.xmlXPathEvalExpression(path, self._c_ctxt) - if self._exc_info is not None: - type, value, traceback = self._exc_info - self._exc_info = None - raise type, value, traceback - if xpathObj is NULL: - raise XPathSyntaxError, "Error in xpath expression." - try: - result = _unwrapXPathObject(xpathObj, self._doc) - except XPathResultError: - #self._release() - xpath.xmlXPathFreeObject(xpathObj) - raise - xpath.xmlXPathFreeObject(xpathObj) - # release temporarily held python stuff - #self._release() - return result - #def clone(self): # # XXX pretty expensive so calling this from callback is probably # # not desirable # return XPathEvaluator(self._doc, self._namespaces, self._extensions) - def _release(self): - self._temp_elements = {} - self._temp_docs = {} - - def _hold(self, obj): - """A way to temporarily hold references to nodes in the evaluator. - - This is needed because otherwise nodes created in XPath extension - functions would be reference counted too soon, during the - XPath evaluation. - """ - cdef _NodeBase element - if isinstance(obj, _NodeBase): - obj = [obj] - if not type(obj) in (type([]), type(())): - return - for o in obj: - if isinstance(o, _NodeBase): - element = <_NodeBase>o - #print "Holding element:", element._c_node - self._temp_elements[id(element)] = element - #print "Holding document:", element._doc._c_doc - self._temp_docs[id(element._doc)] = element._doc - cdef class XPathElementEvaluator(XPathDocumentEvaluator): """Create an XPath evaluator for an element. """ cdef _Element _element - + def __init__(self, _Element element, namespaces=None, extensions=None): XPathDocumentEvaluator.__init__( self, element._doc, namespaces, extensions) self._element = element - - def evaluate(self, path): - return self._evaluate(path, self._element._c_node) -def XPathEvaluator(doc_or_element, namespaces=None, extensions=None): - if isinstance(doc_or_element, _ElementTree) or isinstance(doc_or_element, _Document): - return XPathDocumentEvaluator(doc_or_element, namespaces, extensions) + def evaluate(self, _path, **_variables): + """Evaluate an XPath expression on the element. Variables may + be given as keyword arguments. Note that namespaces are + currently not supported for variables.""" + return self._evaluate(_path, self._element._c_node, _variables) + +def XPathEvaluator(etree_or_element, namespaces=None, extensions=None): + if isinstance(etree_or_element, _ElementTree): + return XPathDocumentEvaluator(etree_or_element, namespaces, extensions) else: - return XPathElementEvaluator(doc_or_element, namespaces, extensions) - + return XPathElementEvaluator(etree_or_element, namespaces, extensions) + def Extension(module, function_mapping, ns_uri=None): - result = {} + functions = [] for function_name, xpath_name in function_mapping.items(): - result[(ns_uri, xpath_name)] = getattr(module, function_name) - return result + functions[xpath_name] = getattr(module, function_name) + return {ns_uri : functions} +cdef class XPath(XPathEvaluatorBase): + cdef xpath.xmlXPathCompExpr* _xpath + cdef object _prefix_map + cdef readonly object path + + def __init__(self, path, namespaces=None, extensions=None): + XPathEvaluatorBase.__init__(self, namespaces, extensions, None) + self.path = path + path = _utf8(path) + self._xpath = xpath.xmlXPathCompile(_cstr(path)) + if self._xpath is NULL: + raise XPathSyntaxError, "Error in xpath expression." + + def __call__(self, _etree_or_element, **_variables): + cdef xpath.xmlXPathContext* xpathCtxt + cdef xpath.xmlXPathObject* xpathObj + cdef _Document document + cdef _NodeBase element + cdef XPathContext context + + document = _documentOrRaise(_etree_or_element) + element = _rootNodeOf(_etree_or_element) + + xpathCtxt = xpath.xmlXPathNewContext(document._c_doc) + xpathCtxt.node = element._c_node + + context = self._context + context._release_temp_refs() + context.register_context(xpathCtxt, document) + context.registerVariables(_variables) + + xpathObj = xpath.xmlXPathCompiledEval(self._xpath, xpathCtxt) + + context.unregister_context() + + xpath.xmlXPathFreeContext(xpathCtxt) + + return self._handle_result(xpathObj, document) + + def evaluate(self, _tree, **_variables): + return self(_tree, **_variables) + + def __dealloc__(self): + if self._xpath is not NULL: + xpath.xmlXPathFreeCompExpr(self._xpath) + +cdef object _replace_strings +cdef object _find_namespaces +_replace_strings = re.compile('("[^"]*")|(\'[^\']*\')').sub +_find_namespaces = re.compile('({[^}]+})').findall + +cdef class ETXPath(XPath): + """Special XPath class that supports the ElementTree {uri} notation for + namespaces.""" + def __init__(self, path, extensions=None): + path_utf, namespaces = self._nsextract_path(_utf8(path)) + XPath.__init__(self, funicode(path_utf), namespaces, extensions) + + cdef _nsextract_path(self, path_utf): + # replace {namespaces} by new prefixes + cdef int i + namespaces = {} + stripped_path = _replace_strings('', path_utf) # remove string literals + namespace_defs = [] + i = 1 + for namespace_def in _find_namespaces(stripped_path): + if namespace_def not in namespace_defs: + prefix = python.PyString_FromFormat("xpp%02d", i) + i = i+1 + python.PyList_Append(namespace_defs, namespace_def) + namespace = namespace_def[1:-1] # remove '{}' + python.PyDict_SetItem(namespaces, prefix, namespace) + prefix_str = prefix + ':' + # FIXME: this also replaces {namespaces} within strings! + path_utf = path_utf.replace(namespace_def, prefix_str) + return path_utf, namespaces ################################################################################ # helper functions @@ -308,19 +661,18 @@ cdef xpath.xmlXPathObject* _wrapXPathObject(object obj) except NULL: cdef xpath.xmlNodeSet* resultSet cdef _NodeBase node - if isinstance(obj, str): + if python.PyUnicode_Check(obj): + obj = _utf8(obj) + if python.PyString_Check(obj): # XXX use the Wrap variant? Or leak... - return xpath.xmlXPathNewCString(obj) - if isinstance(obj, unicode): - obj = obj.encode("utf-8") - return xpath.xmlXPathNewCString(obj) - if isinstance(obj, types.BooleanType): + return xpath.xmlXPathNewCString(_cstr(obj)) + if python.PyBool_Check(obj): return xpath.xmlXPathNewBoolean(obj) - if isinstance(obj, (int, float)): + if python.PyNumber_Check(obj): return xpath.xmlXPathNewFloat(obj) if isinstance(obj, _NodeBase): - obj = [obj] - if isinstance(obj, (types.ListType, types.TupleType)): + obj = (obj,) + if python.PySequence_Check(obj): resultSet = xpath.xmlXPathNodeSetCreate(NULL) for element in obj: if isinstance(element, _NodeBase): @@ -368,7 +720,7 @@ return result for i from 0 <= i < xpathObj.nodesetval.nodeNr: c_node = xpathObj.nodesetval.nodeTab[i] - if c_node.type == tree.XML_ELEMENT_NODE: + if _isElement(c_node): element = _elementFactory(doc, c_node) result.append(element) elif c_node.type == tree.XML_TEXT_NODE: @@ -378,12 +730,6 @@ attr_value = funicode(s) tree.xmlFree(s) result.append(attr_value) - elif c_node.type == tree.XML_COMMENT_NODE: - s = tree.xmlNodeGetContent(c_node) - s2 = '' % s - comment_value = funicode(s2) - tree.xmlFree(s) - result.append(comment_value) else: print "Not yet implemented result node type:", c_node.type raise NotImplementedError @@ -393,10 +739,10 @@ cdef xpath.xmlXPathContext* rctxt cdef _Document doc cdef xpath.xmlXPathObject* obj - cdef XPathDocumentEvaluator evaluator - + cdef BaseContext extensions + rctxt = ctxt.context - + # get information on what function is called name = rctxt.function if rctxt.functionURI is not NULL: @@ -405,29 +751,29 @@ uri = None # get our evaluator - evaluator = (rctxt.userData) + extensions = (rctxt.userData) + + # lookup up the extension function in the context + f = extensions.find_extension(uri, name) - # lookup up the extension function in the evaluator - f = evaluator._extension_functions[(uri, name)] - args = [] - doc = evaluator._doc + doc = extensions._doc for i from 0 <= i < nargs: args.append(_unwrapXPathObject(xpath.valuePop(ctxt), doc)) args.reverse() try: # call the function - res = f(evaluator, *args) + res = f(None, *args) # hold python objects temporarily so that they won't get deallocated # during processing - evaluator._hold(res) + extensions._hold(res) # now wrap for XPath consumption obj = _wrapXPathObject(res) except: xpath.xmlXPathErr( ctxt, xmlerror.XML_XPATH_EXPR_ERROR - xmlerror.XML_XPATH_EXPRESSION_OK) - evaluator._exc_info = sys.exc_info() + extensions._exc_info = sys.exc_info() return xpath.valuePush(ctxt, obj) Modified: lxml/branch/htmlparser/version.txt ============================================================================== --- lxml/branch/htmlparser/version.txt (original) +++ lxml/branch/htmlparser/version.txt Sun Mar 26 12:53:24 2006 @@ -1 +1 @@ -0.8 +0.9 From scoder at codespeak.net Sun Mar 26 14:00:49 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Sun Mar 26 14:00:51 2006 Subject: [Lxml-checkins] r25009 - lxml/trunk/src/lxml Message-ID: <20060326120049.D729810084@code0.codespeak.net> Author: scoder Date: Sun Mar 26 14:00:48 2006 New Revision: 25009 Modified: lxml/trunk/src/lxml/etree.pyx lxml/trunk/src/lxml/parser.pxi Log: fixed potential bugs in API parse functions -> StringIO.getvalue() and FileLike.read() may return unicode strings! - new dedicated helper function _parseMemoryDocument to parse strings/unicode - use it in XML() and _parseDocument() Modified: lxml/trunk/src/lxml/etree.pyx ============================================================================== --- lxml/trunk/src/lxml/etree.pyx (original) +++ lxml/trunk/src/lxml/etree.pyx Sun Mar 26 14:00:48 2006 @@ -143,22 +143,26 @@ cdef xmlDoc* c_doc # XXX simplistic (c)StringIO support if hasattr(source, 'getvalue'): - c_doc = theParser.parseDoc(source.getvalue(), parser) - else: - filename = _getFilenameForFile(source) - # Support for unamed file-like object (eg urlgrabber.urlopen) - if not filename and hasattr(source, 'read'): - c_doc = theParser.parseDoc(source.read(), parser) - # Otherwise parse the file directly from the filesystem - else: - if filename is None: - filename = source - # open filename - c_doc = theParser.parseDocFromFile(filename, parser) - if c_doc is NULL: - return None - else: - return _documentFactory(c_doc) + return _parseMemoryDocument(source.getvalue(), parser) + + filename = _getFilenameForFile(source) + # Support for unamed file-like object (eg urlgrabber.urlopen) + if not filename and hasattr(source, 'read'): + return _parseMemoryDocument(source.read(), parser) + + # Otherwise parse the file directly from the filesystem + if filename is None: + filename = source + # open filename + c_doc = theParser.parseDocFromFile(filename, parser) + return _documentFactory(c_doc) + +cdef _Document _parseMemoryDocument(text, parser): + cdef xmlDoc* c_doc + if python.PyUnicode_Check(text): + text = _stripDeclaration(_utf8(text)) + c_doc = theParser.parseDoc(text, parser) + return _documentFactory(c_doc) cdef _Document _documentFactory(xmlDoc* c_doc): cdef _Document result @@ -1195,11 +1199,7 @@ return etree def XML(text): - cdef xmlDoc* c_doc - if python.PyUnicode_Check(text): - text = _stripDeclaration(_utf8(text)) - c_doc = theParser.parseDoc(text, None) - return _documentFactory(c_doc).getroot() + return _parseMemoryDocument(text, None).getroot() fromstring = XML Modified: lxml/trunk/src/lxml/parser.pxi ============================================================================== --- lxml/trunk/src/lxml/parser.pxi (original) +++ lxml/trunk/src/lxml/parser.pxi Sun Mar 26 14:00:48 2006 @@ -110,7 +110,7 @@ parse_options = _DEFAULT_PARSE_OPTIONS self._initParse() - pctxt = xmlparser.xmlCreateDocParserCtxt(text) + pctxt = xmlparser.xmlCreateDocParserCtxt(_cstr(text)) if pctxt is NULL: raise XMLSyntaxError From scoder at codespeak.net Sun Mar 26 14:02:10 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Sun Mar 26 14:02:11 2006 Subject: [Lxml-checkins] r25010 - lxml/branch/htmlparser/src/lxml Message-ID: <20060326120210.E603F10084@code0.codespeak.net> Author: scoder Date: Sun Mar 26 14:02:09 2006 New Revision: 25010 Modified: lxml/branch/htmlparser/src/lxml/etree.pyx lxml/branch/htmlparser/src/lxml/parser.pxi Log: merged in unicode parsing fixes from trunk Modified: lxml/branch/htmlparser/src/lxml/etree.pyx ============================================================================== --- lxml/branch/htmlparser/src/lxml/etree.pyx (original) +++ lxml/branch/htmlparser/src/lxml/etree.pyx Sun Mar 26 14:02:09 2006 @@ -143,22 +143,26 @@ cdef xmlDoc* c_doc # XXX simplistic (c)StringIO support if hasattr(source, 'getvalue'): - c_doc = theParser.parseDoc(source.getvalue(), parser) - else: - filename = _getFilenameForFile(source) - # Support for unamed file-like object (eg urlgrabber.urlopen) - if not filename and hasattr(source, 'read'): - c_doc = theParser.parseDoc(source.read(), parser) - # Otherwise parse the file directly from the filesystem - else: - if filename is None: - filename = source - # open filename - c_doc = theParser.parseDocFromFile(filename, parser) - if c_doc is NULL: - return None - else: - return _documentFactory(c_doc) + return _parseMemoryDocument(source.getvalue(), parser) + + filename = _getFilenameForFile(source) + # Support for unamed file-like object (eg urlgrabber.urlopen) + if not filename and hasattr(source, 'read'): + return _parseMemoryDocument(source.read(), parser) + + # Otherwise parse the file directly from the filesystem + if filename is None: + filename = source + # open filename + c_doc = theParser.parseDocFromFile(filename, parser) + return _documentFactory(c_doc) + +cdef _Document _parseMemoryDocument(text, parser): + cdef xmlDoc* c_doc + if python.PyUnicode_Check(text): + text = _stripDeclaration(_utf8(text)) + c_doc = theParser.parseDoc(text, parser) + return _documentFactory(c_doc) cdef _Document _documentFactory(xmlDoc* c_doc): cdef _Document result @@ -1195,11 +1199,7 @@ return etree def XML(text): - cdef xmlDoc* c_doc - if python.PyUnicode_Check(text): - text = _stripDeclaration(_utf8(text)) - c_doc = theParser.parseDoc(text, None) - return _documentFactory(c_doc).getroot() + return _parseMemoryDocument(text, None).getroot() fromstring = XML Modified: lxml/branch/htmlparser/src/lxml/parser.pxi ============================================================================== --- lxml/branch/htmlparser/src/lxml/parser.pxi (original) +++ lxml/branch/htmlparser/src/lxml/parser.pxi Sun Mar 26 14:02:09 2006 @@ -7,14 +7,6 @@ class XMLSyntaxError(LxmlSyntaxError): pass -cdef int _XML_DEFAULT_PARSE_OPTIONS -_XML_DEFAULT_PARSE_OPTIONS = ( - xmlparser.XML_PARSE_NOENT | - xmlparser.XML_PARSE_NOCDATA | - xmlparser.XML_PARSE_NOWARNING | - xmlparser.XML_PARSE_NOERROR - ) - cdef int _XML_ORIG_DEFAULT_PARSE_OPTIONS _XML_ORIG_DEFAULT_PARSE_OPTIONS = _XML_DEFAULT_PARSE_OPTIONS @@ -30,20 +22,42 @@ _HTML_ORIG_DEFAULT_PARSE_OPTIONS = _HTML_DEFAULT_PARSE_OPTIONS -cdef class _BaseParser: - cdef int _parse_options +cdef class _ParserContext: + cdef xmlDict* _c_dict + cdef int _initialized - cdef xmlParserCtxt* newMemoryParserContext(self, text_utf): - return NULL + def __init__(self): + self._c_dict = NULL + self._initialized = 0 - cdef xmlParserCtxt* newFileParserContext(self): - return NULL + def __dealloc__(self): + if self._c_dict is not NULL: + xmlparser.xmlDictFree(self._c_dict) - cdef xmlDoc* parseMemory(self, xmlParserCtxt* ctx): - return NULL + cdef void _initParse(self): + if not self._initialized: + xmlparser.xmlInitParser() + self._initialized = 1 - cdef xmlDoc* parseFile(self, xmlParserCtxt* ctx, char* filename, int options): - return NULL + cdef void _prepareParse(self, xmlParserCtxt* pctxt): + if self._c_dict is not NULL and pctxt.dict is not NULL: + xmlparser.xmlDictFree(pctxt.dict) + pctxt.dict = self._c_dict + xmlparser.xmlDictReference(pctxt.dict) + + cdef void _finalizeParse(self, xmlDoc* result): + # store dict of last object parsed if no shared dict yet + if self._c_dict is NULL: + #print "storing shared dict" + self._c_dict = result.dict + xmlparser.xmlDictReference(self._c_dict) + +cdef _ParserContext __GLOBAL_PARSER_CONTEXT +__GLOBAL_PARSER_CONTEXT = _ParserContext() + + +cdef class _BaseParser: + cdef int _parse_options cdef class XMLParser(_BaseParser): @@ -57,6 +71,7 @@ parser configuration. A DTD will only be loaded if validation or attribute default values are requested. """ + cdef int _DEFAULT_PARSE_OPTIONS def __init__(self, attribute_defaults=False, dtd_validation=False, no_network=False, ns_clean=False): cdef int parse_options @@ -75,38 +90,75 @@ self._parse_options = parse_options + cdef xmlDoc* parseDoc(self, text) except NULL: + """Parse document, share dictionary if possible. + """ + cdef xmlDoc* result + cdef xmlParserCtxt* pctxt + cdef int parse_error + __GLOBAL_PARSER_CONTEXT._initParse() + pctxt = xmlparser.xmlCreateDocParserCtxt(text) + if pctxt is NULL: + raise XMLSyntaxError + + __GLOBAL_PARSER_CONTEXT._prepareParse(pctxt) + xmlparser.xmlCtxtUseOptions( + pctxt, + parse_options) + parse_error = xmlparser.xmlParseDocument(pctxt) + # in case of errors, clean up context plus any document + if parse_error != 0 or not pctxt.wellFormed: + if pctxt.myDoc is not NULL: + tree.xmlFreeDoc(pctxt.myDoc) + pctxt.myDoc = NULL + xmlparser.xmlFreeParserCtxt(pctxt) + raise XMLSyntaxError + result = pctxt.myDoc + __GLOBAL_PARSER_CONTEXT._finalizeParse(result) + xmlparser.xmlFreeParserCtxt(pctxt) + return result + + cdef xmlDoc* parseDocFromFile(self, char* filename) except NULL: + cdef xmlDoc* result + cdef xmlParserCtxt* pctxt + __GLOBAL_PARSER_CONTEXT._initParse() + pctxt = xmlparser.xmlNewParserCtxt() + __GLOBAL_PARSER_CONTEXT._prepareParse(pctxt) + # XXX set options twice? needed to shut up libxml2 + xmlparser.xmlCtxtUseOptions(pctxt, self._parse_options) + result = xmlparser.xmlCtxtReadFile(pctxt, filename, + NULL, parse_options) + if result is NULL: + if pctxt.lastError.domain == xmlerror.XML_FROM_IO: + raise IOError, "Could not open file %s" % filename + # in case of errors, clean up context plus any document + # XXX other errors? + if not pctxt.wellFormed: + if pctxt.myDoc is not NULL: + tree.xmlFreeDoc(pctxt.myDoc) + pctxt.myDoc = NULL + xmlparser.xmlFreeParserCtxt(pctxt) + raise XMLSyntaxError + __GLOBAL_PARSER_CONTEXT._finalizeParse(result) + xmlparser.xmlFreeParserCtxt(pctxt) + return result + property error_log: def __get__(self): return __build_error_log_tuple(self) -## def copy(self, attribute_defaults=None, dtd_validation=None, -## no_network=None, ns_clean=None): -## cdef int parse_options -## parse_options = self._parse_options -## if attribute_defaults is None: -## attribute_defaults = parse_options & xmlparser.XML_PARSE_DTDATTR -## if dtd_validation is None: -## dtd_validation = parse_options & xmlparser.XML_PARSE_DTDVALID -## if no_network is None: -## no_network = parse_options & xmlparser.XML_PARSE_NONET -## if ns_clean is None: -## ns_clean = parse_options & xmlparser.XML_PARSE_NSCLEAN - -## return self.__class__(attribute_defaults=attribute_defaults, -## dtd_validation=dtd_validation, -## no_network=no_network, ns_clean=ns_clean) - - cdef xmlParserCtxt* newMemoryParserContext(self, text_utf): - return xmlparser.xmlCreateDocParserCtxt(text_utf) +XMLParser._DEFAULT_PARSE_OPTIONS = ( + xmlparser.XML_PARSE_NOENT | + xmlparser.XML_PARSE_NOCDATA | + xmlparser.XML_PARSE_NOWARNING | + xmlparser.XML_PARSE_NOERROR + ) - cdef xmlDoc* parseMemory(self, xmlParserCtxt* ctx, text_utf): - pass - cdef class HTMLParser(_BaseParser): """The HTML parser. This parser allows reading broken HTML into XML. """ - cdef int _HTML_DEFAULT_PARSE_OPTIONS + cdef int _DEFAULT_PARSE_OPTIONS def __init__(self, recover=True, compact_text=True, no_network=False, from_parser=None): cdef int parse_options @@ -124,8 +176,56 @@ self._parse_options = parse_options - cdef xmlParserCtxt* newMemoryParserContext(self, text_utf): - return htmlparser.htmlCreateMemoryParserCtxt(text_utf, len(text_utf)) + cdef xmlDoc* parseDoc(self, text) except NULL: + """Parse HTML document, share dictionary if possible. + """ + cdef xmlDoc* result + cdef xmlParserCtxt* pctxt + cdef char* c_text + cdef int parse_error + cdef int c_len + __GLOBAL_PARSER_CONTEXT._initParse() + pctxt = htmlparser.htmlCreateMemoryParserCtxt( + _cstr(text), python.PyString_GET_SIZE(text)) + __GLOBAL_PARSER_CONTEXT._prepareParse(pctxt) + # XXX parser options? + parser_error = htmlparser.htmlParseDocument(pctxt) + #if parser_error != 0: + # if pctxt.myDoc is not NULL: + # tree.xmlFreeDoc(pctxt.myDoc) + # pctxt.myDoc = NULL + # htmlparser.htmlFreeParserCtxt(pctxt) + # raise SyntaxError + result = pctxt.myDoc + __GLOBAL_PARSER_CONTEXT._finalizeParse(result) + htmlparser.htmlFreeParserCtxt(pctxt) + return result + + cdef xmlDoc* parseDocFromFile(self, char* filename) except NULL: + cdef xmlDoc* result + cdef xmlParserCtxt* pctxt + cdef int parser_error + __GLOBAL_PARSER_CONTEXT._initParse() + pctxt = htmlparser.htmlCreateFileParserCtxt(filename, NULL) + __GLOBAL_PARSER_CONTEXT._prepareParse(pctxt) + + parser_error = htmlparser.htmlParseDocument(pctxt) + #if parser_error != 0: + # if pctxt.myDoc is not NULL: + # tree.xmlFreeDoc(pctxt.myDoc) + # pctxt.myDoc = NULL + # htmlparser.htmlFreeParserCtxt(pctxt) + # raise SyntaxError + result = pctxt.myDoc + __GLOBAL_PARSER_CONTEXT._finalizeParse(result) + htmlparser.htmlFreeParserCtxt(pctxt) + return result + +HTMLParser._DEFAULT_PARSE_OPTIONS = ( + htmlparser.HTML_PARSE_RECOVER | + htmlparser.HTML_PARSE_NOERROR | + htmlparser.HTML_PARSE_NOWARNING + ) def set_default_parser(parser=None): @@ -141,7 +241,6 @@ cdef class Parser: - cdef xmlDict* _c_dict cdef int _parser_initialized @@ -168,7 +267,7 @@ parse_options = _XML_DEFAULT_PARSE_OPTIONS self._initParse() - pctxt = xmlparser.xmlCreateDocParserCtxt(text) + pctxt = xmlparser.xmlCreateDocParserCtxt(_cstr(text)) if pctxt is NULL: raise XMLSyntaxError From scoder at codespeak.net Sun Mar 26 14:10:15 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Sun Mar 26 14:10:17 2006 Subject: [Lxml-checkins] r25011 - lxml/trunk/src/lxml/tests Message-ID: <20060326121015.6DACA10084@code0.codespeak.net> Author: scoder Date: Sun Mar 26 14:10:14 2006 New Revision: 25011 Modified: lxml/trunk/src/lxml/tests/test_unicode.py Log: new test case for unicode StringIO Modified: lxml/trunk/src/lxml/tests/test_unicode.py ============================================================================== --- lxml/trunk/src/lxml/tests/test_unicode.py (original) +++ lxml/trunk/src/lxml/tests/test_unicode.py Sun Mar 26 14:10:14 2006 @@ -30,6 +30,10 @@ el = etree.Comment(uni) self.assertEquals(' %s ' % uni, el.text) + def test_unicode_parse_stringio(self): + el = etree.parse(StringIO(u'

%s

' % uni)).getroot() + self.assertEquals(uni, el.text) + def test_suite(): suite = unittest.TestSuite() suite.addTests([unittest.makeSuite(UnicodeTestCase)]) From scoder at codespeak.net Sun Mar 26 14:14:05 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Sun Mar 26 14:14:06 2006 Subject: [Lxml-checkins] r25012 - lxml/branch/lxml-0.9.x/src/lxml/tests Message-ID: <20060326121405.929FF10084@code0.codespeak.net> Author: scoder Date: Sun Mar 26 14:13:45 2006 New Revision: 25012 Modified: lxml/branch/lxml-0.9.x/src/lxml/tests/test_unicode.py Log: merge from trunk: test case for stringio parse bug Modified: lxml/branch/lxml-0.9.x/src/lxml/tests/test_unicode.py ============================================================================== --- lxml/branch/lxml-0.9.x/src/lxml/tests/test_unicode.py (original) +++ lxml/branch/lxml-0.9.x/src/lxml/tests/test_unicode.py Sun Mar 26 14:13:45 2006 @@ -30,6 +30,10 @@ el = etree.Comment(uni) self.assertEquals(' %s ' % uni, el.text) + def test_unicode_parse_stringio(self): + el = etree.parse(StringIO(u'

%s

' % uni)).getroot() + self.assertEquals(uni, el.text) + def test_suite(): suite = unittest.TestSuite() suite.addTests([unittest.makeSuite(UnicodeTestCase)]) From scoder at codespeak.net Sun Mar 26 14:14:26 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Sun Mar 26 14:14:27 2006 Subject: [Lxml-checkins] r25013 - lxml/branch/lxml-0.9.x/src/lxml Message-ID: <20060326121426.A765210084@code0.codespeak.net> Author: scoder Date: Sun Mar 26 14:14:25 2006 New Revision: 25013 Modified: lxml/branch/lxml-0.9.x/src/lxml/etree.pyx Log: merge from trunk: bug fix for parsing unicode StringIO values Modified: lxml/branch/lxml-0.9.x/src/lxml/etree.pyx ============================================================================== --- lxml/branch/lxml-0.9.x/src/lxml/etree.pyx (original) +++ lxml/branch/lxml-0.9.x/src/lxml/etree.pyx Sun Mar 26 14:14:25 2006 @@ -143,22 +143,26 @@ cdef xmlDoc* c_doc # XXX simplistic (c)StringIO support if hasattr(source, 'getvalue'): - c_doc = theParser.parseDoc(source.getvalue(), parser) - else: - filename = _getFilenameForFile(source) - # Support for unamed file-like object (eg urlgrabber.urlopen) - if not filename and hasattr(source, 'read'): - c_doc = theParser.parseDoc(source.read(), parser) - # Otherwise parse the file directly from the filesystem - else: - if filename is None: - filename = source - # open filename - c_doc = theParser.parseDocFromFile(filename, parser) - if c_doc is NULL: - return None - else: - return _documentFactory(c_doc) + return _parseMemoryDocument(source.getvalue(), parser) + + filename = _getFilenameForFile(source) + # Support for unamed file-like object (eg urlgrabber.urlopen) + if not filename and hasattr(source, 'read'): + return _parseMemoryDocument(source.read(), parser) + + # Otherwise parse the file directly from the filesystem + if filename is None: + filename = source + # open filename + c_doc = theParser.parseDocFromFile(filename, parser) + return _documentFactory(c_doc) + +cdef _Document _parseMemoryDocument(text, parser): + cdef xmlDoc* c_doc + if python.PyUnicode_Check(text): + text = _stripDeclaration(_utf8(text)) + c_doc = theParser.parseDoc(text, parser) + return _documentFactory(c_doc) cdef _Document _documentFactory(xmlDoc* c_doc): cdef _Document result @@ -1180,11 +1184,7 @@ return etree def XML(text): - cdef xmlDoc* c_doc - if python.PyUnicode_Check(text): - text = _stripDeclaration(_utf8(text)) - c_doc = theParser.parseDoc(text, None) - return _documentFactory(c_doc).getroot() + return _parseMemoryDocument(text, None).getroot() fromstring = XML From scoder at codespeak.net Sun Mar 26 18:09:23 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Sun Mar 26 18:09:24 2006 Subject: [Lxml-checkins] r25016 - lxml/branch/htmlparser/src/lxml/tests Message-ID: <20060326160923.842CE1009A@code0.codespeak.net> Author: scoder Date: Sun Mar 26 18:09:17 2006 New Revision: 25016 Modified: lxml/branch/htmlparser/src/lxml/tests/test_unicode.py Log: merged in changes from trunk Modified: lxml/branch/htmlparser/src/lxml/tests/test_unicode.py ============================================================================== --- lxml/branch/htmlparser/src/lxml/tests/test_unicode.py (original) +++ lxml/branch/htmlparser/src/lxml/tests/test_unicode.py Sun Mar 26 18:09:17 2006 @@ -30,6 +30,10 @@ el = etree.Comment(uni) self.assertEquals(' %s ' % uni, el.text) + def test_unicode_parse_stringio(self): + el = etree.parse(StringIO(u'

%s

' % uni)).getroot() + self.assertEquals(uni, el.text) + def test_suite(): suite = unittest.TestSuite() suite.addTests([unittest.makeSuite(UnicodeTestCase)]) From scoder at codespeak.net Sun Mar 26 20:13:27 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Sun Mar 26 20:13:27 2006 Subject: [Lxml-checkins] r25017 - lxml/branch/htmlparser/src/lxml Message-ID: <20060326181327.1BA5710086@code0.codespeak.net> Author: scoder Date: Sun Mar 26 20:13:25 2006 New Revision: 25017 Modified: lxml/branch/htmlparser/src/lxml/etree.pyx lxml/branch/htmlparser/src/lxml/htmlparser.pxd lxml/branch/htmlparser/src/lxml/parser.pxi Log: non-working version that implements the complete API Modified: lxml/branch/htmlparser/src/lxml/etree.pyx ============================================================================== --- lxml/branch/htmlparser/src/lxml/etree.pyx (original) +++ lxml/branch/htmlparser/src/lxml/etree.pyx Sun Mar 26 20:13:25 2006 @@ -1198,6 +1198,9 @@ return etree +def HTML(text): + return _parseMemoryDocument(text, __DEFAULT_HTML_PARSER).getroot() + def XML(text): return _parseMemoryDocument(text, None).getroot() Modified: lxml/branch/htmlparser/src/lxml/htmlparser.pxd ============================================================================== --- lxml/branch/htmlparser/src/lxml/htmlparser.pxd (original) +++ lxml/branch/htmlparser/src/lxml/htmlparser.pxd Sun Mar 26 20:13:25 2006 @@ -12,5 +12,7 @@ HTML_PARSE_NONET # Forbid network access HTML_PARSE_COMPACT # compact small text nodes - xmlParserCtxt* htmlCreateMemoryParserCtxt(char* buffer, - int size) + xmlParserCtxt* htmlCreateMemoryParserCtxt(char* buffer, int size) + xmlParserCtxt* htmlCreateFileParserCtxt(char* filename, char* encoding) + void htmlFreeParserCtxt(xmlParserCtxt* ctxt) + int htmlParseDocument(xmlParserCtxt* ctxt) Modified: lxml/branch/htmlparser/src/lxml/parser.pxi ============================================================================== --- lxml/branch/htmlparser/src/lxml/parser.pxi (original) +++ lxml/branch/htmlparser/src/lxml/parser.pxi Sun Mar 26 20:13:25 2006 @@ -7,21 +7,6 @@ class XMLSyntaxError(LxmlSyntaxError): pass -cdef int _XML_ORIG_DEFAULT_PARSE_OPTIONS -_XML_ORIG_DEFAULT_PARSE_OPTIONS = _XML_DEFAULT_PARSE_OPTIONS - - -cdef int _HTML_DEFAULT_PARSE_OPTIONS -_HTML_DEFAULT_PARSE_OPTIONS = ( - htmlparser.HTML_PARSE_RECOVER | - htmlparser.HTML_PARSE_NOERROR | - htmlparser.HTML_PARSE_NOWARNING - ) - -cdef int _HTML_ORIG_DEFAULT_PARSE_OPTIONS -_HTML_ORIG_DEFAULT_PARSE_OPTIONS = _HTML_DEFAULT_PARSE_OPTIONS - - cdef class _ParserContext: cdef xmlDict* _c_dict cdef int _initialized @@ -52,15 +37,47 @@ self._c_dict = result.dict xmlparser.xmlDictReference(self._c_dict) + cdef void _resetDict(self, xmlDoc* result): + cdef xmlDict* d + if self._c_dict is NULL: + # we need to get dict from the new document if it's there, + # otherwise make one + if result.dict is not NULL: + d = result.dict + else: + d = xmlparser.xmlDictCreate() + result.dict = d + self._c_dict = d + xmlparser.xmlDictReference(self._c_dict) + else: + # we need to reuse the central dict and get rid of the new one + if result.dict is not NULL: + xmlparser.xmlDictFree(result.dict) + result.dict = self._c_dict + xmlparser.xmlDictReference(result.dict) + cdef _ParserContext __GLOBAL_PARSER_CONTEXT __GLOBAL_PARSER_CONTEXT = _ParserContext() -cdef class _BaseParser: - cdef int _parse_options +cdef class BaseParser: + cdef _ErrorLog _error_log + def __init__(self): + self._error_log = _ErrorLog() + property error_log: + def __get__(self): + return self._error_log.copy() + +cdef int _XML_DEFAULT_PARSE_OPTIONS +_XML_DEFAULT_PARSE_OPTIONS = ( + xmlparser.XML_PARSE_NOENT | + xmlparser.XML_PARSE_NOCDATA | + xmlparser.XML_PARSE_NOWARNING | + xmlparser.XML_PARSE_NOERROR + ) -cdef class XMLParser(_BaseParser): +cdef class XMLParser(BaseParser): """The XML parser. Parsers can be supplied as additional argument to various parse functions of the lxml API. A default parser is always available and can be replaced by a call to the global function @@ -71,11 +88,12 @@ parser configuration. A DTD will only be loaded if validation or attribute default values are requested. """ - cdef int _DEFAULT_PARSE_OPTIONS + cdef int _parse_options def __init__(self, attribute_defaults=False, dtd_validation=False, no_network=False, ns_clean=False): cdef int parse_options - parse_options = _XML_ORIG_DEFAULT_PARSE_OPTIONS + BaseParser.__init__(self) + parse_options = _XML_DEFAULT_PARSE_OPTIONS if dtd_validation: parse_options = parse_options | xmlparser.XML_PARSE_DTDLOAD | \ @@ -96,15 +114,16 @@ cdef xmlDoc* result cdef xmlParserCtxt* pctxt cdef int parse_error + self._error_log.connect() __GLOBAL_PARSER_CONTEXT._initParse() - pctxt = xmlparser.xmlCreateDocParserCtxt(text) + pctxt = xmlparser.xmlCreateDocParserCtxt(_cstr(text)) if pctxt is NULL: raise XMLSyntaxError __GLOBAL_PARSER_CONTEXT._prepareParse(pctxt) xmlparser.xmlCtxtUseOptions( pctxt, - parse_options) + self._parse_options) parse_error = xmlparser.xmlParseDocument(pctxt) # in case of errors, clean up context plus any document if parse_error != 0 or not pctxt.wellFormed: @@ -116,18 +135,20 @@ result = pctxt.myDoc __GLOBAL_PARSER_CONTEXT._finalizeParse(result) xmlparser.xmlFreeParserCtxt(pctxt) + self._error_log.disconnect() return result cdef xmlDoc* parseDocFromFile(self, char* filename) except NULL: cdef xmlDoc* result cdef xmlParserCtxt* pctxt + self._error_log.connect() __GLOBAL_PARSER_CONTEXT._initParse() pctxt = xmlparser.xmlNewParserCtxt() __GLOBAL_PARSER_CONTEXT._prepareParse(pctxt) # XXX set options twice? needed to shut up libxml2 xmlparser.xmlCtxtUseOptions(pctxt, self._parse_options) result = xmlparser.xmlCtxtReadFile(pctxt, filename, - NULL, parse_options) + NULL, self._parse_options) if result is NULL: if pctxt.lastError.domain == xmlerror.XML_FROM_IO: raise IOError, "Could not open file %s" % filename @@ -141,31 +162,43 @@ raise XMLSyntaxError __GLOBAL_PARSER_CONTEXT._finalizeParse(result) xmlparser.xmlFreeParserCtxt(pctxt) + self._error_log.disconnect() return result - property error_log: - def __get__(self): - return __build_error_log_tuple(self) +cdef XMLParser __DEFAULT_PARSER +__DEFAULT_PARSER = XMLParser() + +cdef XMLParser __ORIG_DEFAULT_PARSER +__ORIG_DEFAULT_PARSER = __DEFAULT_PARSER + +def set_default_parser(parser=None): + """Set a default XMLParser. This parser is used globally whenever no + parser is supplied to the various parse functions of the lxml API. If + this function is called without a parser (or if it is None), the default + parser is reset to the original configuration. + """ + if parser is not None: + __DEFAULT_PARSER = parser + else: + __DEFAULT_PARSER = __ORIG_DEFAULT_PARSER -XMLParser._DEFAULT_PARSE_OPTIONS = ( - xmlparser.XML_PARSE_NOENT | - xmlparser.XML_PARSE_NOCDATA | - xmlparser.XML_PARSE_NOWARNING | - xmlparser.XML_PARSE_NOERROR - ) +cdef int _HTML_DEFAULT_PARSE_OPTIONS +_HTML_DEFAULT_PARSE_OPTIONS = ( + htmlparser.HTML_PARSE_RECOVER | + htmlparser.HTML_PARSE_NOERROR | + htmlparser.HTML_PARSE_NOWARNING + ) -cdef class HTMLParser(_BaseParser): +cdef class HTMLParser(BaseParser): """The HTML parser. This parser allows reading broken HTML into XML. """ - cdef int _DEFAULT_PARSE_OPTIONS + cdef int _parse_options def __init__(self, recover=True, compact_text=True, no_network=False, from_parser=None): cdef int parse_options - if from_parser is not None: - parse_options = (from_parser)._parse_options - else: - parse_options = _HTML_DEFAULT_PARSE_OPTIONS + BaseParser.__init__(self) + parse_options = _HTML_DEFAULT_PARSE_OPTIONS if not recover: parse_options = parse_options & ~htmlparser.HTML_PARSE_RECOVER @@ -184,6 +217,7 @@ cdef char* c_text cdef int parse_error cdef int c_len + self._error_log.connect() __GLOBAL_PARSER_CONTEXT._initParse() pctxt = htmlparser.htmlCreateMemoryParserCtxt( _cstr(text), python.PyString_GET_SIZE(text)) @@ -199,12 +233,14 @@ result = pctxt.myDoc __GLOBAL_PARSER_CONTEXT._finalizeParse(result) htmlparser.htmlFreeParserCtxt(pctxt) + self._error_log.disconnect() return result cdef xmlDoc* parseDocFromFile(self, char* filename) except NULL: cdef xmlDoc* result cdef xmlParserCtxt* pctxt cdef int parser_error + self._error_log.connect() __GLOBAL_PARSER_CONTEXT._initParse() pctxt = htmlparser.htmlCreateFileParserCtxt(filename, NULL) __GLOBAL_PARSER_CONTEXT._prepareParse(pctxt) @@ -219,146 +255,37 @@ result = pctxt.myDoc __GLOBAL_PARSER_CONTEXT._finalizeParse(result) htmlparser.htmlFreeParserCtxt(pctxt) + self._error_log.disconnect() return result -HTMLParser._DEFAULT_PARSE_OPTIONS = ( - htmlparser.HTML_PARSE_RECOVER | - htmlparser.HTML_PARSE_NOERROR | - htmlparser.HTML_PARSE_NOWARNING - ) - - -def set_default_parser(parser=None): - """Set a default XMLParser. This parser is used globally whenever no - parser is supplied to the various parse functions of the lxml API. If - this function is called without a parser (or if it is None), the default - parser is reset to the original configuration. - """ - if parser is not None: - _XML_DEFAULT_PARSE_OPTIONS = (parser)._parse_options - else: - _XML_DEFAULT_PARSE_OPTIONS = _XML_ORIG_DEFAULT_PARSE_OPTIONS - +cdef HTMLParser __DEFAULT_HTML_PARSER +__DEFAULT_HTML_PARSER = HTMLParser() cdef class Parser: - cdef xmlDict* _c_dict - cdef int _parser_initialized - - def __init__(self): - self._c_dict = NULL - self._parser_initialized = 0 - - def __dealloc__(self): - #print "cleanup parser" - if self._c_dict is not NULL: - #print "freeing dictionary (cleanup parser)" - xmlparser.xmlDictFree(self._c_dict) - cdef xmlDoc* parseDoc(self, text, parser) except NULL: """Parse document, share dictionary if possible. """ - cdef xmlDoc* result - cdef xmlParserCtxt* pctxt - cdef int parse_error - - if parser is not None: - parse_options = (parser)._parse_options + if parser is None: + return __DEFAULT_PARSER.parseDoc(text) + elif isinstance(parser, XMLParser): + return (parser).parseDoc(text) + elif isinstance(parser, HTMLParser): + return (parser).parseDoc(text) else: - parse_options = _XML_DEFAULT_PARSE_OPTIONS - - self._initParse() - pctxt = xmlparser.xmlCreateDocParserCtxt(_cstr(text)) - if pctxt is NULL: - raise XMLSyntaxError - - self._prepareParse(pctxt) - xmlparser.xmlCtxtUseOptions( - pctxt, - parse_options) - parse_error = xmlparser.xmlParseDocument(pctxt) - # in case of errors, clean up context plus any document - if parse_error != 0 or not pctxt.wellFormed: - if pctxt.myDoc is not NULL: - tree.xmlFreeDoc(pctxt.myDoc) - pctxt.myDoc = NULL - xmlparser.xmlFreeParserCtxt(pctxt) - raise XMLSyntaxError - result = pctxt.myDoc - self._finalizeParse(result) - xmlparser.xmlFreeParserCtxt(pctxt) - return result + raise TypeError, "invalid parser" cdef xmlDoc* parseDocFromFile(self, char* filename, parser) except NULL: - cdef int parse_options - cdef xmlDoc* result - cdef xmlParserCtxt* pctxt - - if parser is not None: - parse_options = (parser)._parse_options + if parser is None: + return __DEFAULT_PARSER.parseDocFromFile(filename) + elif isinstance(parser, XMLParser): + return (parser).parseDocFromFile(filename) + elif isinstance(parser, HTMLParser): + return (parser).parseDocFromFile(filename) else: - parse_options = _XML_DEFAULT_PARSE_OPTIONS + raise TypeError, "invalid parser" - self._initParse() - pctxt = xmlparser.xmlNewParserCtxt() - self._prepareParse(pctxt) - # XXX set options twice? needed to shut up libxml2 - xmlparser.xmlCtxtUseOptions(pctxt, parse_options) - result = xmlparser.xmlCtxtReadFile(pctxt, filename, - NULL, parse_options) - if result is NULL: - if pctxt.lastError.domain == xmlerror.XML_FROM_IO: - raise IOError, "Could not open file %s" % filename - # in case of errors, clean up context plus any document - # XXX other errors? - if not pctxt.wellFormed: - if pctxt.myDoc is not NULL: - tree.xmlFreeDoc(pctxt.myDoc) - pctxt.myDoc = NULL - xmlparser.xmlFreeParserCtxt(pctxt) - raise XMLSyntaxError - self._finalizeParse(result) - xmlparser.xmlFreeParserCtxt(pctxt) - return result - - cdef void _initParse(self): - if not self._parser_initialized: - xmlparser.xmlInitParser() - self._parser_initialized = 1 - - cdef void _prepareParse(self, xmlParserCtxt* pctxt): - if self._c_dict is not NULL and pctxt.dict is not NULL: - #print "sharing dictionary (parseDoc)" - xmlparser.xmlDictFree(pctxt.dict) - pctxt.dict = self._c_dict - xmlparser.xmlDictReference(pctxt.dict) - - cdef void _finalizeParse(self, xmlDoc* result): - # store dict of last object parsed if no shared dict yet - if self._c_dict is NULL: - #print "storing shared dict" - self._c_dict = result.dict - xmlparser.xmlDictReference(self._c_dict) - cdef xmlDoc* newDoc(self): cdef xmlDoc* result - cdef xmlDict* d - result = tree.xmlNewDoc("1.0") - - if self._c_dict is NULL: - # we need to get dict from the new document if it's there, - # otherwise make one - if result.dict is not NULL: - d = result.dict - else: - d = xmlparser.xmlDictCreate() - result.dict = d - self._c_dict = d - xmlparser.xmlDictReference(self._c_dict) - else: - # we need to reuse the central dict and get rid of the new one - if result.dict is not NULL: - xmlparser.xmlDictFree(result.dict) - result.dict = self._c_dict - xmlparser.xmlDictReference(result.dict) + __GLOBAL_PARSER_CONTEXT._resetDict(result) return result From scoder at codespeak.net Sun Mar 26 21:38:34 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Sun Mar 26 21:38:40 2006 Subject: [Lxml-checkins] r25018 - lxml/branch/htmlparser/src/lxml Message-ID: <20060326193834.69BC71008F@code0.codespeak.net> Author: scoder Date: Sun Mar 26 21:38:23 2006 New Revision: 25018 Modified: lxml/branch/htmlparser/src/lxml/htmlparser.pxd lxml/branch/htmlparser/src/lxml/parser.pxi lxml/branch/htmlparser/src/lxml/python.pxd Log: some rewrites, still not working Modified: lxml/branch/htmlparser/src/lxml/htmlparser.pxd ============================================================================== --- lxml/branch/htmlparser/src/lxml/htmlparser.pxd (original) +++ lxml/branch/htmlparser/src/lxml/htmlparser.pxd Sun Mar 26 21:38:23 2006 @@ -16,3 +16,10 @@ xmlParserCtxt* htmlCreateFileParserCtxt(char* filename, char* encoding) void htmlFreeParserCtxt(xmlParserCtxt* ctxt) int htmlParseDocument(xmlParserCtxt* ctxt) + + xmlDoc* htmlCtxtReadFile(xmlParserCtxt* ctxt, + char* filename, char* encoding, + int options) + xmlDoc* htmlCtxtReadDoc(xmlParserCtxt* ctxt, + char* buffer, char* URL, char* encoding, + int options) Modified: lxml/branch/htmlparser/src/lxml/parser.pxi ============================================================================== --- lxml/branch/htmlparser/src/lxml/parser.pxi (original) +++ lxml/branch/htmlparser/src/lxml/parser.pxi Sun Mar 26 21:38:23 2006 @@ -7,7 +7,12 @@ class XMLSyntaxError(LxmlSyntaxError): pass +class HTMLSyntaxError(LxmlSyntaxError): + pass + cdef class _ParserContext: + """Global parser context to share dictionary if possible. + """ cdef xmlDict* _c_dict cdef int _initialized @@ -24,7 +29,7 @@ xmlparser.xmlInitParser() self._initialized = 1 - cdef void _prepareParse(self, xmlParserCtxt* pctxt): + cdef void _initContext(self, xmlParserCtxt* pctxt): if self._c_dict is not NULL and pctxt.dict is not NULL: xmlparser.xmlDictFree(pctxt.dict) pctxt.dict = self._c_dict @@ -87,8 +92,11 @@ The keyword arguments in the constructor are mainly based on the libxml2 parser configuration. A DTD will only be loaded if validation or attribute default values are requested. + + Note that you must not share parsers between threads. """ cdef int _parse_options + cdef xmlParserCtxt* _file_parser_ctxt def __init__(self, attribute_defaults=False, dtd_validation=False, no_network=False, ns_clean=False): cdef int parse_options @@ -107,6 +115,7 @@ parse_options = parse_options | xmlparser.XML_PARSE_NSCLEAN self._parse_options = parse_options + self._file_parser_ctxt = NULL cdef xmlDoc* parseDoc(self, text) except NULL: """Parse document, share dictionary if possible. @@ -120,7 +129,7 @@ if pctxt is NULL: raise XMLSyntaxError - __GLOBAL_PARSER_CONTEXT._prepareParse(pctxt) + __GLOBAL_PARSER_CONTEXT._initContext(pctxt) xmlparser.xmlCtxtUseOptions( pctxt, self._parse_options) @@ -143,8 +152,11 @@ cdef xmlParserCtxt* pctxt self._error_log.connect() __GLOBAL_PARSER_CONTEXT._initParse() - pctxt = xmlparser.xmlNewParserCtxt() - __GLOBAL_PARSER_CONTEXT._prepareParse(pctxt) + pctxt = self._file_parser_ctxt + if pctxt is NULL: + pctxt = xmlparser.xmlNewParserCtxt() + __GLOBAL_PARSER_CONTEXT._initContext(pctxt) + self._file_parser_ctxt = pctxt # XXX set options twice? needed to shut up libxml2 xmlparser.xmlCtxtUseOptions(pctxt, self._parse_options) result = xmlparser.xmlCtxtReadFile(pctxt, filename, @@ -158,10 +170,8 @@ if pctxt.myDoc is not NULL: tree.xmlFreeDoc(pctxt.myDoc) pctxt.myDoc = NULL - xmlparser.xmlFreeParserCtxt(pctxt) raise XMLSyntaxError __GLOBAL_PARSER_CONTEXT._finalizeParse(result) - xmlparser.xmlFreeParserCtxt(pctxt) self._error_log.disconnect() return result @@ -192,8 +202,12 @@ cdef class HTMLParser(BaseParser): """The HTML parser. This parser allows reading broken HTML into XML. + + Note that you must not share parsers between threads. """ cdef int _parse_options + cdef xmlParserCtxt* _memory_parser_ctxt + cdef xmlParserCtxt* _file_parser_ctxt def __init__(self, recover=True, compact_text=True, no_network=False, from_parser=None): cdef int parse_options @@ -208,6 +222,8 @@ parse_options = parse_options | htmlparser.HTML_PARSE_NONET self._parse_options = parse_options + self._memory_parser_ctxt = NULL + self._file_parser_ctxt = NULL cdef xmlDoc* parseDoc(self, text) except NULL: """Parse HTML document, share dictionary if possible. @@ -215,26 +231,26 @@ cdef xmlDoc* result cdef xmlParserCtxt* pctxt cdef char* c_text - cdef int parse_error cdef int c_len self._error_log.connect() __GLOBAL_PARSER_CONTEXT._initParse() - pctxt = htmlparser.htmlCreateMemoryParserCtxt( - _cstr(text), python.PyString_GET_SIZE(text)) - __GLOBAL_PARSER_CONTEXT._prepareParse(pctxt) + c_text = _cstr(text) + pctxt = self._memory_parser_ctxt + if pctxt is NULL: + pctxt = htmlparser.htmlCreateMemoryParserCtxt( + c_text, python.PyString_GET_SIZE(text)) + __GLOBAL_PARSER_CONTEXT._initContext(pctxt) + self._memory_parser_ctxt = pctxt # XXX parser options? - parser_error = htmlparser.htmlParseDocument(pctxt) - #if parser_error != 0: - # if pctxt.myDoc is not NULL: - # tree.xmlFreeDoc(pctxt.myDoc) - # pctxt.myDoc = NULL - # htmlparser.htmlFreeParserCtxt(pctxt) - # raise SyntaxError - result = pctxt.myDoc - __GLOBAL_PARSER_CONTEXT._finalizeParse(result) - htmlparser.htmlFreeParserCtxt(pctxt) + result = htmlparser.htmlCtxtReadDoc( + pctxt, c_text, NULL, NULL, self._parse_options) + if result is not NULL: + __GLOBAL_PARSER_CONTEXT._finalizeParse(result) self._error_log.disconnect() - return result + if result is NULL: + raise HTMLSyntaxError + else: + return result cdef xmlDoc* parseDocFromFile(self, char* filename) except NULL: cdef xmlDoc* result @@ -242,29 +258,26 @@ cdef int parser_error self._error_log.connect() __GLOBAL_PARSER_CONTEXT._initParse() - pctxt = htmlparser.htmlCreateFileParserCtxt(filename, NULL) - __GLOBAL_PARSER_CONTEXT._prepareParse(pctxt) - - parser_error = htmlparser.htmlParseDocument(pctxt) - #if parser_error != 0: - # if pctxt.myDoc is not NULL: - # tree.xmlFreeDoc(pctxt.myDoc) - # pctxt.myDoc = NULL - # htmlparser.htmlFreeParserCtxt(pctxt) - # raise SyntaxError - result = pctxt.myDoc - __GLOBAL_PARSER_CONTEXT._finalizeParse(result) - htmlparser.htmlFreeParserCtxt(pctxt) + pctxt = self._file_parser_ctxt + if pctxt is NULL: + pctxt = htmlparser.htmlCreateFileParserCtxt(filename, NULL) + __GLOBAL_PARSER_CONTEXT._initContext(pctxt) + self._file_parser_ctxt = pctxt + result = htmlparser.htmlCtxtReadFile( + pctxt, filename, NULL, self._parse_options) + if result is not NULL: + __GLOBAL_PARSER_CONTEXT._finalizeParse(result) self._error_log.disconnect() - return result + if result is NULL: + raise HTMLSyntaxError + else: + return result cdef HTMLParser __DEFAULT_HTML_PARSER __DEFAULT_HTML_PARSER = HTMLParser() cdef class Parser: cdef xmlDoc* parseDoc(self, text, parser) except NULL: - """Parse document, share dictionary if possible. - """ if parser is None: return __DEFAULT_PARSER.parseDoc(text) elif isinstance(parser, XMLParser): Modified: lxml/branch/htmlparser/src/lxml/python.pxd ============================================================================== --- lxml/branch/htmlparser/src/lxml/python.pxd (original) +++ lxml/branch/htmlparser/src/lxml/python.pxd Sun Mar 26 21:38:23 2006 @@ -14,6 +14,7 @@ char* errors) cdef object PyUnicode_DecodeUTF8(char* s, int size, char* errors) cdef object PyUnicode_AsUTF8String(object ustring) + cdef int PyString_GET_SIZE(object s) cdef object PyString_FromStringAndSize(char* s, int size) cdef object PyString_FromString(char* s) cdef object PyString_FromFormat(char* format, ...) From scoder at codespeak.net Mon Mar 27 11:44:38 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Mon Mar 27 11:44:39 2006 Subject: [Lxml-checkins] r25021 - lxml/pyrex/dist Message-ID: <20060327094438.A482010088@code0.codespeak.net> Author: scoder Date: Mon Mar 27 11:44:01 2006 New Revision: 25021 Modified: lxml/pyrex/dist/Pyrex-0.9.3.1-1.noarch.rpm lxml/pyrex/dist/Pyrex-0.9.3.1-1.src.rpm lxml/pyrex/dist/Pyrex-0.9.3.1.tar.gz Log: updated source and SRPM distributions of Pyrex to current SVN Modified: lxml/pyrex/dist/Pyrex-0.9.3.1-1.noarch.rpm ============================================================================== Binary files. No diff available. Modified: lxml/pyrex/dist/Pyrex-0.9.3.1-1.src.rpm ============================================================================== Binary files. No diff available. Modified: lxml/pyrex/dist/Pyrex-0.9.3.1.tar.gz ============================================================================== Binary files. No diff available. From scoder at codespeak.net Mon Mar 27 14:14:03 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Mon Mar 27 14:14:04 2006 Subject: [Lxml-checkins] r25029 - lxml/branch/htmlparser/src/lxml Message-ID: <20060327121403.3B1D410086@code0.codespeak.net> Author: scoder Date: Mon Mar 27 14:14:02 2006 New Revision: 25029 Modified: lxml/branch/htmlparser/src/lxml/parser.pxi Log: rewrite of parser context: simplified and more explicit on dict handling Modified: lxml/branch/htmlparser/src/lxml/parser.pxi ============================================================================== --- lxml/branch/htmlparser/src/lxml/parser.pxi (original) +++ lxml/branch/htmlparser/src/lxml/parser.pxi Mon Mar 27 14:14:02 2006 @@ -10,8 +10,11 @@ class HTMLSyntaxError(LxmlSyntaxError): pass +class ParserError(LxmlError): + pass + cdef class _ParserContext: - """Global parser context to share dictionary if possible. + """Global parser context to share the string dictionary. """ cdef xmlDict* _c_dict cdef int _initialized @@ -24,42 +27,34 @@ if self._c_dict is not NULL: xmlparser.xmlDictFree(self._c_dict) - cdef void _initParse(self): + cdef void _initParser(self): if not self._initialized: xmlparser.xmlInitParser() self._initialized = 1 - cdef void _initContext(self, xmlParserCtxt* pctxt): - if self._c_dict is not NULL and pctxt.dict is not NULL: - xmlparser.xmlDictFree(pctxt.dict) + cdef void _initParserDict(self, xmlParserCtxt* pctxt): + "Assure we always use the same string dictionary." + if self._c_dict is not NULL: + if pctxt.dict is not NULL: + xmlparser.xmlDictFree(pctxt.dict) pctxt.dict = self._c_dict xmlparser.xmlDictReference(pctxt.dict) - cdef void _finalizeParse(self, xmlDoc* result): - # store dict of last object parsed if no shared dict yet + cdef void _initDocDict(self, xmlDoc* result): + "Store dict of last object parsed if no shared dict yet" + if result is NULL: + return if self._c_dict is NULL: #print "storing shared dict" + if result.dict is NULL: + result.dict = xmlparser.xmlDictCreate() self._c_dict = result.dict - xmlparser.xmlDictReference(self._c_dict) - - cdef void _resetDict(self, xmlDoc* result): - cdef xmlDict* d - if self._c_dict is NULL: - # we need to get dict from the new document if it's there, - # otherwise make one - if result.dict is not NULL: - d = result.dict - else: - d = xmlparser.xmlDictCreate() - result.dict = d - self._c_dict = d - xmlparser.xmlDictReference(self._c_dict) - else: - # we need to reuse the central dict and get rid of the new one + xmlparser.xmlDictReference(result.dict) + elif result.dict != self._c_dict: if result.dict is not NULL: xmlparser.xmlDictFree(result.dict) result.dict = self._c_dict - xmlparser.xmlDictReference(result.dict) + xmlparser.xmlDictReference(self._c_dict) cdef _ParserContext __GLOBAL_PARSER_CONTEXT __GLOBAL_PARSER_CONTEXT = _ParserContext() @@ -100,6 +95,7 @@ def __init__(self, attribute_defaults=False, dtd_validation=False, no_network=False, ns_clean=False): cdef int parse_options + self._file_parser_ctxt = NULL BaseParser.__init__(self) parse_options = _XML_DEFAULT_PARSE_OPTIONS @@ -115,21 +111,23 @@ parse_options = parse_options | xmlparser.XML_PARSE_NSCLEAN self._parse_options = parse_options - self._file_parser_ctxt = NULL - cdef xmlDoc* parseDoc(self, text) except NULL: + def __dealloc__(self): + if self._file_parser_ctxt != NULL: + xmlparser.xmlFreeParserCtxt(self._file_parser_ctxt) + + cdef xmlDoc* parseDoc(self, text_utf) except NULL: """Parse document, share dictionary if possible. """ cdef xmlDoc* result cdef xmlParserCtxt* pctxt cdef int parse_error self._error_log.connect() - __GLOBAL_PARSER_CONTEXT._initParse() - pctxt = xmlparser.xmlCreateDocParserCtxt(_cstr(text)) + __GLOBAL_PARSER_CONTEXT._initParser() + pctxt = xmlparser.xmlCreateDocParserCtxt(_cstr(text_utf)) if pctxt is NULL: - raise XMLSyntaxError - - __GLOBAL_PARSER_CONTEXT._initContext(pctxt) + raise XMLSyntaxError, "Failed to create parser context" + __GLOBAL_PARSER_CONTEXT._initParserDict(pctxt) xmlparser.xmlCtxtUseOptions( pctxt, self._parse_options) @@ -142,7 +140,7 @@ xmlparser.xmlFreeParserCtxt(pctxt) raise XMLSyntaxError result = pctxt.myDoc - __GLOBAL_PARSER_CONTEXT._finalizeParse(result) + __GLOBAL_PARSER_CONTEXT._initDocDict(result) xmlparser.xmlFreeParserCtxt(pctxt) self._error_log.disconnect() return result @@ -151,12 +149,12 @@ cdef xmlDoc* result cdef xmlParserCtxt* pctxt self._error_log.connect() - __GLOBAL_PARSER_CONTEXT._initParse() + __GLOBAL_PARSER_CONTEXT._initParser() pctxt = self._file_parser_ctxt if pctxt is NULL: pctxt = xmlparser.xmlNewParserCtxt() - __GLOBAL_PARSER_CONTEXT._initContext(pctxt) self._file_parser_ctxt = pctxt + __GLOBAL_PARSER_CONTEXT._initParserDict(pctxt) # XXX set options twice? needed to shut up libxml2 xmlparser.xmlCtxtUseOptions(pctxt, self._parse_options) result = xmlparser.xmlCtxtReadFile(pctxt, filename, @@ -171,7 +169,7 @@ tree.xmlFreeDoc(pctxt.myDoc) pctxt.myDoc = NULL raise XMLSyntaxError - __GLOBAL_PARSER_CONTEXT._finalizeParse(result) + __GLOBAL_PARSER_CONTEXT._initDocDict(result) self._error_log.disconnect() return result @@ -196,8 +194,8 @@ cdef int _HTML_DEFAULT_PARSE_OPTIONS _HTML_DEFAULT_PARSE_OPTIONS = ( htmlparser.HTML_PARSE_RECOVER | - htmlparser.HTML_PARSE_NOERROR | - htmlparser.HTML_PARSE_NOWARNING + htmlparser.HTML_PARSE_NOWARNING | + htmlparser.HTML_PARSE_NOERROR ) cdef class HTMLParser(BaseParser): @@ -211,6 +209,8 @@ def __init__(self, recover=True, compact_text=True, no_network=False, from_parser=None): cdef int parse_options + self._memory_parser_ctxt = NULL + self._file_parser_ctxt = NULL BaseParser.__init__(self) parse_options = _HTML_DEFAULT_PARSE_OPTIONS @@ -222,10 +222,14 @@ parse_options = parse_options | htmlparser.HTML_PARSE_NONET self._parse_options = parse_options - self._memory_parser_ctxt = NULL - self._file_parser_ctxt = NULL - cdef xmlDoc* parseDoc(self, text) except NULL: + def __dealloc__(self): + if self._file_parser_ctxt != NULL: + htmlparser.htmlFreeParserCtxt(self._file_parser_ctxt) + if self._memory_parser_ctxt != NULL: + htmlparser.htmlFreeParserCtxt(self._memory_parser_ctxt) + + cdef xmlDoc* parseDoc(self, text_utf) except NULL: """Parse HTML document, share dictionary if possible. """ cdef xmlDoc* result @@ -233,45 +237,42 @@ cdef char* c_text cdef int c_len self._error_log.connect() - __GLOBAL_PARSER_CONTEXT._initParse() - c_text = _cstr(text) + __GLOBAL_PARSER_CONTEXT._initParser() + c_text = _cstr(text_utf) pctxt = self._memory_parser_ctxt if pctxt is NULL: pctxt = htmlparser.htmlCreateMemoryParserCtxt( - c_text, python.PyString_GET_SIZE(text)) - __GLOBAL_PARSER_CONTEXT._initContext(pctxt) + c_text, python.PyString_GET_SIZE(text_utf)) + if pctxt is NULL: + raise ParserError, "Failed to create parser context" self._memory_parser_ctxt = pctxt + __GLOBAL_PARSER_CONTEXT._initParserDict(pctxt) # XXX parser options? result = htmlparser.htmlCtxtReadDoc( pctxt, c_text, NULL, NULL, self._parse_options) - if result is not NULL: - __GLOBAL_PARSER_CONTEXT._finalizeParse(result) + __GLOBAL_PARSER_CONTEXT._initDocDict(result) self._error_log.disconnect() - if result is NULL: - raise HTMLSyntaxError - else: - return result + print result, text_utf + return result cdef xmlDoc* parseDocFromFile(self, char* filename) except NULL: cdef xmlDoc* result cdef xmlParserCtxt* pctxt cdef int parser_error self._error_log.connect() - __GLOBAL_PARSER_CONTEXT._initParse() + __GLOBAL_PARSER_CONTEXT._initParser() pctxt = self._file_parser_ctxt if pctxt is NULL: pctxt = htmlparser.htmlCreateFileParserCtxt(filename, NULL) - __GLOBAL_PARSER_CONTEXT._initContext(pctxt) + if pctxt is NULL: + raise ParserError, "Failed to create parser context" self._file_parser_ctxt = pctxt + __GLOBAL_PARSER_CONTEXT._initParserDict(pctxt) result = htmlparser.htmlCtxtReadFile( pctxt, filename, NULL, self._parse_options) - if result is not NULL: - __GLOBAL_PARSER_CONTEXT._finalizeParse(result) + __GLOBAL_PARSER_CONTEXT._initDocDict(result) self._error_log.disconnect() - if result is NULL: - raise HTMLSyntaxError - else: - return result + return result cdef HTMLParser __DEFAULT_HTML_PARSER __DEFAULT_HTML_PARSER = HTMLParser() @@ -300,5 +301,5 @@ cdef xmlDoc* newDoc(self): cdef xmlDoc* result result = tree.xmlNewDoc("1.0") - __GLOBAL_PARSER_CONTEXT._resetDict(result) + __GLOBAL_PARSER_CONTEXT._initDocDict(result) return result From scoder at codespeak.net Mon Mar 27 14:15:44 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Mon Mar 27 14:15:46 2006 Subject: [Lxml-checkins] r25030 - lxml/branch/htmlparser/src/lxml Message-ID: <20060327121544.065F510086@code0.codespeak.net> Author: scoder Date: Mon Mar 27 14:15:43 2006 New Revision: 25030 Modified: lxml/branch/htmlparser/src/lxml/etree.pyx Log: make all _Document methods C functions, work around for Pyrex bug when calling methods directly on C function results Modified: lxml/branch/htmlparser/src/lxml/etree.pyx ============================================================================== --- lxml/branch/htmlparser/src/lxml/etree.pyx (original) +++ lxml/branch/htmlparser/src/lxml/etree.pyx Mon Mar 27 14:15:43 2006 @@ -70,17 +70,17 @@ # the document #print "freeing document:", self._c_doc #displayNode(self._c_doc, 0) - #print self._c_doc.dict is theParser._c_dict + #print self._c_doc, self._c_doc.dict is __GLOBAL_PARSER_CONTEXT._c_dict tree.xmlFreeDoc(self._c_doc) - def getroot(self): + cdef getroot(self): cdef xmlNode* c_node c_node = tree.xmlDocGetRootElement(self._c_doc) if c_node is NULL: return None return _elementFactory(self, c_node) - def buildNewPrefix(self): + cdef buildNewPrefix(self): ns = python.PyString_FromFormat("ns%d", self._ns_counter) self._ns_counter = self._ns_counter + 1 return ns @@ -1199,10 +1199,14 @@ return etree def HTML(text): - return _parseMemoryDocument(text, __DEFAULT_HTML_PARSER).getroot() + cdef _Document doc + doc = _parseMemoryDocument(text, __DEFAULT_HTML_PARSER) + return doc.getroot() def XML(text): - return _parseMemoryDocument(text, None).getroot() + cdef _Document doc + doc = _parseMemoryDocument(text, None) + return doc.getroot() fromstring = XML From scoder at codespeak.net Mon Mar 27 21:35:13 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Mon Mar 27 21:35:14 2006 Subject: [Lxml-checkins] r25049 - lxml/branch/htmlparser/src/lxml Message-ID: <20060327193513.579DE1007B@code0.codespeak.net> Author: scoder Date: Mon Mar 27 21:35:11 2006 New Revision: 25049 Modified: lxml/branch/htmlparser/src/lxml/etree.pyx lxml/branch/htmlparser/src/lxml/parser.pxi Log: remove class Parser and make its methods global helper functions, some clean up Modified: lxml/branch/htmlparser/src/lxml/etree.pyx ============================================================================== --- lxml/branch/htmlparser/src/lxml/etree.pyx (original) +++ lxml/branch/htmlparser/src/lxml/etree.pyx Mon Mar 27 21:35:11 2006 @@ -154,14 +154,14 @@ if filename is None: filename = source # open filename - c_doc = theParser.parseDocFromFile(filename, parser) + c_doc = _parseDocFromFile(filename, parser) return _documentFactory(c_doc) cdef _Document _parseMemoryDocument(text, parser): cdef xmlDoc* c_doc if python.PyUnicode_Check(text): text = _stripDeclaration(_utf8(text)) - c_doc = theParser.parseDoc(text, parser) + c_doc = _parseDoc(text, parser) return _documentFactory(c_doc) cdef _Document _documentFactory(xmlDoc* c_doc): @@ -435,7 +435,7 @@ def __copy__(self): cdef xmlNode* c_node cdef xmlDoc* c_doc - c_doc = theParser.newDoc() + c_doc = _newDoc() doc = _documentFactory(c_doc) c_node = tree.xmlDocCopyNode(self._c_node, c_doc, 1) tree.xmlDocSetRootElement(c_doc, c_node) @@ -1141,7 +1141,7 @@ cdef xmlDoc* c_doc cdef _Document doc ns_utf, name_utf = _getNsTag(_tag) - c_doc = theParser.newDoc() + c_doc = _newDoc() c_node = _createElement(c_doc, name_utf, attrib, _extra) tree.xmlDocSetRootElement(c_doc, c_node) doc = _documentFactory(c_doc) @@ -1156,7 +1156,7 @@ text = ' ' else: text = ' %s ' % _utf8(text) - doc = _documentFactory( theParser.newDoc() ) + doc = _documentFactory( _newDoc() ) c_node = _createComment(doc._c_doc, text) tree.xmlAddChild(doc._c_doc, c_node) return _commentFactory(doc, c_node) @@ -1185,7 +1185,7 @@ elif file is not None: doc = _parseDocument(file, parser) else: - doc = _documentFactory( theParser.newDoc() ) + doc = _documentFactory( _newDoc() ) etree = _elementTreeFactory(doc, element) @@ -1273,11 +1273,6 @@ include "proxy.pxi" # Proxy handling (element backpointers/memory/etc.) -# Instantiate globally shared XML parser to enable dictionary sharing -cdef Parser theParser -theParser = Parser() - - # Private helper functions cdef void _raiseIfNone(el): if el is None: Modified: lxml/branch/htmlparser/src/lxml/parser.pxi ============================================================================== --- lxml/branch/htmlparser/src/lxml/parser.pxi (original) +++ lxml/branch/htmlparser/src/lxml/parser.pxi Mon Mar 27 21:35:11 2006 @@ -34,11 +34,12 @@ cdef void _initParserDict(self, xmlParserCtxt* pctxt): "Assure we always use the same string dictionary." - if self._c_dict is not NULL: - if pctxt.dict is not NULL: - xmlparser.xmlDictFree(pctxt.dict) - pctxt.dict = self._c_dict - xmlparser.xmlDictReference(pctxt.dict) + if self._c_dict is NULL or self._c_dict is pctxt.dict: + return + if pctxt.dict is not NULL: + xmlparser.xmlDictFree(pctxt.dict) + pctxt.dict = self._c_dict + xmlparser.xmlDictReference(pctxt.dict) cdef void _initDocDict(self, xmlDoc* result): "Store dict of last object parsed if no shared dict yet" @@ -69,6 +70,10 @@ return self._error_log.copy() +############################################################ +## XML parser +############################################################ + cdef int _XML_DEFAULT_PARSE_OPTIONS _XML_DEFAULT_PARSE_OPTIONS = ( xmlparser.XML_PARSE_NOENT | @@ -97,8 +102,8 @@ cdef int parse_options self._file_parser_ctxt = NULL BaseParser.__init__(self) - parse_options = _XML_DEFAULT_PARSE_OPTIONS + parse_options = _XML_DEFAULT_PARSE_OPTIONS if dtd_validation: parse_options = parse_options | xmlparser.XML_PARSE_DTDLOAD | \ xmlparser.XML_PARSE_DTDVALID @@ -116,7 +121,7 @@ if self._file_parser_ctxt != NULL: xmlparser.xmlFreeParserCtxt(self._file_parser_ctxt) - cdef xmlDoc* parseDoc(self, text_utf) except NULL: + cdef xmlDoc* _parseDoc(self, text_utf) except NULL: """Parse document, share dictionary if possible. """ cdef xmlDoc* result @@ -145,7 +150,7 @@ self._error_log.disconnect() return result - cdef xmlDoc* parseDocFromFile(self, char* filename) except NULL: + cdef xmlDoc* _parseDocFromFile(self, char* filename) except NULL: cdef xmlDoc* result cdef xmlParserCtxt* pctxt self._error_log.connect() @@ -173,10 +178,10 @@ self._error_log.disconnect() return result -cdef XMLParser __DEFAULT_PARSER +cdef BaseParser __DEFAULT_PARSER __DEFAULT_PARSER = XMLParser() -cdef XMLParser __ORIG_DEFAULT_PARSER +cdef BaseParser __ORIG_DEFAULT_PARSER __ORIG_DEFAULT_PARSER = __DEFAULT_PARSER def set_default_parser(parser=None): @@ -185,12 +190,18 @@ this function is called without a parser (or if it is None), the default parser is reset to the original configuration. """ - if parser is not None: + if parser is None: + __DEFAULT_PARSER = __ORIG_DEFAULT_PARSER + elif isinstance(parser, (HTMLParser, XMLParser)): __DEFAULT_PARSER = parser else: - __DEFAULT_PARSER = __ORIG_DEFAULT_PARSER + raise TypeError, "Invalid parser" +############################################################ +## HTML parser +############################################################ + cdef int _HTML_DEFAULT_PARSE_OPTIONS _HTML_DEFAULT_PARSE_OPTIONS = ( htmlparser.HTML_PARSE_RECOVER | @@ -212,8 +223,8 @@ self._memory_parser_ctxt = NULL self._file_parser_ctxt = NULL BaseParser.__init__(self) - parse_options = _HTML_DEFAULT_PARSE_OPTIONS + parse_options = _HTML_DEFAULT_PARSE_OPTIONS if not recover: parse_options = parse_options & ~htmlparser.HTML_PARSE_RECOVER if not compact_text: @@ -229,7 +240,7 @@ if self._memory_parser_ctxt != NULL: htmlparser.htmlFreeParserCtxt(self._memory_parser_ctxt) - cdef xmlDoc* parseDoc(self, text_utf) except NULL: + cdef xmlDoc* _parseDoc(self, text_utf) except NULL: """Parse HTML document, share dictionary if possible. """ cdef xmlDoc* result @@ -241,21 +252,20 @@ c_text = _cstr(text_utf) pctxt = self._memory_parser_ctxt if pctxt is NULL: - pctxt = htmlparser.htmlCreateMemoryParserCtxt( - c_text, python.PyString_GET_SIZE(text_utf)) + pctxt = htmlparser.htmlCreateMemoryParserCtxt('dummy', 5) if pctxt is NULL: + self._error_log.disconnect() raise ParserError, "Failed to create parser context" self._memory_parser_ctxt = pctxt __GLOBAL_PARSER_CONTEXT._initParserDict(pctxt) - # XXX parser options? result = htmlparser.htmlCtxtReadDoc( - pctxt, c_text, NULL, NULL, self._parse_options) + pctxt, c_text, '', NULL, self._parse_options) __GLOBAL_PARSER_CONTEXT._initDocDict(result) self._error_log.disconnect() - print result, text_utf + print hex(result), hex(result.dict), hex(__GLOBAL_PARSER_CONTEXT._c_dict), text_utf return result - cdef xmlDoc* parseDocFromFile(self, char* filename) except NULL: + cdef xmlDoc* _parseDocFromFile(self, char* filename) except NULL: cdef xmlDoc* result cdef xmlParserCtxt* pctxt cdef int parser_error @@ -265,6 +275,7 @@ if pctxt is NULL: pctxt = htmlparser.htmlCreateFileParserCtxt(filename, NULL) if pctxt is NULL: + self._error_log.disconnect() raise ParserError, "Failed to create parser context" self._file_parser_ctxt = pctxt __GLOBAL_PARSER_CONTEXT._initParserDict(pctxt) @@ -277,29 +288,32 @@ cdef HTMLParser __DEFAULT_HTML_PARSER __DEFAULT_HTML_PARSER = HTMLParser() -cdef class Parser: - cdef xmlDoc* parseDoc(self, text, parser) except NULL: - if parser is None: - return __DEFAULT_PARSER.parseDoc(text) - elif isinstance(parser, XMLParser): - return (parser).parseDoc(text) - elif isinstance(parser, HTMLParser): - return (parser).parseDoc(text) - else: - raise TypeError, "invalid parser" - - cdef xmlDoc* parseDocFromFile(self, char* filename, parser) except NULL: - if parser is None: - return __DEFAULT_PARSER.parseDocFromFile(filename) - elif isinstance(parser, XMLParser): - return (parser).parseDocFromFile(filename) - elif isinstance(parser, HTMLParser): - return (parser).parseDocFromFile(filename) - else: - raise TypeError, "invalid parser" +############################################################ +## helper functions for document creation +############################################################ + +cdef xmlDoc* _parseDoc(text_utf, parser) except NULL: + if parser is None: + parser = __DEFAULT_PARSER + if isinstance(parser, XMLParser): + return (parser)._parseDoc(text_utf) + elif isinstance(parser, HTMLParser): + return (parser)._parseDoc(text_utf) + else: + raise TypeError, "invalid parser" - cdef xmlDoc* newDoc(self): - cdef xmlDoc* result - result = tree.xmlNewDoc("1.0") - __GLOBAL_PARSER_CONTEXT._initDocDict(result) - return result +cdef xmlDoc* _parseDocFromFile(char* filename, parser) except NULL: + if parser is None: + parser = __DEFAULT_PARSER + if isinstance(parser, XMLParser): + return (parser)._parseDocFromFile(filename) + elif isinstance(parser, HTMLParser): + return (parser)._parseDocFromFile(filename) + else: + raise TypeError, "invalid parser" + +cdef xmlDoc* _newDoc(): + cdef xmlDoc* result + result = tree.xmlNewDoc("1.0") + __GLOBAL_PARSER_CONTEXT._initDocDict(result) + return result From scoder at codespeak.net Mon Mar 27 21:51:41 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Mon Mar 27 21:51:42 2006 Subject: [Lxml-checkins] r25050 - lxml/branch/htmlparser/src/lxml Message-ID: <20060327195141.9CC4C10041@code0.codespeak.net> Author: scoder Date: Mon Mar 27 21:51:40 2006 New Revision: 25050 Modified: lxml/branch/htmlparser/src/lxml/etree.pyx lxml/branch/htmlparser/src/lxml/parser.pxi Log: more clean up, fix _rootNodeOf() for _Document's Modified: lxml/branch/htmlparser/src/lxml/etree.pyx ============================================================================== --- lxml/branch/htmlparser/src/lxml/etree.pyx (original) +++ lxml/branch/htmlparser/src/lxml/etree.pyx Mon Mar 27 21:51:40 2006 @@ -1301,10 +1301,12 @@ cdef _NodeBase _rootNodeOf(object input): # call this to get the root node of a # _Document, _ElementTree or _NodeBase object - if hasattr(input, 'getroot'): # Document/ElementTree + if hasattr(input, 'getroot'): # ElementTree return <_NodeBase>(input.getroot()) elif isinstance(input, _NodeBase): return <_NodeBase>input + elif isinstance(input, _Document): + return (<_Document>input).getroot() else: return None Modified: lxml/branch/htmlparser/src/lxml/parser.pxi ============================================================================== --- lxml/branch/htmlparser/src/lxml/parser.pxi (original) +++ lxml/branch/htmlparser/src/lxml/parser.pxi Mon Mar 27 21:51:40 2006 @@ -131,6 +131,7 @@ __GLOBAL_PARSER_CONTEXT._initParser() pctxt = xmlparser.xmlCreateDocParserCtxt(_cstr(text_utf)) if pctxt is NULL: + self._error_log.disconnect() raise XMLSyntaxError, "Failed to create parser context" __GLOBAL_PARSER_CONTEXT._initParserDict(pctxt) xmlparser.xmlCtxtUseOptions( @@ -143,6 +144,7 @@ tree.xmlFreeDoc(pctxt.myDoc) pctxt.myDoc = NULL xmlparser.xmlFreeParserCtxt(pctxt) + self._error_log.disconnect() raise XMLSyntaxError result = pctxt.myDoc __GLOBAL_PARSER_CONTEXT._initDocDict(result) @@ -166,6 +168,7 @@ NULL, self._parse_options) if result is NULL: if pctxt.lastError.domain == xmlerror.XML_FROM_IO: + self._error_log.disconnect() raise IOError, "Could not open file %s" % filename # in case of errors, clean up context plus any document # XXX other errors? @@ -173,6 +176,7 @@ if pctxt.myDoc is not NULL: tree.xmlFreeDoc(pctxt.myDoc) pctxt.myDoc = NULL + self._error_log.disconnect() raise XMLSyntaxError __GLOBAL_PARSER_CONTEXT._initDocDict(result) self._error_log.disconnect() @@ -206,6 +210,7 @@ _HTML_DEFAULT_PARSE_OPTIONS = ( htmlparser.HTML_PARSE_RECOVER | htmlparser.HTML_PARSE_NOWARNING | + htmlparser.HTML_PARSE_COMPACT | htmlparser.HTML_PARSE_NOERROR ) @@ -302,13 +307,13 @@ else: raise TypeError, "invalid parser" -cdef xmlDoc* _parseDocFromFile(char* filename, parser) except NULL: +cdef xmlDoc* _parseDocFromFile(filename, parser) except NULL: if parser is None: parser = __DEFAULT_PARSER if isinstance(parser, XMLParser): - return (parser)._parseDocFromFile(filename) + return (parser)._parseDocFromFile(_cstr(filename)) elif isinstance(parser, HTMLParser): - return (parser)._parseDocFromFile(filename) + return (parser)._parseDocFromFile(_cstr(filename)) else: raise TypeError, "invalid parser" From scoder at codespeak.net Mon Mar 27 22:57:11 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Mon Mar 27 22:57:12 2006 Subject: [Lxml-checkins] r25053 - lxml/branch/htmlparser/src/lxml Message-ID: <20060327205711.17BA810079@code0.codespeak.net> Author: scoder Date: Mon Mar 27 22:57:09 2006 New Revision: 25053 Modified: lxml/branch/htmlparser/src/lxml/parser.pxi Log: small change Modified: lxml/branch/htmlparser/src/lxml/parser.pxi ============================================================================== --- lxml/branch/htmlparser/src/lxml/parser.pxi (original) +++ lxml/branch/htmlparser/src/lxml/parser.pxi Mon Mar 27 22:57:09 2006 @@ -264,7 +264,7 @@ self._memory_parser_ctxt = pctxt __GLOBAL_PARSER_CONTEXT._initParserDict(pctxt) result = htmlparser.htmlCtxtReadDoc( - pctxt, c_text, '', NULL, self._parse_options) + pctxt, c_text, NULL, NULL, self._parse_options) __GLOBAL_PARSER_CONTEXT._initDocDict(result) self._error_log.disconnect() print hex(result), hex(result.dict), hex(__GLOBAL_PARSER_CONTEXT._c_dict), text_utf From scoder at codespeak.net Tue Mar 28 08:05:11 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Tue Mar 28 08:05:15 2006 Subject: [Lxml-checkins] r25061 - lxml/branch/htmlparser/src/lxml/tests Message-ID: <20060328060511.04D371007E@code0.codespeak.net> Author: scoder Date: Tue Mar 28 08:04:59 2006 New Revision: 25061 Added: lxml/branch/htmlparser/src/lxml/tests/test_htmlparser.py Log: added test case file Added: lxml/branch/htmlparser/src/lxml/tests/test_htmlparser.py ============================================================================== --- (empty file) +++ lxml/branch/htmlparser/src/lxml/tests/test_htmlparser.py Tue Mar 28 08:04:59 2006 @@ -0,0 +1,110 @@ +# -*- coding: UTF-8 -*- + +""" +HTML parser test cases for etree +""" + +import unittest +import tempfile + +from common_imports import etree, fileInTestDir, SillyFileLike, HelperTestCase + +class HtmlParserTestCaseBase(HelperTestCase): + """HTML parser test cases + """ + etree = etree + + html_str = "test

page title

" + broken_html_str = "

page title" + + def test_module_HTML(self): + print 1 + element = self.etree.HTML(self.html_str) + print 2, element + print 3, element.tag + print 4 + #del element + #print 5 + + def test_module_HTML2(self): + print 0 + element = self.etree.HTML(self.html_str) + print 1 + element = element[0][0] + print 2, element + print 3, element.tag + print 4 + #del element + #print 5 + + def test_module_parse_html(self): + # (c)ElementTree supports gzip instance as parse argument + print 1 + parser = self.etree.HTMLParser() + print 2 + filename = tempfile.mktemp(suffix=".html") + print 3 + open(filename, 'wb').write(self.html_str) + print 4 + f = open(filename, 'r') + print 5 + tree = self.etree.parse(f, parser) + print 6, self.etree.tostring(tree.getroot()) + self.assertEqual(self.etree.tostring(tree.getroot()), self.html_str) + + def _test_class_parse_filename(self): + # (c)ElementTree class ElementTree has a 'parse' method that returns + # the root of the tree + + # parse from filename + + filename = tempfile.mktemp(suffix=".xml") + open(filename, 'wb').write(self.root_str) + tree = self.etree.ElementTree() + root = tree.parse(filename) + self.assertEqual(self.etree.tostring(root), self.root_str) + + def _test_class_parse_filename_remove_previous(self): + filename = tempfile.mktemp(suffix=".xml") + open(filename, "wb").write(self.root_str) + tree = self.etree.ElementTree() + root = tree.parse(filename) + # and now do it again; previous content should still be there + root2 = tree.parse(filename) + self.assertEquals('a', root.tag) + # now remove all references to root2, and parse again + del root2 + root3 = tree.parse(filename) + # root2's memory should've been freed here + # XXX how to check? + + def _test_class_parse_fileobject(self): + # (c)ElementTree class ElementTree has a 'parse' method that returns + # the root of the tree + + # parse from file object + + filename = tempfile.mktemp(suffix=".xml") + open(filename, 'wb').write(self.root_str) + f = open(filename, 'r') + tree = self.etree.ElementTree() + root = tree.parse(f) + self.assertEqual(self.etree.tostring(root), self.root_str) + + def _test_class_parse_unamed_fileobject(self): + # (c)ElementTree class ElementTree has a 'parse' method that returns + # the root of the tree + + # parse from unamed file object + f = SillyFileLike() + root = self.etree.ElementTree().parse(f) + self.assert_(root.tag.endswith('foo')) + + +def test_suite(): + suite = unittest.TestSuite() + #suite.addTests([unittest.makeSuite(HtmlParserTestCaseBase)]) + return suite + +if __name__ == '__main__': + unittest.main() From scoder at codespeak.net Tue Mar 28 08:07:21 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Tue Mar 28 08:07:22 2006 Subject: [Lxml-checkins] r25062 - lxml/branch/htmlparser/src/lxml Message-ID: <20060328060721.44FF61007E@code0.codespeak.net> Author: scoder Date: Tue Mar 28 08:07:19 2006 New Revision: 25062 Modified: lxml/branch/htmlparser/src/lxml/etree.pyx Log: call attemptDeallocation only if C-node is not empty Modified: lxml/branch/htmlparser/src/lxml/etree.pyx ============================================================================== --- lxml/branch/htmlparser/src/lxml/etree.pyx (original) +++ lxml/branch/htmlparser/src/lxml/etree.pyx Tue Mar 28 08:07:19 2006 @@ -71,7 +71,9 @@ #print "freeing document:", self._c_doc #displayNode(self._c_doc, 0) #print self._c_doc, self._c_doc.dict is __GLOBAL_PARSER_CONTEXT._c_dict + #print "DOC:", hex(self._c_doc) tree.xmlFreeDoc(self._c_doc) + #print "DONE:", hex(self._c_doc) cdef getroot(self): cdef xmlNode* c_node @@ -195,7 +197,7 @@ #displayNode(self._c_node, 0) if self._c_node is not NULL: unregisterProxy(self) - attemptDeallocation(self._c_node) + attemptDeallocation(self._c_node) def _init(self): """Called after object initialisation. Subclasses may override From scoder at codespeak.net Tue Mar 28 08:33:21 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Tue Mar 28 08:33:22 2006 Subject: [Lxml-checkins] r25063 - lxml/branch/htmlparser/src/lxml Message-ID: <20060328063321.55E1E1007E@code0.codespeak.net> Author: scoder Date: Tue Mar 28 08:33:20 2006 New Revision: 25063 Modified: lxml/branch/htmlparser/src/lxml/proxy.pxi Log: WORKSsvn di! - bug fix: XML_HTML_DOCUMENT_NODE is the same special case as XML_DOCUMENT_NODE Modified: lxml/branch/htmlparser/src/lxml/proxy.pxi ============================================================================== --- lxml/branch/htmlparser/src/lxml/proxy.pxi (original) +++ lxml/branch/htmlparser/src/lxml/proxy.pxi Tue Mar 28 08:33:20 2006 @@ -103,7 +103,8 @@ while c_current is not NULL: #print "checking:", c_current.type # if we're still attached to the document, don't deallocate - if c_current.type == tree.XML_DOCUMENT_NODE: + if c_current.type == tree.XML_DOCUMENT_NODE or \ + c_current.type == tree.XML_HTML_DOCUMENT_NODE: #print "not freeing: still in doc" return NULL c_top = c_current From scoder at codespeak.net Tue Mar 28 08:33:32 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Tue Mar 28 08:33:33 2006 Subject: [Lxml-checkins] r25064 - lxml/branch/htmlparser/src/lxml Message-ID: <20060328063332.081B91007E@code0.codespeak.net> Author: scoder Date: Tue Mar 28 08:33:30 2006 New Revision: 25064 Modified: lxml/branch/htmlparser/src/lxml/etree.pyx Log: clean up Modified: lxml/branch/htmlparser/src/lxml/etree.pyx ============================================================================== --- lxml/branch/htmlparser/src/lxml/etree.pyx (original) +++ lxml/branch/htmlparser/src/lxml/etree.pyx Tue Mar 28 08:33:30 2006 @@ -71,9 +71,7 @@ #print "freeing document:", self._c_doc #displayNode(self._c_doc, 0) #print self._c_doc, self._c_doc.dict is __GLOBAL_PARSER_CONTEXT._c_dict - #print "DOC:", hex(self._c_doc) tree.xmlFreeDoc(self._c_doc) - #print "DONE:", hex(self._c_doc) cdef getroot(self): cdef xmlNode* c_node From scoder at codespeak.net Tue Mar 28 08:40:58 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Tue Mar 28 08:40:59 2006 Subject: [Lxml-checkins] r25065 - lxml/branch/htmlparser/src/lxml Message-ID: <20060328064058.C4E8F1007E@code0.codespeak.net> Author: scoder Date: Tue Mar 28 08:40:47 2006 New Revision: 25065 Modified: lxml/branch/htmlparser/src/lxml/parser.pxi Log: cleanup Modified: lxml/branch/htmlparser/src/lxml/parser.pxi ============================================================================== --- lxml/branch/htmlparser/src/lxml/parser.pxi (original) +++ lxml/branch/htmlparser/src/lxml/parser.pxi Tue Mar 28 08:40:47 2006 @@ -267,7 +267,6 @@ pctxt, c_text, NULL, NULL, self._parse_options) __GLOBAL_PARSER_CONTEXT._initDocDict(result) self._error_log.disconnect() - print hex(result), hex(result.dict), hex(__GLOBAL_PARSER_CONTEXT._c_dict), text_utf return result cdef xmlDoc* _parseDocFromFile(self, char* filename) except NULL: From scoder at codespeak.net Tue Mar 28 08:42:15 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Tue Mar 28 08:42:17 2006 Subject: [Lxml-checkins] r25066 - lxml/branch/htmlparser/src/lxml/tests Message-ID: <20060328064215.C16DD1007E@code0.codespeak.net> Author: scoder Date: Tue Mar 28 08:42:04 2006 New Revision: 25066 Modified: lxml/branch/htmlparser/src/lxml/tests/test_htmlparser.py Log: cleanup, integrate html test cases in test suite Modified: lxml/branch/htmlparser/src/lxml/tests/test_htmlparser.py ============================================================================== --- lxml/branch/htmlparser/src/lxml/tests/test_htmlparser.py (original) +++ lxml/branch/htmlparser/src/lxml/tests/test_htmlparser.py Tue Mar 28 08:42:04 2006 @@ -15,41 +15,30 @@ etree = etree html_str = "test

page title

" - broken_html_str = "

page title" + broken_html_str = "test<body><h1>page title</body></html>" def test_module_HTML(self): - print 1 element = self.etree.HTML(self.html_str) - print 2, element - print 3, element.tag - print 4 - #del element - #print 5 + self.assertEqual(self.etree.tostring(element), + self.html_str) - def test_module_HTML2(self): - print 0 + def test_module_HTML_broken(self): + element = self.etree.HTML(self.broken_html_str) + self.assertEqual(self.etree.tostring(element), + self.html_str) + + def test_module_HTML_access(self): element = self.etree.HTML(self.html_str) - print 1 element = element[0][0] - print 2, element - print 3, element.tag - print 4 - #del element - #print 5 + self.assertEqual(element.tag, 'title') def test_module_parse_html(self): # (c)ElementTree supports gzip instance as parse argument - print 1 parser = self.etree.HTMLParser() - print 2 filename = tempfile.mktemp(suffix=".html") - print 3 open(filename, 'wb').write(self.html_str) - print 4 f = open(filename, 'r') - print 5 tree = self.etree.parse(f, parser) - print 6, self.etree.tostring(tree.getroot()) self.assertEqual(self.etree.tostring(tree.getroot()), self.html_str) def _test_class_parse_filename(self): @@ -103,7 +92,7 @@ def test_suite(): suite = unittest.TestSuite() - #suite.addTests([unittest.makeSuite(HtmlParserTestCaseBase)]) + suite.addTests([unittest.makeSuite(HtmlParserTestCaseBase)]) return suite if __name__ == '__main__': From scoder at codespeak.net Tue Mar 28 08:44:10 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Tue Mar 28 08:44:11 2006 Subject: [Lxml-checkins] r25067 - lxml/branch/htmlparser/src/lxml/tests Message-ID: <20060328064410.8DEEF10083@code0.codespeak.net> Author: scoder Date: Tue Mar 28 08:44:04 2006 New Revision: 25067 Modified: lxml/branch/htmlparser/src/lxml/tests/test_htmlparser.py Log: cleanup Modified: lxml/branch/htmlparser/src/lxml/tests/test_htmlparser.py ============================================================================== --- lxml/branch/htmlparser/src/lxml/tests/test_htmlparser.py (original) +++ lxml/branch/htmlparser/src/lxml/tests/test_htmlparser.py Tue Mar 28 08:44:04 2006 @@ -41,54 +41,6 @@ tree = self.etree.parse(f, parser) self.assertEqual(self.etree.tostring(tree.getroot()), self.html_str) - def _test_class_parse_filename(self): - # (c)ElementTree class ElementTree has a 'parse' method that returns - # the root of the tree - - # parse from filename - - filename = tempfile.mktemp(suffix=".xml") - open(filename, 'wb').write(self.root_str) - tree = self.etree.ElementTree() - root = tree.parse(filename) - self.assertEqual(self.etree.tostring(root), self.root_str) - - def _test_class_parse_filename_remove_previous(self): - filename = tempfile.mktemp(suffix=".xml") - open(filename, "wb").write(self.root_str) - tree = self.etree.ElementTree() - root = tree.parse(filename) - # and now do it again; previous content should still be there - root2 = tree.parse(filename) - self.assertEquals('a', root.tag) - # now remove all references to root2, and parse again - del root2 - root3 = tree.parse(filename) - # root2's memory should've been freed here - # XXX how to check? - - def _test_class_parse_fileobject(self): - # (c)ElementTree class ElementTree has a 'parse' method that returns - # the root of the tree - - # parse from file object - - filename = tempfile.mktemp(suffix=".xml") - open(filename, 'wb').write(self.root_str) - f = open(filename, 'r') - tree = self.etree.ElementTree() - root = tree.parse(f) - self.assertEqual(self.etree.tostring(root), self.root_str) - - def _test_class_parse_unamed_fileobject(self): - # (c)ElementTree class ElementTree has a 'parse' method that returns - # the root of the tree - - # parse from unamed file object - f = SillyFileLike() - root = self.etree.ElementTree().parse(f) - self.assert_(root.tag.endswith('foo')) - def test_suite(): suite = unittest.TestSuite() From scoder at codespeak.net Tue Mar 28 08:45:36 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Tue Mar 28 08:45:37 2006 Subject: [Lxml-checkins] r25068 - lxml/branch/htmlparser/src/lxml/tests Message-ID: <20060328064536.4283A10081@code0.codespeak.net> Author: scoder Date: Tue Mar 28 08:45:25 2006 New Revision: 25068 Modified: lxml/branch/htmlparser/src/lxml/tests/test_htmlparser.py Log: cleanup Modified: lxml/branch/htmlparser/src/lxml/tests/test_htmlparser.py ============================================================================== --- lxml/branch/htmlparser/src/lxml/tests/test_htmlparser.py (original) +++ lxml/branch/htmlparser/src/lxml/tests/test_htmlparser.py Tue Mar 28 08:45:25 2006 @@ -33,7 +33,6 @@ self.assertEqual(element.tag, 'title') def test_module_parse_html(self): - # (c)ElementTree supports gzip instance as parse argument parser = self.etree.HTMLParser() filename = tempfile.mktemp(suffix=".html") open(filename, 'wb').write(self.html_str) From scoder at codespeak.net Tue Mar 28 09:24:38 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Tue Mar 28 09:24:40 2006 Subject: [Lxml-checkins] r25069 - in lxml/branch/htmlparser/src/lxml: . tests Message-ID: <20060328072438.00C9710081@code0.codespeak.net> Author: scoder Date: Tue Mar 28 09:24:37 2006 New Revision: 25069 Modified: lxml/branch/htmlparser/src/lxml/etree.pyx lxml/branch/htmlparser/src/lxml/parser.pxi lxml/branch/htmlparser/src/lxml/tests/test_htmlparser.py Log: more cleanup, fix default parser handling Modified: lxml/branch/htmlparser/src/lxml/etree.pyx ============================================================================== --- lxml/branch/htmlparser/src/lxml/etree.pyx (original) +++ lxml/branch/htmlparser/src/lxml/etree.pyx Tue Mar 28 09:24:37 2006 @@ -141,12 +141,8 @@ cdef _Document _parseDocument(source, parser): cdef xmlDoc* c_doc - # XXX simplistic (c)StringIO support - if hasattr(source, 'getvalue'): - return _parseMemoryDocument(source.getvalue(), parser) - filename = _getFilenameForFile(source) - # Support for unamed file-like object (eg urlgrabber.urlopen) + # Support for unamed file-like object (StringIO, urlgrabber.urlopen, ...) if not filename and hasattr(source, 'read'): return _parseMemoryDocument(source.read(), parser) @@ -1205,7 +1201,7 @@ def XML(text): cdef _Document doc - doc = _parseMemoryDocument(text, None) + doc = _parseMemoryDocument(text, __DEFAULT_XML_PARSER) return doc.getroot() fromstring = XML Modified: lxml/branch/htmlparser/src/lxml/parser.pxi ============================================================================== --- lxml/branch/htmlparser/src/lxml/parser.pxi (original) +++ lxml/branch/htmlparser/src/lxml/parser.pxi Tue Mar 28 09:24:37 2006 @@ -128,7 +128,6 @@ cdef xmlParserCtxt* pctxt cdef int parse_error self._error_log.connect() - __GLOBAL_PARSER_CONTEXT._initParser() pctxt = xmlparser.xmlCreateDocParserCtxt(_cstr(text_utf)) if pctxt is NULL: self._error_log.disconnect() @@ -156,7 +155,6 @@ cdef xmlDoc* result cdef xmlParserCtxt* pctxt self._error_log.connect() - __GLOBAL_PARSER_CONTEXT._initParser() pctxt = self._file_parser_ctxt if pctxt is NULL: pctxt = xmlparser.xmlNewParserCtxt() @@ -182,11 +180,12 @@ self._error_log.disconnect() return result -cdef BaseParser __DEFAULT_PARSER -__DEFAULT_PARSER = XMLParser() -cdef BaseParser __ORIG_DEFAULT_PARSER -__ORIG_DEFAULT_PARSER = __DEFAULT_PARSER +cdef XMLParser __DEFAULT_XML_PARSER +__DEFAULT_XML_PARSER = XMLParser() + +cdef BaseParser __DEFAULT_PARSER +__DEFAULT_PARSER = __DEFAULT_XML_PARSER def set_default_parser(parser=None): """Set a default XMLParser. This parser is used globally whenever no @@ -194,8 +193,9 @@ this function is called without a parser (or if it is None), the default parser is reset to the original configuration. """ + global __DEFAULT_PARSER if parser is None: - __DEFAULT_PARSER = __ORIG_DEFAULT_PARSER + __DEFAULT_PARSER = __DEFAULT_XML_PARSER elif isinstance(parser, (HTMLParser, XMLParser)): __DEFAULT_PARSER = parser else: @@ -222,8 +222,8 @@ cdef int _parse_options cdef xmlParserCtxt* _memory_parser_ctxt cdef xmlParserCtxt* _file_parser_ctxt - def __init__(self, recover=True, compact_text=True, no_network=False, - from_parser=None): + def __init__(self, recover=True, no_network=False, + compact_text=True, remove_blank_text=False): cdef int parse_options self._memory_parser_ctxt = NULL self._file_parser_ctxt = NULL @@ -236,6 +236,8 @@ parse_options = parse_options & ~htmlparser.HTML_PARSE_COMPACT if no_network: parse_options = parse_options | htmlparser.HTML_PARSE_NONET + if remove_blank_text: + parse_options = parse_options | htmlparser.HTML_PARSE_NOBLANKS self._parse_options = parse_options @@ -253,7 +255,6 @@ cdef char* c_text cdef int c_len self._error_log.connect() - __GLOBAL_PARSER_CONTEXT._initParser() c_text = _cstr(text_utf) pctxt = self._memory_parser_ctxt if pctxt is NULL: @@ -265,6 +266,9 @@ __GLOBAL_PARSER_CONTEXT._initParserDict(pctxt) result = htmlparser.htmlCtxtReadDoc( pctxt, c_text, NULL, NULL, self._parse_options) + if result is NULL: + self._error_log.disconnect() + raise HTMLSyntaxError __GLOBAL_PARSER_CONTEXT._initDocDict(result) self._error_log.disconnect() return result @@ -274,7 +278,6 @@ cdef xmlParserCtxt* pctxt cdef int parser_error self._error_log.connect() - __GLOBAL_PARSER_CONTEXT._initParser() pctxt = self._file_parser_ctxt if pctxt is NULL: pctxt = htmlparser.htmlCreateFileParserCtxt(filename, NULL) @@ -285,6 +288,9 @@ __GLOBAL_PARSER_CONTEXT._initParserDict(pctxt) result = htmlparser.htmlCtxtReadFile( pctxt, filename, NULL, self._parse_options) + if result is NULL: + self._error_log.disconnect() + raise HTMLSyntaxError __GLOBAL_PARSER_CONTEXT._initDocDict(result) self._error_log.disconnect() return result @@ -299,6 +305,7 @@ cdef xmlDoc* _parseDoc(text_utf, parser) except NULL: if parser is None: parser = __DEFAULT_PARSER + __GLOBAL_PARSER_CONTEXT._initParser() if isinstance(parser, XMLParser): return (<XMLParser>parser)._parseDoc(text_utf) elif isinstance(parser, HTMLParser): @@ -309,6 +316,7 @@ cdef xmlDoc* _parseDocFromFile(filename, parser) except NULL: if parser is None: parser = __DEFAULT_PARSER + __GLOBAL_PARSER_CONTEXT._initParser() if isinstance(parser, XMLParser): return (<XMLParser>parser)._parseDocFromFile(_cstr(filename)) elif isinstance(parser, HTMLParser): Modified: lxml/branch/htmlparser/src/lxml/tests/test_htmlparser.py ============================================================================== --- lxml/branch/htmlparser/src/lxml/tests/test_htmlparser.py (original) +++ lxml/branch/htmlparser/src/lxml/tests/test_htmlparser.py Tue Mar 28 09:24:37 2006 @@ -7,7 +7,7 @@ import unittest import tempfile -from common_imports import etree, fileInTestDir, SillyFileLike, HelperTestCase +from common_imports import StringIO, etree, fileInTestDir, SillyFileLike, HelperTestCase class HtmlParserTestCaseBase(HelperTestCase): """HTML parser test cases @@ -16,6 +16,10 @@ html_str = "<html><head><title>test

page title

" broken_html_str = "test<body><h1>page title</body></html>" + blank_text_html_str = "<html><head><title>

" + + def tearDown(self): + self.etree.set_default_parser() def test_module_HTML(self): element = self.etree.HTML(self.html_str) @@ -40,7 +44,17 @@ tree = self.etree.parse(f, parser) self.assertEqual(self.etree.tostring(tree.getroot()), self.html_str) - + def test_default_parser(self): + self.assertRaises(self.etree.XMLSyntaxError, + self.etree.parse, StringIO(self.broken_html_str)) + + self.etree.set_default_parser( self.etree.HTMLParser() ) + + tree = self.etree.parse(StringIO(self.broken_html_str)) + self.assertEqual(self.etree.tostring(tree.getroot()), + self.html_str) + + def test_suite(): suite = unittest.TestSuite() suite.addTests([unittest.makeSuite(HtmlParserTestCaseBase)]) From scoder at codespeak.net Tue Mar 28 10:03:36 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Tue Mar 28 10:03:36 2006 Subject: [Lxml-checkins] r25070 - lxml/branch/htmlparser/src/lxml Message-ID: <20060328080336.4010510082@code0.codespeak.net> Author: scoder Date: Tue Mar 28 10:03:35 2006 New Revision: 25070 Modified: lxml/branch/htmlparser/src/lxml/parser.pxi lxml/branch/htmlparser/src/lxml/xmlparser.pxd Log: large cleanup to reduce code duplication, refactored parse result handling into base class, reuse context also for XML from-memory parsing Modified: lxml/branch/htmlparser/src/lxml/parser.pxi ============================================================================== --- lxml/branch/htmlparser/src/lxml/parser.pxi (original) +++ lxml/branch/htmlparser/src/lxml/parser.pxi Tue Mar 28 10:03:35 2006 @@ -65,10 +65,20 @@ cdef _ErrorLog _error_log def __init__(self): self._error_log = _ErrorLog() + property error_log: def __get__(self): return self._error_log.copy() + cdef xmlDoc* _handleResult(self, xmlParserCtxt* ctxt, xmlDoc* result): + if not ctxt.wellFormed: + if result is not NULL: + tree.xmlFreeDoc(result) + result = NULL + __GLOBAL_PARSER_CONTEXT._initDocDict(result) + self._error_log.disconnect() + return result + ############################################################ ## XML parser @@ -97,6 +107,7 @@ """ cdef int _parse_options cdef xmlParserCtxt* _file_parser_ctxt + cdef xmlParserCtxt* _memory_parser_ctxt def __init__(self, attribute_defaults=False, dtd_validation=False, no_network=False, ns_clean=False): cdef int parse_options @@ -120,6 +131,16 @@ def __dealloc__(self): if self._file_parser_ctxt != NULL: xmlparser.xmlFreeParserCtxt(self._file_parser_ctxt) + if self._memory_parser_ctxt != NULL: + xmlparser.xmlFreeParserCtxt(self._memory_parser_ctxt) + + cdef xmlParserCtxt* _createContext(self) except NULL: + cdef xmlParserCtxt* pctxt + pctxt = xmlparser.xmlNewParserCtxt() + if pctxt is NULL: + self._error_log.disconnect() + raise ParserError, "Failed to create parser context" + return pctxt cdef xmlDoc* _parseDoc(self, text_utf) except NULL: """Parse document, share dictionary if possible. @@ -128,27 +149,17 @@ cdef xmlParserCtxt* pctxt cdef int parse_error self._error_log.connect() - pctxt = xmlparser.xmlCreateDocParserCtxt(_cstr(text_utf)) + pctxt = self._memory_parser_ctxt if pctxt is NULL: - self._error_log.disconnect() - raise XMLSyntaxError, "Failed to create parser context" + pctxt = self._createContext() + self._memory_parser_ctxt = pctxt + __GLOBAL_PARSER_CONTEXT._initParserDict(pctxt) - xmlparser.xmlCtxtUseOptions( - pctxt, - self._parse_options) - parse_error = xmlparser.xmlParseDocument(pctxt) - # in case of errors, clean up context plus any document - if parse_error != 0 or not pctxt.wellFormed: - if pctxt.myDoc is not NULL: - tree.xmlFreeDoc(pctxt.myDoc) - pctxt.myDoc = NULL - xmlparser.xmlFreeParserCtxt(pctxt) - self._error_log.disconnect() + result = xmlparser.xmlCtxtReadDoc( + pctxt, _cstr(text_utf), NULL, NULL, self._parse_options) + result = self._handleResult(pctxt, result) + if result is NULL: raise XMLSyntaxError - result = pctxt.myDoc - __GLOBAL_PARSER_CONTEXT._initDocDict(result) - xmlparser.xmlFreeParserCtxt(pctxt) - self._error_log.disconnect() return result cdef xmlDoc* _parseDocFromFile(self, char* filename) except NULL: @@ -157,27 +168,19 @@ self._error_log.connect() pctxt = self._file_parser_ctxt if pctxt is NULL: - pctxt = xmlparser.xmlNewParserCtxt() + pctxt = self._createContext() self._file_parser_ctxt = pctxt + __GLOBAL_PARSER_CONTEXT._initParserDict(pctxt) - # XXX set options twice? needed to shut up libxml2 - xmlparser.xmlCtxtUseOptions(pctxt, self._parse_options) - result = xmlparser.xmlCtxtReadFile(pctxt, filename, - NULL, self._parse_options) + result = xmlparser.xmlCtxtReadFile( + pctxt, filename, NULL, self._parse_options) if result is NULL: if pctxt.lastError.domain == xmlerror.XML_FROM_IO: self._error_log.disconnect() raise IOError, "Could not open file %s" % filename - # in case of errors, clean up context plus any document - # XXX other errors? - if not pctxt.wellFormed: - if pctxt.myDoc is not NULL: - tree.xmlFreeDoc(pctxt.myDoc) - pctxt.myDoc = NULL - self._error_log.disconnect() + result = self._handleResult(pctxt, result) + if result is NULL: raise XMLSyntaxError - __GLOBAL_PARSER_CONTEXT._initDocDict(result) - self._error_log.disconnect() return result @@ -266,11 +269,9 @@ __GLOBAL_PARSER_CONTEXT._initParserDict(pctxt) result = htmlparser.htmlCtxtReadDoc( pctxt, c_text, NULL, NULL, self._parse_options) + result = self._handleResult(pctxt, result) if result is NULL: - self._error_log.disconnect() raise HTMLSyntaxError - __GLOBAL_PARSER_CONTEXT._initDocDict(result) - self._error_log.disconnect() return result cdef xmlDoc* _parseDocFromFile(self, char* filename) except NULL: @@ -288,11 +289,9 @@ __GLOBAL_PARSER_CONTEXT._initParserDict(pctxt) result = htmlparser.htmlCtxtReadFile( pctxt, filename, NULL, self._parse_options) + result = self._handleResult(pctxt, result) if result is NULL: - self._error_log.disconnect() raise HTMLSyntaxError - __GLOBAL_PARSER_CONTEXT._initDocDict(result) - self._error_log.disconnect() return result cdef HTMLParser __DEFAULT_HTML_PARSER Modified: lxml/branch/htmlparser/src/lxml/xmlparser.pxd ============================================================================== --- lxml/branch/htmlparser/src/lxml/xmlparser.pxd (original) +++ lxml/branch/htmlparser/src/lxml/xmlparser.pxd Tue Mar 28 10:03:35 2006 @@ -32,13 +32,11 @@ XML_PARSE_NOXINCNODE = 32768 # do not generate XINCLUDE START/END nodes cdef void xmlInitParser() - cdef xmlParserCtxt* xmlCreateDocParserCtxt(char* cur) cdef xmlParserCtxt* xmlNewParserCtxt() cdef void xmlFreeParserCtxt(xmlParserCtxt* ctxt) - - cdef int xmlCtxtUseOptions(xmlParserCtxt* ctxt, int options) - cdef int xmlParseDocument(xmlParserCtxt* ctxt) - cdef xmlDoc* xmlParseDoc(char* cur) + + cdef xmlDoc* xmlCtxtReadDoc(xmlParserCtxt* ctxt, + char* cur, char* URL, char* encoding, + int options) cdef xmlDoc* xmlCtxtReadFile(xmlParserCtxt* ctxt, char* filename, char* encoding, int options) - From scoder at codespeak.net Tue Mar 28 10:06:03 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Tue Mar 28 10:06:05 2006 Subject: [Lxml-checkins] r25071 - lxml/branch/htmlparser/src/lxml Message-ID: <20060328080603.E3AE210081@code0.codespeak.net> Author: scoder Date: Tue Mar 28 10:05:57 2006 New Revision: 25071 Modified: lxml/branch/htmlparser/src/lxml/parser.pxi Log: mroe cleanup Modified: lxml/branch/htmlparser/src/lxml/parser.pxi ============================================================================== --- lxml/branch/htmlparser/src/lxml/parser.pxi (original) +++ lxml/branch/htmlparser/src/lxml/parser.pxi Tue Mar 28 10:05:57 2006 @@ -71,11 +71,12 @@ return self._error_log.copy() cdef xmlDoc* _handleResult(self, xmlParserCtxt* ctxt, xmlDoc* result): - if not ctxt.wellFormed: - if result is not NULL: - tree.xmlFreeDoc(result) - result = NULL - __GLOBAL_PARSER_CONTEXT._initDocDict(result) + if ctxt.wellFormed: + __GLOBAL_PARSER_CONTEXT._initDocDict(result) + elif result is not NULL: + # free broken document + tree.xmlFreeDoc(result) + result = NULL self._error_log.disconnect() return result From scoder at codespeak.net Tue Mar 28 13:22:01 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Tue Mar 28 13:22:03 2006 Subject: [Lxml-checkins] r25078 - lxml/trunk/src/lxml Message-ID: <20060328112201.6BFB91007D@code0.codespeak.net> Author: scoder Date: Tue Mar 28 13:22:00 2006 New Revision: 25078 Modified: lxml/trunk/src/lxml/etree.pyx Log: copied changes from htmlparser branch: make _Document methods C functions Modified: lxml/trunk/src/lxml/etree.pyx ============================================================================== --- lxml/trunk/src/lxml/etree.pyx (original) +++ lxml/trunk/src/lxml/etree.pyx Tue Mar 28 13:22:00 2006 @@ -73,14 +73,14 @@ #print self._c_doc.dict is theParser._c_dict tree.xmlFreeDoc(self._c_doc) - def getroot(self): + cdef getroot(self): cdef xmlNode* c_node c_node = tree.xmlDocGetRootElement(self._c_doc) if c_node is NULL: return None return _elementFactory(self, c_node) - def buildNewPrefix(self): + cdef buildNewPrefix(self): ns = python.PyString_FromFormat("ns%d", self._ns_counter) self._ns_counter = self._ns_counter + 1 return ns @@ -195,7 +195,7 @@ #displayNode(self._c_node, 0) if self._c_node is not NULL: unregisterProxy(self) - attemptDeallocation(self._c_node) + attemptDeallocation(self._c_node) def _init(self): """Called after object initialisation. Subclasses may override @@ -1299,10 +1299,12 @@ cdef _NodeBase _rootNodeOf(object input): # call this to get the root node of a # _Document, _ElementTree or _NodeBase object - if hasattr(input, 'getroot'): # Document/ElementTree + if hasattr(input, 'getroot'): # ElementTree return <_NodeBase>(input.getroot()) elif isinstance(input, _NodeBase): return <_NodeBase>input + elif isinstance(input, _Document): + return (<_Document>input).getroot() else: return None From scoder at codespeak.net Tue Mar 28 13:24:58 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Tue Mar 28 13:24:59 2006 Subject: [Lxml-checkins] r25079 - lxml/branch/lxml-0.9.x/src/lxml Message-ID: <20060328112458.57A9E1007D@code0.codespeak.net> Author: scoder Date: Tue Mar 28 13:24:57 2006 New Revision: 25079 Modified: lxml/branch/lxml-0.9.x/src/lxml/etree.pyx Log: merged in changes from trunk: make _Document methods C functions Modified: lxml/branch/lxml-0.9.x/src/lxml/etree.pyx ============================================================================== --- lxml/branch/lxml-0.9.x/src/lxml/etree.pyx (original) +++ lxml/branch/lxml-0.9.x/src/lxml/etree.pyx Tue Mar 28 13:24:57 2006 @@ -73,14 +73,14 @@ #print self._c_doc.dict is theParser._c_dict tree.xmlFreeDoc(self._c_doc) - def getroot(self): + cdef getroot(self): cdef xmlNode* c_node c_node = tree.xmlDocGetRootElement(self._c_doc) if c_node is NULL: return None return _elementFactory(self, c_node) - def buildNewPrefix(self): + cdef buildNewPrefix(self): ns = "ns%d" % self._ns_counter self._ns_counter = self._ns_counter + 1 return ns @@ -195,7 +195,7 @@ #displayNode(self._c_node, 0) if self._c_node is not NULL: unregisterProxy(self) - attemptDeallocation(self._c_node) + attemptDeallocation(self._c_node) def _init(self): """Called after object initialisation. Subclasses may override @@ -1284,10 +1284,12 @@ cdef _NodeBase _rootNodeOf(object input): # call this to get the root node of a # _Document, _ElementTree or _NodeBase object - if hasattr(input, 'getroot'): # Document/ElementTree + if hasattr(input, 'getroot'): # ElementTree return <_NodeBase>(input.getroot()) elif isinstance(input, _NodeBase): return <_NodeBase>input + elif isinstance(input, _Document): + return (<_Document>input).getroot() else: return None From scoder at codespeak.net Tue Mar 28 13:37:03 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Tue Mar 28 13:37:04 2006 Subject: [Lxml-checkins] r25080 - lxml/branch/htmlparser/src/lxml Message-ID: <20060328113703.7A7FC1007D@code0.codespeak.net> Author: scoder Date: Tue Mar 28 13:37:02 2006 New Revision: 25080 Modified: lxml/branch/htmlparser/src/lxml/python.pxd Log: removed unused declaration Modified: lxml/branch/htmlparser/src/lxml/python.pxd ============================================================================== --- lxml/branch/htmlparser/src/lxml/python.pxd (original) +++ lxml/branch/htmlparser/src/lxml/python.pxd Tue Mar 28 13:37:02 2006 @@ -14,7 +14,6 @@ char* errors) cdef object PyUnicode_DecodeUTF8(char* s, int size, char* errors) cdef object PyUnicode_AsUTF8String(object ustring) - cdef int PyString_GET_SIZE(object s) cdef object PyString_FromStringAndSize(char* s, int size) cdef object PyString_FromString(char* s) cdef object PyString_FromFormat(char* format, ...) From scoder at codespeak.net Wed Mar 29 14:58:37 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Wed Mar 29 14:58:38 2006 Subject: [Lxml-checkins] r25104 - lxml/branch/xsltext Message-ID: <20060329125837.9ECB3100A5@code0.codespeak.net> Author: scoder Date: Wed Mar 29 14:58:36 2006 New Revision: 25104 Added: lxml/branch/xsltext/ - copied from r25103, lxml/trunk/ Log: new branch for XSLT extension elements From scoder at codespeak.net Wed Mar 29 19:02:27 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Wed Mar 29 19:02:28 2006 Subject: [Lxml-checkins] r25118 - lxml/trunk/src/lxml Message-ID: <20060329170227.AAEB5100B0@code0.codespeak.net> Author: scoder Date: Wed Mar 29 19:02:26 2006 New Revision: 25118 Modified: lxml/trunk/src/lxml/proxy.pxi Log: bug fix: in XSLT output we may end up with HTML trees -> check for HTML document node in deallocation code Modified: lxml/trunk/src/lxml/proxy.pxi ============================================================================== --- lxml/trunk/src/lxml/proxy.pxi (original) +++ lxml/trunk/src/lxml/proxy.pxi Wed Mar 29 19:02:26 2006 @@ -103,7 +103,8 @@ while c_current is not NULL: #print "checking:", c_current.type # if we're still attached to the document, don't deallocate - if c_current.type == tree.XML_DOCUMENT_NODE: + if c_current.type == tree.XML_DOCUMENT_NODE or \ + c_current.type == tree.XML_HTML_DOCUMENT_NODE: #print "not freeing: still in doc" return NULL c_top = c_current From scoder at codespeak.net Wed Mar 29 19:03:00 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Wed Mar 29 19:03:01 2006 Subject: [Lxml-checkins] r25119 - lxml/branch/lxml-0.9.x/src/lxml Message-ID: <20060329170300.AE853100B0@code0.codespeak.net> Author: scoder Date: Wed Mar 29 19:02:59 2006 New Revision: 25119 Modified: lxml/branch/lxml-0.9.x/src/lxml/proxy.pxi Log: bug fix from trunk: in XSLT output we may end up with HTML trees -> check for HTML document node in deallocation code Modified: lxml/branch/lxml-0.9.x/src/lxml/proxy.pxi ============================================================================== --- lxml/branch/lxml-0.9.x/src/lxml/proxy.pxi (original) +++ lxml/branch/lxml-0.9.x/src/lxml/proxy.pxi Wed Mar 29 19:02:59 2006 @@ -103,7 +103,8 @@ while c_current is not NULL: #print "checking:", c_current.type # if we're still attached to the document, don't deallocate - if c_current.type == tree.XML_DOCUMENT_NODE: + if c_current.type == tree.XML_DOCUMENT_NODE or \ + c_current.type == tree.XML_HTML_DOCUMENT_NODE: #print "not freeing: still in doc" return NULL c_top = c_current From scoder at codespeak.net Wed Mar 29 19:15:33 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Wed Mar 29 19:15:34 2006 Subject: [Lxml-checkins] r25121 - in lxml/trunk: . src/lxml/tests Message-ID: <20060329171533.14423100B5@code0.codespeak.net> Author: scoder Date: Wed Mar 29 19:15:31 2006 New Revision: 25121 Modified: lxml/trunk/CHANGES.txt lxml/trunk/src/lxml/tests/test_xslt.py Log: test case for HTML output method in XSLT Modified: lxml/trunk/CHANGES.txt ============================================================================== --- lxml/trunk/CHANGES.txt (original) +++ lxml/trunk/CHANGES.txt Wed Mar 29 19:15:31 2006 @@ -23,6 +23,8 @@ Bugs fixed ---------- +* Memory deallocation bug when using XSLT output method "html" + * sax.py was handling UTF-8 encoded tag names where it shouldn't * lxml.tests package will no longer be installed (is still in source tar) Modified: lxml/trunk/src/lxml/tests/test_xslt.py ============================================================================== --- lxml/trunk/src/lxml/tests/test_xslt.py (original) +++ lxml/trunk/src/lxml/tests/test_xslt.py Wed Mar 29 19:15:31 2006 @@ -160,6 +160,23 @@ ''', st.tostring(res)) + def test_xslt_html_output(self): + tree = self.parse('BC') + style = self.parse('''\ + + + + + + +''') + + st = etree.XSLT(style) + res = st(tree) + self.assertEquals('''B''', + str(res).strip()) + def test_xslt_multiple_files(self): tree = etree.parse(fileInTestDir('test1.xslt')) st = etree.XSLT(tree) From scoder at codespeak.net Wed Mar 29 19:16:33 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Wed Mar 29 19:16:34 2006 Subject: [Lxml-checkins] r25122 - in lxml/branch/lxml-0.9.x: . src/lxml/tests Message-ID: <20060329171633.93F03100B5@code0.codespeak.net> Author: scoder Date: Wed Mar 29 19:16:32 2006 New Revision: 25122 Modified: lxml/branch/lxml-0.9.x/CHANGES.txt lxml/branch/lxml-0.9.x/src/lxml/tests/test_xslt.py Log: merge from trunk: test case for HTML output method in XSLT Modified: lxml/branch/lxml-0.9.x/CHANGES.txt ============================================================================== --- lxml/branch/lxml-0.9.x/CHANGES.txt (original) +++ lxml/branch/lxml-0.9.x/CHANGES.txt Wed Mar 29 19:16:32 2006 @@ -23,6 +23,8 @@ Bugs fixed ---------- +* Memory deallocation bug when using XSLT output method "html" + * sax.py was handling UTF-8 encoded tag names where it shouldn't * lxml.tests package will no longer be installed (is still in source tar) Modified: lxml/branch/lxml-0.9.x/src/lxml/tests/test_xslt.py ============================================================================== --- lxml/branch/lxml-0.9.x/src/lxml/tests/test_xslt.py (original) +++ lxml/branch/lxml-0.9.x/src/lxml/tests/test_xslt.py Wed Mar 29 19:16:32 2006 @@ -160,6 +160,23 @@ ''', st.tostring(res)) + def test_xslt_html_output(self): + tree = self.parse('BC') + style = self.parse('''\ + + + + + + +''') + + st = etree.XSLT(style) + res = st(tree) + self.assertEquals('''B''', + str(res).strip()) + def test_xslt_multiple_files(self): tree = etree.parse(fileInTestDir('test1.xslt')) st = etree.XSLT(tree) From scoder at codespeak.net Wed Mar 29 23:06:13 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Wed Mar 29 23:06:15 2006 Subject: [Lxml-checkins] r25128 - lxml/branch/xsltext/src/lxml Message-ID: <20060329210613.828DC100A5@code0.codespeak.net> Author: scoder Date: Wed Mar 29 23:06:12 2006 New Revision: 25128 Modified: lxml/branch/xsltext/src/lxml/etree.pyx Log: cleanup of function _dumpNextNode: simplify condition Modified: lxml/branch/xsltext/src/lxml/etree.pyx ============================================================================== --- lxml/branch/xsltext/src/lxml/etree.pyx (original) +++ lxml/branch/xsltext/src/lxml/etree.pyx Wed Mar 29 23:06:12 2006 @@ -1377,11 +1377,9 @@ xmlNode* c_node, char* encoding): cdef xmlNode* c_next c_next = c_node.next - if not (c_next is not NULL and c_next.type == tree.XML_TEXT_NODE): - c_next = NULL - if c_next is not NULL: + if c_next is not NULL and c_next.type == tree.XML_TEXT_NODE: tree.xmlNodeDumpOutput(c_buffer, c_doc, c_next, 0, 0, encoding) - + cdef object _stripDeclaration(object xml_string): xml_string = xml_string.strip() if xml_string[:5] == ' Author: scoder Date: Wed Mar 29 23:08:03 2006 New Revision: 25129 Modified: lxml/trunk/src/lxml/etree.pyx Log: merge from branch: cleanup of function _dumpNextNode: simplify condition Modified: lxml/trunk/src/lxml/etree.pyx ============================================================================== --- lxml/trunk/src/lxml/etree.pyx (original) +++ lxml/trunk/src/lxml/etree.pyx Wed Mar 29 23:08:03 2006 @@ -1377,11 +1377,9 @@ xmlNode* c_node, char* encoding): cdef xmlNode* c_next c_next = c_node.next - if not (c_next is not NULL and c_next.type == tree.XML_TEXT_NODE): - c_next = NULL - if c_next is not NULL: + if c_next is not NULL and c_next.type == tree.XML_TEXT_NODE: tree.xmlNodeDumpOutput(c_buffer, c_doc, c_next, 0, 0, encoding) - + cdef object _stripDeclaration(object xml_string): xml_string = xml_string.strip() if xml_string[:5] == ' Author: scoder Date: Wed Mar 29 23:08:41 2006 New Revision: 25130 Modified: lxml/branch/lxml-0.9.x/src/lxml/etree.pyx Log: merge from branch: cleanup of function _dumpNextNode: simplify condition Modified: lxml/branch/lxml-0.9.x/src/lxml/etree.pyx ============================================================================== --- lxml/branch/lxml-0.9.x/src/lxml/etree.pyx (original) +++ lxml/branch/lxml-0.9.x/src/lxml/etree.pyx Wed Mar 29 23:08:41 2006 @@ -1362,11 +1362,9 @@ xmlNode* c_node, char* encoding): cdef xmlNode* c_next c_next = c_node.next - if not (c_next is not NULL and c_next.type == tree.XML_TEXT_NODE): - c_next = NULL - if c_next is not NULL: + if c_next is not NULL and c_next.type == tree.XML_TEXT_NODE: tree.xmlNodeDumpOutput(c_buffer, c_doc, c_next, 0, 0, encoding) - + cdef object _stripDeclaration(object xml_string): xml_string = xml_string.strip() if xml_string[:5] == ' Author: scoder Date: Thu Mar 30 10:52:58 2006 New Revision: 25131 Modified: lxml/branch/xsltext/src/lxml/nsclasses.pxi lxml/branch/xsltext/src/lxml/tests/test_xslt.py lxml/branch/xsltext/src/lxml/xslt.pxd lxml/branch/xsltext/src/lxml/xslt.pxi Log: main implementation done, API complete, segfaulting test case remains Modified: lxml/branch/xsltext/src/lxml/nsclasses.pxi ============================================================================== --- lxml/branch/xsltext/src/lxml/nsclasses.pxi (original) +++ lxml/branch/xsltext/src/lxml/nsclasses.pxi Thu Mar 30 10:52:58 2006 @@ -10,64 +10,83 @@ persistent state of elements must be stored in the underlying XML.""" pass -class XSLTElement(object): - "NOT IMPLEMENTED YET!" - pass - -cdef object __NAMESPACE_REGISTRIES -__NAMESPACE_REGISTRIES = {} +cdef object __CLASS_NAMESPACE_REGISTRIES +__CLASS_NAMESPACE_REGISTRIES = {} cdef object __FUNCTION_NAMESPACE_REGISTRIES __FUNCTION_NAMESPACE_REGISTRIES = {} +cdef object __XSLT_ELEMENT_NAMESPACE_REGISTRIES +__XSLT_ELEMENT_NAMESPACE_REGISTRIES = {} + def Namespace(ns_uri): """Retrieve the namespace object associated with the given URI. Creates a new one if it does not yet exist.""" - if ns_uri: + if ns_uri is not None: ns_utf = _utf8(ns_uri) - else: - ns_utf = None try: - return __NAMESPACE_REGISTRIES[ns_utf] + return __CLASS_NAMESPACE_REGISTRIES[ns_utf] except KeyError: - registry = __NAMESPACE_REGISTRIES[ns_utf] = \ - _NamespaceRegistry(ns_uri) + registry = __CLASS_NAMESPACE_REGISTRIES[ns_utf] = \ + _ClassNamespaceRegistry(ns_uri) return registry def FunctionNamespace(ns_uri): - """Retrieve the function namespace object associated with the given - URI. Creates a new one if it does not yet exist. A function namespace can - only be used to register extension functions.""" - if ns_uri: + """Retrieve the function namespace object associated with the given URI. + Creates a new one if it does not yet exist. A function namespace can only + be used to register extension functions. + + Extension functions must have the signature + + result = function(_, xpath_value) + + The function receives the XPath call argument(s) and must return a value. + """ + if ns_uri is not None: ns_utf = _utf8(ns_uri) - else: - ns_utf = None try: return __FUNCTION_NAMESPACE_REGISTRIES[ns_utf] except KeyError: registry = __FUNCTION_NAMESPACE_REGISTRIES[ns_utf] = \ - _FunctionNamespaceRegistry(ns_uri) + _XPathFunctionNamespaceRegistry(ns_uri) return registry +def XSLTElementNamespace(ns_uri): + """Retrieve the XSLT element namespace object associated with the given + URI. Creates a new one if it does not yet exist. This namespace can only + be used to register functions for XSLT extension elements. + + The registered functions must have the signature + + result = function(_, subtree) + + 'subtree' is the current result tree from inside the element. It may be a + string value, a node or a sequence of nodes. The element function can + freely operate on this. + + The function must return either a None, a node, a sequence of nodes or a + string value. + """ + if ns_uri is not None: + ns_utf = _utf8(ns_uri) + try: + return __XSLT_ELEMENT_NAMESPACE_REGISTRIES[ns_utf] + except KeyError: + registry = __XSLT_ELEMENT_NAMESPACE_REGISTRIES[ns_utf] = \ + _FunctionNamespaceRegistry(ns_uri) + return registry cdef class _NamespaceRegistry: - "Dictionary-like registry for namespace implementations" + "Dictionary-like namespace registry" cdef object _ns_uri - cdef object _classes - cdef object _extensions - cdef object _xslt_elements def __init__(self, ns_uri): self._ns_uri = ns_uri - self._classes = {} - self._extensions = {} - self._xslt_elements = {} def update(self, class_dict_iterable): - """Forgivingly update the registry. If registered values are - neither subclasses of ElementBase nor callable extension - functions, or if their name starts with '_', they will be - silently discarded. This allows registrations at the module or - class level using vars(), globals() etc.""" + """Forgivingly update the registry. If registered values do not match + the required type for this registry, or if their name starts with '_', + they will be silently discarded. This allows registrations at the + module or class level using vars(), globals() etc.""" if hasattr(class_dict_iterable, 'iteritems'): class_dict_iterable = class_dict_iterable.iteritems() elif hasattr(class_dict_iterable, 'items'): @@ -76,27 +95,10 @@ if (name is None or name[:1] != '_') and callable(item): self[name] = item - def __setitem__(self, name, item): - if python.PyType_Check(item) and issubclass(item, ElementBase): - d = self._classes - elif name is None: - raise NamespaceRegistryError, "Registered name can only be None for elements." - elif python.PyType_Check(item) and issubclass(item, XSLTElement): - d = self._xslt_elements - elif callable(item): - d = self._extensions - else: - raise NamespaceRegistryError, "Registered item must be callable." - - if name is None: - name_utf = None - else: - name_utf = _utf8(name) - d[name_utf] = item - def __getitem__(self, name): - name_utf = _utf8(name) - return self._get(name_utf) + if name is not None: + name = _utf8(name) + return self._get(name) cdef object _get(self, object name): cdef python.PyObject* dict_result @@ -107,37 +109,40 @@ raise KeyError, "Name not registered." return dict_result +cdef class _ClassNamespaceRegistry(_NamespaceRegistry): + "Dictionary-like registry for namespace implementation classes" + cdef object _classes + def __init__(self, ns_uri): + _NamespaceRegistry.__init__(self, ns_uri) + self._classes = {} + + def __setitem__(self, name, item): + if not python.PyType_Check(item) or \ + not issubclass(item, ElementBase): + raise NamespaceRegistryError, \ + "Registered item must be subtypes of ElementBase." + if name is not None: + name = _utf8(name) + self._classes[name] = item + def clear(self): self._classes.clear() - self._extensions.clear() - #self.self._xslt_elements.clear() def __repr__(self): return "Namespace(%r)" % self._ns_uri cdef class _FunctionNamespaceRegistry(_NamespaceRegistry): - cdef object _prefix - cdef object _prefix_utf - property prefix: - "Namespace prefix for extension functions." - def __del__(self): - self._prefix = None # no prefix configured - def __get__(self): - return self._prefix - def __set__(self, prefix): - if prefix is None: - prefix = '' # empty prefix - self._prefix_utf = _utf8(prefix) - self._prefix = prefix + cdef object _extensions + def __init__(self, ns_uri): + _NamespaceRegistry.__init__(self, ns_uri) + self._extensions = {} def __setitem__(self, name, item): if not callable(item): raise NamespaceRegistryError, "Registered function must be callable." - if name is None: - name_utf = None - else: - name_utf = _utf8(name) - self._extensions[name_utf] = item + if name is not None: + name = _utf8(name) + self._extensions[name] = item cdef object _get(self, object name): cdef python.PyObject* dict_result @@ -149,9 +154,27 @@ def __repr__(self): return "FunctionNamespace(%r)" % self._ns_uri + def clear(self): + self._extensions.clear() + +cdef class _XPathFunctionNamespaceRegistry(_FunctionNamespaceRegistry): + cdef object _prefix + cdef object _prefix_utf + property prefix: + "Namespace prefix for extension functions." + def __del__(self): + self._prefix = None # no prefix configured + def __get__(self): + return self._prefix + def __set__(self, prefix): + if prefix is None: + prefix = '' # empty prefix + self._prefix_utf = _utf8(prefix) + self._prefix = prefix + cdef object _find_all_extensions(): "Internal lookup function to find all extension functions for XSLT/XPath." - cdef _NamespaceRegistry registry + cdef _FunctionNamespaceRegistry registry ns_extensions = {} for (ns_utf, registry) in __FUNCTION_NAMESPACE_REGISTRIES.iteritems(): if registry._extensions: @@ -160,47 +183,70 @@ cdef object _find_all_extension_prefixes(): "Internal lookup function to find all function prefixes for XSLT/XPath." - cdef _FunctionNamespaceRegistry registry + cdef _XPathFunctionNamespaceRegistry registry ns_prefixes = {} for (ns_utf, registry) in __FUNCTION_NAMESPACE_REGISTRIES.iteritems(): if registry._prefix_utf is not None: ns_prefixes[registry._prefix_utf] = ns_utf return ns_prefixes -cdef _find_extensions(namespaces): +cdef object _find_extensions(namespaces): + """Returns a dictionary that maps each namespace in the provided list to a + dictionary of name-function mappings defined under that namespace.""" + cdef python.PyObject* dict_result + extension_dict = {} + for ns_uri in namespaces: + if ns_uri is not None: + ns_uri = _utf8(ns_uri) + dict_result = python.PyDict_GetItem( + __FUNCTION_NAMESPACE_REGISTRIES, ns_uri) + if dict_result is NULL: + continue + extensions = (<_FunctionNamespaceRegistry>dict_result)._extensions + if extensions: + python.PyDict_SetItem(extension_dict, ns_uri, extensions) + return extension_dict + +cdef object _find_all_extension_elements(): + "Internal lookup function to find all extension elements for XSLT." + cdef _FunctionNamespaceRegistry registry + ns_extensions = {} + for (ns_utf, registry) in __XSLT_ELEMENT_NAMESPACE_REGISTRIES.iteritems(): + if registry._extensions: + ns_extensions[ns_utf] = registry._extensions + return ns_extensions + +cdef object _find_extension_elements(namespaces): """Returns a dictionary that maps each namespace in the provided list to a dictionary of name-function mappings defined under that namespace.""" cdef python.PyObject* dict_result - cdef char* c_ns_utf extension_dict = {} for ns_uri in namespaces: - if ns_uri is None: - ns_utf = None - else: - ns_utf = _utf8(ns_uri) + if ns_uri is not None: + ns_uri = _utf8(ns_uri) dict_result = python.PyDict_GetItem( - __FUNCTION_NAMESPACE_REGISTRIES, ns_utf) + __XSLT_ELEMENT_NAMESPACE_REGISTRIES, ns_uri) if dict_result is NULL: continue - extensions = (<_NamespaceRegistry>dict_result)._extensions + extensions = (<_FunctionNamespaceRegistry>dict_result)._extensions if extensions: - python.PyDict_SetItem(extension_dict, ns_utf, extensions) + python.PyDict_SetItem(extension_dict, ns_uri, extensions) return extension_dict cdef object _find_element_class(char* c_namespace_utf, char* c_element_name_utf): cdef python.PyObject* dict_result - cdef _NamespaceRegistry registry + cdef _ClassNamespaceRegistry registry if c_namespace_utf is not NULL: dict_result = python.PyDict_GetItemString( - __NAMESPACE_REGISTRIES, c_namespace_utf) + __CLASS_NAMESPACE_REGISTRIES, c_namespace_utf) else: dict_result = python.PyDict_GetItem( - __NAMESPACE_REGISTRIES, None) + __CLASS_NAMESPACE_REGISTRIES, None) if dict_result is NULL: return _Element - registry = <_NamespaceRegistry>dict_result + registry = <_ClassNamespaceRegistry>dict_result classes = registry._classes if c_element_name_utf is not NULL: Modified: lxml/branch/xsltext/src/lxml/tests/test_xslt.py ============================================================================== --- lxml/branch/xsltext/src/lxml/tests/test_xslt.py (original) +++ lxml/branch/xsltext/src/lxml/tests/test_xslt.py Thu Mar 30 10:52:58 2006 @@ -308,7 +308,7 @@ ''') - def mytext(ctxt, values): + def mytext(_, values): return 'X' * len(values) namespace = etree.FunctionNamespace('testns') @@ -318,6 +318,78 @@ self.assertEquals(self._rootstring(result), 'X') + def test_extensions_error(self): + tree = self.parse('B') + style = self.parse('''\ + + +''') + + class LocalError(Exception): + pass + + def mytext(_, values): + raise LocalError + + namespace = etree.FunctionNamespace('testns') + namespace['mytext'] = mytext + + self.assertRaises(LocalError, tree.xslt, style) + + def test_xslt_extension_elements(self): + tree = self.parse('B') + style = self.parse('''\ + + + + +''') + + def myext(_, tree): + s = etree.Element("TEST") + s.append(tree) + return s + + namespace = etree.XSLTElementNamespace('testextns') + namespace['myext'] = myext + + result = tree.xslt(style) + self.assertEquals(self._rootstring(result), + 'B') + etree.XSLTElementNamespace('testextns').clear() + + def test_xslt_extension_elements_error(self): + tree = self.parse('B') + style = self.parse('''\ + + + + +''') + + class LocalError(Exception): + pass + + def myext(_, tree): + raise LocalError + + namespace = etree.XSLTElementNamespace('testextns') + namespace['myext'] = myext + + self.assertRaises(LocalError, tree.xslt, style) + etree.XSLTElementNamespace('testextns').clear() + def test_xslt_document_parse(self): # make sure document('') works from loaded files xslt = etree.XSLT(etree.parse(fileInTestDir("test-document.xslt"))) Modified: lxml/branch/xsltext/src/lxml/xslt.pxd ============================================================================== --- lxml/branch/xsltext/src/lxml/xslt.pxd (original) +++ lxml/branch/xsltext/src/lxml/xslt.pxd Thu Mar 30 10:52:58 2006 @@ -1,4 +1,4 @@ -from tree cimport xmlDoc +from tree cimport xmlDoc, xmlNode from xpath cimport xmlXPathContext, xmlXPathFunction cdef extern from "libxslt/xsltInternals.h": @@ -7,10 +7,20 @@ ctypedef struct xsltTransformContext: xmlXPathContext* xpathCtxt + xsltStylesheet* style + xmlDoc* output + xmlNode* insert + + ctypedef struct xsltElemPreComp: + pass cdef xsltStylesheet* xsltParseStylesheetDoc(xmlDoc* doc) cdef void xsltFreeStylesheet(xsltStylesheet* sheet) - + ctypedef void (*xsltTransformFunction)(xsltTransformContext* ctxt, + xmlNode* node, + xmlNode* instr, + xsltElemPreComp* comp) + #cdef extern from "libxslt/xslt.h": # pass @@ -19,6 +29,10 @@ char* name, char * URI, xmlXPathFunction function) + cdef int xsltRegisterExtElement(xsltTransformContext* ctxt, + char* name, + char * URI, + xsltTransformFunction function) cdef extern from "libxslt/transform.h": cdef xmlDoc* xsltApplyStylesheet(xsltStylesheet* style, xmlDoc* doc, @@ -36,7 +50,9 @@ int* doc_txt_len, xmlDoc* result, xsltStylesheet* style) - + cdef void xsltTransformError(xsltTransformContext* ctxt, + xsltStylesheet* style, + xmlNode* node, char* msg, ...) cdef void xsltSetGenericErrorFunc(void* ctxt, void (*handler)(void* ctxt, char* msg, ...)) cdef void xsltSetTransformErrorFunc(xsltTransformContext*, Modified: lxml/branch/xsltext/src/lxml/xslt.pxi ============================================================================== --- lxml/branch/xsltext/src/lxml/xslt.pxi (original) +++ lxml/branch/xsltext/src/lxml/xslt.pxi Thu Mar 30 10:52:58 2006 @@ -36,8 +36,7 @@ cdef object _extensions cdef object _namespaces cdef object _registered_namespaces - cdef object _registered_extensions - cdef object _extension_functions + cdef object _registered_extension_functions cdef object _utf_refs # for exception handling and temporary reference keeping: cdef object _temp_elements @@ -66,8 +65,7 @@ self._extensions = extensions self._namespaces = namespaces self._registered_namespaces = [] - self._registered_extensions = [] - self._extension_functions = {} + self._registered_extension_functions = {} self._temp_elements = {} self._temp_docs = {} @@ -111,7 +109,6 @@ cdef _free_context(self): self._registered_namespaces = [] - self._registered_extensions = [] python.PyDict_Clear(self._utf_refs) self._doc = None if self._xpathCtxt is not NULL: @@ -141,6 +138,7 @@ xpathCtxt = self._xpathCtxt for prefix_utf in self._registered_namespaces: xpath.xmlXPathRegisterNs(xpathCtxt, prefix_utf, NULL) + del self._registered_namespaces[:] # extension functions (internal UTF-8 methods with leading '_') @@ -160,15 +158,15 @@ cdef _registerExtensionFunction(self, ns_uri_utf, name_utf, function): self._contextRegisterExtensionFunction(ns_uri_utf, name_utf) - self._extension_functions[(ns_uri_utf, name_utf)] = function - self._registered_extensions.append((ns_uri_utf, name_utf)) + self._registered_extension_functions[(ns_uri_utf, name_utf)] = function cdef _unregisterExtensionFunctions(self): - for ns_uri_utf, name_utf in self._registered_extensions: + for ns_uri_utf, name_utf in self._registered_extension_functions.iterkeys(): self._contextUnregisterExtensionFunction(ns_uri_utf, name_utf) + self._registered_extension_functions.clear() - def find_extension(self, ns_uri_utf, name_utf): - return self._extension_functions[(ns_uri_utf, name_utf)] + cdef find_extension(self, ns_uri_utf, name_utf): + return self._registered_extension_functions[(ns_uri_utf, name_utf)] # Python reference keeping during XPath function evaluation @@ -197,28 +195,58 @@ #print "Holding document:", element._doc._c_doc python.PyDict_SetItem(self._temp_docs, id(element._doc), element._doc) + # exception forwarding from extensions + + cdef void _store_exc_info(self): + self._exc_info = sys.exc_info() + + cdef int _raise(self, exception, message) except 1: + if self._exc_info is None: + if exception is None: + return 0 + else: + raise exception, message + else: + type, value, traceback = self._exc_info + raise type, value, traceback ################################################################################ # XSLT cdef class XSLTContext(BaseContext): + cdef object _extension_elements + cdef object _registered_extension_elements cdef xslt.xsltTransformContext* _xsltCtxt - def __init__(self, namespaces, extensions): + def __init__(self, namespaces, extensions, elements): self._xsltCtxt = NULL BaseContext.__init__(self, namespaces, extensions) + if not elements: + elements = {} + self._extension_elements = elements + self._registered_extension_elements = {} cdef register_context(self, xslt.xsltTransformContext* xsltCtxt, _Document doc): self._xsltCtxt = xsltCtxt self._set_xpath_context(xsltCtxt.xpathCtxt) self._register_context(doc, 0) xsltCtxt.xpathCtxt.userData = self + if self._namespaces: + elements = _find_extension_elements(self._namespaces.values()) + else: + elements = _find_all_extension_elements() + if elements: + self._registerExtensionElements(elements) + if self._extension_elements: + self.registerExtensionElements(self._extension_elements) cdef unregister_context(self): cdef xslt.xsltTransformContext* xsltCtxt + # don't care about extensions, etc. xsltCtxt = self._xsltCtxt if xsltCtxt is NULL: return self._unregister_context() + self._registered_extension_elements.clear() self._xsltCtxt = NULL cdef free_context(self): @@ -238,10 +266,33 @@ _xpathCallback) def _contextUnregisterExtensionFunction(self, ns_uri_utf, name_utf): - if ns_uri_utf is not None: - xslt.xsltRegisterExtFunction( - self._xsltCtxt, _cstr(name_utf), _cstr(ns_uri_utf), - _xpathCallback) + # don't care about unregistering, context will be discarded anyway + pass + + # extension element functions (internal UTF-8 methods with leading '_') + + def registerExtensionElements(self, extensions): + for ns_uri, extension in extensions.items(): + for name, function in extension.items(): + self.registerExtensionElement(ns_uri, name, function) + + def registerExtensionElement(self, ns_uri, name, function): + self._registerExtensionElement( + self._to_utf(ns_uri), self._to_utf(name), function) + + cdef _registerExtensionElements(self, extensions_utf): + for ns_uri_utf, extension in extensions_utf.items(): + for name_utf, function in extension.items(): + self._registerExtensionElement(ns_uri_utf, name_utf, function) + + cdef _registerExtensionElement(self, ns_uri_utf, name_utf, function): + xslt.xsltRegisterExtElement( + self._xsltCtxt, _cstr(name_utf), _cstr(ns_uri_utf), + _xsltCallback) + self._registered_extension_elements[(ns_uri_utf, name_utf)] = function + + cdef find_extension_element(self, ns_uri_utf, name_utf): + return self._registered_extension_elements[(ns_uri_utf, name_utf)] cdef class XSLT: @@ -250,7 +301,7 @@ cdef XSLTContext _context cdef xslt.xsltStylesheet* _c_style - def __init__(self, xslt_input, extensions=None): + def __init__(self, xslt_input, extensions=None, elements=None): # make a copy of the document as stylesheet needs to assume it # doesn't change cdef xslt.xsltStylesheet* c_style @@ -276,7 +327,7 @@ raise XSLTParseError, "Cannot parse style sheet" self._c_style = c_style - self._context = XSLTContext(None, extensions) + self._context = XSLTContext(None, extensions, elements) # XXX is it worthwile to use xsltPrecomputeStylesheet here? def __dealloc__(self): @@ -338,7 +389,8 @@ _destroyFakeDoc(input_doc._c_doc, c_doc) if c_result is NULL: - raise XSLTApplyError, "Error applying stylesheet" + self._context._raise(XSLTApplyError, + "Error applying stylesheet") result_doc = _documentFactory(c_result) return _xsltResultTreeFactory(result_doc, self) @@ -464,13 +516,11 @@ def __init__(self, namespaces, extensions, variables=None): self._context = XPathContext(namespaces, extensions, variables) - cdef object _handle_result(self, xpath.xmlXPathObject* xpathObj, _Document doc): - _exc_info = self._context._exc_info - if _exc_info is not None: - type, value, traceback = _exc_info - raise type, value, traceback + cdef object _handle_result(self, xpath.xmlXPathObject* xpathObj, + _Document doc): if xpathObj is NULL: - raise XPathSyntaxError, "Error in xpath expression." + self._context._raise(XPathSyntaxError, + "Error in xpath expression.") try: result = _unwrapXPathObject(xpathObj, doc) except XPathResultError: @@ -750,7 +800,7 @@ else: uri = None - # get our evaluator + # get our context extensions = (rctxt.userData) # lookup up the extension function in the context @@ -774,6 +824,63 @@ xpath.xmlXPathErr( ctxt, xmlerror.XML_XPATH_EXPR_ERROR - xmlerror.XML_XPATH_EXPRESSION_OK) - extensions._exc_info = sys.exc_info() + extensions._store_exc_info() return xpath.valuePush(ctxt, obj) + +cdef void _xsltCallback(xslt.xsltTransformContext* ctxt, + xmlNode* c_tree_node, xmlNode* instr, + xslt.xsltElemPreComp* comp): + cdef xpath.xmlXPathContext* rctxt + cdef _Document doc + cdef _NodeBase insert_element + cdef XSLTContext extensions + cdef int remove_proxy + + # get our context + rctxt = ctxt.xpathCtxt + extensions = (rctxt.userData) + + # find name and namespace of called element + name = instr.name + if instr.ns is NULL or instr.ns.href is NULL: + uri = None + else: + uri = instr.ns.href + + # lookup up the extension function in the context + f = extensions.find_extension_element(uri, name) + + doc = _documentFactory(ctxt.output) + + insert_element = _elementFactory(doc, ctxt.insert) + current_tree = _elementFactory(doc, c_tree_node) + + try: + result = f(None, current_tree) + + if isinstance(result, _NodeBase): + insert_element.append(result) + elif python.PyString_Check(result) or PyUnicode_Check(result): + insert_element.text = result + elif python.PySequence_Check(result): + for node in result: + insert_element.append(node) + elif result is not None: + raise TypeError, "Invalid return value from extension element." + except Exception, e: + message = str(e) + xslt.xsltTransformError( + ctxt, ctxt.style, ctxt.insert, _cstr(message)) + extensions._store_exc_info() + + # prevent garbage collection of C structures in doc + doc._c_doc = NULL + #insert_element._c_node = NULL + + # FIXME: insert_element? current_tree? + # if result_tree_node is in the result document it can be GCed, + # otherwise it won't be + #insert_element._c_node = NULL + #current_tree._c_node = NULL + #del insert_element, current_tree, doc From scoder at codespeak.net Thu Mar 30 11:55:47 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Thu Mar 30 11:55:49 2006 Subject: [Lxml-checkins] r25133 - in lxml/branch/xsltext/src/lxml: . tests Message-ID: <20060330095547.30B67100C4@code0.codespeak.net> Author: scoder Date: Thu Mar 30 11:55:46 2006 New Revision: 25133 Modified: lxml/branch/xsltext/src/lxml/tests/test_xslt.py lxml/branch/xsltext/src/lxml/xslt.pxi Log: fixed document references, new test case on accessing nodesets ; noticed general flaw regarding access to the content of the stylesheet element: need to compute it by hand?? Modified: lxml/branch/xsltext/src/lxml/tests/test_xslt.py ============================================================================== --- lxml/branch/xsltext/src/lxml/tests/test_xslt.py (original) +++ lxml/branch/xsltext/src/lxml/tests/test_xslt.py Thu Mar 30 11:55:46 2006 @@ -365,6 +365,33 @@ 'B') etree.XSLTElementNamespace('testextns').clear() + def test_xslt_extension_elements_nodeset(self): + tree = self.parse('BC') + style = self.parse('''\ + + + + +''') + + def myext(_, tree): + print tree, len(tree) + s = etree.Element("TEST") + s.append(tree) + return s + + namespace = etree.XSLTElementNamespace('testextns') + namespace['myext'] = myext + + result = tree.xslt(style) + self.assertEquals(self._rootstring(result), + 'BC') + etree.XSLTElementNamespace('testextns').clear() + def test_xslt_extension_elements_error(self): tree = self.parse('B') style = self.parse('''\ Modified: lxml/branch/xsltext/src/lxml/xslt.pxi ============================================================================== --- lxml/branch/xsltext/src/lxml/xslt.pxi (original) +++ lxml/branch/xsltext/src/lxml/xslt.pxi Thu Mar 30 11:55:46 2006 @@ -832,7 +832,9 @@ xmlNode* c_tree_node, xmlNode* instr, xslt.xsltElemPreComp* comp): cdef xpath.xmlXPathContext* rctxt - cdef _Document doc + cdef xmlNode* c_node + cdef _Document out_doc + cdef _Document tree_doc cdef _NodeBase insert_element cdef XSLTContext extensions cdef int remove_proxy @@ -848,13 +850,21 @@ else: uri = instr.ns.href - # lookup up the extension function in the context +## # build child tree +## c_node = instr.children +## while c_node is not NULL: +## if xslt.IS_XSLT_ELEM(c_node): +## pass +## c_node = c_node.next + + # lookup up the extension element in the context f = extensions.find_extension_element(uri, name) - doc = _documentFactory(ctxt.output) + out_doc = _documentFactory(ctxt.output) + tree_doc = _documentFactory(c_tree_node.doc) - insert_element = _elementFactory(doc, ctxt.insert) - current_tree = _elementFactory(doc, c_tree_node) + insert_element = _elementFactory(out_doc, ctxt.insert) + current_tree = _elementFactory(tree_doc, c_tree_node) try: result = f(None, current_tree) @@ -874,8 +884,9 @@ ctxt, ctxt.style, ctxt.insert, _cstr(message)) extensions._store_exc_info() - # prevent garbage collection of C structures in doc - doc._c_doc = NULL + # prevent garbage collection of document C structures + out_doc._c_doc = NULL + tree_doc._c_doc = NULL #insert_element._c_node = NULL # FIXME: insert_element? current_tree? From scoder at codespeak.net Thu Mar 30 19:44:49 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Thu Mar 30 19:44:51 2006 Subject: [Lxml-checkins] r25150 - lxml/trunk/src/lxml Message-ID: <20060330174449.F21FE100D1@code0.codespeak.net> Author: scoder Date: Thu Mar 30 19:44:48 2006 New Revision: 25150 Modified: lxml/trunk/src/lxml/etree.pyx Log: fix variable declaration in _Attrib._getValue() - use char* to avoid double conversion Modified: lxml/trunk/src/lxml/etree.pyx ============================================================================== --- lxml/trunk/src/lxml/etree.pyx (original) +++ lxml/trunk/src/lxml/etree.pyx Thu Mar 30 19:44:48 2006 @@ -935,8 +935,8 @@ return result def values(self): - result = [] cdef xmlNode* c_node + result = [] c_node = (self._c_node.properties) while c_node is not NULL: if c_node.type == tree.XML_ATTRIBUTE_NODE: @@ -945,6 +945,7 @@ return result cdef object _getValue(self, xmlNode* c_node): + cdef char* value if c_node.ns is NULL or c_node.ns.href is NULL: value = tree.xmlGetNoNsProp(self._c_node, c_node.name) else: From scoder at codespeak.net Thu Mar 30 19:47:02 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Thu Mar 30 19:47:03 2006 Subject: [Lxml-checkins] r25151 - lxml/branch/lxml-0.9.x/src/lxml Message-ID: <20060330174702.3D996100D1@code0.codespeak.net> Author: scoder Date: Thu Mar 30 19:47:01 2006 New Revision: 25151 Modified: lxml/branch/lxml-0.9.x/src/lxml/etree.pyx Log: merge from trunk: fix variable declaration in _Attrib._getValue() - use char* to avoid double conversion Modified: lxml/branch/lxml-0.9.x/src/lxml/etree.pyx ============================================================================== --- lxml/branch/lxml-0.9.x/src/lxml/etree.pyx (original) +++ lxml/branch/lxml-0.9.x/src/lxml/etree.pyx Thu Mar 30 19:47:01 2006 @@ -924,8 +924,8 @@ return result def values(self): - result = [] cdef xmlNode* c_node + result = [] c_node = (self._c_node.properties) while c_node is not NULL: if c_node.type == tree.XML_ATTRIBUTE_NODE: @@ -934,6 +934,7 @@ return result cdef object _getValue(self, xmlNode* c_node): + cdef char* value if c_node.ns is NULL or c_node.ns.href is NULL: value = tree.xmlGetNoNsProp(self._c_node, c_node.name) else: From scoder at codespeak.net Thu Mar 30 19:49:32 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Thu Mar 30 19:49:33 2006 Subject: [Lxml-checkins] r25152 - lxml/branch/xsltext/src/lxml Message-ID: <20060330174932.901F0100D1@code0.codespeak.net> Author: scoder Date: Thu Mar 30 19:49:31 2006 New Revision: 25152 Modified: lxml/branch/xsltext/src/lxml/etree.pyx Log: merged in fix from trunk Modified: lxml/branch/xsltext/src/lxml/etree.pyx ============================================================================== --- lxml/branch/xsltext/src/lxml/etree.pyx (original) +++ lxml/branch/xsltext/src/lxml/etree.pyx Thu Mar 30 19:49:31 2006 @@ -935,8 +935,8 @@ return result def values(self): - result = [] cdef xmlNode* c_node + result = [] c_node = (self._c_node.properties) while c_node is not NULL: if c_node.type == tree.XML_ATTRIBUTE_NODE: @@ -945,6 +945,7 @@ return result cdef object _getValue(self, xmlNode* c_node): + cdef char* value if c_node.ns is NULL or c_node.ns.href is NULL: value = tree.xmlGetNoNsProp(self._c_node, c_node.name) else: From scoder at codespeak.net Thu Mar 30 19:56:22 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Thu Mar 30 19:56:23 2006 Subject: [Lxml-checkins] r25153 - lxml/trunk/src/lxml Message-ID: <20060330175622.41DB3100D1@code0.codespeak.net> Author: scoder Date: Thu Mar 30 19:56:21 2006 New Revision: 25153 Modified: lxml/trunk/src/lxml/etree.pyx Log: factored out utility function for getting attribute value Modified: lxml/trunk/src/lxml/etree.pyx ============================================================================== --- lxml/trunk/src/lxml/etree.pyx (original) +++ lxml/trunk/src/lxml/etree.pyx Thu Mar 30 19:56:21 2006 @@ -940,18 +940,10 @@ c_node = (self._c_node.properties) while c_node is not NULL: if c_node.type == tree.XML_ATTRIBUTE_NODE: - python.PyList_Append(result, self._getValue(c_node)) + python.PyList_Append( + result, _attributeValue(self._c_node, c_node)) c_node = c_node.next return result - - cdef object _getValue(self, xmlNode* c_node): - cdef char* value - if c_node.ns is NULL or c_node.ns.href is NULL: - value = tree.xmlGetNoNsProp(self._c_node, c_node.name) - else: - value = tree.xmlGetNsProp( - self._c_node, c_node.name, c_node.ns.href) - return funicode(value) def items(self): result = [] @@ -961,7 +953,7 @@ if c_node.type == tree.XML_ATTRIBUTE_NODE: python.PyList_Append(result, ( _namespacedName(c_node), - self._getValue(c_node) + _attributeValue(self._c_node, c_node) )) c_node = c_node.next return result @@ -1359,6 +1351,15 @@ c_root.children = c_root.last = c_root._private = NULL tree.xmlFreeDoc(c_doc) +cdef object _attributeValue(xmlNode* c_element, xmlNode* c_attrib_node): + cdef char* value + if c_attrib_node.ns is NULL or c_attrib_node.ns.href is NULL: + value = tree.xmlGetNoNsProp(c_element, c_attrib_node.name) + else: + value = tree.xmlGetNsProp(c_element, c_attrib_node.name, + c_attrib_node.ns.href) + return funicode(value) + cdef _dumpToFile(f, xmlDoc* c_doc, xmlNode* c_node): cdef python.PyObject* o From scoder at codespeak.net Thu Mar 30 20:02:48 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Thu Mar 30 20:02:49 2006 Subject: [Lxml-checkins] r25154 - lxml/branch/lxml-0.9.x/src/lxml Message-ID: <20060330180248.BC794100D1@code0.codespeak.net> Author: scoder Date: Thu Mar 30 20:02:47 2006 New Revision: 25154 Modified: lxml/branch/lxml-0.9.x/src/lxml/etree.pyx Log: merge from trunk: factored out utility function for getting attribute value Modified: lxml/branch/lxml-0.9.x/src/lxml/etree.pyx ============================================================================== --- lxml/branch/lxml-0.9.x/src/lxml/etree.pyx (original) +++ lxml/branch/lxml-0.9.x/src/lxml/etree.pyx Thu Mar 30 20:02:47 2006 @@ -929,18 +929,10 @@ c_node = (self._c_node.properties) while c_node is not NULL: if c_node.type == tree.XML_ATTRIBUTE_NODE: - result.append(self._getValue(c_node)) + python.PyList_Append( + result, _attributeValue(self._c_node, c_node)) c_node = c_node.next return result - - cdef object _getValue(self, xmlNode* c_node): - cdef char* value - if c_node.ns is NULL or c_node.ns.href is NULL: - value = tree.xmlGetNoNsProp(self._c_node, c_node.name) - else: - value = tree.xmlGetNsProp( - self._c_node, c_node.name, c_node.ns.href) - return funicode(value) def items(self): result = [] @@ -948,9 +940,9 @@ c_node = (self._c_node.properties) while c_node is not NULL: if c_node.type == tree.XML_ATTRIBUTE_NODE: - result.append(( + python.PyList_Append(result, ( _namespacedName(c_node), - self._getValue(c_node) + _attributeValue(self._c_node, c_node) )) c_node = c_node.next return result @@ -1344,6 +1336,15 @@ c_root.children = c_root.last = c_root._private = NULL tree.xmlFreeDoc(c_doc) +cdef object _attributeValue(xmlNode* c_element, xmlNode* c_attrib_node): + cdef char* value + if c_attrib_node.ns is NULL or c_attrib_node.ns.href is NULL: + value = tree.xmlGetNoNsProp(c_element, c_attrib_node.name) + else: + value = tree.xmlGetNsProp(c_element, c_attrib_node.name, + c_attrib_node.ns.href) + return funicode(value) + cdef _dumpToFile(f, xmlDoc* c_doc, xmlNode* c_node): cdef python.PyObject* o From scoder at codespeak.net Thu Mar 30 20:08:13 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Thu Mar 30 20:08:15 2006 Subject: [Lxml-checkins] r25155 - lxml/branch/xsltext/src/lxml Message-ID: <20060330180813.99F77100D1@code0.codespeak.net> Author: scoder Date: Thu Mar 30 20:08:12 2006 New Revision: 25155 Modified: lxml/branch/xsltext/src/lxml/etree.pyx Log: merge from trunk: factored out utility function for getting attribute value Modified: lxml/branch/xsltext/src/lxml/etree.pyx ============================================================================== --- lxml/branch/xsltext/src/lxml/etree.pyx (original) +++ lxml/branch/xsltext/src/lxml/etree.pyx Thu Mar 30 20:08:12 2006 @@ -940,18 +940,10 @@ c_node = (self._c_node.properties) while c_node is not NULL: if c_node.type == tree.XML_ATTRIBUTE_NODE: - python.PyList_Append(result, self._getValue(c_node)) + python.PyList_Append( + result, _attributeValue(self._c_node, c_node)) c_node = c_node.next return result - - cdef object _getValue(self, xmlNode* c_node): - cdef char* value - if c_node.ns is NULL or c_node.ns.href is NULL: - value = tree.xmlGetNoNsProp(self._c_node, c_node.name) - else: - value = tree.xmlGetNsProp( - self._c_node, c_node.name, c_node.ns.href) - return funicode(value) def items(self): result = [] @@ -961,7 +953,7 @@ if c_node.type == tree.XML_ATTRIBUTE_NODE: python.PyList_Append(result, ( _namespacedName(c_node), - self._getValue(c_node) + _attributeValue(self._c_node, c_node) )) c_node = c_node.next return result @@ -1359,6 +1351,15 @@ c_root.children = c_root.last = c_root._private = NULL tree.xmlFreeDoc(c_doc) +cdef object _attributeValue(xmlNode* c_element, xmlNode* c_attrib_node): + cdef char* value + if c_attrib_node.ns is NULL or c_attrib_node.ns.href is NULL: + value = tree.xmlGetNoNsProp(c_element, c_attrib_node.name) + else: + value = tree.xmlGetNsProp(c_element, c_attrib_node.name, + c_attrib_node.ns.href) + return funicode(value) + cdef _dumpToFile(f, xmlDoc* c_doc, xmlNode* c_node): cdef python.PyObject* o From scoder at codespeak.net Thu Mar 30 20:16:22 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Thu Mar 30 20:16:23 2006 Subject: [Lxml-checkins] r25156 - lxml/branch/lxml-0.9.x Message-ID: <20060330181622.DBE46100D1@code0.codespeak.net> Author: scoder Date: Thu Mar 30 20:16:22 2006 New Revision: 25156 Modified: lxml/branch/lxml-0.9.x/CHANGES.txt Log: release date for 0.9.1 Modified: lxml/branch/lxml-0.9.x/CHANGES.txt ============================================================================== --- lxml/branch/lxml-0.9.x/CHANGES.txt (original) +++ lxml/branch/lxml-0.9.x/CHANGES.txt Thu Mar 30 20:16:22 2006 @@ -1,8 +1,8 @@ lxml changelog ============== -0.9.1 -===== +0.9.1 (2006-03-30) +================== Features added -------------- From scoder at codespeak.net Thu Mar 30 20:30:57 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Thu Mar 30 20:30:58 2006 Subject: [Lxml-checkins] r25157 - lxml/branch/lxml-0.9.x/doc Message-ID: <20060330183057.5FB05100D1@code0.codespeak.net> Author: scoder Date: Thu Mar 30 20:30:56 2006 New Revision: 25157 Modified: lxml/branch/lxml-0.9.x/doc/main.txt Log: 0.9.1 links Modified: lxml/branch/lxml-0.9.x/doc/main.txt ============================================================================== --- lxml/branch/lxml-0.9.x/doc/main.txt (original) +++ lxml/branch/lxml-0.9.x/doc/main.txt Thu Mar 30 20:30:56 2006 @@ -16,6 +16,8 @@ News ---- +* 2006-03-30: `lxml 0.9.1`_ released (`changes for 0.9.1`_) + * 2006-03-20: `lxml 0.9`_ released (`changes for 0.9`_) * 2005-11-03: `lxml 0.8`_ released (`changes for 0.8`_) @@ -28,6 +30,8 @@ * 2005-04-08: `lxml 0.5`_ released! +.. _`lxml 0.9.1`: lxml-0.9.1.tgz + .. _`lxml 0.9`: lxml-0.9.tgz .. _`lxml 0.8`: lxml-0.8.tgz @@ -40,6 +44,8 @@ .. _`lxml 0.5`: lxml-0.5.tgz +.. _`CHANGES for 0.9.1`: changes-0.9.1.html + .. _`CHANGES for 0.9`: changes-0.9.html .. _`CHANGES for 0.8`: changes-0.8.html @@ -101,6 +107,8 @@ Download -------- +* `lxml 0.9.1`_ (2006-03-30) + * `lxml 0.9`_ (2006-03-20) * `lxml 0.8`_ (2005-11-03) From scoder at codespeak.net Thu Mar 30 20:32:51 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Thu Mar 30 20:32:52 2006 Subject: [Lxml-checkins] r25158 - in lxml/trunk: . doc Message-ID: <20060330183251.AE17B100D1@code0.codespeak.net> Author: scoder Date: Thu Mar 30 20:32:50 2006 New Revision: 25158 Modified: lxml/trunk/CHANGES.txt lxml/trunk/doc/main.txt Log: 0.9.1 doc merges from branch Modified: lxml/trunk/CHANGES.txt ============================================================================== --- lxml/trunk/CHANGES.txt (original) +++ lxml/trunk/CHANGES.txt Thu Mar 30 20:32:50 2006 @@ -1,8 +1,8 @@ lxml changelog ============== -0.9.1 -===== +0.9.1 (2006-03-30) +================== Features added -------------- Modified: lxml/trunk/doc/main.txt ============================================================================== --- lxml/trunk/doc/main.txt (original) +++ lxml/trunk/doc/main.txt Thu Mar 30 20:32:50 2006 @@ -16,6 +16,8 @@ News ---- +* 2006-03-30: `lxml 0.9.1`_ released (`changes for 0.9.1`_) + * 2006-03-20: `lxml 0.9`_ released (`changes for 0.9`_) * 2005-11-03: `lxml 0.8`_ released (`changes for 0.8`_) @@ -28,6 +30,8 @@ * 2005-04-08: `lxml 0.5`_ released! +.. _`lxml 0.9.1`: lxml-0.9.1.tgz + .. _`lxml 0.9`: lxml-0.9.tgz .. _`lxml 0.8`: lxml-0.8.tgz @@ -40,6 +44,8 @@ .. _`lxml 0.5`: lxml-0.5.tgz +.. _`CHANGES for 0.9.1`: changes-0.9.1.html + .. _`CHANGES for 0.9`: changes-0.9.html .. _`CHANGES for 0.8`: changes-0.8.html @@ -101,6 +107,8 @@ Download -------- +* `lxml 0.9.1`_ (2006-03-30) + * `lxml 0.9`_ (2006-03-20) * `lxml 0.8`_ (2005-11-03) From scoder at codespeak.net Thu Mar 30 21:02:31 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Thu Mar 30 21:02:31 2006 Subject: [Lxml-checkins] r25160 - lxml/trunk Message-ID: <20060330190231.0F19D100D0@code0.codespeak.net> Author: scoder Date: Thu Mar 30 21:02:30 2006 New Revision: 25160 Modified: lxml/trunk/setup.py Log: removed zip_safe flag - doesn't really work for C extensions Modified: lxml/trunk/setup.py ============================================================================== --- lxml/trunk/setup.py (original) +++ lxml/trunk/setup.py Thu Mar 30 21:02:30 2006 @@ -11,7 +11,6 @@ try: from setuptools import setup from setuptools.extension import Extension - setup_args['zip_safe'] = True except ImportError: from distutils.core import setup from distutils.extension import Extension From scoder at codespeak.net Thu Mar 30 21:25:16 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Thu Mar 30 21:25:18 2006 Subject: [Lxml-checkins] r25162 - lxml/trunk Message-ID: <20060330192516.CFE97100D5@code0.codespeak.net> Author: scoder Date: Thu Mar 30 21:25:15 2006 New Revision: 25162 Modified: lxml/trunk/setup.py Log: set zip_safe flag to False to prevent library duplication Modified: lxml/trunk/setup.py ============================================================================== --- lxml/trunk/setup.py (original) +++ lxml/trunk/setup.py Thu Mar 30 21:25:15 2006 @@ -11,6 +11,7 @@ try: from setuptools import setup from setuptools.extension import Extension + setup_args["zip_safe"] = False except ImportError: from distutils.core import setup from distutils.extension import Extension From scoder at codespeak.net Thu Mar 30 21:27:36 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Thu Mar 30 21:27:37 2006 Subject: [Lxml-checkins] r25163 - lxml/branch/lxml-0.9.x Message-ID: <20060330192736.40A6B100D5@code0.codespeak.net> Author: scoder Date: Thu Mar 30 21:27:35 2006 New Revision: 25163 Modified: lxml/branch/lxml-0.9.x/CHANGES.txt lxml/branch/lxml-0.9.x/setup.py Log: set zip_safe flag to False to prevent library duplication Modified: lxml/branch/lxml-0.9.x/CHANGES.txt ============================================================================== --- lxml/branch/lxml-0.9.x/CHANGES.txt (original) +++ lxml/branch/lxml-0.9.x/CHANGES.txt Thu Mar 30 21:27:35 2006 @@ -14,8 +14,6 @@ (startElement, endElement) and defaults to empty attributes (keyword argument) -* zip_safe flag allows setuptools to install lxml as zipped egg - * Speedup for repeatedly accessing element tag names * Minor API performance improvements Modified: lxml/branch/lxml-0.9.x/setup.py ============================================================================== --- lxml/branch/lxml-0.9.x/setup.py (original) +++ lxml/branch/lxml-0.9.x/setup.py Thu Mar 30 21:27:35 2006 @@ -11,7 +11,7 @@ try: from setuptools import setup from setuptools.extension import Extension - setup_args['zip_safe'] = True + setup_args['zip_safe'] = False except ImportError: from distutils.core import setup from distutils.extension import Extension From scoder at codespeak.net Thu Mar 30 21:32:12 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Thu Mar 30 21:32:14 2006 Subject: [Lxml-checkins] r25166 - lxml/trunk Message-ID: <20060330193212.D86C8100D5@code0.codespeak.net> Author: scoder Date: Thu Mar 30 21:32:11 2006 New Revision: 25166 Modified: lxml/trunk/CHANGES.txt lxml/trunk/setup.py Log: merged in zip_safe doc update from 0.9.x branch Modified: lxml/trunk/CHANGES.txt ============================================================================== --- lxml/trunk/CHANGES.txt (original) +++ lxml/trunk/CHANGES.txt Thu Mar 30 21:32:11 2006 @@ -14,8 +14,6 @@ (startElement, endElement) and defaults to empty attributes (keyword argument) -* zip_safe flag allows setuptools to install lxml as zipped egg - * Speedup for repeatedly accessing element tag names * Minor API performance improvements Modified: lxml/trunk/setup.py ============================================================================== --- lxml/trunk/setup.py (original) +++ lxml/trunk/setup.py Thu Mar 30 21:32:11 2006 @@ -11,7 +11,7 @@ try: from setuptools import setup from setuptools.extension import Extension - setup_args["zip_safe"] = False + setup_args['zip_safe'] = False except ImportError: from distutils.core import setup from distutils.extension import Extension From scoder at codespeak.net Fri Mar 31 10:21:36 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Fri Mar 31 10:21:39 2006 Subject: [Lxml-checkins] r25179 - lxml/branch/xsltext/src/lxml Message-ID: <20060331082136.8B75D100D5@code0.codespeak.net> Author: scoder Date: Fri Mar 31 10:21:34 2006 New Revision: 25179 Modified: lxml/branch/xsltext/src/lxml/xslt.pxi Log: provide attribute dictionary and tag name for current XSLT element to extension element function Modified: lxml/branch/xsltext/src/lxml/xslt.pxi ============================================================================== --- lxml/branch/xsltext/src/lxml/xslt.pxi (original) +++ lxml/branch/xsltext/src/lxml/xslt.pxi Fri Mar 31 10:21:34 2006 @@ -836,12 +836,13 @@ cdef _Document out_doc cdef _Document tree_doc cdef _NodeBase insert_element + cdef _NodeBase context_tree cdef XSLTContext extensions cdef int remove_proxy # get our context rctxt = ctxt.xpathCtxt - extensions = (rctxt.userData) + extensions = rctxt.userData # find name and namespace of called element name = instr.name @@ -864,10 +865,13 @@ tree_doc = _documentFactory(c_tree_node.doc) insert_element = _elementFactory(out_doc, ctxt.insert) - current_tree = _elementFactory(tree_doc, c_tree_node) + context_tree = _elementFactory(tree_doc, c_tree_node) + + instr_attribs = _attributeDict(instr) + instr_tag = _namespacedName(instr) try: - result = f(None, current_tree) + result = f(instr_tag, instr_attribs, context_tree) if isinstance(result, _NodeBase): insert_element.append(result) @@ -895,3 +899,21 @@ #insert_element._c_node = NULL #current_tree._c_node = NULL #del insert_element, current_tree, doc + +cdef object _attributeDict(xmlNode* c_element): + cdef xmlNode* c_node + result = {} + c_node = (c_element.properties) + while c_node is not NULL: + if c_node.type == tree.XML_ATTRIBUTE_NODE: + python.PyDict_SetItem( + result, _namespacedName(c_node), + _attributeValue(c_element, c_node) + ) + c_node = c_node.next + return result + +cdef xmlNode* _applyTemplate(xslt.xsltTransformContext* ctxt, xmlNode* c_node): + cdef xslt.xsltStylesheet* stylesheet + stylesheet = ctxt.style + From faassen at codespeak.net Fri Mar 31 17:19:08 2006 From: faassen at codespeak.net (faassen@codespeak.net) Date: Fri Mar 31 17:19:09 2006 Subject: [Lxml-checkins] r25189 - lxml/branch/lxml-0.9.x/doc Message-ID: <20060331151908.D050D100D6@code0.codespeak.net> Author: faassen Date: Fri Mar 31 17:19:07 2006 New Revision: 25189 Modified: lxml/branch/lxml-0.9.x/doc/sax.txt Log: Restructured Text fix. Modified: lxml/branch/lxml-0.9.x/doc/sax.txt ============================================================================== --- lxml/branch/lxml-0.9.x/doc/sax.txt (original) +++ lxml/branch/lxml-0.9.x/doc/sax.txt Fri Mar 31 17:19:07 2006 @@ -8,7 +8,7 @@ interfacing lxml with code that uses the Python core SAX facilities. Producing SAX events from an ElementTree or Element --------------------------------------------------- +--------------------------------------------------- Let's make a tree we can generate SAX events for:: From faassen at codespeak.net Fri Mar 31 17:27:43 2006 From: faassen at codespeak.net (faassen@codespeak.net) Date: Fri Mar 31 17:27:44 2006 Subject: [Lxml-checkins] r25190 - lxml/www Message-ID: <20060331152743.EAB7B100D6@code0.codespeak.net> Author: faassen Date: Fri Mar 31 17:27:42 2006 New Revision: 25190 Modified: lxml/www/publish.py Log: Updated. Modified: lxml/www/publish.py ============================================================================== --- lxml/www/publish.py (original) +++ lxml/www/publish.py Fri Mar 31 17:27:42 2006 @@ -5,7 +5,7 @@ os.mkdir(dirname) stylesheet_url = 'http://codespeak.net/lxml/style.css' for name in ['main.txt', 'intro.txt', 'api.txt', 'compatibility.txt', - 'xpath.txt']: + 'extensions.txt', 'namespace_extensions.txt', 'sax.txt']: path = os.path.join(lxml_path, 'doc', name) outname = os.path.splitext(name)[0] + '.html' outpath = os.path.join(dirname, outname) From faassen at codespeak.net Fri Mar 31 17:27:58 2006 From: faassen at codespeak.net (faassen@codespeak.net) Date: Fri Mar 31 17:27:59 2006 Subject: [Lxml-checkins] r25191 - lxml/branch/lxml-0.9.x/doc Message-ID: <20060331152758.E150E100D6@code0.codespeak.net> Author: faassen Date: Fri Mar 31 17:27:57 2006 New Revision: 25191 Modified: lxml/branch/lxml-0.9.x/doc/main.txt Log: Minor text tweak to point out cheeseshop on main page. Modified: lxml/branch/lxml-0.9.x/doc/main.txt ============================================================================== --- lxml/branch/lxml-0.9.x/doc/main.txt (original) +++ lxml/branch/lxml-0.9.x/doc/main.txt Fri Mar 31 17:27:57 2006 @@ -107,6 +107,8 @@ Download -------- +.. _`lxml at the Python cheeseshop`: http://cheeseshop.python.org/pypi/lxml/ + * `lxml 0.9.1`_ (2006-03-30) * `lxml 0.9`_ (2006-03-20) @@ -121,6 +123,10 @@ * `lxml 0.5`_ (2005-04-08) +Instead of downloading the source here, you can also find `lxml at the +Python cheeseshop`_ in source, egg and installer form for various +platforms. + See also the `installation instructions`_. .. _`installation instructions`: installation.html From faassen at codespeak.net Fri Mar 31 17:31:03 2006 From: faassen at codespeak.net (faassen@codespeak.net) Date: Fri Mar 31 17:31:05 2006 Subject: [Lxml-checkins] r25192 - lxml/trunk/doc Message-ID: <20060331153103.4A0E2100D6@code0.codespeak.net> Author: faassen Date: Fri Mar 31 17:31:02 2006 New Revision: 25192 Modified: lxml/trunk/doc/sax.txt Log: Restructured text fix. Modified: lxml/trunk/doc/sax.txt ============================================================================== --- lxml/trunk/doc/sax.txt (original) +++ lxml/trunk/doc/sax.txt Fri Mar 31 17:31:02 2006 @@ -8,7 +8,7 @@ interfacing lxml with code that uses the Python core SAX facilities. Producing SAX events from an ElementTree or Element --------------------------------------------------- +--------------------------------------------------- Let's make a tree we can generate SAX events for:: From faassen at codespeak.net Fri Mar 31 17:31:16 2006 From: faassen at codespeak.net (faassen@codespeak.net) Date: Fri Mar 31 17:31:17 2006 Subject: [Lxml-checkins] r25193 - lxml/trunk/doc Message-ID: <20060331153116.B29D7100D6@code0.codespeak.net> Author: faassen Date: Fri Mar 31 17:31:15 2006 New Revision: 25193 Modified: lxml/trunk/doc/main.txt Log: Add cheeseshop note on main page. Modified: lxml/trunk/doc/main.txt ============================================================================== --- lxml/trunk/doc/main.txt (original) +++ lxml/trunk/doc/main.txt Fri Mar 31 17:31:15 2006 @@ -121,6 +121,12 @@ * `lxml 0.5`_ (2005-04-08) +Instead of downloading the source here, you can also find `lxml at the +Python cheeseshop`_ in source, egg and installer form for various +platforms. + +.. _`lxml at the Python cheeseshop`: http://cheeseshop.python.org/pypi/lxml/ + See also the `installation instructions`_. .. _`installation instructions`: installation.html From scoder at codespeak.net Fri Mar 10 14:00:15 2006 From: scoder at codespeak.net (scoder at codespeak.net) Date: Fri, 10 Mar 2006 13:00:15 -0000 Subject: [Lxml-checkins] r24225 - in lxml/pyrex: . Demos Doc Pyrex Pyrex/Compiler Pyrex/Distutils Pyrex/Mac Pyrex/Plex bin build build/bdist.linux-i686 build/bdist.linux-i686/rpm build/bdist.linux-i686/rpm/BUILD build/bdist.linux-i686/rpm/RPMS build/bdist.linux-i686/rpm/RPMS/noarch build/bdist.linux-i686/rpm/SOURCES build/bdist.linux-i686/rpm/SPECS build/bdist.linux-i686/rpm/SRPMS build/bdist.linux-x86_64 build/bdist.linux-x86_64/rpm build/bdist.linux-x86_64/rpm/BUILD build/bdist.linux-x86_64/rpm/RPMS build/bdist.linux-x86_64/rpm/RPMS/noarch build/bdist.linux-x86_64/rpm/SOURCES build/bdist.linux-x86_64/rpm/SPECS build/bdist.linux-x86_64/rpm/SRPMS dist Message-ID: <20060310130009.BC469100D4@code0.codespeak.net> Author: scoder Date: Fri Mar 10 13:58:37 2006 New Revision: 24225 Added: lxml/pyrex/CHANGES.txt (contents, props changed) lxml/pyrex/Demos/ lxml/pyrex/Demos/Makefile (contents, props changed) lxml/pyrex/Demos/Makefile.nodistutils (contents, props changed) lxml/pyrex/Demos/Setup.py (contents, props changed) lxml/pyrex/Demos/numeric_demo.pyx (contents, props changed) lxml/pyrex/Demos/primes.pyx (contents, props changed) lxml/pyrex/Demos/pyprimes.py (contents, props changed) lxml/pyrex/Demos/run_numeric_demo.py (contents, props changed) lxml/pyrex/Demos/run_primes.py (contents, props changed) lxml/pyrex/Demos/run_spam.py (contents, props changed) lxml/pyrex/Demos/spam.pyx (contents, props changed) lxml/pyrex/Doc/ lxml/pyrex/Doc/About.html (contents, props changed) lxml/pyrex/Doc/FAQ.html (contents, props changed) lxml/pyrex/Doc/extension_types.html (contents, props changed) lxml/pyrex/Doc/index.html (contents, props changed) lxml/pyrex/Doc/overview.html (contents, props changed) lxml/pyrex/Doc/primes.c (contents, props changed) lxml/pyrex/Doc/sharing.html (contents, props changed) lxml/pyrex/Doc/special_methods.html (contents, props changed) lxml/pyrex/INSTALL.txt (contents, props changed) lxml/pyrex/MANIFEST lxml/pyrex/MANIFEST.in (contents, props changed) lxml/pyrex/PKG-INFO lxml/pyrex/Pyrex/ lxml/pyrex/Pyrex/Compiler/ lxml/pyrex/Pyrex/Compiler/CmdLine.py (contents, props changed) lxml/pyrex/Pyrex/Compiler/Code.py (contents, props changed) lxml/pyrex/Pyrex/Compiler/Code.pyc (contents, props changed) lxml/pyrex/Pyrex/Compiler/DebugFlags.py (contents, props changed) lxml/pyrex/Pyrex/Compiler/DebugFlags.pyc (contents, props changed) lxml/pyrex/Pyrex/Compiler/Errors.py (contents, props changed) lxml/pyrex/Pyrex/Compiler/Errors.pyc (contents, props changed) lxml/pyrex/Pyrex/Compiler/ExprNodes.py (contents, props changed) lxml/pyrex/Pyrex/Compiler/ExprNodes.pyc (contents, props changed) lxml/pyrex/Pyrex/Compiler/Lexicon.pickle (contents, props changed) lxml/pyrex/Pyrex/Compiler/Lexicon.py (contents, props changed) lxml/pyrex/Pyrex/Compiler/Lexicon.pyc (contents, props changed) lxml/pyrex/Pyrex/Compiler/Main.py (contents, props changed) lxml/pyrex/Pyrex/Compiler/Main.pyc (contents, props changed) lxml/pyrex/Pyrex/Compiler/Naming.py (contents, props changed) lxml/pyrex/Pyrex/Compiler/Naming.pyc (contents, props changed) lxml/pyrex/Pyrex/Compiler/Nodes.py (contents, props changed) lxml/pyrex/Pyrex/Compiler/Nodes.pyc (contents, props changed) lxml/pyrex/Pyrex/Compiler/Options.py (contents, props changed) lxml/pyrex/Pyrex/Compiler/Options.pyc (contents, props changed) lxml/pyrex/Pyrex/Compiler/Parsing.py (contents, props changed) lxml/pyrex/Pyrex/Compiler/Parsing.pyc (contents, props changed) lxml/pyrex/Pyrex/Compiler/PyrexTypes.py (contents, props changed) lxml/pyrex/Pyrex/Compiler/PyrexTypes.pyc (contents, props changed) lxml/pyrex/Pyrex/Compiler/Scanning.py (contents, props changed) lxml/pyrex/Pyrex/Compiler/Scanning.pyc (contents, props changed) lxml/pyrex/Pyrex/Compiler/Symtab.py (contents, props changed) lxml/pyrex/Pyrex/Compiler/Symtab.pyc (contents, props changed) lxml/pyrex/Pyrex/Compiler/TypeSlots.py (contents, props changed) lxml/pyrex/Pyrex/Compiler/TypeSlots.pyc (contents, props changed) lxml/pyrex/Pyrex/Compiler/Version.py (contents, props changed) lxml/pyrex/Pyrex/Compiler/Version.pyc (contents, props changed) lxml/pyrex/Pyrex/Compiler/__init__.py (contents, props changed) lxml/pyrex/Pyrex/Compiler/__init__.pyc (contents, props changed) lxml/pyrex/Pyrex/Debugging.py (contents, props changed) lxml/pyrex/Pyrex/Debugging.pyc (contents, props changed) lxml/pyrex/Pyrex/Distutils/ lxml/pyrex/Pyrex/Distutils/__init__.py (contents, props changed) lxml/pyrex/Pyrex/Distutils/build_ext.py (contents, props changed) lxml/pyrex/Pyrex/Mac/ lxml/pyrex/Pyrex/Mac/DarwinSystem.py (contents, props changed) lxml/pyrex/Pyrex/Mac/Finder_Std_Suite.py (contents, props changed) lxml/pyrex/Pyrex/Mac/MPW_Misc_Suite.py (contents, props changed) lxml/pyrex/Pyrex/Mac/MacSystem.py (contents, props changed) lxml/pyrex/Pyrex/Mac/MacUtils.py (contents, props changed) lxml/pyrex/Pyrex/Mac/PS_Misc_Suite.py (contents, props changed) lxml/pyrex/Pyrex/Mac/PyServerMain.py (contents, props changed) lxml/pyrex/Pyrex/Mac/TS_Misc_Suite.py (contents, props changed) lxml/pyrex/Pyrex/Mac/__init__.py (contents, props changed) lxml/pyrex/Pyrex/Plex/ lxml/pyrex/Pyrex/Plex/Actions.py (contents, props changed) lxml/pyrex/Pyrex/Plex/Actions.pyc (contents, props changed) lxml/pyrex/Pyrex/Plex/DFA.py (contents, props changed) lxml/pyrex/Pyrex/Plex/DFA.pyc (contents, props changed) lxml/pyrex/Pyrex/Plex/Errors.py (contents, props changed) lxml/pyrex/Pyrex/Plex/Errors.pyc (contents, props changed) lxml/pyrex/Pyrex/Plex/Lexicons.py (contents, props changed) lxml/pyrex/Pyrex/Plex/Lexicons.pyc (contents, props changed) lxml/pyrex/Pyrex/Plex/Machines.py (contents, props changed) lxml/pyrex/Pyrex/Plex/Machines.pyc (contents, props changed) lxml/pyrex/Pyrex/Plex/Regexps.py (contents, props changed) lxml/pyrex/Pyrex/Plex/Regexps.pyc (contents, props changed) lxml/pyrex/Pyrex/Plex/Scanners.py (contents, props changed) lxml/pyrex/Pyrex/Plex/Scanners.pyc (contents, props changed) lxml/pyrex/Pyrex/Plex/Timing.py (contents, props changed) lxml/pyrex/Pyrex/Plex/Traditional.py (contents, props changed) lxml/pyrex/Pyrex/Plex/Transitions.py (contents, props changed) lxml/pyrex/Pyrex/Plex/Transitions.pyc (contents, props changed) lxml/pyrex/Pyrex/Plex/__init__.py (contents, props changed) lxml/pyrex/Pyrex/Plex/__init__.pyc (contents, props changed) lxml/pyrex/Pyrex/Plex/test_tm.py (contents, props changed) lxml/pyrex/Pyrex/Utils.py (contents, props changed) lxml/pyrex/Pyrex/Utils.pyc (contents, props changed) lxml/pyrex/Pyrex/__init__.py (contents, props changed) lxml/pyrex/Pyrex/__init__.pyc (contents, props changed) lxml/pyrex/README.txt (contents, props changed) lxml/pyrex/ToDo.txt (contents, props changed) lxml/pyrex/USAGE.txt (contents, props changed) lxml/pyrex/bin/ lxml/pyrex/bin/pyrexc (contents, props changed) lxml/pyrex/build/ lxml/pyrex/build/bdist.linux-i686/ lxml/pyrex/build/bdist.linux-i686/rpm/ lxml/pyrex/build/bdist.linux-i686/rpm/BUILD/ lxml/pyrex/build/bdist.linux-i686/rpm/RPMS/ lxml/pyrex/build/bdist.linux-i686/rpm/RPMS/noarch/ lxml/pyrex/build/bdist.linux-i686/rpm/SOURCES/ lxml/pyrex/build/bdist.linux-i686/rpm/SOURCES/pyrex-0.9.3.1.tar.gz (contents, props changed) lxml/pyrex/build/bdist.linux-i686/rpm/SPECS/ lxml/pyrex/build/bdist.linux-i686/rpm/SPECS/pyrex.spec lxml/pyrex/build/bdist.linux-i686/rpm/SRPMS/ lxml/pyrex/build/bdist.linux-x86_64/ lxml/pyrex/build/bdist.linux-x86_64/rpm/ lxml/pyrex/build/bdist.linux-x86_64/rpm/BUILD/ lxml/pyrex/build/bdist.linux-x86_64/rpm/RPMS/ lxml/pyrex/build/bdist.linux-x86_64/rpm/RPMS/noarch/ lxml/pyrex/build/bdist.linux-x86_64/rpm/SOURCES/ lxml/pyrex/build/bdist.linux-x86_64/rpm/SOURCES/Pyrex-0.9.3.1.tar.gz (contents, props changed) lxml/pyrex/build/bdist.linux-x86_64/rpm/SOURCES/pyrex-0.9.3.1.tar.gz (contents, props changed) lxml/pyrex/build/bdist.linux-x86_64/rpm/SPECS/ lxml/pyrex/build/bdist.linux-x86_64/rpm/SPECS/Pyrex.spec lxml/pyrex/build/bdist.linux-x86_64/rpm/SPECS/pyrex.spec lxml/pyrex/build/bdist.linux-x86_64/rpm/SRPMS/ lxml/pyrex/dist/ lxml/pyrex/dist/Pyrex-0.9.3.1-1.noarch.rpm (contents, props changed) lxml/pyrex/dist/Pyrex-0.9.3.1-1.src.rpm (contents, props changed) lxml/pyrex/dist/Pyrex-0.9.3.1.tar.gz (contents, props changed) lxml/pyrex/pyrexc.py (contents, props changed) lxml/pyrex/setup.py (contents, props changed) Log: import of patched Pyrex, version 0.9.3.1 Added: lxml/pyrex/CHANGES.txt ============================================================================== --- (empty file) +++ lxml/pyrex/CHANGES.txt Fri Mar 10 13:58:37 2006 @@ -0,0 +1,1069 @@ +0.9.3.1 +------- + +Bug fixes: + + - Fix generated code to compile with GCC 4.0 (remove usage of the + cast-as-lvalue extension). + ["Jeremy Katz" ] + + - Fix interoperability with Python 2.4. + ["Bob Ippolito" ] + +0.9.3 +----- + +Enhancements: + + - Types defined with a ctypedef in a 'cdef extern from' block + are now referred to by the typedef name in generated C code, + so it is no longer necessary to match the type in the C + header file exactly. + + - Conversion to/from unsigned long now done with + PyLong_AsUnsignedLong and PyLong_FromUnsignedLong. + [Dug Song ] + + - A struct, union or enum definition in a 'cdef extern from' + block may now be left empty (using 'pass'). This can be useful + if you need to declare a variable of that type, but don't need + to refer to any of its members. + + - More flexible about ordering of qualifiers such as 'long' and + 'unsigned'. + ["John (J5) Palmieri" ] + + +Bug fixes: + + - Non-interned string literals used in a Python class + definition did not work. + [Atsuo Ishimoto ] + [Andreas Kostyrka ] + + - Return types of the buffer interface functions for extension + types have been corrected. + [Dug Song ] + + - Added 'static' to declarations of string literals. + [Phil Frost ] + + - Float literals are now copied directly to the C code as written, + to avoid problems with loss of precision. + [Mario Pernici ] + + - Inheriting from an extension type with C methods defined in + another Pyrex module did not work. + [Itamar Shtull-Trauring ] + +0.9.2.1 +------- + +Bug fixes: + + - Corrected an import statement setup.py, and made it + check for a unix platform in a more reliable way. + +0.9.2 +----- + +Enhancements: + + - Names of Python global variables and attributes are now + interned, and PyObject_GetAttr/SetAttr are used instead + of PyObject_GetAttrString/SetAttrString. String literals + which resemble Python identifiers are also interned. + + - String literals are now converted to Python objects only + once instead of every time they are used. + + - NUL characters are now allowed in Python string literals. + + - Added some missing error checking code to the beginning + of module init functions. It's unlikely the operations + involved would ever fail, but you never know. + +Bug fixes: + + - Corrected some problems introduced by moving the Plex + package. + +0.9.1.1 +------- + +Bug fixes: + + - Corrected a problem in the setup.py (pyrexc script incorrectly + named). + + - Updated the distutils extension to match changes in the + Pyrex compiler calling interface. + + - Doing 'make clean' in Demos/callback was removing a little too + much (that's why cheesefinder.c kept disappearing). + +0.9.1 +----- + +Enhancements: + + - A C method can now call an inherited C method by the usual + Python technique. + [Jiba ] + + - The __modname__ of a Python class is now set correctly. + [Paul Prescod ] + + - A MANIFEST.in file has been added to the distribution to + facilitate building rpms. + [contributed by Konrad Hinsen ] + +Bug fixes: + + - Conditional code now generated to allow for the renaming of LONG_LONG + to PY_LONG_LONG that occurred between Python 2.2 and 2.3. + + - Header files referenced in cimported modules were not being included. + [Tom Popovich ] + + - References to C functions and variables in a cimported module were + not being recognised if made from within a local scope. + [Tom Popovich ] + + - Spurious declarations in code generated for a "finally" block. + [Brandon Long ] + + - Attempting to return a value from a __contains__ method didn't work. + [Andreas Kostyrka ] + + - Incorrect code generated for an extension type with C methods + inheriting from a base type with no C methods. + [Robin Becker ] + + - Failure to report an error if a C method was defined in the + implementation part of an extension type that was not declared + in the corresponding definition part. Documentation also updated + to explain that this is necessary. + [Jiba ] + + - Made it an error to forward-declare an extension type with + a different base class specification from its subsequent + definition. + [Jiba ] + + - C attributes of an extension type were not being propagated + through more than one level of inheritance. + [Jiba ] + + - If a garbage collection occurred early enough in the __new__ + method of an extension type with Python-valued C attributes, + a crash could occur in its tp_traverse function. + [reported by Jiba ] + [fix suggested by Paul Prescod ] + + - An empty vtable struct is no longer generated for extension + types with no C methods. + [Robin Becker ] + + - Memory was leaked in the sq_item function of an extension + type with a __getitem__ method. + [Atsuo Ishimoto ] + + - Code generated to work around a bug in some versions of Python + 2.2 which fails to initialise the tp_free slot correctly in + some circumstances. + [Matthias Baas ] + + - Compiler crash when defining an extension type with a base + class specified by a dotted name. + [Alain Pointdexter ] + + - Referencing an extension type defined in a cimported module + at run time did not work correctly. + [Alain Pointdexter ] + + - Incorrect object struct code generated for an extension type + whose base class was defined in a .pxd file. + [Alain Pointdexter ] + + - Redeclaring a type that wasn't previously an extension type + as an extension type caused a compiler crash. + [Scott Robinson ] + + - Incorrect code was generated for return statements in a + special method with no return value. + [Gary Bishop ] + + - Single-line def statement did not work. + [Francois Pinard ] + +Modifications: + + - Only the last pathname component of the .pyx file is reported in + backtraces now. + [Bryan Weingarten ] + + - Documentation corrected to remove the erroneous statement that + extension classes can have a __del__ method. + [Bryan Weingarten ] + + - Note added to documentation explaining that it is not possible + for an extension type's __new__ method to explicitly call the + inherited __new__ method. + + - The version of Plex included with Pyrex is now installed + as a subpackage of the Pyrex package, rather than as a + top-level package, so as not to interfere with any other + version of Plex the user may have installed. + +0.9 +--- + +New features: + + - Extension types can have properties. See the new "Properties" + section in the "Extension Types" page. + + - An extension type can inherit from a builtin type or another + extension type. See "Subclassing" in the "Extension Types" page. + + - Extension types can have C methods, which can be overridden + in derived extension types. See "C Methods" in the "Extension Types" + page. + +Enhancements: + + - Conversion is now performed between C long longs and Python + long integers without chopping to the size of a C long. + Also the Python PY_LONG_LONG type is now used for long longs + for greater portability. + +Bug fixes: + + - Names were sometimes being generated that were insufficiently + unique in the presence of cimported declarations. + + - Changed the way the included filename table is declared from + char *[] to char **, to stop MSVC from complaining about it + having an unknown size. + [Alexander A Naanou ] + + - Second argument of assert statement was not being coerced + to a Python value. + [Francois Pinard http://www.iro.umontreal.ca/~pinard] + + - Return statement without value wasn't accepted in some + extension type special methods when it should have been. + [Francois Pinard http://www.iro.umontreal.ca/~pinard] + + - Attempting to call a non-function C value crashed the + compiler. + [John J Lee ] + + - Functions declared as "except *" were not returning exceptions. + [John J Lee ] + + - A syntax warning from Plex about assignment to None has + been eliminated. + [Gordon Williams ] + + - Public function declaration with empty argument list was + producing (void) in .pxi file. + [Michael P. Dubner ] + + - Incorrect error signalling code was being generated in the + __hash__ special method of an extension type. + + +0.8.1 +----- + +Bug fixes: + + - Names of structs, unions and enums in external header + files were getting mangled when they shouldn't have been. + [Norman Shelley ] + + - Modified distutils extension so that it will stop before + compiling the C file if the Pyrex compiler reports errors. + [John J Lee ] + + +0.8 +--- + +New features: + + - INCOMPATIBLE CHANGE: The type object of an external extension + type is now imported at run time using the Python import + mechanism. To make this possible, an 'extern' extension type + declaration must DECLARE THE MODULE from which the extension + type originates. See the new version of the "Extension Types" + documentation for details. + + This change was made to eliminate the need for Pyrex to be + told the C name of the type object, or for the Pyrex module + to be linked against the object code providing the type object. + + You will have to update any existing external extension type + declarations that you are using. I'm sorry about that, but it + was too hard to support both the old and new ways. + + - Compile-time importing: A Pyrex module can now import declarations + from another Pyrex module using the new 'cimport' statement. See + the new section on "Sharing Declarations Between Pyrex Modules" in + the documentation. + +Minor improvements: + + - An error is reported if you declare a struct, union or + extension type using 'cdef' in one place and 'ctypedef' + in another. + + - Struct, union and extension types can only be forward- + declared using 'cdef', not 'ctypedef' (otherwise invalid + C code would be generated). + + - The 'global' statement can be used at the module level to + declare that a name is a module-level name rather than a + builtin. This can be used to access module attributes such + as __name__ that would otherwise be assumed to be builtins. + [Pat Maupin ] + + - The 'assert' statement now accepts a second argument. + [Francois Pinard ] + +Bug fixes: + + - When using Python 2.3, "True" or "False" could sometimes + turn up in generated code instead of "1" or "0". + [Adam Hixson ] + + - Function return value not always converted to or from a + Python object when it should have been. + + - Certain kinds of error in a function call expression + could crash the compiler. + ["Edward C. Jones" ] + + - Fixed memory leak in functions with * or ** args. + [Alexander A Naanou ] + + +0.7.1 +----- + +Bug fixes: + + - Calling a function declared as returning an extension + type could crash the compiler. + + - A function call with type errors in the argument list + could crash the compiler. + + - An 'else' clause on a for-from statement could crash + the compiler. + + - Incorrect casting code was generated when a generic + object argument of a special method was declared as + being of an extension type. + [Phillip J. Eby ] + + - A blank line that couldn't be interpreted wholly as + a valid indentation sequence caused a syntax error. + In particular, a formfeed character on an otherwise + blank line wasn't accepted. + [Francois Pinard ] + + - Parallel assignments were incorrectly optimised. + + - A bare tuple constructor with an extra comma at the + end of a line caused a syntax error. + +0.7 +--- + +New features: + + - Attributes of extension types can be exposed to Python + code, either read/write or read-only. + + - Different internal and external names can be specified + for C entities. + + - None is a compile-time constant, and more efficient code + is generated to reference it. + + - Command line options for specifying directories to + search for include files. + +Enhancements: + + - More efficient code is generated for access to Python + valued C attributes of extension types. + + - Cosmetic code improvement: Less casting back and forth + between extension types and PyObject * when referencing + C members of the object struct. + + - C arguments and variables declared as an extension type + can take the value None. + + - Form feed characters are accepted as whitespace. + + - Function names in tracebacks are qualified with + module name and class name. + +Bug fixes: + + - A sufficiently complex expression in a boolean context + could cause code to be generated twice for the same + subexpression. + + - Incorrect casting code was generated when passing an + extension type to a function expecting a generic Python + object. + + - Executable statements are now disallowed inside a + cdef class block (previously they silently caused + crazy C code to be generated). + + - Tracebacks should now report the correct filename for + functions defined in files included with the 'include' + statement. + + - The documentation incorrectly claimed that an extension + type can't have a __del__ method. In fact, it can, and + it behaves as expected. + + +0.6.1 +----- + +Bug fixes: + + - Fixed broken distutils extension. + + + +0.6 +--- + +New features: + + - Command line options for reporting version number, + requesting a listing file and specifying the name of + the generated C file. + + - An 'include' statement allows inclusion of declarations + from other Pyrex source files. + + - If there are any public declarations, a Pyrex include + file is generated (as well as a .h file) containing + declarations for them. + + - Extension types can be declared public, so their C + attributes are visible to other Pyrex and C code. + + - Try-except statements can now have an 'else' clause. + [Francois Pinard ] + + - Multiple simple statements can be placed on one line + separated by semicolons. + + - A suite consisting of a simple statement list can now + be placed on the same line after the colon in most + cases. + [Francois Pinard ] + + - The automatic coercion of a C string to a C char has + been removed (it proved to be too error-prone). + Instead, there is a new form of literal for C + character constants: c'X' + + - The __get__ special method (used by descriptor objects) + now allows for the possibility of the 2nd or 3rd + arguments being NULL. Also the __set__ method has been + split into two methods, __set__ and __delete__. + [Phillip J. Eby ] + +Bug fixes: + + - Values unpacked into a non-Python destination variable + were not being converted before assignment. + [Gareth Watts ] + + - Hex constants greater than 0x7fffffff caused compiler + to crash. [Gareth Watts ] + + - Type slots are no longer statically initialised with + extern function pointers, to avoid problems with + some compilers. The hack in the distutils extension + to work around this by compiling as C++ has been + disabled. [Phillip J. Eby ] + + - Fixed several more instances of the error-reporting + routine being called with arguments in the wrong + order. Hoping I've *finally* got all of them now... + + - Nested for-from loops used the same control variable. + [Sebastien de Menten ] + + - Fixed some other error message related bugs. + [Francois Pinard ] + + - Assigning to slice didn't work. + [Francois Pinard ] + + - Temp variables were being declared as extension + types and then being assigned PyObject *'s. All + Python temp vars are now declared as PyObject *. + [Francois Pinard ] + +0.5 +--- + +Bug fixes: + + - Algorithm for allocating temp variables redesigned + to fix various errors concerning temp + variable re-use. + [Mark Rowe ] + + - Memory leak occured sometimes when an implicit + type test was applied to the result of an + expression. + [christoph.wiedemann at daimlerchrysler.com] + + - __set__ method of extension types had wrong + signature. + [Josh Littlefield ] + +0.4.6 +----- + +Bug fixes: + + - Indexing multi-dimensional C arrays didn't + work. + [Gary Dietachmayer ] + + +0.4.5 +----- + +New features: + + - There is now a 'public' declaration for + making Pyrex-defined variables and functions + available to external C code. A .h file is + also generated if there are any public + declarations. + +Enhancements: + + - Defining __len__/__getitem__ methods in an + extension class fills sq_length/sq_item slots + as well as mp_length/mp_subscript. + [Matthias Baas ] + + - The Distutils extension now allows .c files + to be incorporated along with .pyx files. + [Modification to Distutils extension contributed + by Darrell Gallion .] + +Bug fixes: + + - Float literals without a decimal point + work again now. + [Mike Rovner ] + [Peter Lepage ] + + - Compiler crashed if exception value didn't + match function return type. + [Michael JasonSmith ] + + - The setup.py file should now install the + Lexicon.pickle file in the right place. + [Patch supplied by David M. Cooke + ] + + - Compiler crashed when compiling a C function that + returned an extension type. + [David M. Cooke + ] + + - Anonymous enum types did not have C code + suppressed inside an extern-from block. + [Matthew Mueller ] + + +0.4.4 +----- + +Enhancements: + + - Tracebacks now extend into Pyrex function + calls and show line numbers in the Pyrex + source file. + + - Syntax for float literals made more lenient + (no longer requires digits both before and + after the point). + [Peter Lepage ] + + - Method calls can be made on string literals + (e.g. ",".join(x)). + [pedro_rodriguez at club-internet.fr] + +Bug fixes: + + - Incorrect refcount code generated when a + Python function needing argument type tests + had local Python variables. + [Matthias Baas ] + + - 'self' parameter of __getitem__ method of + extension type had wrong implicit type. + [Peter Lepage ] + + - Repaired breakage introduced by trying to + allow an empty parameter list to be written + as (void). No longer attempting to allow + this (too hard to parse correctly). + [Peter Lepage ] + + - Found bug in Plex 1.1.2 which was the *real* + cause of the two-newlines-in-a-row problem. + Removed the Opt(Eol)+Str("\n") hacks in + the scanner which were working around this + before. + [pedro_rodriguez at club-internet.fr] + + - __call__ special method of extension types + had wrong signature. + [Peter Lepage ] + + +0.4.3 +----- + +New language features: + + - For-from loop for iterating over integer + ranges, using pure C loop where possible. + +Enhancements: + + - sizeof() can now be applied to types as + well as variables. + + - Improved handling of forward-declared + extension types. + +Bug fixes: + + - Two newlines in a row in a triple quoted + string caused a parse error on some + platforms. + [Matthias Baas ] + + - Fixed problem with break and continue in + the else-clause of a loop. + + +0.4.2 +----- + +New language features: + + - C functions can be declared as having an + exception return value, which is checked + whenever the function is called. If an + exception is detected inside a C function + for which no exception value is declared, + a warning message is printed and the + exception is cleared. + + - Cascaded assignments (i.e. a = b = c + are now supported. + + - Anonymous enum declarations are allowed, + for when you just want to declare constants. + + - The C types "long long" and "long double" + are now understood. Also, "int" is optional + after "short" or "long". + +Enhancements: + + - A * argument in a function call can now be + any sequence, not just a tuple. + + - A C char* or char[] will be turned into + a char by taking its first character if + used in a context where a char is required, + thus allowing a string literal to be used as + a char literal. + + - C string * C int or vice versa is now + interpreted as Python string replication. + + - Function arguments are checked for void or + incomplete type. + +Bug fixes: + + - Non-external extension types show up in the + module dict once more (this got broken in + 0.4.1). + + - A spurious decref has been removed from the + runtime support code for the "import" statement. + Hopefully this will prevent the crashes some + people have been experiencing when importing + builtin modules. + [Mathew Yeates ] + +0.4.1 +----- + +New language features: + + - "ctypedef struct/union/enum/class" statements + added, for use in extern-from blocks when a + header file uses a ctypedef to declare a + tagless struct, union or enum type. + + - "pass" allowed in an extern-from block. + + - "cdef extern from *" for when you don't want + to specify an include file name. + + - Argument names may be omitted in function + signatures when they're not needed. + + - New reserved word NULL for the null C pointer. + +Compiler enhancements: + + - Lexicon is now picked in binary format, so + startup should be much faster on slower + machines. + + - If Pyrex decides to rebuild the lexicon and + then finds that it can't write a pickle file, + it now prints a warning and carries on + instead of crashing. + + - Chat about hash codes and lexicon pickling + now turned off by default except when creating + a new lexicon (which ought never happen now + unless you change the scanner). + +Bug fixes: + + - Modified the runtime support code for "import" + statements, hopefully fixing problem with using + a Pyrex module in conjunction with py2exe. + + - DL_EXPORT now used in both the prototype and + definition of the module init function. + + - Exception state is now saved and restored around + calls to an extension type __dealloc__ method, + to avoid screwing up if the object is deallocated + while an exception is being propagated. + + - Making an attribute reference to a method of + an extension type caused a compiler crash. + + - Doc string in new-style class definition + caused a run-time error. + + - Insufficient parentheses were put around C type + casts. + + - Constructors for extension types are now read-only + C global variables instead of entries in the + module dict. This change was needed to prevent + Numeric from blowing up due to touching its + typeobject before import_numeric() could be called. + +0.4 +--- + +New features: + + - "cdef extern from" statement allows inclusion + of C header files to be specified, solving + a number of problems including: + - Clashes between Pyrex and C declarations, + due to "const" and other reasons + - Windows-specific features required in + function declarations + - Helping deal with types such as "size_t" + - Helping deal with functions defined as + macros + + - Access to internals of pre-existing extension + types is now possible by placing an extension + type declaration inside a "cdef extern from" + block. + +Bug fixes: + + - Error not reported properly when passing + wrong number of args to certain special + methods of extension types. + [Mitch Chapman ] + + - Compile-time crash when defining an extension + type with a __hash__ method. + +Minor enhancements: + + - Hashing of the scanner source file made more + platform-independent, making spurious regeneration + of the pickle less likely. + + +0.3.4 +----- + +Bug fixes: + + - Runtime crash when using * or ** args in + a method of an extension type fixed. + [Matthew Mueller ] + + - Compiler crash when using default argument + values in a method of a Python class. + [Mike Rovner ] + +Enhancements: + + - Type slots filled with functions from outside + the extension module are now initialised dynamically, + which should eliminate at least some of the + "initialiser is not constant" problems experienced + on Windows. + [Marek Baczek ] + + - On Windows, __declspec(dllexport) is now used for + the module init func declaration (or should be -- + I haven't tested this). + [Marek Baczek ] + + - The compiler shouldn't attempt to rewrite the + Lexicon.pickle file unless the source has been + changed (hashing is used now instead of comparing + timestamps). So there should be no problem any more + with installing Pyrex read-only. + [fawcett at uwindsor.ca] + +0.3.3 +----- + +Bug fixes: + +* A void * can be assigned from any other +pointer type. +[piers at cs.su.oz.au] + +* File names in error messages no longer +quoted (this was apparently confusing some +editors). +[donut at azstarnet.com] + +* Reference to a struct member which is an +array is coerced to a pointer. +[donut at azstarnet.com] + +* Default argument values did not work +in methods of an extension type. +[donut at azstarnet.com] + +* Single or double quote characters in a +triple-quoted string didn't work. +[donut at azstarnet.com] + +* Using *args in a function definition +sometimes caused a crash at runtime. +[donut at azstarnet.com] + +* A hack is included which tries to make +functions in Python.h which use 'const' +accessible from Pyrex. But it doesn't +work on all platforms. Thinking about a +better solution. + + +New features: + +* Comment containing Pyrex version number +and date/time at top of generated C file. +[baas at ira.uka.de] + + +0.3.2 +----- + +Bug fixes: + +* The & operator works again. +[matthias.oberlaender at daimlerchrysler.com] +[baas at ira.uka.de] + +* The & operator had incorrect precedence. + +* "SystemError: 'finally' pops bad exception" +under some circumstances when raising an +exception. [baas at ira.uka.de] + +* Calling a Python function sometimes leaked +a reference. + +* Crash under some circumstances when casting +a Python object reference to a C pointer type. +[mpj17 at cosc.canterbury.ac.nz] + +* Crash when redeclaring a function. +[baas at ira.uka.de] + +* Crash when using a string constant inside +a Python class definition. +[mike at bindkey.com] + +* 2-element slice indexing expressions. +[mike at bindkey.com] + +* Crash when encountering mixed tabs and +spaces. +[mike at bindkey.com] + +New features: + +* A wider variety of constant expressions is +now accepted for enum values, array +dimensions, etc. +[mike at bindkey.com] + + +0.3.1 +----- + +New features: + +* More special methods for extension types: +__delitem__, __delslice__, __getattr__, +__setattr__, __delattr__ + +* Module-level variable of a Python object type +declared with 'cdef' is private to the module, and +held in a C variable instead of the module dict. + +* External C functions with variable argument lists +can be declared and called. + +* Pyrex-defined Python functions can have default +argument values and * and ** arguments, and can be +called with keyword arguments. + +* Pointer-to-function types can be declared. + +* Pyrex now supports a declaration syntax that +C doesn't! Example: + + cdef (int (*)()) foo() # function returning a function ptr + +* There is now a ctypedef statement. + +* Extension types can now be forward-declared. + +* All permutations of (non-Unicode) string literals +and escape codes should work now. + +* Hex and octal integer literals. + +* Imaginary number literals. + +* Docstrings are now supported. + +Bug fixes: + +* Type tests are performed when using a Python object +in a context requiring a particular extension type. + +* Module-level variable holding the type object +of an extension type had incorrect type. + +0.3 +--- + +New features: + +* Extension types! Yay! + +0.2.2 +----- + +Bug fixes: + +* Fixed error message generation again after a previous +bug was accidentally re-indroduced. + +* Removed the declaration of isspace() from the code +generated for print statement support (it's not needed +and was conflicting with the system-supplied one on +some platforms). + +0.2 +--- + +New features: + +* Executable statements are now allowed at the +top level of a module. + +* Python class definitions are now supported, with +the following limitations: + + - Class definitions are only allowed at the top + level of a module, not inside a control structure + or function or another class definition. + + - Assigning a Pyrex-defined Python function to a + class attribute outside of the class definition + will not create a method (because it's not an + interpreted Python function and therefore + won't trigger the bound-method creation magic). + + - The __metaclass__ mechanism and the creation of + new-style classes is not (yet) supported. + +* Casting between Python and non-Python types is +better supported. + +Bug fixes: + +* Fixed bug preventing for-loops from working. + + +0.1.1 +----- + +* I've discovered a flaw in my algorithm for releasing +temp variables. Fixing this properly will require some +extensive reworking; I've put in a hack in the meantime +which should work at the cost of using more temp variables +than are strictly necessary. + +* Fixed bug preventing access to builtin names from +working. This should also have fixed the import +statement, but I haven't tested it. + +* Fixed some errors in __Pyx_GetExcValue. + +* Fixed bug causing boolean expressions to malfunction +sometimes. Added: lxml/pyrex/Demos/Makefile ============================================================================== --- (empty file) +++ lxml/pyrex/Demos/Makefile Fri Mar 10 13:58:37 2006 @@ -0,0 +1,15 @@ +all: + python Setup.py build_ext --inplace + +test: all + python run_primes.py 20 + python run_numeric_demo.py + python run_spam.py + cd callback; $(MAKE) test + +clean: + @echo Cleaning Demos + @rm -f *.c *.o *.so *~ core + @rm -rf build + @cd callback; $(MAKE) clean + @cd embed; $(MAKE) clean Added: lxml/pyrex/Demos/Makefile.nodistutils ============================================================================== --- (empty file) +++ lxml/pyrex/Demos/Makefile.nodistutils Fri Mar 10 13:58:37 2006 @@ -0,0 +1,21 @@ +PYHOME = $(HOME)/pkg/python/version +PYINCLUDE = \ + -I$(PYHOME)/include/python2.2 \ + -I$(PYHOME)/$(ARCH)/include/python2.2 + +%.c: %.pyx + ../bin/pyrexc $< + +%.o: %.c + gcc -c -fPIC $(PYINCLUDE) $< + +%.so: %.o + gcc -shared $< -lm -o $@ + +all: primes.so spam.so numeric_demo.so + +clean: + @echo Cleaning Demos + @rm -f *.c *.o *.so *~ core core.* + @cd callback; $(MAKE) clean + @cd embed; $(MAKE) clean Added: lxml/pyrex/Demos/Setup.py ============================================================================== --- (empty file) +++ lxml/pyrex/Demos/Setup.py Fri Mar 10 13:58:37 2006 @@ -0,0 +1,13 @@ +from distutils.core import setup +from distutils.extension import Extension +from Pyrex.Distutils import build_ext + +setup( + name = 'Demos', + ext_modules=[ + Extension("primes", ["primes.pyx"]), + Extension("spam", ["spam.pyx"]), + Extension("numeric_demo", ["numeric_demo.pyx"]), + ], + cmdclass = {'build_ext': build_ext} +) Added: lxml/pyrex/Demos/numeric_demo.pyx ============================================================================== --- (empty file) +++ lxml/pyrex/Demos/numeric_demo.pyx Fri Mar 10 13:58:37 2006 @@ -0,0 +1,39 @@ +# +# This example demonstrates how to access the internals +# of a Numeric array object. +# + +cdef extern from "Numeric/arrayobject.h": + + struct PyArray_Descr: + int type_num, elsize + char type + + ctypedef class Numeric.ArrayType [object PyArrayObject]: + cdef char *data + cdef int nd + cdef int *dimensions, *strides + cdef object base + cdef PyArray_Descr *descr + cdef int flags + +def print_2d_array(ArrayType a): + print "Type:", chr(a.descr.type) + if chr(a.descr.type) <> "f": + raise TypeError("Float array required") + if a.nd <> 2: + raise ValueError("2 dimensional array required") + cdef int nrows, ncols + cdef float *elems, x + nrows = a.dimensions[0] + ncols = a.dimensions[1] + elems = a.data + hyphen = "-" + divider = ("+" + 10 * hyphen) * ncols + "+" + print divider + for row in range(nrows): + for col in range(ncols): + x = elems[row * ncols + col] + print "| %8f" % x, + print "|" + print divider Added: lxml/pyrex/Demos/primes.pyx ============================================================================== --- (empty file) +++ lxml/pyrex/Demos/primes.pyx Fri Mar 10 13:58:37 2006 @@ -0,0 +1,18 @@ +def primes(int kmax): + cdef int n, k, i + cdef int p[1000] + result = [] + if kmax > 1000: + kmax = 1000 + k = 0 + n = 2 + while k < kmax: + i = 0 + while i < k and n % p[i] <> 0: + i = i + 1 + if i == k: + p[k] = n + k = k + 1 + result.append(n) + n = n + 1 + return result Added: lxml/pyrex/Demos/pyprimes.py ============================================================================== --- (empty file) +++ lxml/pyrex/Demos/pyprimes.py Fri Mar 10 13:58:37 2006 @@ -0,0 +1,13 @@ +def primes(kmax): + p = [] + k = 0 + n = 2 + while k < kmax: + i = 0 + while i < k and n % p[i] <> 0: + i = i + 1 + if i == k: + p.append(n) + k = k + 1 + n = n + 1 + return p Added: lxml/pyrex/Demos/run_numeric_demo.py ============================================================================== --- (empty file) +++ lxml/pyrex/Demos/run_numeric_demo.py Fri Mar 10 13:58:37 2006 @@ -0,0 +1,5 @@ +import Numeric +import numeric_demo + +a = Numeric.array([[1.0, 3.5, 8.4], [2.3, 6.6, 4.1]], "f") +numeric_demo.print_2d_array(a) Added: lxml/pyrex/Demos/run_primes.py ============================================================================== --- (empty file) +++ lxml/pyrex/Demos/run_primes.py Fri Mar 10 13:58:37 2006 @@ -0,0 +1,7 @@ +import sys +from primes import primes +if len(sys.argv) >= 2: + n = int(sys.argv[1]) +else: + n = 1000 +print primes(n) Added: lxml/pyrex/Demos/run_spam.py ============================================================================== --- (empty file) +++ lxml/pyrex/Demos/run_spam.py Fri Mar 10 13:58:37 2006 @@ -0,0 +1,8 @@ +from spam import Spam + +s = Spam() +print "Created:", s +s.set_amount(42) +print "Amount =", s.get_amount() +s.describe() +s = None Added: lxml/pyrex/Demos/spam.pyx ============================================================================== --- (empty file) +++ lxml/pyrex/Demos/spam.pyx Fri Mar 10 13:58:37 2006 @@ -0,0 +1,22 @@ +# +# Example of an extension type. +# + +cdef class Spam: + + cdef int amount + + def __new__(self): + self.amount = 0 + + def __dealloc__(self): + print self.amount, "tons of spam is history." + + def get_amount(self): + return self.amount + + def set_amount(self, new_amount): + self.amount = new_amount + + def describe(self): + print self.amount, "tons of spam!" Added: lxml/pyrex/Doc/About.html ============================================================================== --- (empty file) +++ lxml/pyrex/Doc/About.html Fri Mar 10 13:58:37 2006 @@ -0,0 +1 @@ + About Pyrex


Pyrex

A language for writing Python extension modules

What is Pyrex all about?

Pyrex is a language specially designed for writing Python extension modules. It's designed to bridge the gap between the nice, high-level, easy-to-use world of Python and the messy, low-level world of C.

You may be wondering why anyone would want a special language for this. Python is really easy to extend using C or C++, isn't it? Why not just write your extension modules in one of those languages?

Well, if you've ever written an extension module for Python, you'll know that things are not as easy as all that. First of all, there is a fair bit of boilerplate code to write before you can even get off the ground. Then you're faced with the problem of converting between Python and C data types. For the basic types such as numbers and strings this is not too bad, but anything more elaborate and you're into picking Python objects apart using the Python/C API calls, which requires you to be meticulous about maintaining reference counts, checking for errors at every step and cleaning up properly if anything goes wrong. Any mistakes and you have a nasty crash that's very difficult to debug.

Various tools have been developed to ease some of the burdens of producing extension code, of which perhaps SWIG is the best known. SWIG takes a definition file consisting of a mixture of C code and specialised declarations, and produces an extension module. It writes all the boilerplate for you, and in many cases you can use it without knowing about the Python/C API. But you need to use API calls if any substantial restructuring of the data is required between Python and C.

What's more, SWIG gives you no help at all if you want to create a new built-in Python type. It will generate pure-Python classes which wrap (in a slightly unsafe manner) pointers to C data structures, but creation of true extension types is outside its scope.

Another notable attempt at making it easier to extend Python is PyInline , inspired by a similar facility for Perl. PyInline lets you embed pieces of C code in the midst of a Python file, and automatically extracts them and compiles them into an extension. But it only converts the basic types automatically, and as with SWIG,  it doesn't address the creation of new Python types.

Pyrex aims to go far beyond what any of these previous tools provides. Pyrex deals with the basic types just as easily as SWIG, but it also lets you write code to convert between arbitrary Python data structures and arbitrary C data structures, in a simple and natural way, without knowing anything about the Python/C API. That's right -- nothing at all! Nor do you have to worry about reference counting or error checking -- it's all taken care of automatically, behind the scenes, just as it is in interpreted Python code. And what's more, Pyrex lets you define new built-in Python types just as easily as you can define new classes in Python.

Sound too good to be true? Read on and find out how it's done.

The Basics of Pyrex

The fundamental nature of Pyrex can be summed up as follows: Pyrex is Python with C data types.

Pyrex is Python: Almost any piece of Python code is also valid Pyrex code. (There are a few limitations, but this approximation will serve for now.) The Pyrex compiler will convert it into C code which makes equivalent calls to the Python/C API. In this respect, Pyrex is similar to the former Python2C project (to which I would supply a reference except that it no longer seems to exist).

...with C data types. But Pyrex is much more than that, because parameters and variables can be declared to have C data types. Code which manipulates Python values and C values can be freely intermixed, with conversions occurring automatically wherever possible. Reference count maintenance and error checking of Python operations is also automatic, and the full power of Python's exception handling facilities, including the try-except and try-finally statements, is available to you -- even in the midst of manipulating C data.

Here's a small example showing some of what can be done. It's a routine for finding prime numbers. You tell it how many primes you want, and it returns them as a Python list.

primes.pyx
 1  def primes(int kmax):
 2      cdef int n, k, i
 3      cdef int p[1000]
 4      result = []
 5      if kmax > 1000:
 6          kmax = 1000
 7      k = 0
 8      n = 2
 9      while k < kmax:
10          i = 0
11          while i < k and n % p[i] <> 0:
12              i = i + 1
13          if i == k:
14             p[k] = n
15             k = k + 1
16             result.append(n)
17          n = n + 1
18      return result
You'll see that it starts out just like a normal Python function definition, except that the parameter kmax is declared to be of type int . This means that the object passed will be converted to a C integer (or a TypeError will be raised if it can't be).

Lines 2 and 3 use the cdef statement to define some local C variables. Line 4 creates a Python list which will be used to return the result. You'll notice that this is done exactly the same way it would be in Python. Because the variable result hasn't been given a type, it is assumed to hold a Python object.

Lines 7-9 set up for a loop which will test candidate numbers for primeness until the required number of primes has been found. Lines 11-12, which try dividing a candidate by all the primes found so far, are of particular interest. Because no Python objects are referred to, the loop is translated entirely into C code, and thus runs very fast.

When a prime is found, lines 14-15 add it to the p array for fast access by the testing loop, and line 16 adds it to the result list. Again, you'll notice that line 16 looks very much like a Python statement, and in fact it is, with the twist that the C parameter n is automatically converted to a Python object before being passed to the append method. Finally, at line 18, a normal Python return statement returns the result list.

Compiling primes.pyx with the Pyrex compiler produces an extension module which we can try out in the interactive interpreter as follows:

>>> import primes
>>> primes.primes(10)
[2, 3, 5, 7, 11, 13, 17, 19, 23, 29]
>>>
See, it works! And if you're curious about how much work Pyrex has saved you, take a look at the C code generated for this module .

Language Details

For more about the Pyrex language, see the Language Overview .

Future Plans

Pyrex is not finished. Substantial tasks remaining include:
  • Support for certain Python language features which are planned but not yet implemented. See the Limitations section of the Language Overview for a current list.
  • C++ support. This could be a very big can of worms - careful thought required before going there.
  • Reading C/C++ header files directly would be very nice, but there are some severe problems that I will have to find solutions for first, such as what to do about preprocessor macros. My current thinking is to use a separate tool to convert .h files into Pyrex declarations, possibly with some manual intervention.
\ No newline at end of file Added: lxml/pyrex/Doc/FAQ.html ============================================================================== --- (empty file) +++ lxml/pyrex/Doc/FAQ.html Fri Mar 10 13:58:37 2006 @@ -0,0 +1 @@ + FAQ.html


Pyrex FAQ

Contents


How do I call Python/C API routines?

Declare them as C functions inside a cdef extern from block. Use the type name object for any parameters and return types which are Python object references. Don't use the word const anywhere. Here is an example which defines and uses the PyString_FromStringAndSize routine:
cdef extern from "Python.h":
    object PyString_FromStringAndSize(char *, int)

cdef char buf[42]
my_string = PyString_FromStringAndSize(buf, 42)

How do I convert a C string containing null bytes to a Python string?

Put in a declaration for the PyString_FromStringAndSize API routine and use that. See How do I call Python/C API routines?

How do I access the data inside a Numeric array object?

Use a cdef extern from block to include the Numeric header file and declare the array object as an external extension type. The following code illustrates how to do this:
cdef extern from "Numeric/arrayobject.h":

    struct PyArray_Descr:
        int type_num, elsize
        char type

    ctypedef class Numeric.ArrayType [object PyArrayObject]:
        cdef char *data
        cdef int nd
        cdef int *dimensions, *strides
        cdef object base
        cdef PyArray_Descr *descr
        cdef int flags

For more information about external extension types, see the "External Extension Types" section of the "Extension Types" documentation page.

Pyrex says my extension type object has no attribute 'rhubarb', but I know it does. What gives?

You're probably trying to access it through a reference which Pyrex thinks is a generic Python object. You need to tell Pyrex that it's a reference to your extension type by means of a declaration,
for example,
cdef class Vegetables:
    cdef int rhubarb

...
cdef Vegetables veg
veg.rhubarb = 42
Also see the "Attributes" section of the "Extension Types" documentation page.
--- \ No newline at end of file Added: lxml/pyrex/Doc/extension_types.html ============================================================================== --- (empty file) +++ lxml/pyrex/Doc/extension_types.html Fri Mar 10 13:58:37 2006 @@ -0,0 +1 @@ + Extension Types


Extension Types

Contents

Introduction

As well as creating normal user-defined classes with the Python class statement, Pyrex also lets you create new built-in Python types, known as extension types. You define an extension type using the cdef class statement. Here's an example:
cdef class Shrubbery:

    cdef int width, height

    def __init__(self, w, h):
        self.width = w
        self.height = h

    def describe(self):
        print "This shrubbery is", self.width, \
            "by", self.height, "cubits."

As you can see, a Pyrex extension type definition looks a lot like a Python class definition. Within it, you use the def statement to define methods that can be called from Python code. You can even define many of the special methods such as __init__ as you would in Python.

The main difference is that you can use the cdef statement to define attributes. The attributes may be Python objects (either generic or of a particular extension type), or they may be of any C data type. So you can use extension types to wrap arbitrary C data structures and provide a Python-like interface to them.

Attributes

Attributes of an extension type are stored directly in the object's C struct. The set of attributes is fixed at compile time; you can't add attributes to an extension type instance at run time simply by assigning to them, as you could with a Python class instance. (You can subclass the extension type in Python and add attributes to instances of the subclass, however.)

There are two ways that attributes of an extension type can be accessed: by Python attribute lookup, or by direct access to the C struct from Pyrex code. Python code is only able to access attributes of an extension type by the first method, but Pyrex code can use either method.

By default, extension type attributes are only accessible by direct access, not Python access, which means that they are not accessible from Python code. To make them accessible from Python code, you need to declare them as public or readonly. For example,

cdef class Shrubbery:
    cdef public int width, height
    cdef readonly float depth
makes the width and height attributes readable and writable from Python code, and the depth attribute readable but not writable.

Note that you can only expose simple C types, such as ints, floats and strings, for Python access. You can also expose Python-valued attributes, although read-write exposure is only possible for generic Python attributes (of type object). If the attribute is declared to be of an extension type, it must be exposed readonly.

Note also that the public and readonly options apply only to Python access, not direct access. All the attributes of an extension type are always readable and writable by direct access.

Howerver, for direct access to be possible, the Pyrex compiler must know that you have an instance of that type, and not just a generic Python object. It knows this already in the case of the "self" parameter of the methods of that type, but in other cases you will have to tell it by means of a declaration. For example,

cdef widen_shrubbery(Shrubbery sh, extra_width):
    sh.width = sh.width + extra_width
If you attempt to access an extension type attribute through a generic object reference, Pyrex will use a Python attribute lookup. If the attribute is exposed for Python access (using public or readonly) then this will work, but it will be much slower than direct access.

Extension types and None

When you declare a parameter or C variable as being of an extension type, Pyrex will allow it to take on the value None as well as values of its declared type. This is analogous to the way a C pointer can take on the value NULL, and you need to exercise the same caution because of it. There is no problem as long as you are performing Python operations on it, because full dynamic type checking will be applied. However, when you access C attributes of an extension type (as in the widen_shrubbery function above), it's up to you to make sure the reference you're using is not None -- in the interests of efficiency, Pyrex does not check this.

You need to be particularly careful when exposing Python functions which take extension types as arguments. If we wanted to make widen_shrubbery a Python function, for example, if we simply wrote

def widen_shrubbery(Shrubbery sh, extra_width): # This is
    sh.width = sh.width + extra_width           # dangerous!
then users of our module could crash it by passing None for the sh parameter.

One way to fix this would be

def widen_shrubbery(Shrubbery sh, extra_width):
    if sh is None:
        raise TypeError
    sh.width = sh.width + extra_width
but since this is anticipated to be such a frequent requirement, Pyrex provides a more convenient way. Parameters of a Python function declared as an extension type can have a not None clause:
def widen_shrubbery(Shrubbery sh not None, extra_width):
    sh.width = sh.width + extra_width
Now the function will automatically check that sh is not None along with checking that it has the right type.

Note, however that the not None clause can only be used in Python functions (defined with def) and not C functions (defined with cdef). If you need to check whether a parameter to a C function is None, you will need to do it yourself.

Some more things to note:

  • The self parameter of a method of an extension type is guaranteed never to be None.
  • When comparing a value with None, keep in mind that, if x is a Python object, x is None and x is not None are very efficient because they translate directly to C pointer comparisons, whereas x == None and x != None, or simply using x as a boolean value (as in if x: ...) will invoke Python operations and therefore be much slower.

Special methods

Although the principles are similar, there are substantial differences between many of the __xxx___ special methods of extension types and their Python counterparts. There is a separate page devoted to this subject, and you should read it carefully before attempting to use any special methods in your extension types.

Properties

There is a special syntax for defining properties in an extension class:
cdef class Spam:

    property cheese:

        "A doc string can go here."

        def __get__(self):
            # This is called when the property is read.
            ...

        def __set__(self, value):
            # This is called when the property is written.
            ...

        def __del__(self):
            # This is called when the property is deleted.
 

The __get__, __set__ and __del__ methods are all optional; if they are omitted, an exception will be raised when the corresponding operation is attempted.

Here's a complete example. It defines a property which adds to a list each time it is written to, returns the list when it is read, and empties the list when it is deleted.
 

cheesy.pyx Test input
cdef class CheeseShop:

  cdef object cheeses

  def __new__(self):
    self.cheeses = []

  property cheese:

    def __get__(self):
      return "We don't have: %s" % self.cheeses

    def __set__(self, value):
      self.cheeses.append(value)

    def __del__(self):
      del self.cheeses[:]

from cheesy import CheeseShop

shop = CheeseShop()
print shop.cheese

shop.cheese = "camembert"
print shop.cheese

shop.cheese = "cheddar"
print shop.cheese

del shop.cheese
print shop.cheese

Test output
We don't have: []
We don't have: ['camembert']
We don't have: ['camembert', 'cheddar']
We don't have: []

Subclassing

An extension type may inherit from a built-in type or another extension type:
cdef class Parrot:
    ...

cdef class Norwegian(Parrot):
    ...


A complete definition of the base type must be available to Pyrex, so if the base type is a built-in type, it must have been previously declared as an extern extension type. If the base type is defined in another Pyrex module, it must either be declared as an extern extension type or imported using the cimport statement.

An extension type can only have one base class (no multiple inheritance).

Pyrex extension types can also be subclassed in Python. A Python class can inherit from multiple extension types provided that the usual Python rules for multiple inheritance are followed (i.e. the C layouts of all the base classes must be compatible).

C methods

Extension types can have C methods as well as Python methods. Like C functions, C methods are declared using cdef instead of def. C methods are "virtual", and may be overridden in derived extension types.

pets.pyx
Output
cdef class Parrot:

  cdef void describe(self):
    print "This parrot is resting."

cdef class Norwegian(Parrot):

  cdef void describe(self):
    Parrot.describe(self)
    print "Lovely plumage!"


cdef Parrot p1, p2
p1 = Parrot()
p2 = Norwegian()
print "p1:"
p1.describe()
print "p2:"
p2.describe()

p1:
This parrot is resting.
p2:
This parrot is resting.
Lovely plumage!

The above example also illustrates that a C method can call an inherited C method using the usual Python technique, i.e.
Parrot.describe(self)

Forward-declaring extension types

Extension types can be forward-declared, like struct and union types. This will be necessary if you have two extension types that need to refer to each other, e.g.
cdef class Shrubbery # forward declaration

cdef class Shrubber:
    cdef Shrubbery work_in_progress

cdef class Shrubbery:
    cdef Shrubber creator

If you are forward-declaring an exension type that has a base class, you must specify the base class in both the forward declaration and its subsequent definition, for example,
cdef class A(B)

...

cdef class A(B):
    # attributes and methods

Public and external extension types

Extension types can be declared extern or public. An extern extension type declaration makes an extension type defined in external C code available to a Pyrex module. A public extension type declaration makes an extension type defined in a Pyrex module available to external C code.

External extension types

An extern extension type allows you to gain access to the internals of Python objects defined in the Python core or in a non-Pyrex extension module.
NOTE: In Pyrex versions before 0.8, extern extension types were also used to reference extension types defined in another Pyrex module. While you can still do that, Pyrex 0.8 and later provides a better mechanism for this. See Sharing C Declarations Between Pyrex Modules.
Here is an example which will let you get at the C-level members of the built-in complex object.
cdef extern from "complexobject.h":

    struct Py_complex:
        double real
        double imag

    ctypedef class __builtin__.complex [object PyComplexObject]:
        cdef Py_complex cval

# A function which uses the above type
def spam(complex c):
    print "Real:", c.cval.real
    print "Imag:", c.cval.imag

Some important things to note are:
  1. In this example, ctypedef class has been used. This is because, in the Python header files, the PyComplexObject struct is declared with
ctypedef struct {
    ...
} PyComplexObject;
  1. As well as the name of the extension type, the module in which its type object can be found is also specified. See the implicit importing section below.

  2.  
  3. The part in square brackets tells Pyrex the name to use for the object's C struct, so it can generate code that matches what is declared in the header file. See the name specification clause section below.

  4.  
  5. When declaring an external extension type, you don't declare any methods. Declaration of methods is not required in order to call them, because the calls are Python method calls. Also, as with structs and unions, if your extension class declaration is inside a cdef extern from block, you only need to declare those C members which you wish to access.

Implicit importing

Backwards Incompatibility Note: You will have to update any pre-0.8 Pyrex modules you have which use extern extension types. I apologise for this, but for complicated reasons it proved to be too difficult to continue supporting the old way of doing these while introducing the new features that I wanted.
Pyrex 0.8 and later requires you to include a module name in an extern extension class declaration, for example,
cdef extern class MyModule.Spam:
    ...
The type object will be implicitly imported from the specified module and bound to the corresponding name in this module. In other words, in this example an implicit
    from MyModule import Spam
statement will be executed at module load time.

The module name can be a dotted name to refer to a module inside a package hierarchy, for example,

cdef extern class My.Nested.Package.Spam:
    ...
You can also specify an alternative name under which to import the type using an as clause, for example,
    cdef extern class My.Nested.Package.Spam as Yummy:
       ...
which corresponds to the implicit import statement
    from My.Nested.Package import Spam as Yummy

Type names vs. constructor names

Inside a Pyrex module, the name of an extension type serves two distinct purposes. When used in an expression, it refers to a module-level global variable holding the type's constructor (i.e. its type-object). However, it can also be used as a C type name to declare variables, arguments and return values of that type.

When you declare

cdef extern class MyModule.Spam:
    ...
the name Spam serves both these roles. There may be other names by which you can refer to the constructor, but only Spam can be used as a type name. For example, if you were to explicity import MyModule, you could use MyModule.Spam() to create a Spam instance, but you wouldn't be able to use MyModule.Spam as a type name.

When an as clause is used, the name specified in the as clause also takes over both roles. So if you declare

cdef extern class MyModule.Spam as Yummy:
    ...
then Yummy becomes both the type name and a name for the constructor. Again, there are other ways that you could get hold of the constructor, but only Yummy is usable as a type name.

Public extension types

An extension type can be declared public, in which case a .h file is generated containing declarations for its object struct and type object. By including the .h file in external C code that you write, that code can access the attributes of the extension type.

Name specification clause

The part of the class declaration in square brackets is a special feature only available for extern or public extension types. The full form of this clause is
[object object_struct_name, type type_object_name]
where object_struct_name is the name to assume for the type's C struct, and type_object_name is the name to assume for the type's statically declared type object. (The object and type clauses can be written in either order.)

If the extension type declaration is inside a cdef extern from block, the object clause is required, because Pyrex must be able to generate code that is compatible with the declarations in the header file. Otherwise, for extern extension types, the object clause is optional.

For public extension types, the object and type clauses are both required, because Pyrex must be able to generate code that is compatible with external C code.



Back to the Language Overview
 

\ No newline at end of file Added: lxml/pyrex/Doc/index.html ============================================================================== --- (empty file) +++ lxml/pyrex/Doc/index.html Fri Mar 10 13:58:37 2006 @@ -0,0 +1 @@ + Pyrex - Front Page  
Pyrex A smooth blend of the finest Python 
with the unsurpassed power 
of raw C.
Welcome to Pyrex, a language for writing Python extension modules. Pyrex makes creating an extension module is almost as easy as creating a Python module! To find out more, consult one of the edifying documents below.

Documentation

About Pyrex

Read this to find out what Pyrex is all about and what it can do for you.

Language Overview

A description of all the features of the Pyrex language. This is the closest thing to a reference manual in existence yet.

FAQ

Want to know how to do something in Pyrex? Check here first.

Other Resources

Michael's Quick Guide to Pyrex

This tutorial-style presentation will take you through the steps of creating some Pyrex modules to wrap existing C libraries. Contributed by Michael JasonSmith.

Mail to the Author

If you have a question that's not answered by anything here, you're not sure about something, or you have a bug to report or a suggestion to make, or anything at all to say about Pyrex, feel free to email me: greg at cosc.canterbury.ac.nz
\ No newline at end of file Added: lxml/pyrex/Doc/overview.html ============================================================================== --- (empty file) +++ lxml/pyrex/Doc/overview.html Fri Mar 10 13:58:37 2006 @@ -0,0 +1 @@ + Pyrex Language Overview


Overview of the Pyrex Language 

This document informally describes the extensions to the Python language made by Pyrex. Some day there will be a reference manual covering everything in more detail.
 

Contents


Basics

This section describes the basic features of the Pyrex language. The facilities covered in this section allow you to create Python-callable functions that manipulate C data structures and convert between Python and C data types. Later sections will cover facilities for wrapping external C code, creating new Python types and cooperation between Pyrex modules.

Python functions vs. C functions

There are two kinds of function definition in Pyrex:

Python functions are defined using the def statement, as in Python. They take Python objects as parameters and return Python objects.

C functions are defined using the new cdef statement. They take either Python objects or C values as parameters, and can return either Python objects or C values.

Within a Pyrex module, Python functions and C functions can call each other freely, but only Python functions can be called from outside the module by interpreted Python code. So, any functions that you want to "export" from your Pyrex module must be declared as Python functions.

Parameters of either type of function can be declared to have C data types, using normal C declaration syntax. For example,

def spam(int i, char *s):
    ...
cdef int eggs(unsigned long l, float f):
    ...
When a parameter of a Python function is declared to have a C data type, it is passed in as a Python object and automatically converted to a C value, if possible. Automatic conversion is currently only possible for numeric types and string types; attempting to use any other type for the parameter of a Python function will result in a compile-time error.

C functions, on the other hand, can have parameters of any type, since they're passed in directly using a normal C function call.

Python objects as parameters and return values

If no type is specified for a parameter or return value, it is assumed to be a Python object. (Note that this is different from the C convention, where it would default to int.) For example, the following defines a C function that takes two Python objects as parameters and returns a Python object:
cdef spamobjs(x, y):
    ...
Reference counting for these objects is performed automatically according to the standard Python/C API rules (i.e. borrowed references are taken as parameters and a new reference is returned).

The name object can also be used to explicitly declare something as a Python object. This can be useful if the name being declared would otherwise be taken as the name of a type, for example,

cdef ftang(object int):
    ...
declares a parameter called int which is a Python object. You can also use object as the explicit return type of a function, e.g.
cdef object ftang(object int):
    ...
In the interests of clarity, it is probably a good idea to always be explicit about object parameters in C functions.

C variable and type definitions

The cdef statement is also used to declare C variables, either local or module-level:
cdef int i, j, k
cdef float f, g[42], *h
and C struct, union or enum types:
cdef struct Grail:
    int age
    float volume
cdef union Food:
    char *spam
    float *eggs
cdef enum CheeseType:
    cheddar, edam, 
    camembert
cdef enum CheeseState:
    hard = 1
    soft = 2
    runny = 3
There is currently no special syntax for defining a constant, but you can use an anonymous enum declaration for this purpose, for example,
cdef enum:
    tons_of_spam = 3
Note that the words struct, union and enum are used only when defining a type, not when referring to it. For example, to declare a variable pointing to a Grail you would write
cdef Grail *gp
and not
cdef struct Grail *gp # WRONG
There is also a ctypedef statement for giving names to types, e.g.
ctypedef unsigned long ULong
ctypedef int *IntPtr

Scope rules

Pyrex determines whether a variable belongs to a local scope, the module scope, or the built-in scope completely statically. As with Python, assigning to a variable which is not otherwise declared implicitly declares it to be a Python variable residing in the scope where it is assigned. Unlike Python, however, a name which is referred to but not declared or assigned is assumed to reside in the builtin scope, not the module scope. Names added to the module dictionary at run time will not shadow such names.

You can use a global statement at the module level to explicitly declare a name to be a module-level name when there would otherwise not be any indication of this, for example,

global __name__
print __name__
Without the global statement, the above would print the name of the builtins module.

Note: A consequence of these rules is that the module-level scope behaves the same way as a Python local scope if you refer to a variable before assigning to it. In particular, tricks such as the following will not work in Pyrex:
try:
  x = True
except NameError:
  True = 1
because, due to the assignment, the True will always be looked up in the module-level scope. You would have to do something like this instead:
import __builtin__
try:
True = __builtin__.True
except AttributeError:
True = 1

Statements and expressions

Control structures and expressions follow Python syntax for the most part. When applied to Python objects, they have the same semantics as in Python (unless otherwise noted). Most of the Python operators can also be applied to C values, with the obvious semantics.

If Python objects and C values are mixed in an expression, conversions are performed automatically between Python objects and C numeric or string types.

Reference counts are maintained automatically for all Python objects, and all Python operations are automatically checked for errors, with appropriate action taken.

Differences between C and Pyrex expression syntax

Pyrex also includes some C operations which have no direct Python equivalent. Some of them are expressed differently in Pyrex than in C.
  • There is no -> operator in Pyrex. Instead of p->x, use p.x

  •  
  • There is no * operator in Pyrex. Instead of *p, use p[0]

  •  
  • There is an & operator, with the same semantics as in C

  •  
  • The null C pointer is called NULL, not 0 (and NULL is a reserved word).

  •  
  • Character literals are written with a c prefix, for example:
    • c'X'
  • Type casts are written <type>value , for example:
    • cdef char *p, float *q
      p = <char*>q
    Warning: Don't attempt to use a typecast to convert between Python and C data types -- it won't do the right thing. Leave Pyrex to perform the conversion automatically.

Integer for-loops

You should be aware that a for-loop such as
for i in range(n):
    ...
won't be very fast, even if i and n are declared as C integers, because range is a Python function. For iterating over ranges of integers, Pyrex has another form of for-loop:
for i from 0 <= i < n:
    ...
If the loop variable and the lower and upper bounds are all C integers, this form of loop will be much faster, because Pyrex will translate it into pure C code.

Some things to note about the for-from loop:

  • The target expression must be a variable name.
  • The name between the lower and upper bounds must be the same as the target name.
  • The direction of iteration is determined by the relations. If they are both from the set {<, <=} then it is upwards; if they are both from the set {>, >=} then it is downwards. (Any other combination is disallowed.)
Like other Python looping statements, break and continue may be used in the body, and the loop may have an else clause.


Error return values

If you don't do anything special, a function declared with cdef that does not return a Python object has no way of reporting Python exceptions to its caller. If an exception is detected in such a function, a warning message is printed and the exception is ignored.

If you want a C function that does not return a Python object to be able to propagate exceptions to its caller, you need to declare an exception value for it. Here is an example:

cdef int spam() except -1:
    ...
With this declaration, whenever an exception occurs inside spam, it will immediately return with the value -1. Furthermore, whenever a call to spam returns -1, an exception will be assumed to have occurred and will be propagated.

When you declare an exception value for a function, you should never explicitly return that value. If all possible return values are legal and you can't reserve one entirely for signalling errors, you can use an alternative form of exception value declaration:

cdef int spam() except? -1:
    ...
The "?" indicates that the value -1 only indicates a possible error. In this case, Pyrex generates a call to PyErr_Occurredif the exception value is returned, to make sure it really is an error.

There is also a third form of exception value declaration:

cdef int spam() except *:
    ...
This form causes Pyrex to generate a call to PyErr_Occurred after every call to spam, regardless of what value it returns. If you have a function returning void that needs to propagate errors, you will have to use this form, since there isn't any return value to test.

Some things to note:

  • Currently, exception values can only declared for functions returning an integer, float or pointer type, and the value must be a literal, not an expression (although it can be negative). The only possible pointer exception value is NULL. Void functions can only use the except * form.

  •  
  • The exception value specification is part of the signature of the function. If you're passing a pointer to a function as a parameter or assigning it to a variable, the declared type of the parameter or variable must have the same exception value specification (or lack thereof). Here is an example of a pointer-to-function declaration with an exception value:
    • int (*grail)(int, char *) except -1
  • You don't need to (and shouldn't) declare exception values for functions which return Python objects. Remember that a function with no declared return type implicitly returns a Python object.

Checking return values of non-Pyrex functions

It's important to understand that the except clause does not cause an error to be raised when the specified value is returned. For example, you can't write something like
cdef extern FILE *fopen(char *filename, char *mode) except NULL # WRONG!
and expect an exception to be automatically raised if a call to fopen returns NULL. The except clause doesn't work that way; its only purpose is for propagating exceptions that have already been raised, either by a Pyrex function or a C function that calls Python/C API routines. To get an exception from a non-Python-aware function such as fopen, you will have to check the return value and raise it yourself, for example,
cdef FILE *p
p = fopen("spam.txt", "r")
if p == NULL:
    raise SpamError("Couldn't open the spam file")


The include statement

For convenience, a large Pyrex module can be split up into a number of files which are put together using the include statement, for example
include "spamstuff.pxi"
The contents of the named file are textually included at that point. The included file can contain any complete top-level Pyrex statements, including other include statements. The include statement itself can only appear at the top level of a file.

The include statement can also be used in conjunction with public declarations to make C functions and variables defined in one Pyrex module accessible to another. However, note that some of these uses have been superseded by the facilities described in Sharing Declarations Between Pyrex Modules, and it is expected that use of the include statement for this purpose will be phased out altogether in future versions.


Interfacing with External C Code

One of the main uses of Pyrex is wrapping existing libraries of C code. This is achieved by using external declarations to declare the C functions and variables from the library that you want to use.

You can also use public declarations to make C functions and variables defined in a Pyrex module available to external C code. The need for this is expected to be less frequent, but you might want to do it, for example, if you are embedding Python in another application as a scripting language. Just as a Pyrex module can be used as a bridge to allow Python code to call C code, it can also be used to allow C code to call Python code.

External declarations

By default, C functions and variables declared at the module level are local to the module (i.e. they have the C static storage class). They can also be declared extern to specify that they are defined elsewhere, for example:
cdef extern int spam_counter
cdef extern void order_spam(int tons)

Referencing C header files

When you use an extern definition on its own as in the examples above, Pyrex includes a declaration for it in the generated C file. This can cause problems if the declaration doesn't exactly match the declaration that will be seen by other C code. If you're wrapping an existing C library, for example, it's important that the generated C code is compiled with exactly the same declarations as the rest of the library.

To achieve this, you can tell Pyrex that the declarations are to be found in a C header file, like this:

cdef extern from "spam.h":
    int spam_counter
    void order_spam(int tons)
The cdef extern from clause does three things:
  1. It directs Pyrex to place a #include statement for the named header file in the generated C code.
  2.  
  3. It prevents Pyrex from generating any C code for the declarations found in the associated block.
  4.  
  5. It treats all declarations within the block as though they started with cdef extern.
It's important to understand that Pyrex does not itself read the C header file, so you still need to provide Pyrex versions of any declarations from it that you use. However, the Pyrex declarations don't always have to exactly match the C ones, and in some cases they shouldn't or can't. In particular:
  1. Don't use const. Pyrex doesn't know anything about const, so just leave it out. Most of the time this shouldn't cause any problem, although on rare occasions you might have to use a cast. 1
  2.  
  3. Leave out any platform-specific extensions to C declarations such as __declspec().
  4.  
  5. If the header file declares a big struct and you only want to use a few members, you only need to declare the members you're interested in. Leaving the rest out doesn't do any harm, because the C compiler will use the full definition from the header file.

    In some cases, you might not need any of the struct's members, in which case you can just put pass in the body of the struct declaration, e.g.

        cdef extern from "foo.h":
            struct spam:
                pass


    Note that you can only do this inside a cdef extern from block; struct declarations anywhere else must be non-empty.

  6. If the header file uses typedef names such as size_t to refer to platform-dependent flavours of numeric types, you will need a corresponding ctypedef statement, but you don't need to match the type exactly, just use something of the right general kind (int, float, etc). For example,
    1. ctypedef int size_t
    will work okay whatever the actual size of a size_t is (provided the header file defines it correctly).
     
  7. If the header file uses macros to define constants, translate them into a dummy enum declaration.
  8.  
  9. If the header file defines a function using a macro, declare it as though it were an ordinary function, with appropriate argument and result types.
A few more tricks and tips:
  • If you want to include a C header because it's needed by another header, but don't want to use any declarations from it, put pass in the extern-from block:
      cdef extern from "spam.h":
          pass
  • If you want to include some external declarations, but don't want to specify a header file (because it's included by some other header that you've already included) you can put * in place of the header file name:
cdef extern from *:
    ...

Styles of struct, union and enum declaration

There are two main ways that structs, unions and enums can be declared in C header files: using a tag name, or using a typedef. There are also some variations based on various combinations of these.

It's important to make the Pyrex declarations match the style used in the header file, so that Pyrex can emit the right sort of references to the type in the code it generates. To make this possible, Pyrex provides two different syntaxes for declaring a struct, union or enum type. The style introduced above corresponds to the use of a tag name. To get the other style, you prefix the declaration with ctypedef, as illustrated below.

The following table shows the various possible styles that can be found in a header file, and the corresponding Pyrex declaration that you should put in the cdef exern from block. Struct declarations are used as an example; the same applies equally to union and enum declarations.

Note that in all the cases below, you refer to the type in Pyrex code simply as Foo, not struct Foo.
 
  C code Possibilities for corresponding Pyrex code Comments
1 struct Foo {
  ...
};
cdef struct Foo:
  ...
Pyrex will refer to the type as struct Foo in the generated C code.
2 typedef struct {
  ...
} Foo;
ctypedef struct Foo:
  ...
Pyrex will refer to the type simply as Foo in the generated C code.
3 typedef struct foo {
  ...
} Foo;
cdef struct foo:
  ...
ctypedef foo Foo #optional
If the C header uses both a tag and a typedef with different names, you can use either form of declaration in Pyrex (although if you need to forward reference the type, you'll have to use the first form).
ctypedef struct Foo:
  ...
4 typedef struct Foo {
  ...
} Foo;
cdef struct Foo:
  ...
If the header uses the same name for the tag and the typedef, you won't be able to include a ctypedef for it -- but then, it's not necessary.

Accessing Python/C API routines

One particular use of the cdef extern from statement is for gaining access to routines in the Python/C API. For example,
cdef extern from "Python.h":
    object PyString_FromStringAndSize(char *s, int len)
will allow you to create Python strings containing null bytes.


Resolving naming conflicts - C name specifications

Each Pyrex module has a single module-level namespace for both Python and C names. This can be inconvenient if you want to wrap some external C functions and provide the Python user with Python functions of the same names.

Pyrex 0.8 provides a couple of different ways of solving this problem. The best way, especially if you have many C functions to wrap, is probably to put the extern C function declarations into a different namespace using the facilities described in the section on sharing declarations between Pyrex modules.

The other way is to use a c name specification to give different Pyrex and C names to the C function. Suppose, for example, that you want to wrap an external function called eject_tomato. If you declare it as

cdef extern void c_eject_tomato "eject_tomato" (float speed)
then its name inside the Pyrex module will be c_eject_tomato, whereas its name in C will be eject_tomato. You can then wrap it with
def eject_tomato(speed):
  c_eject_tomato(speed)
so that users of your module can refer to it as eject_tomato.

Another use for this feature is referring to external names that happen to be Pyrex keywords. For example, if you want to call an external function called print, you can rename it to something else in your Pyrex module.

As well as functions, C names can be specified for variables, structs, unions, enums, struct and union members, and enum values. For example,

cdef extern int one "ein", two "zwei"
cdef extern float three "drei"

cdef struct spam "SPAM":
  int i "eye"
cdef enum surprise "inquisition":
  first "alpha"
  second "beta" = 3

Public Declarations

You can make C variables and functions defined in a Pyrex module accessible to external C code (or another Pyrex module) using the public keyword, as follows:
cdef public int spam # public variable declaration

cdef public void grail(int num_nuns): # public function declaration
    ...

If there are any public declarations in a Pyrex module, a .h file is generated containing equivalent C declarations for inclusion in other C code.

Pyrex also generates a .pxi file containing Pyrex versions of the declarations for inclusion in another Pyrex module using the include statement. If you use this, you will need to arrange for the module using the declarations to be linked against the module defining them, and for both modules to be available to the dynamic linker at run time. I haven't tested this, so I can't say how well it will work on the various platforms.

NOTE: If all you want to export is an extension type, there is now a better way -- see Sharing Declarations Between Pyrex Modules.


Extension Types

One of the most powerful features of Pyrex is the ability to easily create new built-in Python types, called extension types. This is a major topic in itself, so there is a  separate page devoted to it.


Sharing Declarations Between Pyrex Modules

Pyrex 0.8 introduces a substantial new set of facilities allowing a Pyrex module to easily import and use C declarations and extension types from another Pyrex module. You can now create a set of co-operating Pyrex modules just as easily as you can create a set of co-operating Python modules. There is a separate page devoted to this topic.


Limitations

Unsupported Python features

Pyrex is not quite a full superset of Python. The following restrictions apply:
  • Function definitions (whether using def or cdef) cannot be nested within other function definitions.
  •  
  • Class definitions can only appear at the top level of a module, not inside a function.
  •  
  • The import * form of import is not allowed anywhere (other forms of the import statement are fine, though).
  •  
  • Generators cannot be defined in Pyrex.

  • The globals() and locals() functions cannot be used.
  • The above restrictions will most likely remain, since removing them would be difficult and they're not really needed for Pyrex's intended applications.

    There are also some temporary limitations which may eventually be lifted:

  • Class and function definitions cannot be placed inside control structures.
  •  
  • In-place arithmetic operators (+=, etc) are not yet supported.
  •  
  • List comprehensions are not yet supported.
  •  
  • There is no support for Unicode.
  •  
  • Special methods of extension types cannot have functioning docstrings.

  • The use of string literals as comments is not recommended at present, because Pyrex doesn't optimize them away, and won't even accept them in places where executable statements are not allowed.
  • There are probably also some other gaps which I can't think of at the moment.

    Semantic differences between Python and Pyrex

    Behaviour of class scopes

    In Python, referring to a method of a class inside the class definition, i.e. while the class is being defined, yields a plain function object, but in Pyrex it yields an unbound method2. A consequence of this is that the usual idiom for using the classmethod and staticmethod functions, e.g.
    class Spam:
      def method(cls):
        ...
      method = classmethod(method)
    will not work in Pyrex. This can be worked around by defining the function outside the class, and then assigning the result of classmethod or staticmethod inside the class, i.e.
    def Spam_method(cls):
      ...
    class Spam:
      method = classmethod(Spam_method)


    Footnotes

    1. A problem with const could arise if you have something like
    cdef extern from "grail.h":
      char *nun
    where grail.h actually contains
    extern const char *nun;
    and you do
    cdef void languissement(char *s):
      #something that doesn't change s
    ...
    languissement(nun)
    which will cause the C compiler to complain. You can work around it by casting away the constness:
    oral(<char *>nun)

    2. The reason for the different behaviour of class scopes is that Pyrex-defined Python functions are PyCFunction objects, not PyFunction objects, and are not recognised by the machinery that creates a bound or unbound method when a function is extracted from a class. To get around this, Pyrex wraps each method in an unbound method object itself before storing it in the class's dictionary.
     

    \ No newline at end of file Added: lxml/pyrex/Doc/primes.c ============================================================================== --- (empty file) +++ lxml/pyrex/Doc/primes.c Fri Mar 10 13:58:37 2006 @@ -0,0 +1 @@ +#include "Python.h" static PyObject *__Pyx_UnpackItem(PyObject *, int); static int __Pyx_EndUnpack(PyObject *, int); static int __Pyx_PrintItem(PyObject *); static int __Pyx_PrintNewline(void); static void __Pyx_ReRaise(void); static void __Pyx_RaiseWithTraceback(PyObject *, PyObject *, PyObject *); static PyObject *__Pyx_Import(PyObject *name, PyObject *from_list); static PyObject *__Pyx_GetExcValue(void); static PyObject *__Pyx_GetName(PyObject *dict, char *name); static PyObject *__pyx_m; static PyObject *__pyx_d; static PyObject *__pyx_b; PyObject *__pyx_f_primes(PyObject *__pyx_self, PyObject *__pyx_args); /*proto*/ PyObject *__pyx_f_primes(PyObject *__pyx_self, PyObject *__pyx_args) { int __pyx_v_kmax; int __pyx_v_n; int __pyx_v_k; int __pyx_v_i; int (__pyx_v_p[1000]); PyObject *__pyx_v_result; PyObject *__pyx_r; PyObject *__pyx_1 = 0; int __pyx_2; int __pyx_3; int __pyx_4; PyObject *__pyx_5 = 0; PyObject *__pyx_6 = 0; if (!PyArg_ParseTuple(__pyx_args, "i", &__pyx_v_kmax)) return 0; __pyx_v_result = Py_None; Py_INCREF(__pyx_v_result); /* "ProjectsA:Python:Pyrex:Demos:primes.pyx":2 */ /* "ProjectsA:Python:Pyrex:Demos:primes.pyx":3 */ /* "ProjectsA:Python:Pyrex:Demos:primes.pyx":4 */ __pyx_1 = PyList_New(0); if (!__pyx_1) goto __pyx_L1; Py_DECREF(__pyx_v_result); __pyx_v_result = __pyx_1; __pyx_1 = 0; /* "ProjectsA:Python:Pyrex:Demos:primes.pyx":5 */ __pyx_2 = (__pyx_v_kmax > 1000); if (__pyx_2) { /* "ProjectsA:Python:Pyrex:Demos:primes.pyx":6 */ __pyx_v_kmax = 1000; goto __pyx_L2; } __pyx_L2:; /* "ProjectsA:Python:Pyrex:Demos:primes.pyx":7 */ __pyx_v_k = 0; /* "ProjectsA:Python:Pyrex:Demos:primes.pyx":8 */ __pyx_v_n = 2; /* "ProjectsA:Python:Pyrex:Demos:primes.pyx":9 */ while (1) { __pyx_L3:; __pyx_2 = (__pyx_v_k < __pyx_v_kmax); if (!__pyx_2) break; /* "ProjectsA:Python:Pyrex:Demos:primes.pyx":10 */ __pyx_v_i = 0; /* "ProjectsA:Python:Pyrex:Demos:primes.pyx":11 */ while (1) { __pyx_L5:; if (__pyx_3 = (__pyx_v_i < __pyx_v_k)) { __pyx_3 = ((__pyx_v_n % (__pyx_v_p[__pyx_v_i])) != 0); } if (!__pyx_3) break; /* "ProjectsA:Python:Pyrex:Demos:primes.pyx":12 */ __pyx_v_i = (__pyx_v_i + 1); } __pyx_L6:; /* "ProjectsA:Python:Pyrex:Demos:primes.pyx":13 */ __pyx_4 = (__pyx_v_i == __pyx_v_k); if (__pyx_4) { /* "ProjectsA:Python:Pyrex:Demos:primes.pyx":14 */ (__pyx_v_p[__pyx_v_k]) = __pyx_v_n; /* "ProjectsA:Python:Pyrex:Demos:primes.pyx":15 */ __pyx_v_k = (__pyx_v_k + 1); /* "ProjectsA:Python:Pyrex:Demos:primes.pyx":16 */ __pyx_1 = PyObject_GetAttrString(__pyx_v_result, "append"); if (!__pyx_1) goto __pyx_L1; __pyx_5 = PyInt_FromLong(__pyx_v_n); if (!__pyx_5) goto __pyx_L1; __pyx_6 = PyTuple_New(1); if (!__pyx_6) goto __pyx_L1; PyTuple_SET_ITEM(__pyx_6, 0, __pyx_5); __pyx_5 = 0; __pyx_5 = PyObject_CallObject(__pyx_1, __pyx_6); if (!__pyx_5) goto __pyx_L1; Py_DECREF(__pyx_6); __pyx_6 = 0; Py_DECREF(__pyx_5); __pyx_5 = 0; goto __pyx_L7; } __pyx_L7:; /* "ProjectsA:Python:Pyrex:Demos:primes.pyx":17 */ __pyx_v_n = (__pyx_v_n + 1); } __pyx_L4:; /* "ProjectsA:Python:Pyrex:Demos:primes.pyx":18 */ Py_INCREF(__pyx_v_result); __pyx_r = __pyx_v_result; goto __pyx_L0; __pyx_r = Py_None; Py_INCREF(__pyx_r); goto __pyx_L0; __pyx_L1:; Py_XDECREF(__pyx_1); Py_XDECREF(__pyx_5); Py_XDECREF(__pyx_6); __pyx_r = 0; __pyx_L0:; Py_DECREF(__pyx_v_result); return __pyx_r; } static struct PyMethodDef __pyx_methods[] = { {"primes", (PyCFunction)__pyx_f_primes, METH_VARARGS, 0}, {0, 0, 0, 0} }; void initprimes(void); /*proto*/ void initprimes(void) { __pyx_m = Py_InitModule4("primes", __pyx_methods, 0, 0, PYTHON_API_VERSION); __pyx_d = PyModule_GetDict(__pyx_m); __pyx_b = PyImport_AddModule("__builtin__"); PyDict_SetItemString(__pyx_d, "__builtins__", __pyx_b); } /* Runtime support code */ \ No newline at end of file Added: lxml/pyrex/Doc/sharing.html ============================================================================== --- (empty file) +++ lxml/pyrex/Doc/sharing.html Fri Mar 10 13:58:37 2006 @@ -0,0 +1 @@ + Sharing Declarations Between Pyrex Modules


    Sharing Declarations Between Pyrex Modules

    This section describes a new set of facilities introduced in Pyrex 0.8 for making C declarations and extension types in one Pyrex module available for use in another Pyrex module. These facilities are closely modelled on the Python import mechanism, and can be thought of as a compile-time version of it.

    Contents

    Definition and Implementation files

    A Pyrex module can be split into two parts: a definition file with a .pxd suffix, containing C declarations that are to be available to other Pyrex modules, and an implementation file with a .pyx suffix, containing everything else. When a module wants to use something declared in another module's definition file, it imports it using the cimport statement.

    What a Definition File contains

    A definition file can contain:
    • Any kind of C type declaration.
    • extern C function or variable declarations.
    • The definition part of an extension type (see below).
    It cannot currently contain any non-extern C function or variable declarations (although this may be possible in a future version).

    It cannot contain the implementations of any C or Python functions, or any Python class definitions, or any executable statements.

    NOTE: You don't need to (and shouldn't) declare anything in a declaration file public in order to make it available to other Pyrex modules; its mere presence in a definition file does that. You only need a public declaration if you want to make something available to external C code.

    What an Implementation File contains

    An implementation file can contain any kind of Pyrex statement, although there are some restrictions on the implementation part of an extension type if the corresponding definition file also defines that type (see below).

    The cimport statement

    The cimport statement is used in a definition or implementation file to gain access to names declared in another definition file. Its syntax exactly parallels that of the normal Python import statement:
    cimport module [, module...]
    from module cimport name [as name] [, name [as name] ...]
    Here is an example. The file on the left is a definition file which exports a C data type. The file on the right is an implementation file which imports and uses it.
     
    dishes.pxd restaurant.pyx
    cdef enum otherstuff:
        sausage, eggs, lettuce

    cdef struct spamdish:
        int oz_of_spam
        otherstuff filler

    cimport dishes
    from dishes cimport spamdish

    cdef void prepare(spamdish *d):
        d.oz_of_spam = 42
        d.filler = dishes.sausage

    def serve():
        spamdish d
        prepare(&d)
        print "%d oz spam, filler no. %d" % \
             (d->oz_of_spam, d->otherstuff)

    It is important to understand that the cimport statement can only be used to import C data types, external C functions and variables, and extension types. It cannot be used to import any Python objects, and (with one exception) it doesn't imply any Python import at run time. If you want to refer to any Python names from a module that you have cimported, you will have to include a regular import statement for it as well.

    The exception is that when you use cimport to import an extension type, its type object is imported at run time and made available by the name under which you imported it. Using cimport to import extension types is covered in more detail below.

    Search paths for definition files

    When you cimport a module called modulename, the Pyrex compiler searches for a file called modulename.pxd along the search path for include files, as specified by -I command line options.

    Also, whenever you compile a file modulename.pyx, the corresponding definition file modulename.pxd is first searched for along the same path, and if found, it is processed before processing the .pyx file.

    Using cimport to resolve naming conflicts

    The cimport mechanism provides a clean and simple way to solve the problem of wrapping external C functions with Python functions of the same name. All you need to do is put the extern C declarations into a .pxd file for an imaginary module, and cimport that module. You can then refer to the C functions by qualifying them with the name of the module. Here's an example:
     
    c_lunch.pxd lunch.pyx
    cdef extern from "lunch.h":
        void eject_tomato(float)
    cimport c_lunch

    def eject_tomato(float speed):
        c_lunch.eject_tomato(speed)

    You don't need any c_lunch.pyx file, because the only things defined in c_lunch.pxd are extern C entities. There won't be any actual c_lunch module at run time, but that doesn't matter -- c_lunch has done its job of providing an additional namespace at compile time.

    Sharing Extension Types

    An extension type declaration can also be split into two parts, one in a definition file and the other in the corresponding implementation file.

    The definition part of the extension type can only declare C attributes and C methods, not Python methods, and it must declare all of that type's C attributes and C methods.

    The implementation part must implement all of the C methods declared in the definition part, and may not add any further C attributes. It may also define Python methods.

    Here is an example of a module which defines and exports an extension type, and another module which uses it.
     
    Shrubbing.pxd Shrubbing.pyx
    cdef class Shrubbery:
        cdef int width
        cdef int length
    cdef class Shrubbery:
        def __new__(self, int w, int l):
            self.width = w
            self.length = l

    def standard_shrubbery():
        return Shrubbery(3, 7)

    Landscaping.pyx
    cimport Shrubbing
    import Shrubbing

    cdef Shrubbing.Shrubbery sh
    sh = Shrubbing.standard_shrubbery()
    print "Shrubbery size is %d x %d" % (sh.width, sh.height)
     

    Some things to note about this example:

    • There is a cdef class shrubbery declaration in both Shrubbing.pxd and Shrubbing.pyx. When the Shrubbing module is compiled, these two declarations are combined into one.

    •  
    • In Landscaping.pyx, the cimport Shrubbery declaration allows us to refer to the Shrubbery type as Shrubbing.Shrubbery. But it doesn't bind the name Shrubbery in Landscaping's module namespace at run time, so to access Shrubbery.standard_shrubbery we also need to import Shrubbing.

    Back to the Language Overview

    \ No newline at end of file Added: lxml/pyrex/Doc/special_methods.html ============================================================================== --- (empty file) +++ lxml/pyrex/Doc/special_methods.html Fri Mar 10 13:58:37 2006 @@ -0,0 +1 @@ + Special Methods of Extenstion Types


    Special Methods of Extension Types

    This page describes the special methods currently supported by Pyrex extension types. A complete list of all the special methods appears in the table at the bottom. Some of these methods behave differently from their Python counterparts or have no direct Python counterparts, and require special mention.

    Note: Everything said on this page applies only to extension types, defined with the cdef class statement. It doesn't apply  to classes defined with the Python class statement, where the normal Python rules apply.

    Docstrings

    Currently, docstrings are not fully supported in special methods of extension types. You can place a docstring in the source to serve as a comment, but it won't show up in the corresponding __doc__ attribute at run time. (This is a Python limitation -- there's nowhere in the PyTypeObject data structure to put such docstrings.)

    Initialisation methods: __new__ and __init__

    There are two methods concerned with initialising the object.

    The __new__ method is where you should perform basic C-level initialisation of the object, including allocation of any C data structures that your object will own. You need to be careful what you do in the __new__ method, because the object may not yet be a valid Python object when it is called. Therefore, you must not invoke any Python operations which might touch the object; in particular, do not try to call any of its methods.

    Unlike the corresponding method in Python, your __new__ method is not responsible for creating the object. By the time your __new__ method is called, memory has been allocated for the object and any C attributes it has have been initialised to 0 or null. (Any Python attributes have also been initialised to None, but you probably shouldn't rely on that.) Your __new__ method is guaranteed to be called exactly once.

    If your extension type has a base type, the __new__ method of the base type is automatically called before your __new__ method is called; you cannot explicitly call the inherited __new__ method. If you need to pass a modified argument list to the base type, you will have to do the relevant part of the initialisation in the __init__ method instead (where the normal rules for calling inherited methods apply).

    Note that the first parameter of the __new__ method is the object to be initialised, not the class of the object as it is in Python.

    Any initialisation which cannot safely be done in the __new__ method should be done in the __init__ method. By the time __init__ is called, the object is a fully valid Python object and all operations are safe. Under some circumstances it is possible for __init__ to be called more than once or not to be called at all, so your other methods should be designed to be robust in such situations.

    Keep in mind that any arguments passed to the constructor will be passed to the __new__ method as well as the __init__ method. If you anticipate subclassing your extension type in Python, you may find it useful to give the __new__ method * and ** arguments so that it can accept and ignore extra arguments. Otherwise, any Python subclass which has an __init__ with a different signature will have to override __new__ as well as __init__, which the writer of a Python class wouldn't expect to have to do.

    Finalization method: __dealloc__

    The counterpart to the __new__ method is the __dealloc__ method, which should perform the inverse of the __new__ method. Any C data structures that you allocated in your __new__ method should be freed in your __dealloc__ method.

    You need to be careful what you do in a __dealloc__ method. By the time your __dealloc__ method is called, the object may already have been partially destroyed and may not be in a valid state as far as Python is concerned, so you should avoid invoking any Python operations which might touch the object. In particular, don't call any other methods of the object or do anything which might cause the object to be resurrected. It's best if you stick to just deallocating C data.

    You don't need to worry about deallocating Python attributes of your object, because that will be done for you by Pyrex after your __dealloc__ method returns.

    Note: There is no __del__ method for extension types. (Earlier versions of the Pyrex documentation stated that there was, but this turned out to be incorrect.)

    Arithmetic methods

    Arithmetic operator methods, such as __add__, behave differently from their Python counterparts. There are no separate "reversed" versions of these methods (__radd__, etc.) Instead, if the first operand cannot perform the operation, the same method of the second operand is called, with the operands in the same order.

    This means that you can't rely on the first parameter of these methods being "self", and you should test the types of both operands before deciding what to do. If you can't handle the combination of types you've been given, you should return NotImplemented.

    This also applies to the in-place arithmetic method __ipow__. It doesn't apply to any of the other in-place methods (__iadd__, etc.) which always take self as the first argument.

    Rich comparisons

    There are no separate methods for the individual rich comparison operations (__eq__, __le__, etc.) Instead there is a single method __richcmp__ which takes an integer indicating which operation is to be performed, as follows:
         
        <
        0
        ==
        2
        >
        4
        <=
        1
        !=
        3
        >=
        5

    The __new__ method

    Extension types wishing to implement the iterator interface should define a method called __new__, not new. The Python system will automatically supply a new method which calls your __new__. Do NOT explicitly give your type a new method, or bad things could happen (see note 3).

    Special Method Table

    This table lists all of the special methods together with their parameter and return types. A parameter named self is of the type the method belongs to. Other untyped parameters are generic Python objects.

    You don't have to declare your method as taking these parameter types. If you declare different types, conversions will be performed as necessary.
     
    Name Parameters Return type Description
    General
    __new__ self, ...   Basic initialisation (no direct Python equivalent)
    __init__ self, ...   Further initialisation
    __dealloc__ self   Basic deallocation (no direct Python equivalent)
    __cmp__ x, y int 3-way comparison
    __richcmp__ x, y, int op object Rich comparison (no direct Python equivalent)
    __str__ self object str(self)
    __repr__ self object repr(self)
    __hash__ self int Hash function
    __call__ self, ... object self(...)
    __iter__ self object Return iterator for sequence
    __getattr__ self, name object Get attribute
    __setattr__ self, name, val   Set attribute
    __delattr__ self, name   Delete attribute
    Arithmetic operators
    __add__ x, y object binary + operator
    __sub__ x, y object binary - operator
    __mul__ x, y object * operator
    __div__ x, y object /  operator for old-style division
    __floordiv__ x, y object //  operator
    __truediv__ x, y object /  operator for new-style division
    __mod__ x, y object % operator
    __divmod__ x, y object combined div and mod
    __pow__ x, y, z object ** operator or pow(x, y, z)
    __neg__ self object unary - operator
    __pos__ self object unary + operator
    __abs__ self object absolute value
    __nonzero__ self int convert to boolean
    __invert__ self object ~ operator
    __lshift__ x, y object << operator
    __rshift__ x, y object >> operator
    __and__ x, y object & operator
    __or__ x, y object | operator
    __xor__ x, y object ^ operator
    Numeric conversions
    __int__ self object Convert to integer
    __long__ self object Convert to long integer
    __float__ self object Convert to float
    __oct__ self object Convert to octal
    __hex__ self object Convert to hexadecimal
    In-place arithmetic operators
    __iadd__ self, x object += operator
    __isub__ self, x object -= operator
    __imul__ self, x object *= operator
    __idiv__ self, x object /= operator for old-style division
    __ifloordiv__ self, x object //= operator
    __itruediv__ self, x object /= operator for new-style division
    __imod__ self, x object %= operator
    __ipow__ x, y, z object **= operator
    __ilshift__ self, x object <<= operator
    __irshift__ self, x object >>= operator
    __iand__ self, x object &= operator
    __ior__ self, x object |= operator
    __ixor__ self, x object ^= operator
    Sequences and mappings
    __len__ self int len(self)
    __getitem__ self, x object self[x]
    __setitem__ self, x, y   self[x] = y
    __delitem__ self, x   del self[x]
    __getslice__ self, int i, int j object self[i:j]
    __setslice__ self, int i, int j, x   self[i:j] = x
    __delslice__ self, int i, int j   del self[i:j]
    __contains__ self, x int x in self
    Iterators
    __next__ self object Get next item (called next in Python)
    Buffer interface  (no Python equivalents - see note 1)
    __getreadbuffer__ self, int i, void **p    
    __getwritebuffer__ self, int i, void **p    
    __getsegcount__ self, int *p    
    __getcharbuffer__ self, int i, char **p    
    Descriptor objects  (no Python equivalents - see note 2)
    __get__ self, instance, class object Get value of attribute
    __set__ self, instance, value   Set value of attribute
    __delete__ self, instance   Delete attribute

    Note 1: The buffer interface is intended for use by C code and is not directly accessible from Python. It is described in the Python/C API Reference Manual under sections 6.6 and 10.6.

    Note 2: Descriptor objects are part of the support mechanism for new-style Python classes. See the discussion of descriptors in the Python documentation. See also PEP 252, "Making Types Look More Like Classes", and PEP 253, "Subtyping Built-In Types".

    Note 3: If your type defines a __new__ method, any method called new that you define will be overwritten with the system-supplied new at module import time.



    \ No newline at end of file Added: lxml/pyrex/INSTALL.txt ============================================================================== --- (empty file) +++ lxml/pyrex/INSTALL.txt Fri Mar 10 13:58:37 2006 @@ -0,0 +1,22 @@ +Pyrex - Installation Instructions +================================= + +You have two installation options: + +(1) Run the setup.py script in this directory + as follows: + + python setup.py install + + This will install the Pyrex package + into your Python system. + +OR + +(2) If you prefer not to modify your Python + installation, arrange for the directory + containing this file (INSTALL.txt) to be in + your PYTHONPATH. On unix, also put the bin + directory on your PATH. + +See README.txt for pointers to other documentation. Added: lxml/pyrex/MANIFEST ============================================================================== --- (empty file) +++ lxml/pyrex/MANIFEST Fri Mar 10 13:58:37 2006 @@ -0,0 +1,71 @@ +CHANGES.txt +INSTALL.txt +MANIFEST.in +README.txt +ToDo.txt +USAGE.txt +pyrexc.py +setup.py +Demos/Makefile +Demos/Makefile.nodistutils +Demos/Setup.py +Demos/numeric_demo.pyx +Demos/primes.pyx +Demos/pyprimes.py +Demos/run_numeric_demo.py +Demos/run_primes.py +Demos/run_spam.py +Demos/spam.pyx +Doc/About.html +Doc/FAQ.html +Doc/extension_types.html +Doc/index.html +Doc/overview.html +Doc/primes.c +Doc/sharing.html +Doc/special_methods.html +Pyrex/Debugging.py +Pyrex/Utils.py +Pyrex/__init__.py +Pyrex/Compiler/CmdLine.py +Pyrex/Compiler/Code.py +Pyrex/Compiler/DebugFlags.py +Pyrex/Compiler/Errors.py +Pyrex/Compiler/ExprNodes.py +Pyrex/Compiler/Lexicon.pickle +Pyrex/Compiler/Lexicon.py +Pyrex/Compiler/Main.py +Pyrex/Compiler/Naming.py +Pyrex/Compiler/Nodes.py +Pyrex/Compiler/Options.py +Pyrex/Compiler/Parsing.py +Pyrex/Compiler/PyrexTypes.py +Pyrex/Compiler/Scanning.py +Pyrex/Compiler/Symtab.py +Pyrex/Compiler/TypeSlots.py +Pyrex/Compiler/Version.py +Pyrex/Compiler/__init__.py +Pyrex/Distutils/__init__.py +Pyrex/Distutils/build_ext.py +Pyrex/Mac/DarwinSystem.py +Pyrex/Mac/Finder_Std_Suite.py +Pyrex/Mac/MPW_Misc_Suite.py +Pyrex/Mac/MacSystem.py +Pyrex/Mac/MacUtils.py +Pyrex/Mac/PS_Misc_Suite.py +Pyrex/Mac/PyServerMain.py +Pyrex/Mac/TS_Misc_Suite.py +Pyrex/Mac/__init__.py +Pyrex/Plex/Actions.py +Pyrex/Plex/DFA.py +Pyrex/Plex/Errors.py +Pyrex/Plex/Lexicons.py +Pyrex/Plex/Machines.py +Pyrex/Plex/Regexps.py +Pyrex/Plex/Scanners.py +Pyrex/Plex/Timing.py +Pyrex/Plex/Traditional.py +Pyrex/Plex/Transitions.py +Pyrex/Plex/__init__.py +Pyrex/Plex/test_tm.py +bin/pyrexc Added: lxml/pyrex/MANIFEST.in ============================================================================== --- (empty file) +++ lxml/pyrex/MANIFEST.in Fri Mar 10 13:58:37 2006 @@ -0,0 +1,7 @@ +include MANIFEST.in README.txt INSTALL.txt CHANGES.txt ToDo.txt USAGE.txt +include setup.py +include bin/pyrexc +include pyrexc.py +include Pyrex/Compiler/Lexicon.pickle +include Doc/* +include Demos/* Added: lxml/pyrex/PKG-INFO ============================================================================== --- (empty file) +++ lxml/pyrex/PKG-INFO Fri Mar 10 13:58:37 2006 @@ -0,0 +1,10 @@ +Metadata-Version: 1.0 +Name: Pyrex +Version: 0.9.3.1 +Summary: UNKNOWN +Home-page: http://www.cosc.canterbury.ac.nz/~greg/python/Pyrex/ +Author: Greg Ewing +Author-email: greg at cosc.canterbury.ac.nz +License: UNKNOWN +Description: UNKNOWN +Platform: UNKNOWN Added: lxml/pyrex/Pyrex/Compiler/CmdLine.py ============================================================================== --- (empty file) +++ lxml/pyrex/Pyrex/Compiler/CmdLine.py Fri Mar 10 13:58:37 2006 @@ -0,0 +1,65 @@ +# +# Pyrex - Command Line Parsing +# + +import sys + +usage = """\ +Usage: pyrexc [options] sourcefile... +Options: + -v, --version Display version number of pyrex compiler + -l, --create-listing Write error messages to a listing file + -I, --include-dir Search for include files in named directory + -o, --output-file Specify name of generated C file""" + +def bad_usage(): + print >>sys.stderr, usage + sys.exit(1) + +def parse_command_line(args): + from Pyrex.Compiler.Main import \ + CompilationOptions, default_options + + def pop_arg(): + if args: + return args.pop(0) + else: + bad_usage() + + def get_param(option): + tail = option[2:] + if tail: + return tail + else: + return pop_arg() + + options = CompilationOptions(default_options) + sources = [] + while args: + if args[0].startswith("-"): + option = pop_arg() + if option in ("-v", "--version"): + options.show_version = 1 + elif option in ("-l", "--create-listing"): + options.use_listing_file = 1 + elif option in ("-C", "--compile"): + options.c_only = 0 + elif option in ("-X", "--link"): + options.c_only = 0 + options.obj_only = 0 + elif option.startswith("-I"): + options.include_path.append(get_param(option)) + elif option == "--include-dir": + options.include_path.append(pop_arg()) + elif option in ("-o", "--output-file"): + options.output_file = pop_arg() + else: + bad_usage() + else: + sources.append(pop_arg()) + if options.use_listing_file and len(sources) > 1: + print >>sys.stderr, \ + "pyrexc: Only one source file allowed when using -o" + sys.exit(1) + return options, sources + Added: lxml/pyrex/Pyrex/Compiler/Code.py ============================================================================== --- (empty file) +++ lxml/pyrex/Pyrex/Compiler/Code.py Fri Mar 10 13:58:37 2006 @@ -0,0 +1,282 @@ +# +# Pyrex - Code output module +# + +import Naming +from Pyrex.Utils import open_new_file + +class CCodeWriter: + # f file output file + # level int indentation level + # bol bool beginning of line? + # marker string comment to emit before next line + # return_label string function return point label + # error_label string error catch point label + # continue_label string loop continue point label + # break_label string loop break point label + # label_counter integer counter for naming labels + # in_try_finally boolean inside try of try...finally + # filename_table {string : int} for finding filename table indexes + # filename_list [string] filenames in filename table order + + in_try_finally = 0 + + def __init__(self, outfile_name): + self.f = open_new_file(outfile_name) + self.level = 0 + self.bol = 1 + self.marker = None + self.label_counter = 1 + self.error_label = None + self.filename_table = {} + self.filename_list = [] + + def putln(self, code = ""): + if self.marker and self.bol: + self.emit_marker() + if code: + self.put(code) + self.f.write("\n"); + self.bol = 1 + + def emit_marker(self): + self.f.write("\n"); + self.indent() + self.f.write("/* %s */\n" % self.marker) + self.marker = None + + def put(self, code): + dl = code.count("{") - code.count("}") + if dl < 0: + self.level += dl + if self.bol: + self.indent() + self.f.write(code) + self.bol = 0 + if dl > 0: + self.level += dl + + def increase_indent(self): + self.level = self.level + 1 + + def decrease_indent(self): + self.level = self.level - 1 + + def begin_block(self): + self.putln("{") + self.increase_indent() + + def end_block(self): + self.decrease_indent() + self.putln("}") + + def indent(self): + self.f.write(" " * self.level) + + def mark_pos(self, pos): + file, line, col = pos + self.marker = '"%s":%s' % (file, line) + + def init_labels(self): + self.label_counter = 0 + self.return_label = self.new_label() + self.new_error_label() + self.continue_label = None + self.break_label = None + + def new_label(self): + n = self.label_counter + self.label_counter = n + 1 + return "%s%d" % (Naming.label_prefix, n) + + def new_error_label(self): + old_err_lbl = self.error_label + self.error_label = self.new_label() + return old_err_lbl + + def get_loop_labels(self): + return ( + self.continue_label, + self.break_label) + + def set_loop_labels(self, labels): + (self.continue_label, + self.break_label) = labels + + def new_loop_labels(self): + old_labels = self.get_loop_labels() + self.set_loop_labels( + (self.new_label(), + self.new_label())) + return old_labels + + def get_all_labels(self): + return ( + self.continue_label, + self.break_label, + self.return_label, + self.error_label) + + def set_all_labels(self, labels): + (self.continue_label, + self.break_label, + self.return_label, + self.error_label) = labels + + def all_new_labels(self): + old_labels = self.get_all_labels() + new_labels = [] + for old_label in old_labels: + if old_label: + new_labels.append(self.new_label()) + else: + new_labels.append(old_label) + self.set_all_labels(new_labels) + return old_labels + + def put_label(self, lbl): + self.putln("%s:;" % lbl) + + def put_var_declarations(self, entries, static = 0, dll_linkage = None): + for entry in entries: + if not entry.in_cinclude: + self.put_var_declaration(entry, static, dll_linkage) + + def put_var_declaration(self, entry, static = 0, dll_linkage = None): + if entry.visibility == 'extern': + self.put("extern ") + elif static and entry.visibility <> 'public': + self.put("static ") + if entry.visibility <> 'public': + dll_linkage = None + self.put(entry.type.declaration_code(entry.cname, + dll_linkage = dll_linkage)) + if entry.init is not None: + self.put(" = %s" % entry.type.literal_code(entry.init)) + self.putln(";") + + def entry_as_pyobject(self, entry): + type = entry.type + if (not entry.is_self_arg and not entry.type.is_complete()) \ + or (entry.type.is_extension_type and entry.type.base_type): + return "(PyObject *)" + entry.cname + else: + return entry.cname + + def as_pyobject(self, cname, type): + if type.is_extension_type and type.base_type: + return "(PyObject *)" + cname + else: + return cname + + def put_incref(self, cname, type): + self.putln("Py_INCREF(%s);" % self.as_pyobject(cname, type)) + + def put_decref(self, cname, type): + self.putln("Py_DECREF(%s);" % self.as_pyobject(cname, type)) + + def put_var_incref(self, entry): + if entry.type.is_pyobject: + self.putln("Py_INCREF(%s);" % self.entry_as_pyobject(entry)) + + def put_decref_clear(self, cname, type): + self.putln("Py_DECREF(%s); %s = 0;" % ( + self.as_pyobject(cname, type), cname)) + + def put_xdecref(self, cname, type): + self.putln("Py_XDECREF(%s);" % self.as_pyobject(cname, type)) + + def put_xdecref_clear(self, cname, type): + self.putln("Py_XDECREF(%s); %s = 0;" % ( + self.as_pyobject(cname, type), cname)) + + def put_var_decref(self, entry): + if entry.type.is_pyobject: + self.putln("Py_DECREF(%s);" % self.entry_as_pyobject(entry)) + + def put_var_xdecref(self, entry): + if entry.type.is_pyobject: + self.putln("Py_XDECREF(%s);" % self.entry_as_pyobject(entry)) + + def put_var_xdecref_clear(self, entry): + if entry.type.is_pyobject: + self.putln("Py_XDECREF(%s); %s = 0;" % ( + self.entry_as_pyobject(entry), entry.cname)) + + def put_var_decrefs(self, entries): + for entry in entries: + if entry.xdecref_cleanup: + self.put_var_xdecref(entry) + else: + self.put_var_decref(entry) + + def put_var_xdecrefs(self, entries): + for entry in entries: + self.put_var_xdecref(entry) + + def put_var_xdecrefs_clear(self, entries): + for entry in entries: + self.put_var_xdecref_clear(entry) + + def put_init_to_py_none(self, cast, cname): + if cast: + self.putln("%s = (void *)Py_None; Py_INCREF(%s %s);" % (cname, cast, cname)) + else: + self.putln("%s = Py_None; Py_INCREF(%s);" % (cname, cname)) + + def put_init_var_to_py_none(self, entry, template = "%s"): + code = template % entry.cname + if entry.type.is_extension_type: + cast = "(PyObject *)" + else: + cast = None + self.put_init_to_py_none(cast, code) + + def put_pymethoddef(self, entry, term): + if entry.doc: + doc_code = entry.doc_cname + else: + doc_code = 0 + self.putln( + '{"%s", (PyCFunction)%s, METH_VARARGS|METH_KEYWORDS, %s}%s' % ( + entry.name, + entry.func_cname, + doc_code, + term)) + + def error_goto(self, pos): + return "{%s = %s[%s]; %s = %s; goto %s;}" % ( + Naming.filename_cname, + Naming.filetable_cname, + self.lookup_filename(pos[0]), + Naming.lineno_cname, + pos[1], + self.error_label) + + def lookup_filename(self, filename): + try: + index = self.filename_table[filename] + except KeyError: + index = len(self.filename_list) + self.filename_list.append(filename) + self.filename_table[filename] = index + return index + + +class PyrexCodeWriter: + # f file output file + # level int indentation level + + def __init__(self, outfile_name): + self.f = open_new_file(outfile_name) + self.level = 0 + + def putln(self, code): + self.f.write("%s%s\n" % (" " * self.level, code)) + + def indent(self): + self.level += 1 + + def dedent(self): + self.level -= 1 + Added: lxml/pyrex/Pyrex/Compiler/Code.pyc ============================================================================== Binary file. No diff available. Added: lxml/pyrex/Pyrex/Compiler/DebugFlags.py ============================================================================== --- (empty file) +++ lxml/pyrex/Pyrex/Compiler/DebugFlags.py Fri Mar 10 13:58:37 2006 @@ -0,0 +1,4 @@ +debug_disposal_code = 0 +debug_temp_alloc = 0 +debug_coercion = 0 + Added: lxml/pyrex/Pyrex/Compiler/DebugFlags.pyc ============================================================================== Binary file. No diff available. Added: lxml/pyrex/Pyrex/Compiler/Errors.py ============================================================================== --- (empty file) +++ lxml/pyrex/Pyrex/Compiler/Errors.py Fri Mar 10 13:58:37 2006 @@ -0,0 +1,67 @@ +# +# Pyrex - Errors +# + +import sys +from Pyrex.Utils import open_new_file + + +class PyrexError(Exception): + pass + + +class CompileError(PyrexError): + + def __init__(self, position = None, message = ""): + self.position = position + self.message = message + if position: + pos_str = "%s:%d:%d: " % position + else: + pos_str = "" + Exception.__init__(self, pos_str + message) + + +class InternalError(Exception): + # If this is ever raised, there is a bug in the compiler. + + def __init__(self, message): + Exception.__init__(self, "Internal compiler error: %s" + % message) + + +listing_file = None +num_errors = 0 +echo_file = None + +def open_listing_file(path, echo_to_stderr = 1): + # Begin a new error listing. If path is None, no file + # is opened, the error counter is just reset. + global listing_file, num_errors, echo_file + if path is not None: + listing_file = open_new_file(path) + else: + listing_file = None + if echo_to_stderr: + echo_file = sys.stderr + else: + echo_file = None + num_errors = 0 + +def close_listing_file(): + global listing_file + if listing_file: + listing_file.close() + listing_file = None + +def error(position, message): + #print "Errors.error:", repr(position), repr(message) ### + global num_errors + err = CompileError(position, message) + line = "%s\n" % err + if listing_file: + listing_file.write(line) + if echo_file: + echo_file.write(line) + num_errors = num_errors + 1 + return err Added: lxml/pyrex/Pyrex/Compiler/Errors.pyc ============================================================================== Binary file. No diff available. Added: lxml/pyrex/Pyrex/Compiler/ExprNodes.py ============================================================================== --- (empty file) +++ lxml/pyrex/Pyrex/Compiler/ExprNodes.py Fri Mar 10 13:58:37 2006 @@ -0,0 +1,3147 @@ +# +# Pyrex - Parse tree nodes for expressions +# + +from string import join + +from Errors import error, InternalError +import Naming +from Nodes import Node +import PyrexTypes +from PyrexTypes import py_object_type +import Symtab +import Options + +from Pyrex.Debugging import print_call_chain +from DebugFlags import debug_disposal_code, debug_temp_alloc, \ + debug_coercion + +class ExprNode(Node): + # subexprs [string] Class var holding names of subexpr node attrs + # type PyrexType Type of the result + # result string C code fragment + # is_temp boolean Result is in a temporary variable + # is_sequence_constructor + # boolean Is a list or tuple constructor expression + # saved_subexpr_nodes + # [ExprNode or [ExprNode or None] or None] + # Cached result of subexpr_nodes() + + # The Analyse Expressions phase for expressions is split + # into two sub-phases: + # + # Analyse Types + # Determines the result type of the expression based + # on the types of its sub-expressions, and inserts + # coercion nodes into the expression tree where needed. + # Marks nodes which will need to have temporary variables + # allocated. + # + # Allocate Temps + # Allocates temporary variables where needed, and fills + # in the result field of each node. + # + # ExprNode provides some convenience routines which + # perform both of the above phases. These should only + # be called from statement nodes, and only when no + # coercion nodes need to be added around the expression + # being analysed. In that case, the above two phases + # should be invoked separately. + # + # Framework code in ExprNode provides much of the common + # processing for the various phases. It makes use of the + # 'subexprs' class attribute of ExprNodes, which should + # contain a list of the names of attributes which can + # hold sub-nodes or sequences of sub-nodes. + # + # The framework makes use of a number of abstract methods. + # Their responsibilities are as follows. + # + # Declaration Analysis phase + # + # analyse_target_declaration + # Called during the Analyse Declarations phase to analyse + # the LHS of an assignment or argument of a del statement. + # Nodes which cannot be the LHS of an assignment need not + # implement it. + # + # Expression Analysis phase + # + # analyse_types + # - Call analyse_types on all sub-expressions. + # - Check operand types, and wrap coercion nodes around + # sub-expressions where needed. + # - Set the type of this node. + # - If a temporary variable will be required for the + # result, set the is_temp flag of this node. + # + # analyse_target_types + # Called during the Analyse Types phase to analyse + # the LHS of an assignment or argument of a del + # statement. Similar responsibilities to analyse_types. + # + # allocate_temps + # - Call allocate_temps for all sub-nodes. + # - Call allocate_temp for this node. + # - If a temporary was allocated, call release_temp on + # all sub-expressions. + # + # A default implementation of allocate_temps is + # provided which uses the following abstract method: + # + # result_code + # - Return a C code fragment evaluating to + # the result. This is only called when the + # result is not a temporary. + # + # check_const + # - Check that this node and its subnodes form a + # legal constant expression. If so, do nothing, + # otherwise call not_const. + # + # The default implementation of check_const + # assumes that the expression is not constant. + # + # check_const_addr + # - Same as check_const, except check that the + # expression is a C lvalue whose address is + # constant. Otherwise, call addr_not_const. + # + # The default implementation of calc_const_addr + # assumes that the expression is not a constant + # lvalue. + # + # Code Generation phase + # + # generate_evaluation_code + # - Call generate_evaluation_code for sub-expressions. + # - Perform the functions of generate_result_code + # (see below). + # - If result is temporary, call generate_disposal_code + # on all sub-expressions. + # + # A default implementation of generate_evaluation_code + # is provided which uses the following abstract method: + # + # generate_result_code + # - Generate any C statements necessary to calculate + # the result of this node from the results of its + # sub-expressions. + # + # generate_assignment_code + # Called on the LHS of an assignment. + # - Call generate_evaluation_code for sub-expressions. + # - Generate code to perform the assignment. + # - If the assignment absorbed a reference, call + # generate_post_assignment_code on the RHS, + # otherwise call generate_disposal_code on it. + # + # generate_deletion_code + # Called on an argument of a del statement. + # - Call generate_evaluation_code for sub-expressions. + # - Generate code to perform the deletion. + # - Call generate_disposal_code on all sub-expressions. + # + # result_as_extension_type + # Normally, the results of all nodes whose type + # is a Python object, either generic or an extension + # type, are returned as a generic Python object, so + # that they can be passed directly to Python/C API + # routines. This method is called to obtain the + # result as the actual type of the node. It is only + # called when the type is known to actually be an + # extension type, and nodes whose result can never + # be an extension type need not implement it. + # + + is_sequence_constructor = 0 + is_attribute = 0 + + saved_subexpr_nodes = None + is_temp = 0 + + def not_implemented(self, method_name): + print_call_chain(method_name, "not implemented") ### + raise InternalError( + "%s.%s not implemented" % + (self.__class__.__name__, method_name)) + + def is_lvalue(self): + return 0 + + def is_ephemeral(self): + # An ephemeral node is one whose result is in + # a Python temporary and we suspect there are no + # other references to it. Certain operations are + # disallowed on such values, since they are + # likely to result in a dangling pointer. + return self.type.is_pyobject and self.is_temp + + def subexpr_nodes(self): + # Extract a list of subexpression nodes based + # on the contents of the subexprs class attribute. + if self.saved_subexpr_nodes is None: + nodes = [] + for name in self.subexprs: + item = getattr(self, name) + if item: + if isinstance(item, ExprNode): + nodes.append(item) + else: + nodes.extend(item) + self.saved_subexpr_nodes = nodes + return self.saved_subexpr_nodes + + # ------------- Declaration Analysis ---------------- + + def analyse_target_declaration(self, env): + error(self.pos, "Cannot assign to or delete this") + + # ------------- Expression Analysis ---------------- + + def analyse_const_expression(self, env): + # Called during the analyse_declarations phase of a + # constant expression. Analyses the expression's type, + # checks whether it is a legal const expression, + # and determines its value. + self.analyse_types(env) + self.allocate_temps(env) + self.check_const() + + def analyse_expressions(self, env): + # Convenience routine performing both the Type + # Analysis and Temp Allocation phases for a whole + # expression. + self.analyse_types(env) + self.allocate_temps(env) + + def analyse_target_expression(self, env): + # Convenience routine performing both the Type + # Analysis and Temp Allocation phases for the LHS of + # an assignment. + self.analyse_target_types(env) + self.allocate_target_temps(env) + + def analyse_boolean_expression(self, env): + # Analyse expression and coerce to a boolean. + self.analyse_types(env) + bool = self.coerce_to_boolean(env) + bool.allocate_temps(env) + return bool + + def analyse_temp_boolean_expression(self, env): + # Analyse boolean expression and coerce result into + # a temporary. This is used when a branch is to be + # performed on the result and we won't have an + # opportunity to ensure disposal code is executed + # afterwards. By forcing the result into a temporary, + # we ensure that all disposal has been done by the + # time we get the result. + self.analyse_types(env) + bool = self.coerce_to_boolean(env) + temp_bool = bool.coerce_to_temp(env) + temp_bool.allocate_temps(env) + return temp_bool + + # --------------- Type Analysis ------------------ + + def analyse_as_module(self, env): + # If this node can be interpreted as a reference to a + # cimported module, return its scope, else None. + return None + + def analyse_as_extension_type(self, env): + # If this node can be interpreted as a reference to an + # extension type, return its type, else None. + return None + + def analyse_types(self, env): + self.not_implemented("analyse_types") + + def analyse_target_types(self, env): + self.analyse_types(env) + + def check_const(self): + self.not_const() + + def not_const(self): + error(self.pos, "Not allowed in a constant expression") + + def check_const_addr(self): + self.addr_not_const() + + def addr_not_const(self): + error(self.pos, "Address is not constant") + + # ----------------- Result Allocation ----------------- + + def result_in_temp(self): + # Return true if result is in a temporary owned by + # this node or one of its subexpressions. Overridden + # by certain nodes which can share the result of + # a subnode. + return self.is_temp + + def allocate_target_temps(self, env): + # Perform allocate_temps for the LHS of an assignment. + if debug_temp_alloc: + print self, "Allocating target temps" + self.allocate_subexpr_temps(env) + self.result = self.target_code() + + def allocate_temps(self, env, result = None): + # Allocate temporary variables for this node and + # all its sub-expressions. If a result is specified, + # this must be a temp node and the specified variable + # is used as the result instead of allocating a new + # one. + if debug_temp_alloc: + print self, "Allocating temps" + self.allocate_subexpr_temps(env) + self.allocate_temp(env, result) + if self.is_temp: + self.release_subexpr_temps(env) + + def allocate_subexpr_temps(self, env): + # Allocate temporary variables for all sub-expressions + # of this node. + if debug_temp_alloc: + print self, "Allocating temps for:", self.subexprs + for node in self.subexpr_nodes(): + if node: + if debug_temp_alloc: + print self, "Allocating temps for", node + node.allocate_temps(env) + + def allocate_temp(self, env, result = None): + # If this node requires a temporary variable for its + # result, allocate one, otherwise set the result to + # a C code fragment. If a result is specified, + # this must be a temp node and the specified variable + # is used as the result instead of allocating a new + # one. + if debug_temp_alloc: + print self, "Allocating temp" + if result: + if not self.is_temp: + raise InternalError("Result forced on non-temp node") + self.result = result + elif self.is_temp: + type = self.type + if not type.is_void: + if type.is_pyobject: + type = PyrexTypes.py_object_type + self.result = env.allocate_temp(type) + else: + self.result = None + if debug_temp_alloc: + print self, "Allocated result", self.result + #print_call_chain(self, "allocated temp", self.result) + else: + self.result = self.result_code() + + def target_code(self): + # Return code fragment for use as LHS of a C assignment. + return self.result_code() + + def result_code(self): + self.not_implemented("result_code") + + def release_target_temp(self, env): + # Release temporaries used by LHS of an assignment. + self.release_subexpr_temps(env) + + def release_temp(self, env): + # If this node owns a temporary result, release it, + # otherwise release results of its sub-expressions. + if self.is_temp: + if debug_temp_alloc: + print self, "Releasing result", self.result + env.release_temp(self.result) + else: + self.release_subexpr_temps(env) + + def release_subexpr_temps(self, env): + # Release the results of all sub-expressions of + # this node. + for node in self.subexpr_nodes(): + if node: + node.release_temp(env) + + # ---------------- Code Generation ----------------- + + def make_owned_reference(self, code): + # If result is a pyobject, make sure we own + # a reference to it. + #if self.type.is_pyobject and not self.is_temp: + if self.type.is_pyobject and not self.result_in_temp(): + #code.put_incref(self.result, self.type) + code.put_incref(self.result, py_object_type) + + def generate_evaluation_code(self, code): + # Generate code to evaluate this node and + # its sub-expressions, and dispose of any + # temporary results of its sub-expressions. + self.generate_subexpr_evaluation_code(code) + self.generate_result_code(code) + if self.is_temp: + self.generate_subexpr_disposal_code(code) + + def generate_subexpr_evaluation_code(self, code): + for node in self.subexpr_nodes(): + node.generate_evaluation_code(code) + + def generate_result_code(self, code): + self.not_implemented("generate_result_code") + + def generate_disposal_code(self, code): + # If necessary, generate code to dispose of + # temporary Python reference. + if self.is_temp: + if self.type.is_pyobject: + code.put_decref_clear(self.result, self.type) + else: + self.generate_subexpr_disposal_code(code) + + def generate_subexpr_disposal_code(self, code): + # Generate code to dispose of temporary results + # of all sub-expressions. + for node in self.subexpr_nodes(): + node.generate_disposal_code(code) + + def generate_post_assignment_code(self, code): + # Same as generate_disposal_code except that + # assignment will have absorbed a reference to + # the result if it is a Python object. + if self.is_temp: + if self.type.is_pyobject: + code.putln("%s = 0;" % self.result) + else: + self.generate_subexpr_disposal_code(code) + + def generate_assignment_code(self, rhs, code): + # Stub method for nodes which are not legal as + # the LHS of an assignment. An error will have + # been reported earlier. + pass + + def generate_deletion_code(self, code): + # Stub method for nodes that are not legal as + # the argument of a del statement. An error + # will have been reported earlier. + pass + + # ----------------- Coercion ---------------------- + + def coerce_to(self, dst_type, env): + # Coerce the result so that it can be assigned to + # something of type dst_type. If processing is necessary, + # wraps this node in a coercion node and returns that. + # Otherwise, returns this node unchanged. + # + # This method is called during the analyse_expressions + # phase of the src_node's processing. + src = self + src_type = self.type + src_is_py_type = src_type.is_pyobject + dst_is_py_type = dst_type.is_pyobject + + if dst_type.is_pyobject: + if not src.type.is_pyobject: + src = CoerceToPyTypeNode(src, env) + if not src.type.subtype_of(dst_type): + src = PyTypeTestNode(src, dst_type, env) + elif src.type.is_pyobject: + src = CoerceFromPyTypeNode(dst_type, src, env) + else: # neither src nor dst are py types + if not dst_type.assignable_from(src_type): + error(self.pos, "Cannot assign type '%s' to '%s'" % + (src.type, dst_type)) + return src + + def coerce_to_pyobject(self, env): + return self.coerce_to(PyrexTypes.py_object_type, env) + + def coerce_to_boolean(self, env): + # Coerce result to something acceptable as + # a boolean value. + type = self.type + if type.is_pyobject or type.is_ptr or type.is_float: + return CoerceToBooleanNode(self, env) + else: + if not type.is_int: + error(self.pos, + "Type '%s' not acceptable as a boolean" % type) + return self + + def coerce_to_integer(self, env): + # If not already some C integer type, coerce to longint. + if self.type.is_int: + return self + else: + return self.coerce_to(PyrexTypes.c_long_type, env) + + def coerce_to_temp(self, env): + # Ensure that the result is in a temporary. + if self.result_in_temp(): + return self + else: + return CoerceToTempNode(self, env) + + def coerce_to_simple(self, env): + # Ensure that the result is simple (see is_simple). + if self.is_simple(): + return self + else: + return self.coerce_to_temp(env) + + def is_simple(self): + # A node is simple if its result is something that can + # be referred to without performing any operations, e.g. + # a constant, local var, C global var, struct member + # reference, or temporary. + return self.result_in_temp() + + +class AtomicExprNode(ExprNode): + # Abstract base class for expression nodes which have + # no sub-expressions. + + subexprs = [] + + +class PyConstNode(AtomicExprNode): + # Abstract base class for constant Python values. + + def is_simple(self): + return 1 + + def analyse_types(self, env): + self.type = PyrexTypes.py_object_type + + def result_code(self): + return self.value + + def generate_result_code(self, code): + pass + + +class NoneNode(PyConstNode): + # The constant value None + + value = "Py_None" + + +class EllipsisNode(PyConstNode): + # '...' in a subscript list. + + value = "Py_Ellipsis" + + +class ConstNode(AtomicExprNode): + # Abstract base type for literal constant nodes. + # + # value string C code fragment + + is_literal = 1 + + def is_simple(self): + return 1 + + def analyse_types(self, env): + pass # Types are held in class variables + + def check_const(self): + pass + + def result_code(self): + return str(self.value) + + def generate_result_code(self, code): + pass + + +class NullNode(ConstNode): + type = PyrexTypes.c_null_ptr_type + value = "0" + + +class CharNode(ConstNode): + type = PyrexTypes.c_char_type + + def result_code(self): + return "'%s'" % self.value + + +class IntNode(ConstNode): + type = PyrexTypes.c_long_type + + +class FloatNode(ConstNode): + type = PyrexTypes.c_double_type + + +class StringNode(ConstNode): + # entry Symtab.Entry + + type = PyrexTypes.c_char_ptr_type + + def analyse_types(self, env): + self.entry = env.add_string_const(self.value) + + def coerce_to(self, dst_type, env): + # Arrange for a Python version of the string to be pre-allocated + # when coercing to a Python type. + if dst_type.is_pyobject and not self.type.is_pyobject: + node = self.as_py_string_node(env) + else: + node = self + # We still need to perform normal coerce_to processing on the + # result, because we might be coercing to an extension type, + # in which case a type test node will be needed. + return ConstNode.coerce_to(node, dst_type, env) + + def as_py_string_node(self, env): + # Return a new StringNode with the same entry as this node + # but whose type is a Python type instead of a C type. + entry = self.entry + env.add_py_string(entry) + return StringNode(self.pos, entry = entry, type = py_object_type) + + def result_code(self): + if self.type.is_pyobject: + return self.entry.pystring_cname + else: + return self.entry.cname + + +class ImagNode(AtomicExprNode): + # Imaginary number literal + # + # value float imaginary part + + def analyse_types(self, env): + self.type = PyrexTypes.py_object_type + self.is_temp = 1 + + def generate_evaluation_code(self, code): + code.putln( + "%s = PyComplex_FromDoubles(0.0, %s); if (!%s) %s" % ( + self.result, + self.value, + self.result, + code.error_goto(self.pos))) + + +class NameNode(AtomicExprNode): + # Reference to a local or global variable name. + # + # name string Python name of the variable + # entry Entry Symbol table entry + + is_name = 1 + + def analyse_as_module(self, env): + # Try to interpret this as a reference to a cimported module. + # Returns the module scope, or None. + entry = env.lookup(self.name) + if entry and entry.as_module: + return entry.as_module + return None + + def analyse_as_extension_type(self, env): + # Try to interpret this as a reference to an extension type. + # Returns the extension type, or None. + entry = env.lookup(self.name) + if entry and entry.is_type and entry.type.is_extension_type: + return entry.type + return None + + def analyse_target_declaration(self, env): + self.entry = env.lookup_here(self.name) + if not self.entry: + self.entry = env.declare_var(self.name, + PyrexTypes.py_object_type, self.pos) + + def analyse_types(self, env): + self.entry = env.lookup(self.name) + if not self.entry: + self.entry = env.declare_builtin(self.name, self.pos) + self.analyse_entry(env) + + def analyse_entry(self, env): + self.check_identifier_kind() + self.type = self.entry.type + # Reference to C array turns into pointer to first element. + if self.type.is_array: + self.type = self.type.element_ptr_type() + if self.entry.is_pyglobal or self.entry.is_builtin: + assert self.type.is_pyobject, "Python global or builtin not a Python object" + self.is_temp = 1 + if Options.intern_names: + env.use_utility_code(get_name_interned_utility_code) + else: + env.use_utility_code(get_name_utility_code) + + def analyse_target_types(self, env): + self.check_identifier_kind() + if self.is_lvalue(): + self.type = self.entry.type + else: + error(self.pos, "Assignment to non-lvalue '%s'" + % self.name) + self.type = PyrexTypes.error_type + + def check_identifier_kind(self): + entry = self.entry + if not (entry.is_const or entry.is_variable + or entry.is_builtin or entry.is_cfunction): + if self.entry.as_variable: + self.entry = self.entry.as_variable + else: + error(self.pos, + "'%s' is not a constant, variable or function identifier" % self.name) + + def is_simple(self): + # If it's not a C variable, it'll be in a temp. + return 1 + + def calculate_target_results(self, env): + pass + + def check_const(self): + entry = self.entry + if not (entry.is_const or entry.is_cfunction): + self.not_const() + + def check_const_addr(self): + entry = self.entry + if not (entry.is_cglobal or entry.is_cfunction): + self.addr_not_const() + + def is_lvalue(self): + return self.entry.is_variable and \ + not self.entry.type.is_array and \ + not self.entry.is_readonly + + def is_ephemeral(self): + # Name nodes are never ephemeral, even if the + # result is in a temporary. + return 0 + + def result_code(self): + if self.entry is None: + return "" # There was an error earlier + result = self.entry.cname + if self.type.is_extension_type and \ + not self.entry.is_declared_generic: + result = "((PyObject *)%s)" % result + return result + + def result_as_extension_type(self): + if self.entry is None: + return "" # There was an error earlier + #if not self.entry.is_self_arg: + if not self.entry.is_declared_generic: + return self.entry.cname + else: + return "((%s)%s)" % ( + self.type.declaration_code(""), + self.entry.cname) + + def generate_result_code(self, code): + if not hasattr(self, 'entry'): + error(self.pos, "INTERNAL ERROR: NameNode has no entry attribute during code generation") + entry = self.entry + if entry is None: + return # There was an error earlier + if entry.is_pyglobal or entry.is_builtin: + if entry.is_builtin: + namespace = Naming.builtins_cname + else: # entry.is_pyglobal + namespace = entry.namespace_cname + if Options.intern_names: + #assert entry.interned_cname is not None + code.putln( + '%s = __Pyx_GetName(%s, %s); if (!%s) %s' % ( + self.result, + namespace, + entry.interned_cname, + self.result, + code.error_goto(self.pos))) + else: + code.putln( + '%s = __Pyx_GetName(%s, "%s"); if (!%s) %s' % ( + self.result, + namespace, + self.entry.name, + self.result, + code.error_goto(self.pos))) + + def generate_assignment_code(self, rhs, code): + entry = self.entry + if entry is None: + return # There was an error earlier + if entry.is_pyglobal: + namespace = self.entry.namespace_cname + if Options.intern_names: + code.putln( + 'if (PyObject_SetAttr(%s, %s, %s) < 0) %s' % ( + namespace, + entry.interned_cname, + rhs.result, + code.error_goto(self.pos))) + else: + code.putln( + 'if (PyObject_SetAttrString(%s, "%s", %s) < 0) %s' % ( + namespace, + entry.name, + rhs.result, + code.error_goto(self.pos))) + if debug_disposal_code: + print "NameNode.generate_assignment_code:" + print "...generating disposal code for", rhs + rhs.generate_disposal_code(code) + else: + if self.type.is_pyobject: + rhs.make_owned_reference(code) + code.put_decref(self.result, self.type) + code.putln('%s = %s;' % (self.entry.cname, rhs.result)) + if debug_disposal_code: + print "NameNode.generate_assignment_code:" + print "...generating post-assignment code for", rhs + rhs.generate_post_assignment_code(code) + + def generate_deletion_code(self, code): + if self.entry is None: + return # There was an error earlier + if not self.entry.is_pyglobal: + error(self.pos, "Deletion of local or C global name not supported") + return + code.putln( + 'if (PyObject_DelAttrString(%s, "%s") < 0) %s' % ( + Naming.module_cname, + self.entry.name, + code.error_goto(self.pos))) + + +class BackquoteNode(ExprNode): + # `expr` + # + # arg ExprNode + + subexprs = ['arg'] + + def analyse_types(self, env): + self.arg.analyse_types(env) + self.arg = self.arg.coerce_to_pyobject(env) + self.type = PyrexTypes.py_object_type + self.is_temp = 1 + + def generate_result_code(self, code): + code.putln( + "%s = PyObject_Repr(%s); if (!%s) %s" % ( + self.result, + self.arg.result, + self.result, + code.error_goto(self.pos))) + + +class ImportNode(ExprNode): + # Used as part of import statement implementation. + # Implements result = + # __import__(module_name, globals(), None, name_list) + # + # module_name StringNode dotted name of module + # name_list ListNode or None list of names to be imported + + subexprs = ['module_name', 'name_list'] + + def analyse_types(self, env): + self.module_name.analyse_types(env) + self.module_name = self.module_name.coerce_to_pyobject(env) + if self.name_list: + self.name_list.analyse_types(env) + self.type = PyrexTypes.py_object_type + self.is_temp = 1 + env.use_utility_code(import_utility_code) + + def generate_result_code(self, code): + if self.name_list: + name_list_code = self.name_list.result + else: + name_list_code = "0" + code.putln( + "%s = __Pyx_Import(%s, %s); if (!%s) %s" % ( + self.result, + self.module_name.result, + name_list_code, + self.result, + code.error_goto(self.pos))) + + +class IteratorNode(ExprNode): + # Used as part of for statement implementation. + # Implements result = iter(sequence) + # + # sequence ExprNode + + subexprs = ['sequence'] + + def analyse_types(self, env): + self.sequence.analyse_types(env) + self.sequence = self.sequence.coerce_to_pyobject(env) + self.type = PyrexTypes.py_object_type + self.is_temp = 1 + + def generate_result_code(self, code): + code.putln( + "%s = PyObject_GetIter(%s); if (!%s) %s" % ( + self.result, + self.sequence.result, + self.result, + code.error_goto(self.pos))) + + +class NextNode(AtomicExprNode): + # Used as part of for statement implementation. + # Implements result = iterator.next() + # Created during analyse_types phase. + # The iterator is not owned by this node. + # + # iterator ExprNode + + def __init__(self, iterator, env): + self.pos = iterator.pos + self.iterator = iterator + self.type = PyrexTypes.py_object_type + self.is_temp = 1 + + def generate_result_code(self, code): + code.putln( + "%s = PyIter_Next(%s);" % ( + self.result, + self.iterator.result)) + code.putln( + "if (!%s) {" % + self.result) + code.putln( + "if (PyErr_Occurred()) %s" % + code.error_goto(self.pos)) + code.putln( + "break;") + code.putln( + "}") + + +class ExcValueNode(AtomicExprNode): + # Node created during analyse_types phase + # of an ExceptClauseNode to fetch the current + # exception value. + + def __init__(self, pos, env): + ExprNode.__init__(self, pos) + self.type = PyrexTypes.py_object_type + self.is_temp = 1 + env.use_utility_code(get_exception_utility_code) + + def generate_result_code(self, code): + code.putln( + "%s = __Pyx_GetExcValue(); if (!%s) %s" % ( + self.result, + self.result, + code.error_goto(self.pos))) + + +class TempNode(AtomicExprNode): + # Node created during analyse_types phase + # of some nodes to hold a temporary value. + + def __init__(self, pos, type, env): + ExprNode.__init__(self, pos) + self.type = type + self.is_temp = 1 + + def generate_result_code(self, code): + pass + + +class PyTempNode(TempNode): + # TempNode holding a Python value. + + def __init__(self, pos, env): + TempNode.__init__(self, pos, PyrexTypes.py_object_type, env) + + +#------------------------------------------------------------------- +# +# Trailer nodes +# +#------------------------------------------------------------------- + +class IndexNode(ExprNode): + # Sequence indexing. + # + # base ExprNode + # index ExprNode + + subexprs = ['base', 'index'] + + def analyse_target_declaration(self, env): + pass + + def analyse_types(self, env): + self.base.analyse_types(env) + self.index.analyse_types(env) + if self.base.type.is_pyobject: + self.index = self.index.coerce_to_pyobject(env) + self.type = PyrexTypes.py_object_type + self.is_temp = 1 + else: + if self.base.type.is_ptr or self.base.type.is_array: + self.type = self.base.type.base_type + else: + error(self.pos, + "Attempting to index non-array type '%s'" % + self.base.type) + self.type = PyrexTypes.error_type + if self.index.type.is_pyobject: + self.index = self.index.coerce_to( + PyrexTypes.c_int_type, env) + if not self.index.type.is_int: + error(self.pos, + "Invalid index type '%s'" % + self.index.type) + + def check_const_addr(self): + self.base.check_const_addr() + self.index.check_const() + + def is_lvalue(self): + return 1 + + def result_code(self): + return "(%s[%s])" % ( + self.base.result, self.index.result) + + def generate_result_code(self, code): + if self.type.is_pyobject: + code.putln( + "%s = PyObject_GetItem(%s, %s); if (!%s) %s" % ( + self.result, + self.base.result, + self.index.result, + self.result, + code.error_goto(self.pos))) + + def generate_assignment_code(self, rhs, code): + self.generate_subexpr_evaluation_code(code) + if self.type.is_pyobject: + code.putln( + "if (PyObject_SetItem(%s, %s, %s) < 0) %s" % ( + self.base.result, + self.index.result, + rhs.result, + code.error_goto(self.pos))) + self.generate_subexpr_disposal_code(code) + else: + code.putln( + "%s = %s;" % ( + self.result, rhs.result)) + rhs.generate_disposal_code(code) + + def generate_deletion_code(self, code): + self.generate_subexpr_evaluation_code(code) + code.putln( + "if (PyObject_DelItem(%s, %s) < 0) %s" % ( + self.base.result, + self.index.result, + code.error_goto(self.pos))) + self.generate_subexpr_disposal_code(code) + + +class SliceIndexNode(ExprNode): + # 2-element slice indexing + # + # base ExprNode + # start ExprNode or None + # stop ExprNode or None + + subexprs = ['base', 'start', 'stop'] + + def analyse_target_declaration(self, env): + pass + + def analyse_types(self, env): + self.base.analyse_types(env) + if self.start: + self.start.analyse_types(env) + if self.stop: + self.stop.analyse_types(env) + self.base = self.base.coerce_to_pyobject(env) + c_int = PyrexTypes.c_int_type + if self.start: + self.start = self.start.coerce_to(c_int, env) + if self.stop: + self.stop = self.stop.coerce_to(c_int, env) + self.type = PyrexTypes.py_object_type + self.is_temp = 1 + + def generate_result_code(self, code): + code.putln( + "%s = PySequence_GetSlice(%s, %s, %s); if (!%s) %s" % ( + self.result, + self.base.result, + self.start_code(), + self.stop_code(), + self.result, + code.error_goto(self.pos))) + + def generate_assignment_code(self, rhs, code): + self.generate_subexpr_evaluation_code(code) + code.putln( + "if (PySequence_SetSlice(%s, %s, %s, %s) < 0) %s" % ( + self.base.result, + self.start_code(), + self.stop_code(), + rhs.result, + code.error_goto(self.pos))) + self.generate_subexpr_disposal_code(code) + rhs.generate_disposal_code(code) + + def generate_deletion_code(self, code): + self.generate_subexpr_evaluation_code(code) + code.putln( + "if (PySequence_DelSlice(%s, %s, %s) < 0) %s" % ( + self.base.result, + self.start_code(), + self.stop_code(), + code.error_goto(self.pos))) + self.generate_subexpr_disposal_code(code) + + def start_code(self): + if self.start: + return self.start.result + else: + return "0" + + def stop_code(self): + if self.stop: + return self.stop.result + else: + return "0x7fffffff" + + def result_code(self): + # self.result is not used, but this method must exist + return "" + + +class SliceNode(ExprNode): + # start:stop:step in subscript list + # + # start ExprNode + # stop ExprNode + # step ExprNode + + subexprs = ['start', 'stop', 'step'] + + def analyse_types(self, env): + self.start.analyse_types(env) + self.stop.analyse_types(env) + self.step.analyse_types(env) + self.start = self.start.coerce_to_pyobject(env) + self.stop = self.stop.coerce_to_pyobject(env) + self.step = self.step.coerce_to_pyobject(env) + self.type = PyrexTypes.py_object_type + self.is_temp = 1 + + def generate_result_code(self, code): + code.putln( + "%s = PySlice_New(%s, %s, %s); if (!%s) %s" % ( + self.result, + self.start.result, + self.stop.result, + self.step.result, + self.result, + code.error_goto(self.pos))) + + +class SimpleCallNode(ExprNode): + # Function call without keyword, * or ** args. + # + # function ExprNode + # args [ExprNode] + # arg_tuple ExprNode or None used internally + # self ExprNode or None used internally + # coerced_self ExprNode or None used internally + + subexprs = ['self', 'coerced_self', 'function', 'args', 'arg_tuple'] + + self = None + coerced_self = None + arg_tuple = None + + def analyse_types(self, env): + function = self.function + function.is_called = 1 + self.function.analyse_types(env) + if function.is_attribute and function.entry and function.entry.is_cmethod: + # Take ownership of the object from which the attribute + # was obtained, because we need to pass it as 'self'. + self.self = function.obj + function.obj = CloneNode(self.self) + if self.function.type.is_pyobject: + self.arg_tuple = TupleNode(self.pos, args = self.args) + self.args = None + self.arg_tuple.analyse_types(env) + self.type = PyrexTypes.py_object_type + self.is_temp = 1 + else: + for arg in self.args: + arg.analyse_types(env) + if self.self and self.function.type.args: + # Coerce 'self' to the type expected by the method. + expected_type = self.function.type.args[0].type + self.coerced_self = CloneNode(self.self).coerce_to( + expected_type, env) + # Insert coerced 'self' argument into argument list. + self.args.insert(0, self.coerced_self) + self.analyse_c_function_call(env) + + def analyse_c_function_call(self, env): + func_type = self.function.type + # Coerce function pointer to function + if func_type.is_ptr: + func_type = func_type.base_type + self.function.type = func_type + # Check function type + if not func_type.is_cfunction: + if not func_type.is_error: + error(self.pos, "Calling non-function type '%s'" % + func_type) + self.type = PyrexTypes.error_type + self.result = "" + return + # Check no. of args + expected_nargs = len(func_type.args) + actual_nargs = len(self.args) + if actual_nargs < expected_nargs \ + or (not func_type.has_varargs and actual_nargs > expected_nargs): + expected_str = str(expected_nargs) + if func_type.has_varargs: + expected_str = "at least " + expected_str + error(self.pos, + "Call with wrong number of arguments (expected %s, got %s)" + % (expected_str, actual_nargs)) + self.args = None + self.type = PyrexTypes.error_type + self.result = "" + return + # Coerce arguments + for i in range(expected_nargs): + formal_type = func_type.args[i].type + self.args[i] = self.args[i].coerce_to(formal_type, env) + for i in range(expected_nargs, actual_nargs): + if self.args[i].type.is_pyobject: + error(self.args[i].pos, + "Python object cannot be passed as a varargs parameter") + # Calc result type and code fragment + self.type = func_type.return_type + if self.type.is_pyobject \ + or func_type.exception_value is not None \ + or func_type.exception_check: + self.is_temp = 1 + + def result_code(self): + return self.c_call_code(as_extension_type = 0) + + def result_as_extension_type(self): + return self.c_call_code(as_extension_type = 1) + + def c_call_code(self, as_extension_type): + if self.args is None or not self.function.type.is_cfunction: + return "" + formal_args = self.function.type.args + arg_list_code = [] + for (formal_arg, actual_arg) in \ + zip(formal_args, self.args): + if formal_arg.type.is_extension_type: + arg_code = actual_arg.result_as_extension_type() + if not formal_arg.type.same_as(actual_arg.type): + arg_code = "((%s)%s)" % ( + formal_arg.type.declaration_code(""), + arg_code) + else: + arg_code = actual_arg.result + arg_list_code.append(arg_code) + for actual_arg in self.args[len(formal_args):]: + arg_list_code.append(actual_arg.result) + result = "%s(%s)" % (self.function.result, + join(arg_list_code, ",")) + if self.type.is_extension_type and not as_extension_type: + result = "((PyObject *)%s)" % result + return result + + def generate_result_code(self, code): + #print_call_chain("SimpleCallNode.generate_result_code") ### + if self.function.type.is_pyobject: + code.putln( + "%s = PyObject_CallObject(%s, %s); if (!%s) %s" % ( + self.result, + self.function.result, + self.arg_tuple.result, + self.result, + code.error_goto(self.pos))) + elif self.function.type.is_cfunction: + exc_checks = [] + if self.type.is_pyobject: + exc_checks.append("!%s" % self.result) + else: + exc_val = self.function.type.exception_value + exc_check = self.function.type.exception_check + if exc_val is not None: + exc_checks.append("%s == %s" % (self.result, exc_val)) + if exc_check: + exc_checks.append("PyErr_Occurred()") + if self.is_temp or exc_checks: + if self.result: + lhs = "%s = " % self.result + else: + lhs = "" + code.putln( + "%s%s; if (%s) %s" % ( + lhs, + self.c_call_code(as_extension_type = 0), + " && ".join(exc_checks), + code.error_goto(self.pos))) + + +class GeneralCallNode(ExprNode): + # General Python function call, including keyword, + # * and ** arguments. + # + # function ExprNode + # positional_args ExprNode Tuple of positional arguments + # keyword_args ExprNode or None Dict of keyword arguments + # starstar_arg ExprNode or None Dict of extra keyword args + + subexprs = ['function', 'positional_args', 'keyword_args', 'starstar_arg'] + + def analyse_types(self, env): + self.function.analyse_types(env) + self.positional_args.analyse_types(env) + if self.keyword_args: + self.keyword_args.analyse_types(env) + if self.starstar_arg: + self.starstar_arg.analyse_types(env) + self.function = self.function.coerce_to_pyobject(env) + self.positional_args = \ + self.positional_args.coerce_to_pyobject(env) + if self.starstar_arg: + self.starstar_arg = \ + self.starstar_arg.coerce_to_pyobject(env) + self.type = PyrexTypes.py_object_type + self.is_temp = 1 + + def generate_result_code(self, code): + if self.keyword_args and self.starstar_arg: + code.putln( + "if (PyDict_Update(%s, %s) < 0) %s" % ( + self.keyword_args.result, + self.starstar_arg.result, + code.error_goto(self.pos))) + keyword_code = self.keyword_args.result + elif self.keyword_args: + keyword_code = self.keyword_args.result + elif self.starstar_arg: + keyword_code = self.starstar_arg.result + else: + keyword_code = None + if not keyword_code: + call_code = "PyObject_CallObject(%s, %s)" % ( + self.function.result, + self.positional_args.result) + else: + call_code = "PyEval_CallObjectWithKeywords(%s, %s, %s)" % ( + self.function.result, + self.positional_args.result, + keyword_code) + code.putln( + "%s = %s; if (!%s) %s" % ( + self.result, + call_code, + self.result, + code.error_goto(self.pos))) + + +class AsTupleNode(ExprNode): + # Convert argument to tuple. Used for normalising + # the * argument of a function call. + # + # arg ExprNode + + subexprs = ['arg'] + + def analyse_types(self, env): + self.arg.analyse_types(env) + self.arg = self.arg.coerce_to_pyobject(env) + self.type = PyrexTypes.py_object_type + self.is_temp = 1 + + def generate_result_code(self, code): + code.putln( + "%s = PySequence_Tuple(%s); if (!%s) %s" % ( + self.result, + self.arg.result, + self.result, + code.error_goto(self.pos))) + + +class AttributeNode(ExprNode): + # obj.attribute + # + # obj ExprNode + # attribute string + # + # Used internally: + # + # is_py_attr boolean Is a Python getattr operation + # member string C name of struct member + # is_called boolean Function call is being done on result + # entry Entry Symbol table entry of attribute + # interned_attr_cname string C name of interned attribute name + + is_attribute = 1 + subexprs = ['obj'] + + type = PyrexTypes.error_type + result = "" + entry = None + is_called = 0 + + def analyse_target_declaration(self, env): + pass + + def analyse_target_types(self, env): + self.analyse_types(env, target = 1) + + def analyse_types(self, env, target = 0): + if self.analyse_as_cimported_attribute(env, target): + return + if not target and self.analyse_as_unbound_cmethod(env): + return + self.analyse_as_ordinary_attribute(env, target) + + def analyse_as_cimported_attribute(self, env, target): + # Try to interpret this as a reference to an imported + # C const, type, var or function. If successful, mutates + # this node into a NameNode and returns 1, otherwise + # returns 0. + module_scope = self.obj.analyse_as_module(env) + if module_scope: + entry = module_scope.lookup_here(self.attribute) + if entry and ( + entry.is_cglobal or entry.is_cfunction + or entry.is_type or entry.is_const): + self.mutate_into_name_node(env, entry, target) + return 1 + return 0 + + def analyse_as_unbound_cmethod(self, env): + # Try to interpret this as a reference to an unbound + # C method of an extension type. If successful, mutates + # this node into a NameNode and returns 1, otherwise + # returns 0. + type = self.obj.analyse_as_extension_type(env) + if type: + entry = type.scope.lookup_here(self.attribute) + if entry and entry.is_cmethod: + # Create a temporary entry describing the C method + # as an ordinary function. + ubcm_entry = Symtab.Entry(entry.name, + "%s->%s" % (type.vtabptr_cname, entry.cname), + entry.type) + ubcm_entry.is_cfunction = 1 + ubcm_entry.func_cname = entry.func_cname + self.mutate_into_name_node(env, ubcm_entry, None) + return 1 + return 0 + + def analyse_as_extension_type(self, env): + # Try to interpret this as a reference to an extension type + # in a cimported module. Returns the extension type, or None. + module_scope = self.obj.analyse_as_module(env) + if module_scope: + entry = module_scope.lookup_here(self.attribute) + if entry and entry.is_type and entry.type.is_extension_type: + return entry.type + return None + + def analyse_as_module(self, env): + # Try to interpret this as a reference to a cimported module + # in another cimported module. Returns the module scope, or None. + module_scope = self.obj.analyse_as_module(env) + if module_scope: + entry = module_scope.lookup_here(self.attribute) + if entry and entry.as_module: + return entry.as_module + return None + + def mutate_into_name_node(self, env, entry, target): + # Mutate this node into a NameNode and complete the + # analyse_types phase. + self.__class__ = NameNode + self.name = self.attribute + self.entry = entry + del self.obj + del self.attribute + if target: + NameNode.analyse_target_types(self, env) + else: + NameNode.analyse_entry(self, env) + + def analyse_as_ordinary_attribute(self, env, target): + self.obj.analyse_types(env) + self.analyse_attribute(env) + if self.entry and self.entry.is_cmethod and not self.is_called: + error(self.pos, "C method can only be called") + # Reference to C array turns into pointer to first element. + if self.type.is_array: + self.type = self.type.element_ptr_type() + if self.is_py_attr: + if not target: + self.is_temp = 1 + + def analyse_attribute(self, env): + # Look up attribute and set self.type and self.member. + self.is_py_attr = 0 + self.member = self.attribute + if self.obj.type.is_string: + self.obj = self.obj.coerce_to_pyobject(env) + obj_type = self.obj.type + if obj_type.is_ptr: + obj_type = obj_type.base_type + self.op = "->" + elif obj_type.is_extension_type: + self.op = "->" + else: + self.op = "." + if obj_type.has_attributes: + entry = None + if obj_type.attributes_known(): + entry = obj_type.scope.lookup_here(self.attribute) + else: + error(self.pos, + "Cannot select attribute of incomplete type '%s'" + % obj_type) + obj_type = PyrexTypes.error_type + self.entry = entry + if entry: + if entry.is_variable or entry.is_cmethod: + self.type = entry.type + self.member = entry.cname + return + else: + # If it's not a variable or C method, it must be a Python + # method of an extension type, so we treat it like a Python + # attribute. + pass + # If we get here, the base object is not a struct/union/extension + # type, or it is an extension type and the attribute is either not + # declared or is declared as a Python method. Treat it as a Python + # attribute reference. + if obj_type.is_pyobject: + self.type = PyrexTypes.py_object_type + self.is_py_attr = 1 + if Options.intern_names: + self.interned_attr_cname = env.intern(self.attribute) + else: + if not obj_type.is_error: + error(self.pos, + "Object of type '%s' has no attribute '%s'" % + (obj_type, self.attribute)) + + def is_simple(self): + if self.obj: + return self.result_in_temp() or self.obj.is_simple() + else: + return NameNode.is_simple(self) + + def is_lvalue(self): + if self.obj: + return 1 + else: + return NameNode.is_lvalue(self) + + def is_ephemeral(self): + if self.obj: + return ExprNode.is_ephemeral(self) + else: + return NameNode.is_ephemeral(self) + + def result_code(self): + return self.select_code()[0] + + def result_as_extension_type(self): + return self.uncast_select_code() + + def select_code(self): + orig_code = self.uncast_select_code() + if self.type.is_extension_type: + code = "((PyObject *)%s)" % orig_code + else: + code = orig_code + return code, orig_code + + def uncast_select_code(self): + obj_type = self.obj.type + if obj_type.is_extension_type: + obj_code = self.obj.result_as_extension_type() + else: + obj_code = self.obj.result + if self.entry and self.entry.is_cmethod: + return "((struct %s *)%s%s%s)->%s" % ( + obj_type.vtabstruct_cname, obj_code, self.op, + obj_type.vtabslot_cname, self.member) + else: + return "%s%s%s" % (obj_code, self.op, self.member) + + def generate_result_code(self, code): + if self.is_py_attr: + if Options.intern_names: + code.putln( + '%s = PyObject_GetAttr(%s, %s); if (!%s) %s' % ( + self.result, + self.obj.result, + self.interned_attr_cname, + self.result, + code.error_goto(self.pos))) + else: + code.putln( + '%s = PyObject_GetAttrString(%s, "%s"); if (!%s) %s' % ( + self.result, + self.obj.result, + self.attribute, + self.result, + code.error_goto(self.pos))) + + def generate_assignment_code(self, rhs, code): + self.obj.generate_evaluation_code(code) + if self.is_py_attr: + if Options.intern_names: + code.putln( + 'if (PyObject_SetAttr(%s, %s, %s) < 0) %s' % ( + self.obj.result, + self.interned_attr_cname, + rhs.result, + code.error_goto(self.pos))) + else: + code.putln( + 'if (PyObject_SetAttrString(%s, "%s", %s) < 0) %s' % ( + self.obj.result, + self.attribute, + rhs.result, + code.error_goto(self.pos))) + rhs.generate_disposal_code(code) + else: + select_code, orig_code = self.select_code() + if self.type.is_pyobject: + rhs.make_owned_reference(code) + code.put_decref(select_code, self.type) + code.putln( + "%s = %s;" % ( + orig_code, + rhs.result)) + rhs.generate_post_assignment_code(code) + self.obj.generate_disposal_code(code) + + def generate_deletion_code(self, code): + self.obj.generate_evaluation_code(code) + if self.is_py_attr: + code.putln( + 'if (PyObject_DelAttrString(%s, "%s") < 0) %s' % ( + self.obj.result, + self.attribute, + code.error_goto(self.pos))) + else: + error(self.pos, "Cannot delete C attribute of extension type") + self.obj.generate_disposal_code(code) + +#------------------------------------------------------------------- +# +# Constructor nodes +# +#------------------------------------------------------------------- + +class SequenceNode(ExprNode): + # Base class for list and tuple constructor nodes. + # Contains common code for performing sequence unpacking. + # + # args [ExprNode] + # unpacked_items [ExprNode] or None + # coerced_unpacked_items [ExprNode] or None + + subexprs = ['args'] + + is_sequence_constructor = 1 + unpacked_items = None + + def analyse_target_declaration(self, env): + for arg in self.args: + arg.analyse_target_declaration(env) + + def analyse_types(self, env): + for i in range(len(self.args)): + arg = self.args[i] + arg.analyse_types(env) + self.args[i] = arg.coerce_to_pyobject(env) + self.type = PyrexTypes.py_object_type + self.is_temp = 1 + + def analyse_target_types(self, env): + self.unpacked_items = [] # PyTempNode(self.pos, env) + self.coerced_unpacked_items = [] + for arg in self.args: + arg.analyse_target_types(env) + #node = CloneNode(self.unpacked_item) + unpacked_item = PyTempNode(self.pos, env) + coerced_unpacked_item = unpacked_item.coerce_to(arg.type, env) + self.unpacked_items.append(unpacked_item) + self.coerced_unpacked_items.append(coerced_unpacked_item) + self.type = PyrexTypes.py_object_type + env.use_utility_code(unpacking_utility_code) + + def allocate_target_temps(self, env): + for arg in self.args: + arg.allocate_target_temps(env) + for node in self.coerced_unpacked_items: + node.allocate_temps(env) + + def release_target_temp(self, env): + for arg in self.args: + arg.release_target_temp(env) + for node in self.coerced_unpacked_items: + node.release_temp(env) + + def generate_result_code(self, code): + self.generate_operation_code(code) + + def generate_assignment_code(self, rhs, code): + for i in range(len(self.args)): + unpack_result = self.unpacked_items[i].result + code.putln( + "%s = __Pyx_UnpackItem(%s, %s); if (!%s) %s" % ( + unpack_result, + rhs.result, + i, + unpack_result, + code.error_goto(self.pos))) + value_node = self.coerced_unpacked_items[i] + value_node.generate_evaluation_code(code) + self.args[i].generate_assignment_code(value_node, code) + code.putln( + "if (__Pyx_EndUnpack(%s, %s) < 0) %s" % ( + rhs.result, + len(self.args), + code.error_goto(self.pos))) + if debug_disposal_code: + print "UnpackNode.generate_assignment_code:" + print "...generating disposal code for", rhs + rhs.generate_disposal_code(code) + + +class TupleNode(SequenceNode): + # Tuple constructor. + + def generate_operation_code(self, code): + code.putln( + "%s = PyTuple_New(%s); if (!%s) %s" % ( + self.result, + len(self.args), + self.result, + code.error_goto(self.pos))) + for i in range(len(self.args)): + arg = self.args[i] + if not arg.result_in_temp(): + code.put_incref(arg.result, arg.type) + code.putln( + "PyTuple_SET_ITEM(%s, %s, %s);" % ( + self.result, + i, + arg.result)) + + def generate_subexpr_disposal_code(self, code): + # We call generate_post_assignment_code here instead + # of generate_disposal_code, because values were stored + # in the tuple using a reference-stealing operation. + for arg in self.args: + arg.generate_post_assignment_code(code) + + +class ListNode(SequenceNode): + # List constructor. + + def generate_operation_code(self, code): + code.putln("%s = PyList_New(%s); if (!%s) %s" % + (self.result, + len(self.args), + self.result, + code.error_goto(self.pos))) + for i in range(len(self.args)): + arg = self.args[i] + #if not arg.is_temp: + if not arg.result_in_temp(): + code.put_incref(arg.result, arg.type) + code.putln("PyList_SET_ITEM(%s, %s, %s);" % + (self.result, + i, + arg.result)) + + def generate_subexpr_disposal_code(self, code): + # We call generate_post_assignment_code here instead + # of generate_disposal_code, because values were stored + # in the list using a reference-stealing operation. + for arg in self.args: + arg.generate_post_assignment_code(code) + + +class DictNode(ExprNode): + # Dictionary constructor. + # + # key_value_pairs [(ExprNode, ExprNode)] + + def analyse_types(self, env): + new_pairs = [] + for key, value in self.key_value_pairs: + key.analyse_types(env) + value.analyse_types(env) + key = key.coerce_to_pyobject(env) + value = value.coerce_to_pyobject(env) + new_pairs.append((key, value)) + self.key_value_pairs = new_pairs + self.type = PyrexTypes.py_object_type + self.is_temp = 1 + + def allocate_temps(self, env): + # Custom method used here because key-value + # pairs are evaluated and used one at a time. + self.allocate_temp(env) + for key, value in self.key_value_pairs: + key.allocate_temps(env) + value.allocate_temps(env) + key.release_temp(env) + value.release_temp(env) + + def generate_evaluation_code(self, code): + # Custom method used here because key-value + # pairs are evaluated and used one at a time. + code.putln( + "%s = PyDict_New(); if (!%s) %s" % ( + self.result, + self.result, + code.error_goto(self.pos))) + for key, value in self.key_value_pairs: + key.generate_evaluation_code(code) + value.generate_evaluation_code(code) + code.putln( + "if (PyDict_SetItem(%s, %s, %s) < 0) %s" % ( + self.result, + key.result, + value.result, + code.error_goto(self.pos))) + key.generate_disposal_code(code) + value.generate_disposal_code(code) + + +class ClassNode(ExprNode): + # Helper class used in the implementation of Python + # class definitions. Constructs a class object given + # a name, tuple of bases and class dictionary. + # + # name ExprNode Name of the class + # bases ExprNode Base class tuple + # dict ExprNode Class dict (not owned by this node) + # doc ExprNode or None Doc string + # module_name string Name of defining module + + subexprs = ['name', 'bases', 'doc'] + + def analyse_types(self, env): + self.name.analyse_types(env) + self.name = self.name.coerce_to_pyobject(env) + self.bases.analyse_types(env) + if self.doc: + self.doc.analyse_types(env) + self.doc = self.doc.coerce_to_pyobject(env) + self.module_name = env.global_scope().module_name + self.type = PyrexTypes.py_object_type + self.is_temp = 1 + env.use_utility_code(create_class_utility_code); + + def generate_result_code(self, code): + if self.doc: + code.putln( + 'if (PyDict_SetItemString(%s, "__doc__", %s) < 0) %s' % ( + self.dict.result, + self.doc.result, + code.error_goto(self.pos))) +## code.putln( +## '%s = PyClass_New(%s, %s, %s); if (!%s) %s' % ( +## self.result, +## self.bases.result, +## self.dict.result, +## self.name.result, +## self.result, +## code.error_goto(self.pos))) + code.putln( + '%s = __Pyx_CreateClass(%s, %s, %s, "%s"); if (!%s) %s' % ( + self.result, + self.bases.result, + self.dict.result, + self.name.result, + self.module_name, + self.result, + code.error_goto(self.pos))) + + +class UnboundMethodNode(ExprNode): + # Helper class used in the implementation of Python + # class definitions. Constructs an unbound method + # object from a class and a function. + # + # class_cname string C var holding the class object + # function ExprNode Function object + + subexprs = ['function'] + + def analyse_types(self, env): + self.function.analyse_types(env) + self.type = PyrexTypes.py_object_type + self.is_temp = 1 + + def generate_result_code(self, code): + code.putln( + "%s = PyMethod_New(%s, 0, %s); if (!%s) %s" % ( + self.result, + self.function.result, + self.class_cname, + self.result, + code.error_goto(self.pos))) + + +class PyCFunctionNode(AtomicExprNode): + # Helper class used in the implementation of Python + # class definitions. Constructs a PyCFunction object + # from a PyMethodDef struct. + # + # pymethdef_cname string PyMethodDef structure + + def analyse_types(self, env): + self.type = PyrexTypes.py_object_type + self.is_temp = 1 + + def generate_result_code(self, code): + code.putln( + "%s = PyCFunction_New(&%s, 0); if (!%s) %s" % ( + self.result, + self.pymethdef_cname, + self.result, + code.error_goto(self.pos))) + +#------------------------------------------------------------------- +# +# Unary operator nodes +# +#------------------------------------------------------------------- + +class UnopNode(ExprNode): + # operator string + # operand ExprNode + # + # Processing during analyse_expressions phase: + # + # analyse_c_operation + # Called when the operand is not a pyobject. + # - Check operand type and coerce if needed. + # - Determine result type and result code fragment. + # - Allocate temporary for result if needed. + + subexprs = ['operand'] + + def analyse_types(self, env): + self.operand.analyse_types(env) + if self.is_py_operation(): + self.coerce_operand_to_pyobject(env) + self.type = PyrexTypes.py_object_type + self.is_temp = 1 + else: + self.analyse_c_operation(env) + + def check_const(self): + self.operand.check_const() + + def is_py_operation(self): + return self.operand.type.is_pyobject + + def coerce_operand_to_pyobject(self, env): + self.operand = self.operand.coerce_to_pyobject(env) + + def generate_result_code(self, code): + if self.operand.type.is_pyobject: + self.generate_py_operation_code(code) + else: + if self.is_temp: + self.generate_c_operation_code(code) + + def generate_py_operation_code(self, code): + function = self.py_operation_function() + code.putln( + "%s = %s(%s); if (!%s) %s" % ( + self.result, + function, + self.operand.result, + self.result, + code.error_goto(self.pos))) + + def type_error(self): + if not self.operand.type.is_error: + error(self.pos, "Invalid operand type for '%s' (%s)" % + (self.operator, self.operand.type)) + self.type = PyrexTypes.error_type + + +class NotNode(ExprNode): + # 'not' operator + # + # operand ExprNode + + subexprs = ['operand'] + + def analyse_types(self, env): + self.operand.analyse_types(env) + self.operand = self.operand.coerce_to_boolean(env) + self.type = PyrexTypes.c_int_type + + def result_code(self): + return "(!%s)" % self.operand.result + + def generate_result_code(self, code): + pass + + +class UnaryPlusNode(UnopNode): + # unary '+' operator + + operator = '+' + + def analyse_c_operation(self, env): + self.type = self.operand.type + + def py_operation_function(self): + return "PyNumber_Positive" + + def result_code(self): + return self.operand.result + + +class UnaryMinusNode(UnopNode): + # unary '-' operator + + operator = '-' + + def analyse_c_operation(self, env): + if self.operand.type.is_numeric: + self.type = self.operand.type + else: + self.type_error() + + def py_operation_function(self): + return "PyNumber_Negative" + + def result_code(self): + return "(-%s)" % self.operand.result + + +class TildeNode(UnopNode): + # unary '~' operator + + def analyse_c_operation(self, env): + if self.operand.type.is_int: + self.type = self.operand.type + else: + self.type_error() + + def py_operation_function(self): + return "PyNumber_Invert" + + def result_code(self): + return "(~%s)" % self.operand.result + + +class AmpersandNode(ExprNode): + # The C address-of operator. + # + # operand ExprNode + + subexprs = ['operand'] + + def analyse_types(self, env): + self.operand.analyse_types(env) + argtype = self.operand.type + if not (argtype.is_cfunction or self.operand.is_lvalue()): + self.error("Taking address of non-lvalue") + return + if argtype.is_pyobject: + self.error("Cannot take address of Python variable") + return + self.type = PyrexTypes.c_ptr_type(argtype) + + def check_const(self): + self.operand.check_const_addr() + + def error(self, mess): + error(self.pos, mess) + self.type = PyrexTypes.error_type + self.result = "" + + def result_code(self): + return "(&%s)" % self.operand.result + + def generate_result_code(self, code): + pass + + +unop_node_classes = { + "+": UnaryPlusNode, + "-": UnaryMinusNode, + "~": TildeNode, +} + +def unop_node(pos, operator, operand): + # Construct unnop node of appropriate class for + # given operator. + return unop_node_classes[operator](pos, + operator = operator, + operand = operand) + + +class TypecastNode(ExprNode): + # C type cast + # + # base_type CBaseTypeNode + # declarator CDeclaratorNode + # operand ExprNode + + subexprs = ['operand'] + + def analyse_types(self, env): + base_type = self.base_type.analyse(env) + _, self.type = self.declarator.analyse(base_type, env) + self.operand.analyse_types(env) + to_py = self.type.is_pyobject + from_py = self.operand.type.is_pyobject + if from_py and not to_py and self.operand.is_ephemeral(): + error(self.pos, "Casting temporary Python object to non-Python type") + #if to_py: + if to_py and not from_py: + self.is_temp = 1 + + def check_const(self): + self.operand.check_const() + + def result_code(self): + if self.type.is_pyobject: + cast = "PyObject *" + else: + cast = self.type.declaration_code("") + return "((%s)%s)" % (cast, self.operand.result) + + def result_as_extension_type(self): + return "((%s)%s)" % ( + self.type.declaration_code(""), + self.operand.result) + + def generate_result_code(self, code): + if self.is_temp: + code.putln( + "%s = (PyObject *)%s;" % ( + self.result, + #self.type.declaration_code(""), + self.operand.result)) + code.put_incref(self.result, self.type) + + +class SizeofNode(ExprNode): + # Abstract base class for sizeof(x) expression nodes. + + def check_const(self): + pass + + def generate_result_code(self, code): + pass + + +class SizeofTypeNode(SizeofNode): + # C sizeof function applied to a type + # + # base_type CBaseTypeNode + # declarator CDeclaratorNode + + subexprs = [] + + def analyse_types(self, env): + base_type = self.base_type.analyse(env) + _, arg_type = self.declarator.analyse(base_type, env) + self.arg_type = arg_type + if arg_type.is_pyobject: + error(self.pos, "Cannot take sizeof Python object") + elif arg_type.is_void: + error(self.pos, "Cannot take sizeof void") + elif not arg_type.is_complete(): + error(self.pos, "Cannot take sizeof incomplete type '%s'" % arg_code) + self.type = PyrexTypes.c_int_type + + def result_code(self): + arg_code = self.arg_type.declaration_code("") + return "(sizeof(%s))" % arg_code + + +class SizeofVarNode(SizeofNode): + # C sizeof function applied to a variable + # + # operand ExprNode + + subexprs = ['operand'] + + def analyse_types(self, env): + self.operand.analyse_types(env) + self.type = PyrexTypes.c_int_type + + def result_code(self): + return "(sizeof(%s))" % self.operand.result + + def generate_result_code(self, code): + pass + + +#------------------------------------------------------------------- +# +# Binary operator nodes +# +#------------------------------------------------------------------- + +class BinopNode(ExprNode): + # operator string + # operand1 ExprNode + # operand2 ExprNode + # + # Processing during analyse_expressions phase: + # + # analyse_c_operation + # Called when neither operand is a pyobject. + # - Check operand types and coerce if needed. + # - Determine result type and result code fragment. + # - Allocate temporary for result if needed. + + subexprs = ['operand1', 'operand2'] + + def analyse_types(self, env): + self.operand1.analyse_types(env) + self.operand2.analyse_types(env) + if self.is_py_operation(): + self.coerce_operands_to_pyobjects(env) + self.type = PyrexTypes.py_object_type + self.is_temp = 1 + else: + self.analyse_c_operation(env) + + def is_py_operation(self): + return (self.operand1.type.is_pyobject + or self.operand2.type.is_pyobject) + + def coerce_operands_to_pyobjects(self, env): + self.operand1 = self.operand1.coerce_to_pyobject(env) + self.operand2 = self.operand2.coerce_to_pyobject(env) + + def check_const(self): + self.operand1.check_const() + self.operand2.check_const() + + def generate_result_code(self, code): + if self.operand1.type.is_pyobject: + function = self.py_operation_function() + if function == "PyNumber_Power": + extra_args = ", Py_None" + else: + extra_args = "" + code.putln( + "%s = %s(%s, %s%s); if (!%s) %s" % ( + self.result, + function, + self.operand1.result, + self.operand2.result, + extra_args, + self.result, + code.error_goto(self.pos))) + else: + if self.is_temp: + self.generate_c_operation_code(code) + + def type_error(self): + if not (self.operand1.type.is_error + or self.operand2.type.is_error): + error(self.pos, "Invalid operand types for '%s' (%s; %s)" % + (self.operator, self.operand1.type, + self.operand2.type)) + self.type = PyrexTypes.error_type + + +class NumBinopNode(BinopNode): + # Binary operation taking numeric arguments. + + def analyse_c_operation(self, env): + type1 = self.operand1.type + type2 = self.operand2.type + self.type = self.compute_c_result_type(type1, type2) + if not self.type: + self.type_error() + + def compute_c_result_type(self, type1, type2): + if self.c_types_okay(type1, type2): + return PyrexTypes.widest_numeric_type(type1, type2) + else: + return None + + def c_types_okay(self, type1, type2): + return type1.is_numeric and type2.is_numeric + + def result_code(self): + return "(%s %s %s)" % ( + self.operand1.result, + self.operator, + self.operand2.result) + + def py_operation_function(self): + return self.py_functions[self.operator] + + py_functions = { + "|": "PyNumber_Or", + "^": "PyNumber_Xor", + "&": "PyNumber_And", + "<<": "PyNumber_Lshift", + ">>": "PyNumber_Rshift", + "+": "PyNumber_Add", + "-": "PyNumber_Subtract", + "*": "PyNumber_Multiply", + "/": "PyNumber_Divide", + "%": "PyNumber_Remainder", + "**": "PyNumber_Power" + } + + +class IntBinopNode(NumBinopNode): + # Binary operation taking integer arguments. + + def c_types_okay(self, type1, type2): + return type1.is_int and type2.is_int + + +class AddNode(NumBinopNode): + # '+' operator. + + def is_py_operation(self): + if self.operand1.type.is_string \ + and self.operand2.type.is_string: + return 1 + else: + return NumBinopNode.is_py_operation(self) + + def compute_c_result_type(self, type1, type2): + if type1.is_ptr and type2.is_int: + return type1 + elif type1.is_int and type2.is_ptr: + return type2 + else: + return NumBinopNode.compute_c_result_type( + self, type1, type2) + + +class SubNode(NumBinopNode): + # '-' operator. + + def compute_c_result_type(self, type1, type2): + if type1.is_ptr and type2.is_int: + return type1 + elif type1.is_ptr and type2.is_ptr: + return PyrexTypes.c_int_type + else: + return NumBinopNode.compute_c_result_type( + self, type1, type2) + + +class MulNode(NumBinopNode): + # '*' operator. + + def is_py_operation(self): + type1 = self.operand1.type + type2 = self.operand2.type + if (type1.is_string and type2.is_int) \ + or (type2.is_string and type1.is_int): + return 1 + else: + return NumBinopNode.is_py_operation(self) + + +class ModNode(IntBinopNode): + # '%' operator. + + def is_py_operation(self): + return (self.operand1.type.is_string + or self.operand2.type.is_string + or IntBinopNode.is_py_operation(self)) + + +class PowNode(NumBinopNode): + # '**' operator. + + def analyse_types(self, env): + env.pow_function_used = 1 + NumBinopNode.analyse_types(self, env) + + def compute_c_result_type(self, type1, type2): + if self.c_types_okay(type1, type2): + return PyrexTypes.c_double_type + else: + return None + + def result_code(self): + return "pow(%s, %s)" % ( + self.operand1.result, self.operand2.result) + + +class BoolBinopNode(ExprNode): + # Short-circuiting boolean operation. + # + # operator string + # operand1 ExprNode + # operand2 ExprNode + # temp_bool ExprNode used internally + + temp_bool = None + + subexprs = ['operand1', 'operand2', 'temp_bool'] + + def analyse_types(self, env): + self.operand1.analyse_types(env) + self.operand2.analyse_types(env) + if self.operand1.type.is_pyobject or \ + self.operand2.type.is_pyobject: + self.operand1 = self.operand1.coerce_to_pyobject(env) + self.operand2 = self.operand2.coerce_to_pyobject(env) + self.temp_bool = TempNode(self.pos, + PyrexTypes.c_int_type, env) + self.type = PyrexTypes.py_object_type + else: + self.operand1 = self.operand1.coerce_to_boolean(env) + self.operand2 = self.operand2.coerce_to_boolean(env) + self.type = PyrexTypes.c_int_type + # For what we're about to do, it's vital that + # both operands be temp nodes. + self.operand1 = self.operand1.coerce_to_temp(env) #CTT + self.operand2 = self.operand2.coerce_to_temp(env) + # coerce_to_simple does not seem to be sufficient + #self.operand1 = self.operand1.coerce_to_simple(env) + #self.operand2 = self.operand2.coerce_to_simple(env) + self.is_temp = 1 + + def allocate_temps(self, env, result = None): + # We don't need both operands at the same time, and + # one of the operands will also be our result. So we + # use an allocation strategy here which results in + # this node and both its operands sharing the same + # result variable. This allows us to avoid some + # assignments and increfs/decrefs that would otherwise + # be necessary. + self.allocate_temp(env, result) + self.operand1.allocate_temps(env, self.result) + if self.temp_bool: + self.temp_bool.allocate_temp(env) + self.temp_bool.release_temp(env) + self.operand2.allocate_temps(env, self.result) + # We haven't called release_temp on either operand, + # because although they are temp nodes, they don't own + # their result variable. And because they are temp + # nodes, any temps in their subnodes will have been + # released before their allocate_temps returned. + # Therefore, they contain no temp vars that need to + # be released. + + def check_const(self): + self.operand1.check_const() + self.operand2.check_const() + + def result_code(self): + return "(%s %s %s)" % ( + self.operand1.result, + self.py_to_c_op[self.operator], + self.operand2.result) + + py_to_c_op = {'and': "&&", 'or': "||"} + + def generate_evaluation_code(self, code): + self.operand1.generate_evaluation_code(code) + test_result = self.generate_operand1_test(code) + if self.operator == 'and': + sense = "" + else: + sense = "!" + code.putln( + "if (%s%s) {" % ( + sense, + test_result)) + self.operand1.generate_disposal_code(code) + self.operand2.generate_evaluation_code(code) + code.putln( + "}") + + def generate_operand1_test(self, code): + # Generate code to test the truth of the first operand. + if self.type.is_pyobject: + test_result = self.temp_bool.result + code.putln( + "%s = PyObject_IsTrue(%s); if (%s < 0) %s" % ( + test_result, + self.operand1.result, + test_result, + code.error_goto(self.pos))) + else: + test_result = self.operand1.result + return test_result + + +class CmpNode: + # Mixin class containing code common to PrimaryCmpNodes + # and CascadedCmpNodes. + + def is_python_comparison(self): + return (self.has_python_operands() + or (self.cascade and self.cascade.is_python_comparison()) + or self.operator in ('in', 'not_in')) + + def check_types(self, env, operand1, op, operand2): + if not self.types_okay(operand1, op, operand2): + error(self.pos, "Invalid types for '%s' (%s, %s)" % + (self.operator, operand1.type, operand2.type)) + + def types_okay(self, operand1, op, operand2): + type1 = operand1.type + type2 = operand2.type + if type1.is_error or type2.is_error: + return 1 + if type1.is_pyobject: # type2 will be, too + return 1 + elif type1.is_ptr: + return type1.is_null_ptr or type2.is_null_ptr \ + or type1.same_as(type2) + elif (type1.is_numeric and type2.is_numeric + and op not in ('is', 'is_not')): + return 1 + else: + return 0 + + def generate_operation_code(self, code, result, + operand1, op , operand2): + if op == 'in' or op == 'not_in': + code.putln( + "%s = PySequence_Contains(%s, %s); if (%s < 0) %s" % ( + result, + operand2.result, + operand1.result, + result, + code.error_goto(self.pos))) + if op == 'not_in': + code.putln( + "%s = !%s;" % ( + result, result)) + elif (operand1.type.is_pyobject + and op not in ('is', 'is_not')): + code.putln( + "if (PyObject_Cmp(%s, %s, &%s) < 0) %s" % ( + operand1.result, + operand2.result, + result, + code.error_goto(self.pos))) + code.putln( + "%s = %s %s 0;" % ( + result, result, op)) + else: + code.putln("%s = %s %s %s;" % ( + result, + operand1.result, + self.c_operator(op), + operand2.result)) + + def c_operator(self, op): + if op == 'is': + return "==" + elif op == 'is_not': + return "!=" + else: + return op + + +class PrimaryCmpNode(ExprNode, CmpNode): + # Non-cascaded comparison or first comparison of + # a cascaded sequence. + # + # operator string + # operand1 ExprNode + # operand2 ExprNode + # cascade CascadedCmpNode + + # We don't use the subexprs mechanism, because + # things here are too complicated for it to handle. + # Instead, we override all the framework methods + # which use it. + + cascade = None + + def analyse_types(self, env): + self.operand1.analyse_types(env) + self.operand2.analyse_types(env) + if self.cascade: + self.cascade.analyse_types(env, self.operand2) + self.is_pycmp = self.is_python_comparison() + if self.is_pycmp: + self.coerce_operands_to_pyobjects(env) + if self.cascade: + #self.operand2 = self.operand2.coerce_to_temp(env) #CTT + self.operand2 = self.operand2.coerce_to_simple(env) + self.cascade.coerce_cascaded_operands_to_temp(env) + self.check_operand_types(env) + self.type = PyrexTypes.c_int_type + if self.is_pycmp or self.cascade: + self.is_temp = 1 + + def check_operand_types(self, env): + self.check_types(env, + self.operand1, self.operator, self.operand2) + if self.cascade: + self.cascade.check_operand_types(env, self.operand2) + + def has_python_operands(self): + return (self.operand1.type.is_pyobject + or self.operand2.type.is_pyobject) + + def coerce_operands_to_pyobjects(self, env): + self.operand1 = self.operand1.coerce_to_pyobject(env) + self.operand2 = self.operand2.coerce_to_pyobject(env) + if self.cascade: + self.cascade.coerce_operands_to_pyobjects(env) + + def allocate_subexpr_temps(self, env): + self.operand1.allocate_temps(env) + self.operand2.allocate_temps(env) + if self.cascade: + self.cascade.allocate_subexpr_temps(env) + + def release_subexpr_temps(self, env): + self.operand1.release_temp(env) + self.operand2.release_temp(env) + if self.cascade: + self.cascade.release_subexpr_temps(env) + + def check_const(self): + self.operand1.check_const() + self.operand2.check_const() + if self.cascade: + self.not_const() + + def result_code(self): + return "(%s %s %s)" % ( + self.operand1.result, + self.c_operator(self.operator), + self.operand2.result) + + def generate_evaluation_code(self, code): + self.operand1.generate_evaluation_code(code) + self.operand2.generate_evaluation_code(code) + if self.is_temp: + self.generate_operation_code(code, self.result, + self.operand1, self.operator, self.operand2) + if self.cascade: + self.cascade.generate_evaluation_code(code, + self.result, self.operand2) + self.operand1.generate_disposal_code(code) + self.operand2.generate_disposal_code(code) + + def generate_subexpr_disposal_code(self, code): + # If this is called, it is a non-cascaded cmp, + # so only need to dispose of the two main operands. + self.operand1.generate_disposal_code(code) + self.operand2.generate_disposal_code(code) + + +class CascadedCmpNode(Node, CmpNode): + # A CascadedCmpNode is not a complete expression node. It + # hangs off the side of another comparison node, shares + # its left operand with that node, and shares its result + # with the PrimaryCmpNode at the head of the chain. + # + # operator string + # operand2 ExprNode + # cascade CascadedCmpNode + + cascade = None + + def analyse_types(self, env, operand1): + self.operand2.analyse_types(env) + if self.cascade: + self.cascade.analyse_types(env, self.operand2) + + def check_operand_types(self, env, operand1): + self.check_types(env, + operand1, self.operator, self.operand2) + if self.cascade: + self.cascade.check_operand_types(env, self.operand2) + + def has_python_operands(self): + return self.operand2.type.is_pyobject + + def coerce_operands_to_pyobjects(self, env): + self.operand2 = self.operand2.coerce_to_pyobject(env) + if self.cascade: + self.cascade.coerce_operands_to_pyobjects(env) + + def coerce_cascaded_operands_to_temp(self, env): + if self.cascade: + #self.operand2 = self.operand2.coerce_to_temp(env) #CTT + self.operand2 = self.operand2.coerce_to_simple(env) + self.cascade.coerce_cascaded_operands_to_temp(env) + + def allocate_subexpr_temps(self, env): + self.operand2.allocate_temps(env) + if self.cascade: + self.cascade.allocate_subexpr_temps(env) + + def release_subexpr_temps(self, env): + self.operand2.release_temp(env) + if self.cascade: + self.cascade.release_subexpr_temps(env) + + def generate_evaluation_code(self, code, result, operand1): + code.putln("if (%s) {" % result) + self.operand2.generate_evaluation_code(code) + self.generate_operation_code(code, result, + operand1, self.operator, self.operand2) + if self.cascade: + self.cascade.generate_evaluation_code( + code, result, self.operand2) + # Cascaded cmp result is always temp + self.operand2.generate_disposal_code(code) + code.putln("}") + + +binop_node_classes = { + "or": BoolBinopNode, + "and": BoolBinopNode, + "|": IntBinopNode, + "^": IntBinopNode, + "&": IntBinopNode, + "<<": IntBinopNode, + ">>": IntBinopNode, + "+": AddNode, + "-": SubNode, + "*": MulNode, + "/": NumBinopNode, + "%": ModNode, + "**": PowNode +} + +def binop_node(pos, operator, operand1, operand2): + # Construct binop node of appropriate class for + # given operator. + return binop_node_classes[operator](pos, + operator = operator, + operand1 = operand1, + operand2 = operand2) + +#------------------------------------------------------------------- +# +# Coercion nodes +# +# Coercion nodes are special in that they are created during +# the analyse_types phase of parse tree processing. +# Their __init__ methods consequently incorporate some aspects +# of that phase. +# +#------------------------------------------------------------------- + +class CoercionNode(ExprNode): + # Abstract base class for coercion nodes. + # + # arg ExprNode node being coerced + + subexprs = ['arg'] + + def __init__(self, arg): + self.pos = arg.pos + self.arg = arg + if debug_coercion: + print self, "Coercing", self.arg + + +class CastNode(CoercionNode): + # Wrap a node in a C type cast. + + def __init__(self, arg, new_type): + CoercionNode.__init__(self, arg) + self.type = new_type + + def result_code(self): + return "((%s)%s)" % ( + self.type.declaration_code(""), + self.arg.result) + + def result_as_extension_type(self): + return self.result + + def generate_result_code(self, code): + self.arg.generate_result_code(code) + + +class PyTypeTestNode(CoercionNode): + # This node is used to check that a generic Python + # object is an instance of a particular extension type. + # This node borrows the result of its argument node. + + def __init__(self, arg, dst_type, env): + # The arg is know to be a Python object, and + # the dst_type is known to be an extension type. + assert dst_type.is_extension_type, "PyTypeTest on non extension type" + CoercionNode.__init__(self, arg) + self.type = dst_type + env.use_utility_code(type_test_utility_code) + + def result_in_temp(self): + return self.arg.result_in_temp() + + def is_ephemeral(self): + return self.arg.is_ephemeral() + + def result_code(self): + return self.arg.result + + def result_as_extension_type(self): + return "((%s)%s)" % ( + self.type.declaration_code(""), + self.arg.result) + + def generate_result_code(self, code): + if self.type.typeobj_is_available(): + code.putln( + "if (!__Pyx_TypeTest(%s, %s)) %s" % ( + self.result, + self.type.typeptr_cname, + code.error_goto(self.pos))) + else: + error(self.pos, "Cannot test type of extern C class " + "without type object name specification") + + def generate_post_assignment_code(self, code): + self.arg.generate_post_assignment_code(code) + + +class CoerceToPyTypeNode(CoercionNode): + # This node is used to convert a C data type + # to a Python object. + + def __init__(self, arg, env): + CoercionNode.__init__(self, arg) + self.type = PyrexTypes.py_object_type + if not arg.type.to_py_function: + error(arg.pos, + "Cannot convert '%s' to Python object" % arg.type) + self.is_temp = 1 + + def generate_result_code(self, code): + function = self.arg.type.to_py_function + code.putln('%s = %s(%s); if (!%s) %s' % ( + self.result, + function, + self.arg.result, + self.result, + code.error_goto(self.pos))) + + +class CoerceFromPyTypeNode(CoercionNode): + # This node is used to convert a Python object + # to a C data type. + + def __init__(self, result_type, arg, env): + CoercionNode.__init__(self, arg) + self.type = result_type + if not result_type.from_py_function: + error(arg.pos, + "Cannot convert Python object to '%s'" % result_type) + if self.type.is_string and self.arg.is_ephemeral(): + error(arg.pos, + "Obtaining char * from temporary Python value") + self.is_temp = 1 + + def generate_result_code(self, code): + opnd = self.arg.result + function = self.type.from_py_function + code.putln('%s = %s(%s); if (PyErr_Occurred()) %s' % ( + self.result, + function, + self.arg.result, + code.error_goto(self.pos))) + + +class CoerceToBooleanNode(CoercionNode): + # This node is used when a result needs to be used + # in a boolean context. + + def __init__(self, arg, env): + CoercionNode.__init__(self, arg) + self.type = PyrexTypes.c_int_type + if arg.type.is_pyobject: + self.is_temp = 1 + + def check_const(self): + if self.is_temp: + self.not_const() + self.arg.check_const() + + def result_code(self): + return "(%s != 0)" % self.arg.result + + def generate_result_code(self, code): + if self.arg.type.is_pyobject: + code.putln( + "%s = PyObject_IsTrue(%s); if (%s < 0) %s" % ( + self.result, + self.arg.result, + self.result, + code.error_goto(self.pos))) + + +class CoerceToTempNode(CoercionNode): + # This node is used to force the result of another node + # to be stored in a temporary. It is only used if the + # argument node's result is not already in a temporary. + + def __init__(self, arg, env): + CoercionNode.__init__(self, arg) + self.type = self.arg.type + self.is_temp = 1 + + def generate_result_code(self, code): + #self.arg.generate_evaluation_code(code) # Already done + # by generic generate_subexpr_evaluation_code! + code.putln("%s = %s;" % ( + self.result, self.arg.result)) + if self.type.is_pyobject: + code.put_incref(self.result, self.type) + + +class CloneNode(CoercionNode): + # This node is employed when the result of another node needs + # to be used multiple times. The argument node's result must + # be in a temporary. This node "borrows" the result from the + # argument node, and does not generate any evaluation or + # disposal code for it. The original owner of the argument + # node is responsible for doing those things. + + subexprs = [] # Arg is not considered a subexpr + + def __init__(self, arg): + CoercionNode.__init__(self, arg) + self.type = arg.type + + def result_code(self): + return self.arg.result + + def result_as_extension_type(self): + return self.arg.result_as_extension_type() + + def generate_evaluation_code(self, code): + pass + + def generate_result_code(self, code): + pass + +#------------------------------------------------------------------------------------ +# +# Runtime support code +# +#------------------------------------------------------------------------------------ + +get_name_utility_code = \ +""" +static PyObject *__Pyx_GetName(PyObject *dict, char *name) { + PyObject *result; + result = PyObject_GetAttrString(dict, name); + if (!result) + PyErr_SetString(PyExc_NameError, name); + return result; +} +""" + +get_name_interned_utility_code = \ +""" +static PyObject *__Pyx_GetName(PyObject *dict, PyObject *name) { + PyObject *result; + result = PyObject_GetAttr(dict, name); + if (!result) + PyErr_SetObject(PyExc_NameError, name); + return result; +} +""" + +#------------------------------------------------------------------------------------ + +import_utility_code = \ +""" +static PyObject *__Pyx_Import(PyObject *name, PyObject *from_list) { + PyObject *__import__ = 0; + PyObject *empty_list = 0; + PyObject *module = 0; + PyObject *global_dict = 0; + PyObject *empty_dict = 0; + PyObject *list; + __import__ = PyObject_GetAttrString(%(BUILTINS)s, "__import__"); + if (!__import__) + goto bad; + if (from_list) + list = from_list; + else { + empty_list = PyList_New(0); + if (!empty_list) + goto bad; + list = empty_list; + } + global_dict = PyModule_GetDict(%(GLOBALS)s); + if (!global_dict) + goto bad; + empty_dict = PyDict_New(); + if (!empty_dict) + goto bad; + module = PyObject_CallFunction(__import__, "OOOO", + name, global_dict, empty_dict, list); +bad: + Py_XDECREF(empty_list); + Py_XDECREF(__import__); + Py_XDECREF(empty_dict); + return module; +} +""" % { + "BUILTINS": Naming.builtins_cname, + "GLOBALS": Naming.module_cname, +} + +#------------------------------------------------------------------------------------ + +get_exception_utility_code = \ +""" +static PyObject *__Pyx_GetExcValue(void) { + PyObject *type = 0, *value = 0, *tb = 0; + PyObject *result = 0; + PyThreadState *tstate = PyThreadState_Get(); + PyErr_Fetch(&type, &value, &tb); + PyErr_NormalizeException(&type, &value, &tb); + if (PyErr_Occurred()) + goto bad; + if (!value) { + value = Py_None; + Py_INCREF(value); + } + Py_XDECREF(tstate->exc_type); + Py_XDECREF(tstate->exc_value); + Py_XDECREF(tstate->exc_traceback); + tstate->exc_type = type; + tstate->exc_value = value; + tstate->exc_traceback = tb; + result = value; + Py_XINCREF(result); + type = 0; + value = 0; + tb = 0; +bad: + Py_XDECREF(type); + Py_XDECREF(value); + Py_XDECREF(tb); + return result; +} +""" + +#------------------------------------------------------------------------------------ + +unpacking_utility_code = \ +""" +static void __Pyx_UnpackError(void) { + PyErr_SetString(PyExc_ValueError, "unpack sequence of wrong size"); +} + +static PyObject *__Pyx_UnpackItem(PyObject *seq, int i) { + PyObject *item; + if (!(item = PySequence_GetItem(seq, i))) { + if (PyErr_ExceptionMatches(PyExc_IndexError)) + __Pyx_UnpackError(); + } + return item; +} + +static int __Pyx_EndUnpack(PyObject *seq, int i) { + PyObject *item; + if (item = PySequence_GetItem(seq, i)) { + Py_DECREF(item); + __Pyx_UnpackError(); + return -1; + } + PyErr_Clear(); + return 0; +} +""" + +#------------------------------------------------------------------------------------ + +type_test_utility_code = \ +""" +static int __Pyx_TypeTest(PyObject *obj, PyTypeObject *type) { + if (!type) { + PyErr_Format(PyExc_SystemError, "Missing type object"); + return 0; + } + if (obj == Py_None || PyObject_TypeCheck(obj, type)) + return 1; + PyErr_Format(PyExc_TypeError, "Cannot convert %s to %s", + obj->ob_type->tp_name, type->tp_name); + return 0; +} +""" + +#------------------------------------------------------------------------------------ + +create_class_utility_code = \ +""" +static PyObject *__Pyx_CreateClass( + PyObject *bases, PyObject *dict, PyObject *name, char *modname) +{ + PyObject *py_modname; + PyObject *result = 0; + + py_modname = PyString_FromString(modname); + if (!py_modname) + goto bad; + if (PyDict_SetItemString(dict, "__module__", py_modname) < 0) + goto bad; + result = PyClass_New(bases, dict, name); +bad: + Py_XDECREF(py_modname); + return result; +} +""" + +#------------------------------------------------------------------------------------ Added: lxml/pyrex/Pyrex/Compiler/ExprNodes.pyc ============================================================================== Binary file. No diff available. Added: lxml/pyrex/Pyrex/Compiler/Lexicon.pickle ============================================================================== Binary file. No diff available. Added: lxml/pyrex/Pyrex/Compiler/Lexicon.py ============================================================================== --- (empty file) +++ lxml/pyrex/Pyrex/Compiler/Lexicon.py Fri Mar 10 13:58:37 2006 @@ -0,0 +1,143 @@ +# +# Pyrex Scanner - Lexical Definitions +# +# Changing anything in this file will cause Lexicon.pickle +# to be rebuilt next time pyrexc is run. +# + +string_prefixes = "cCrR" + +def make_lexicon(): + from Pyrex.Plex import \ + Str, Any, AnyBut, AnyChar, Rep, Rep1, Opt, Bol, Eol, Eof, \ + TEXT, IGNORE, State, Lexicon + from Scanning import Method + + letter = Any("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz_") + digit = Any("0123456789") + octdigit = Any("01234567") + hexdigit = Any("0123456789ABCDEFabcdef") + indentation = Bol + Rep(Any(" \t")) + + #resword = apply(Str, reserved_words) + decimal = Rep1(digit) + dot = Str(".") + exponent = Any("Ee") + Opt(Any("+-")) + decimal + decimal_fract = (decimal + dot + Opt(decimal)) | (dot + decimal) + + name = letter + Rep(letter | digit) + intconst = decimal | (Str("0x") + Rep1(hexdigit)) + fltconst = (decimal_fract + Opt(exponent)) | (decimal + exponent) + imagconst = (intconst | fltconst) + Any("jJ") + + sq_string = ( + Str("'") + + Rep(AnyBut("\\\n'") | (Str("\\") + AnyChar)) + + Str("'") + ) + + dq_string = ( + Str('"') + + Rep(AnyBut('\\\n"') | (Str("\\") + AnyChar)) + + Str('"') + ) + + non_sq = AnyBut("'") | (Str('\\') + AnyChar) + tsq_string = ( + Str("'''") + + Rep(non_sq | (Str("'") + non_sq) | (Str("''") + non_sq)) + + Str("'''") + ) + + non_dq = AnyBut('"') | (Str('\\') + AnyChar) + tdq_string = ( + Str('"""') + + Rep(non_dq | (Str('"') + non_dq) | (Str('""') + non_dq)) + + Str('"""') + ) + stringlit = Opt(Any(string_prefixes)) + (sq_string | dq_string | tsq_string| tdq_string) + + beginstring = Opt(Any(string_prefixes)) + (Str("'") | Str('"') | Str("'''") | Str('"""')) + two_oct = octdigit + octdigit + three_oct = octdigit + octdigit + octdigit + two_hex = hexdigit + hexdigit + escapeseq = Str("\\") + (two_oct | three_oct | two_hex | AnyChar) + + bra = Any("([{") + ket = Any(")]}") + punct = Any(":,;+-*/|&<>=.%`~^?") + diphthong = Str("==", "<>", "!=", "<=", ">=", "<<", ">>", "**") + spaces = Rep1(Any(" \t\f")) + comment = Str("#") + Rep(AnyBut("\n")) + escaped_newline = Str("\\\n") + lineterm = Eol + Opt(Str("\n")) + + return Lexicon([ + #(resword, TEXT), + (name, 'IDENT'), + (intconst, 'INT'), + (fltconst, 'FLOAT'), + (imagconst, 'IMAG'), + (punct | diphthong, TEXT), + + (bra, Method('open_bracket_action')), + (ket, Method('close_bracket_action')), + (lineterm, Method('newline_action')), + + #(stringlit, 'STRING'), + (beginstring, Method('begin_string_action')), + + (comment, IGNORE), + (spaces, IGNORE), + (escaped_newline, IGNORE), + + State('INDENT', [ + (Opt(spaces) + Opt(comment) + lineterm, IGNORE), + (indentation, Method('indentation_action')), + (Eof, Method('eof_action')) + ]), + + State('SQ_STRING', [ + (escapeseq, 'ESCAPE'), + (Rep1(AnyBut("'\"\n\\")), 'CHARS'), + (Str('"'), 'CHARS'), + (Str("\n"), Method('unclosed_string_action')), + (Str("'"), Method('end_string_action')), + (Eof, 'EOF') + ]), + + State('DQ_STRING', [ + (escapeseq, 'ESCAPE'), + (Rep1(AnyBut('"\n\\')), 'CHARS'), + (Str("'"), 'CHARS'), + (Str("\n"), Method('unclosed_string_action')), + (Str('"'), Method('end_string_action')), + (Eof, 'EOF') + ]), + + State('TSQ_STRING', [ + (escapeseq, 'ESCAPE'), + (Rep1(AnyBut("'\"\n\\")), 'CHARS'), + (Any("'\""), 'CHARS'), + (Str("\n"), 'NEWLINE'), + (Str("'''"), Method('end_string_action')), + (Eof, 'EOF') + ]), + + State('TDQ_STRING', [ + (escapeseq, 'ESCAPE'), + (Rep1(AnyBut('"\'\n\\')), 'CHARS'), + (Any("'\""), 'CHARS'), + (Str("\n"), 'NEWLINE'), + (Str('"""'), Method('end_string_action')), + (Eof, 'EOF') + ]), + + (Eof, Method('eof_action')) + ], + + # FIXME: Plex 1.9 needs different args here from Plex 1.1.4 + #debug_flags = scanner_debug_flags, + #debug_file = scanner_dump_file + ) + Added: lxml/pyrex/Pyrex/Compiler/Lexicon.pyc ============================================================================== Binary file. No diff available. Added: lxml/pyrex/Pyrex/Compiler/Main.py ============================================================================== --- (empty file) +++ lxml/pyrex/Pyrex/Compiler/Main.py Fri Mar 10 13:58:37 2006 @@ -0,0 +1,303 @@ +# +# Pyrex Top Level +# + +import sys +if sys.version_info[:2] < (2, 2): + print >>sys.stderr, "Sorry, Pyrex requires Python 2.2 or later" + sys.exit(1) + +import os +from time import time +import Version +from Scanning import PyrexScanner +import Errors +from Errors import PyrexError, CompileError, error +import Parsing +from Symtab import BuiltinScope, ModuleScope +import Code +from Pyrex.Utils import replace_suffix + +verbose = 0 + +class Context: + # This class encapsulates the context needed for compiling + # one or more Pyrex implementation files along with their + # associated and imported declaration files. It includes + # the root of the module import namespace and the list + # of directories to search for include files. + # + # modules {string : ModuleScope} + # include_directories [string] + + def __init__(self, include_directories): + self.modules = {"__builtin__" : BuiltinScope()} + self.include_directories = include_directories + + def find_module(self, module_name, + relative_to = None, pos = None, need_pxd = 1): + # Finds and returns the module scope corresponding to + # the given relative or absolute module name. If this + # is the first time the module has been requested, finds + # the corresponding .pxd file and process it. + # If relative_to is not None, it must be a module scope, + # and the module will first be searched for relative to + # that module, provided its name is not a dotted name. + debug_find_module = 0 + if debug_find_module: + print "Context.find_module: module_name =", module_name, \ + "relative_to =", relative_to, "pos =", pos, "need_pxd =", need_pxd + scope = None + pxd_pathname = None + if "." not in module_name and relative_to: + if debug_find_module: + print "...trying relative import" + scope = relative_to.lookup_submodule(module_name) + if not scope: + qualified_name = relative_to.qualify_name(module_name) + pxd_pathname = self.find_pxd_file(qualified_name, pos) + if pxd_pathname: + scope = relative_to.find_submodule(module_name) + if not scope: + if debug_find_module: + print "...trying absolute import" + scope = self + for name in module_name.split("."): + scope = scope.find_submodule(name) + if debug_find_module: + print "...scope =", scope + if not scope.pxd_file_loaded: + if debug_find_module: + print "...pxd not loaded" + scope.pxd_file_loaded = 1 + if not pxd_pathname: + if debug_find_module: + print "...looking for pxd file" + pxd_pathname = self.find_pxd_file(module_name, pos) + if debug_find_module: + print "......found ", pxd_pathname + if not pxd_pathname and need_pxd: + error(pos, "'%s.pxd' not found" % module_name) + if pxd_pathname: + try: + if debug_find_module: + print "Context.find_module: Parsing", pxd_pathname + pxd_tree = self.parse(pxd_pathname, scope.type_names, pxd = 1) + pxd_tree.analyse_declarations(scope) + except CompileError: + pass + return scope + + def find_pxd_file(self, module_name, pos): + # Search include directories for the .pxd file + # corresponding to the given (full) module name. + pxd_filename = "%s.pxd" % module_name + return self.search_include_directories(pxd_filename, pos) + + def find_include_file(self, filename, pos): + # Search list of include directories for filename. + # Reports an error and returns None if not found. + path = self.search_include_directories(filename, pos) + if not path: + error(pos, "'%s' not found" % filename) + return path + + def search_include_directories(self, filename, pos): + # Search the list of include directories for the given + # file name. If a source file position is given, first + # searches the directory containing that file. Returns + # None if not found, but does not report an error. + dirs = self.include_directories + if pos: + here_dir = os.path.dirname(pos[0]) + dirs = [here_dir] + dirs + for dir in dirs: + path = os.path.join(dir, filename) + if os.path.exists(path): + return path + return None + + def lookup_submodule(self, name): + # Look up a top-level module. Returns None if not found. + return self.modules.get(name, None) + + def find_submodule(self, name): + # Find a top-level module, creating a new one if needed. + scope = self.lookup_submodule(name) + if not scope: + scope = ModuleScope(name, + parent_module = None, context = self) + self.modules[name] = scope + return scope + + def parse(self, source_filename, type_names, pxd): + # Parse the given source file and return a parse tree. + f = open(source_filename, "r") + s = PyrexScanner(f, source_filename, + type_names = type_names, context = self) + try: + tree = Parsing.p_module(s, pxd) + finally: + f.close() + if Errors.num_errors > 0: + raise CompileError + return tree + + def extract_module_name(self, path): + # Get the module name out of a source file pathname. + _, tail = os.path.split(path) + name, _ = os.path.splitext(tail) + return name + + def compile(self, source, options = None): + # Compile a Pyrex implementation file in this context + # and return a CompilationResult. + if not options: + options = default_options + result = CompilationResult() + cwd = os.getcwd() + source = os.path.join(cwd, source) + if options.use_listing_file: + result.listing_file = replace_suffix(source, ".lis") + Errors.open_listing_file(result.listing_file, + echo_to_stderr = options.errors_to_stderr) + else: + Errors.open_listing_file(None) + if options.output_file: + result.c_file = os.path.join(cwd, options.output_file) + else: + result.c_file = replace_suffix(source, ".c") + module_name = self.extract_module_name(source) + initial_pos = (source, 1, 0) + scope = self.find_module(module_name, pos = initial_pos, need_pxd = 0) + try: + tree = self.parse(source, scope.type_names, pxd = 0) + tree.process_implementation(scope, result) + except CompileError: + result.c_file = None + Errors.close_listing_file() + result.num_errors = Errors.num_errors + if result.num_errors > 0: + result.c_file = None + if result.c_file and not options.c_only and c_compile: + result.object_file = c_compile(result.c_file) + if not options.obj_only and c_link: + result.extension_file = c_link(result.object_file) + return result + +#------------------------------------------------------------------------ +# +# Main Python entry point +# +#------------------------------------------------------------------------ + +class CompilationOptions: + """ + Options to the Pyrex compiler: + + show_version boolean Display version number + use_listing_file boolean Generate a .lis file + errors_to_stderr boolean Echo errors to stderr when using .lis + include_path [string] Directories to search for include files + output_file string Name of generated .c file + """ + + def __init__(self, defaults = None, **kw): + self.include_path = [] + if defaults: + self.__dict__.update(defaults.__dict__) + self.__dict__.update(kw) + + +class CompilationResult: + """ + Results from the Pyrex compiler: + + c_file string or None The generated C source file + h_file string or None The generated C header file + i_file string or None The generated .pxi file + listing_file string or None File of error messages + object_file string or None Result of compiling the C file + extension_file string or None Result of linking the object file + num_errors integer Number of compilation errors + """ + + def __init__(self): + self.c_file = None + self.h_file = None + self.i_file = None + self.listing_file = None + self.object_file = None + self.extension_file = None + + +def compile(source, options = None, c_compile = 0, c_link = 0): + """ + compile(source, options = default_options) + + Compile the given Pyrex implementation file and return + a CompilationResult object describing what was produced. + """ + if not options: + options = default_options + options = CompilationOptions(defaults = options) + if c_compile: + options.c_only = 0 + if c_link: + options.obj_only = 0 + context = Context(options.include_path) + return context.compile(source, options) + +#------------------------------------------------------------------------ +# +# Main command-line entry point +# +#------------------------------------------------------------------------ + +def main(command_line = 0): + args = sys.argv[1:] + any_failures = 0 + if command_line: + from CmdLine import parse_command_line + options, sources = parse_command_line(args) + else: + options = default_options + sources = args + if options.show_version: + print >>sys.stderr, "Pyrex version %s" % Version.version + context = Context(options.include_path) + for source in sources: + try: + result = context.compile(source, options) + if result.num_errors > 0: + any_failures = 1 + except PyrexError, e: + print >>sys.stderr, e + any_failures = 1 + if any_failures: + sys.exit(1) + +#------------------------------------------------------------------------ +# +# Set the default options depending on the platform +# +#------------------------------------------------------------------------ + +default_options = CompilationOptions( + show_version = 0, + use_listing_file = 0, + errors_to_stderr = 1, + c_only = 1, + obj_only = 1, + output_file = None) + +if sys.platform == "mac": + from Pyrex.Mac.MacSystem import c_compile, c_link, CCompilerError + default_options.use_listing_file = 1 +elif sys.platform == "darwin": + from Pyrex.Mac.DarwinSystem import c_compile, c_link, CCompilerError +else: + c_compile = None + c_link = None + + Added: lxml/pyrex/Pyrex/Compiler/Main.pyc ============================================================================== Binary file. No diff available. Added: lxml/pyrex/Pyrex/Compiler/Naming.py ============================================================================== --- (empty file) +++ lxml/pyrex/Pyrex/Compiler/Naming.py Fri Mar 10 13:58:37 2006 @@ -0,0 +1,51 @@ +# +# Pyrex - C naming conventions +# +# +# Prefixes for generating C names. +# Collected here to facilitate ensuring uniqueness. +# + +pyrex_prefix = "__pyx_" + +arg_prefix = pyrex_prefix + "arg_" +funcdoc_prefix = pyrex_prefix + "doc_" +enum_prefix = pyrex_prefix + "e_" +func_prefix = pyrex_prefix + "f_" +gstab_prefix = pyrex_prefix + "getsets_" +prop_get_prefix = pyrex_prefix + "getprop_" +const_prefix = pyrex_prefix + "k" +label_prefix = pyrex_prefix + "L" +pymethdef_prefix = pyrex_prefix + "mdef_" +methtab_prefix = pyrex_prefix + "methods_" +memtab_prefix = pyrex_prefix + "members_" +interned_prefix = pyrex_prefix + "n_" +objstruct_prefix = pyrex_prefix + "obj_" +typeptr_prefix = pyrex_prefix + "ptype_" +prop_set_prefix = pyrex_prefix + "setprop_" +type_prefix = pyrex_prefix + "t_" +typeobj_prefix = pyrex_prefix + "type_" +var_prefix = pyrex_prefix + "v_" +vtable_prefix = pyrex_prefix + "vtable_" +vtabptr_prefix = pyrex_prefix + "vtabptr_" +vtabstruct_prefix = pyrex_prefix + "vtabstruct_" + +args_cname = pyrex_prefix + "args" +kwdlist_cname = pyrex_prefix + "argnames" +obj_base_cname = pyrex_prefix + "base" +builtins_cname = pyrex_prefix + "b" +moddict_cname = pyrex_prefix + "d" +dummy_cname = pyrex_prefix + "dummy" +filename_cname = pyrex_prefix + "filename" +filetable_cname = pyrex_prefix + "f" +filenames_cname = pyrex_prefix + "filenames" +intern_tab_cname = pyrex_prefix + "intern_tab" +kwds_cname = pyrex_prefix + "kwds" +lineno_cname = pyrex_prefix + "lineno" +module_cname = pyrex_prefix + "m" +moddoc_cname = pyrex_prefix + "mdoc" +methtable_cname = pyrex_prefix + "methods" +retval_cname = pyrex_prefix + "r" +self_cname = pyrex_prefix + "self" +stringtab_cname = pyrex_prefix + "string_tab" +vtabslot_cname = pyrex_prefix + "vtab" Added: lxml/pyrex/Pyrex/Compiler/Naming.pyc ============================================================================== Binary file. No diff available. Added: lxml/pyrex/Pyrex/Compiler/Nodes.py ============================================================================== --- (empty file) +++ lxml/pyrex/Pyrex/Compiler/Nodes.py Fri Mar 10 13:58:37 2006 @@ -0,0 +1,3992 @@ +# +# Pyrex - Parse tree nodes +# + +import os, string, sys, time + +import Code +from Errors import error, InternalError +import Naming +import PyrexTypes +from PyrexTypes import py_object_type, error_type, CTypedefType +from Symtab import ModuleScope, LocalScope, \ + StructOrUnionScope, PyClassScope, CClassScope +import TypeSlots +import Version +from Pyrex.Utils import open_new_file, replace_suffix +import Options + +from DebugFlags import debug_disposal_code + +class Node: + # pos (string, int, int) Source file position + # is_name boolean Is a NameNode + # is_literal boolean Is a ConstNode + + is_name = 0 + is_literal = 0 + + def __init__(self, pos, **kw): + self.pos = pos + self.__dict__.update(kw) + + # + # There are 3 phases of parse tree processing, applied in order to + # all the statements in a given scope-block: + # + # (1) analyse_declarations + # Make symbol table entries for all declarations at the current + # level, both explicit (def, cdef, etc.) and implicit (assignment + # to an otherwise undeclared name). + # + # (2) analyse_expressions + # Determine the result types of expressions and fill in the + # 'type' attribute of each ExprNode. Insert coercion nodes into the + # tree where needed to convert to and from Python objects. + # Allocate temporary locals for intermediate results. Fill + # in the 'result' attribute of each ExprNode with a C code + # fragment. + # + # (3) generate_code + # Emit C code for all declarations, statements and expressions. + # Recursively applies the 3 processing phases to the bodies of + # functions. + # + + def analyse_declarations(self, env): + pass + + def analyse_expressions(self, env): + raise InternalError("analyse_expressions not implemented for %s" % \ + self.__class__.__name__) + + def generate_code(self, code): + raise InternalError("generate_code not implemented for %s" % \ + self.__class__.__name__) + + +class BlockNode: + # Mixin class for nodes representing a declaration block. + + def generate_const_definitions(self, env, code): + if env.const_entries: + code.putln("") + #code.put_var_declarations(env.const_entries, static = 1) + for entry in env.const_entries: + if not entry.is_interned: + code.put_var_declaration(entry, static = 1) + + def generate_interned_name_decls(self, env, code): + # Flush accumulated interned names from the global scope + # and generate declarations for them. + genv = env.global_scope() + intern_map = genv.intern_map + names = genv.interned_names + if names: + code.putln("") + for name in names: + code.putln( + "static PyObject *%s;" % intern_map[name]) + del names[:] + + def generate_py_string_decls(self, env, code): + entries = env.pystring_entries + if entries: + code.putln("") + for entry in entries: + code.putln( + "static PyObject *%s;" % entry.pystring_cname) + + +class ModuleNode(Node, BlockNode): + # doc string or None + # body StatListNode + + def analyse_declarations(self, env): + env.doc = self.doc + self.body.analyse_declarations(env) + + def process_implementation(self, env, result): + self.analyse_declarations(env) + env.check_c_classes() + self.body.analyse_expressions(env) + env.return_type = PyrexTypes.c_void_type + self.generate_c_code(env, result) + self.generate_h_code(env, result) + + def generate_h_code(self, env, result): + public_vars_and_funcs = [] + public_extension_types = [] + for entry in env.var_entries: + if entry.visibility == 'public': + public_vars_and_funcs.append(entry) + for entry in env.cfunc_entries: + if entry.visibility == 'public': + public_vars_and_funcs.append(entry) + for entry in env.c_class_entries: + if entry.visibility == 'public': + public_extension_types.append(entry) + if public_vars_and_funcs or public_extension_types: + #import os + #outname_base, _ = os.path.splitext(result.c_file) + #result.h_file = outname_base + ".h" + #result.i_file = outname_base + ".pxi" + result.h_file = replace_suffix(result.c_file, ".h") + result.i_file = replace_suffix(result.c_file, ".pxi") + h_code = Code.CCodeWriter(result.h_file) + i_code = Code.PyrexCodeWriter(result.i_file) + for entry in public_vars_and_funcs: + h_code.putln("extern %s;" % + entry.type.declaration_code( + entry.cname, dll_linkage = "DL_IMPORT")) + i_code.putln("cdef extern %s" % + entry.type.declaration_code(entry.cname, pyrex = 1)) + for entry in public_extension_types: + self.generate_cclass_header_code(entry.type, h_code) + self.generate_cclass_include_code(entry.type, i_code) + h_code.putln("extern DL_IMPORT(void) init%s(void);" % env.module_name) + #result.h_file_generated = 1 + #result.i_file_generated = 1 + + def generate_cclass_header_code(self, type, h_code): + h_code.putln("extern DL_IMPORT(PyTypeObject) %s;" % type.typeobj_cname) + self.generate_obj_struct_definition(type, h_code) + + def generate_cclass_include_code(self, type, i_code): + i_code.putln("cdef extern class %s.%s:" % ( + type.module_name, type.name)) + i_code.indent() + for entry in type.scope.var_entries: + i_code.putln("cdef %s" % + entry.type.declaration_code(entry.cname, pyrex = 1)) + i_code.dedent() + + def generate_c_code(self, env, result): + modules = [] + self.find_referenced_modules(env, modules, {}) + code = Code.CCodeWriter(result.c_file) + code.init_labels() + self.generate_module_preamble(env, modules, code) + for module in modules: + self.generate_declarations_for_module(module, code) + code.putln("") + code.putln("/* Implementation of %s */" % env.qualified_name) + self.generate_const_definitions(env, code) + self.generate_interned_name_decls(env, code) + self.generate_py_string_decls(env, code) + self.body.generate_function_definitions(env, code) + self.generate_interned_name_table(env, code) + self.generate_py_string_table(env, code) + self.generate_typeobj_definitions(env, code) + self.generate_method_table(env, code) + self.generate_module_init_func(modules[:-1], env, code) + self.generate_filename_table(code) + self.generate_utility_functions(env, code) + result.c_file_generated = 1 + + def find_referenced_modules(self, env, module_list, modules_seen): + for imported_module in env.cimported_modules: + if imported_module not in modules_seen: + modules_seen[imported_module] = 1 + self.find_referenced_modules(imported_module, module_list, modules_seen) + module_list.append(env) + + def generate_module_preamble(self, env, cimported_modules, code): + code.putln('/* Generated by Pyrex %s on %s */' % ( + Version.version, time.asctime())) + code.putln('') + for filename in env.python_include_files: + code.putln('#include "%s"' % filename) + code.putln("#ifndef PY_LONG_LONG") + code.putln(" #define PY_LONG_LONG LONG_LONG") + code.putln("#endif") + self.generate_includes(env, cimported_modules, code) + #for filename in env.include_files: + # code.putln('#include "%s"' % filename) + code.putln('') + code.put(utility_function_predeclarations) + if Options.intern_names: + code.putln(get_name_interned_predeclaration) + else: + code.putln(get_name_predeclaration) + code.putln('') + code.putln('static PyObject *%s;' % env.module_cname) + code.putln('static PyObject *%s;' % Naming.builtins_cname) + code.putln('static int %s;' % Naming.lineno_cname) + code.putln('static char *%s;' % Naming.filename_cname) + code.putln('staticforward char **%s;' % Naming.filetable_cname) + if env.doc: + code.putln('') + code.putln('static char %s[] = "%s";' % (env.doc_cname, env.doc)) + + def generate_includes(self, env, cimported_modules, code): + includes = env.include_files[:] + for module in cimported_modules: + for filename in module.include_files: + if filename not in includes: + includes.append(filename) + for filename in includes: + code.putln('#include "%s"' % filename) + + def generate_filename_table(self, code): + code.putln("") + code.putln("static char *%s[] = {" % Naming.filenames_cname) + if code.filename_list: + for filename in code.filename_list: + filename = os.path.basename(filename) + escaped_filename = filename.replace("\\", "\\\\").replace('"', r'\"') + code.putln('"%s",' % + escaped_filename) + else: + # Some C compilers don't like an empty array + code.putln("0") + code.putln("};") + code.putln("statichere char **%s = %s;" % + (Naming.filetable_cname, Naming.filenames_cname)) + + def generate_declarations_for_module(self, env, code): + code.putln("") + code.putln("/* Declarations from %s */" % env.qualified_name) + self.generate_type_predeclarations(env, code) + self.generate_type_definitions(env, code) + self.generate_global_declarations(env, code) + self.generate_cfunction_predeclarations(env, code) + + def generate_type_predeclarations(self, env, code): + pass + + def generate_type_definitions(self, env, code): + # Generate definitions of structs/unions/enums. + for entry in env.sue_entries: + if not entry.in_cinclude: + type = entry.type + if type.is_struct_or_union: + self.generate_struct_union_definition(entry, code) + else: + self.generate_enum_definition(entry, code) + # Generate extension type object struct definitions. + for entry in env.c_class_entries: + if not entry.in_cinclude: + self.generate_typeobject_predeclaration(entry, code) + self.generate_obj_struct_definition(entry.type, code) + self.generate_exttype_vtable_struct(entry, code) + self.generate_exttype_vtabptr_declaration(entry, code) + + def sue_header_footer(self, type, kind, name): + if type.typedef_flag: + header = "typedef %s {" % kind + footer = "} %s;" % name + else: + header = "%s %s {" % (kind, name) + footer = "};" + return header, footer + + def generate_struct_union_definition(self, entry, code): + type = entry.type + scope = type.scope + if scope: + header, footer = \ + self.sue_header_footer(type, type.kind, type.cname) + code.putln("") + code.putln(header) + var_entries = scope.var_entries + if not var_entries: + error(entry.pos, + "Empty struct or union definition not allowed outside a" + " 'cdef extern from' block") + for attr in var_entries: + code.putln( + "%s;" % + attr.type.declaration_code(attr.cname)) + code.putln(footer) + + def generate_enum_definition(self, entry, code): + type = entry.type + name = entry.cname or entry.name or "" + header, footer = \ + self.sue_header_footer(type, "enum", name) + code.putln("") + code.putln(header) + enum_values = entry.enum_values + if not enum_values: + error(entry.pos, + "Empty enum definition not allowed outside a" + " 'cdef extern from' block") + for value_entry in enum_values: + if value_entry.value == value_entry.name: + code.putln( + "%s," % + value_entry.cname) + else: + code.putln( + "%s = %s," % ( + value_entry.cname, + value_entry.value)) + code.putln(footer) + + def generate_typeobject_predeclaration(self, entry, code): + code.putln("") + name = entry.type.typeobj_cname + if name: + if entry.visibility == 'extern' and not entry.in_cinclude: + code.putln("extern DL_IMPORT(PyTypeObject) %s;" % name) + elif entry.visibility == 'public': + code.putln("DL_EXPORT(PyTypeObject) %s;" % name) + else: + code.putln("staticforward PyTypeObject %s;" % name) + + def generate_exttype_vtable_struct(self, entry, code): + # Generate struct declaration for an extension type's vtable. + type = entry.type + scope = type.scope + if type.vtabstruct_cname: + code.putln("") + code.putln( + "struct %s {" % + type.vtabstruct_cname) + if type.base_type and type.base_type.vtabstruct_cname: + code.putln("struct %s %s;" % ( + type.base_type.vtabstruct_cname, + Naming.obj_base_cname)) + for method_entry in scope.cfunc_entries: + if not method_entry.is_inherited: + code.putln( + "%s;" % method_entry.type.declaration_code("(*%s)" % method_entry.name)) + code.putln( + "};") + + def generate_exttype_vtabptr_declaration(self, entry, code): + # Generate declaration of pointer to an extension type's vtable. + type = entry.type + if type.vtabptr_cname: + code.putln("static struct %s *%s;" % ( + type.vtabstruct_cname, + type.vtabptr_cname)) + + def generate_obj_struct_definition(self, type, code): + # Generate object struct definition for an + # extension type. + if not type.scope: + return # Forward declared but never defined + header, footer = \ + self.sue_header_footer(type, "struct", type.objstruct_cname) + code.putln("") + code.putln(header) + base_type = type.base_type + if base_type: + code.putln( + "%s%s %s;" % ( + ("struct ", "")[base_type.typedef_flag], + base_type.objstruct_cname, + Naming.obj_base_cname)) + else: + code.putln( + "PyObject_HEAD") + if type.vtabslot_cname and not (type.base_type and type.base_type.vtabslot_cname): + code.putln( + "struct %s *%s;" % ( + type.vtabstruct_cname, + type.vtabslot_cname)) + for attr in type.scope.var_entries: + code.putln( + "%s;" % + attr.type.declaration_code(attr.cname)) + code.putln(footer) + + def generate_global_declarations(self, env, code): + code.putln("") + for entry in env.c_class_entries: + code.putln("static PyTypeObject *%s = 0;" % + entry.type.typeptr_cname) + code.put_var_declarations(env.var_entries, static = 1, + dll_linkage = "DL_EXPORT") + code.put_var_declarations(env.default_entries, static = 1) + + def generate_cfunction_predeclarations(self, env, code): + for entry in env.cfunc_entries: + if not entry.in_cinclude: + if entry.visibility == 'public': + dll_linkage = "DL_EXPORT" + else: + dll_linkage = None + header = entry.type.declaration_code(entry.cname, + dll_linkage = dll_linkage) + if entry.visibility <> 'private': + storage_class = "" + else: + storage_class = "static " + code.putln("%s%s; /*proto*/" % ( + storage_class, + header)) + + def generate_typeobj_definitions(self, env, code): + modname = env.module_name + for entry in env.c_class_entries: + #print "generate_typeobj_definitions:", entry.name + #print "...visibility =", entry.visibility + if entry.visibility <> 'extern': + type = entry.type + scope = type.scope + if scope: # could be None if there was an error + self.generate_exttype_vtable(scope, code) + self.generate_new_function(scope, code) + self.generate_dealloc_function(scope, code) + self.generate_traverse_function(scope, code) + self.generate_clear_function(scope, code) + if scope.defines_any(["__getitem__"]): + self.generate_getitem_int_function(scope, code) + if scope.defines_any(["__setitem__", "__delitem__"]): + self.generate_ass_subscript_function(scope, code) + if scope.defines_any(["__setslice__", "__delslice__"]): + self.generate_ass_slice_function(scope, code) + if scope.defines_any(["__getattr__"]): + self.generate_getattro_function(scope, code) + if scope.defines_any(["__setattr__", "__delattr__"]): + self.generate_setattro_function(scope, code) + if scope.defines_any(["__get__"]): + self.generate_descr_get_function(scope, code) + if scope.defines_any(["__set__", "__delete__"]): + self.generate_descr_set_function(scope, code) + self.generate_property_accessors(scope, code) + self.generate_method_table(scope, code) + self.generate_member_table(scope, code) + self.generate_getset_table(scope, code) + self.generate_typeobj_definition(modname, entry, code) + + def generate_exttype_vtable(self, scope, code): + # Generate the definition of an extension type's vtable. + type = scope.parent_type + if type.vtable_cname: + code.putln("static struct %s %s;" % ( + type.vtabstruct_cname, + type.vtable_cname)) + + def generate_self_cast(self, scope, code): + type = scope.parent_type + code.putln( + "%s = (%s)o;" % ( + type.declaration_code("p"), + type.declaration_code(""))) + + def generate_new_function(self, scope, code): + base_type = scope.parent_type.base_type + code.putln("") + code.putln( + "static PyObject *%s(PyTypeObject *t, PyObject *a, PyObject *k) {" + % scope.mangle_internal("tp_new")) + if base_type: + code.putln( + "PyObject *o = %s->tp_new(t, a, k);" % + base_type.typeptr_cname) + else: + code.putln( + "PyObject *o = (*t->tp_alloc)(t, 0);") + self.generate_self_cast(scope, code) + type = scope.parent_type + if type.vtabslot_cname: + code.putln("p->%s = (struct %s *)%s;" % ( + type.vtabslot_cname, + type.vtabstruct_cname, + type.vtabptr_cname)) + for entry in scope.var_entries: + if entry.type.is_pyobject: + code.put_init_var_to_py_none(entry, "p->%s") + entry = scope.lookup_here("__new__") + if entry: + code.putln( + "if (%s(o, a, k) < 0) {" % + entry.func_cname) + code.put_decref_clear("o", py_object_type); + code.putln( + "}") + code.putln( + "return o;") + code.putln( + "}") + + def generate_dealloc_function(self, scope, code): + base_type = scope.parent_type.base_type + code.putln("") + code.putln( + "static void %s(PyObject *o) {" + % scope.mangle_internal("tp_dealloc")) + self.generate_self_cast(scope, code) + self.generate_usr_dealloc_call(scope, code) + for entry in scope.var_entries: + if entry.type.is_pyobject: + code.put_xdecref("p->%s" % entry.cname, entry.type) + if base_type: + code.putln( + "%s->tp_dealloc(o);" % + base_type.typeptr_cname) + else: + code.putln( + "(*o->ob_type->tp_free)(o);") + code.putln( + "}") + + def generate_usr_dealloc_call(self, scope, code): + entry = scope.lookup_here("__dealloc__") + if entry: + code.putln( + "{") + code.putln( + "PyObject *etype, *eval, *etb;") + code.putln( + "PyErr_Fetch(&etype, &eval, &etb);") + code.putln( + "++o->ob_refcnt;") + code.putln( + "%s(o);" % + entry.func_cname) + code.putln( + "if (PyErr_Occurred()) PyErr_WriteUnraisable(o);") + code.putln( + "--o->ob_refcnt;") + code.putln( + "PyErr_Restore(etype, eval, etb);") + code.putln( + "}") + + def generate_traverse_function(self, scope, code): + base_type = scope.parent_type.base_type + code.putln("") + code.putln( + "static int %s(PyObject *o, visitproc v, void *a) {" + % scope.mangle_internal("tp_traverse")) + code.putln( + "int e;") + self.generate_self_cast(scope, code) + if base_type: + code.putln( + "%s->tp_traverse(o, v, a);" % + base_type.typeptr_cname) + for entry in scope.var_entries: + if entry.type.is_pyobject: + var_code = "p->%s" % entry.cname + code.putln( + "if (%s) {" + % var_code) + if entry.type.is_extension_type: + var_code = "((PyObject*)%s)" % var_code + code.putln( + "e = (*v)(%s, a); if (e) return e;" + % var_code) + code.putln( + "}") + code.putln( + "return 0;") + code.putln( + "}") + + def generate_clear_function(self, scope, code): + base_type = scope.parent_type.base_type + code.putln("") + code.putln( + "static int %s(PyObject *o) {" + % scope.mangle_internal("tp_clear")) + self.generate_self_cast(scope, code) + if base_type: + code.putln( + "%s->tp_clear(o);" % + base_type.typeptr_cname) + for entry in scope.var_entries: + if entry.type.is_pyobject: + name = "p->%s" % entry.cname + code.put_xdecref(name, entry.type) + #code.put_init_to_py_none(name) + code.put_init_var_to_py_none(entry, "p->%s") + code.putln( + "return 0;") + code.putln( + "}") + + def generate_getitem_int_function(self, scope, code): + # This function is put into the sq_item slot when + # a __getitem__ method is present. It converts its + # argument to a Python integer and calls mp_subscript. + code.putln( + "static PyObject *%s(PyObject *o, int i) {" % + scope.mangle_internal("sq_item")) + code.putln( + "PyObject *r;") + code.putln( + "PyObject *x = PyInt_FromLong(i); if(!x) return 0;") + code.putln( + "r = o->ob_type->tp_as_mapping->mp_subscript(o, x);") + code.putln( + "Py_DECREF(x);") + code.putln( + "return r;") + code.putln( + "}") + + def generate_ass_subscript_function(self, scope, code): + # Setting and deleting an item are both done through + # the ass_subscript method, so we dispatch to user's __setitem__ + # or __delitem__, or raise an exception. + base_type = scope.parent_type.base_type + set_entry = scope.lookup_here("__setitem__") + del_entry = scope.lookup_here("__delitem__") + code.putln("") + code.putln( + "static int %s(PyObject *o, PyObject *i, PyObject *v) {" % + scope.mangle_internal("mp_ass_subscript")) + code.putln( + "if (v) {") + if set_entry: + code.putln( + "return %s(o, i, v);" % + set_entry.func_cname) + else: + self.generate_guarded_basetype_call( + base_type, "tp_as_mapping", "mp_ass_subscript", "o, i, v", code) + code.putln( + "PyErr_Format(PyExc_NotImplementedError,") + code.putln( + ' "Subscript assignment not supported by %s", o->ob_type->tp_name);') + code.putln( + "return -1;") + code.putln( + "}") + code.putln( + "else {") + if del_entry: + code.putln( + "return %s(o, i);" % + del_entry.func_cname) + else: + self.generate_guarded_basetype_call( + base_type, "tp_as_mapping", "mp_ass_subscript", "o, i, v", code) + code.putln( + "PyErr_Format(PyExc_NotImplementedError,") + code.putln( + ' "Subscript deletion not supported by %s", o->ob_type->tp_name);') + code.putln( + "return -1;") + code.putln( + "}") + code.putln( + "}") + + def generate_guarded_basetype_call( + self, base_type, substructure, slot, args, code): + if base_type: + base_tpname = base_type.typeptr_cname + if substructure: + code.putln( + "if (%s->%s && %s->%s->%s)" % ( + base_tpname, substructure, base_tpname, substructure, slot)) + code.putln( + " return %s->%s->%s(%s);" % ( + base_tpname, substructure, slot, args)) + else: + code.putln( + "if (%s->%s)" % ( + base_tpname, slot)) + code.putln( + " return %s->%s(%s);" % ( + base_tpname, slot, args)) + + def generate_ass_slice_function(self, scope, code): + # Setting and deleting a slice are both done through + # the ass_slice method, so we dispatch to user's __setslice__ + # or __delslice__, or raise an exception. + base_type = scope.parent_type.base_type + set_entry = scope.lookup_here("__setslice__") + del_entry = scope.lookup_here("__delslice__") + code.putln("") + code.putln( + "static int %s(PyObject *o, int i, int j, PyObject *v) {" % + scope.mangle_internal("sq_ass_slice")) + code.putln( + "if (v) {") + if set_entry: + code.putln( + "return %s(o, i, j, v);" % + set_entry.func_cname) + else: + self.generate_guarded_basetype_call( + base_type, "tp_as_sequence", "sq_ass_slice", "o, i, j, v", code) + code.putln( + "PyErr_Format(PyExc_NotImplementedError,") + code.putln( + ' "2-element slice assignment not supported by %s", o->ob_type->tp_name);') + code.putln( + "return -1;") + code.putln( + "}") + code.putln( + "else {") + if del_entry: + code.putln( + "return %s(o, i, j);" % + del_entry.func_cname) + else: + self.generate_guarded_basetype_call( + base_type, "tp_as_sequence", "sq_ass_slice", "o, i, j, v", code) + code.putln( + "PyErr_Format(PyExc_NotImplementedError,") + code.putln( + ' "2-element slice deletion not supported by %s", o->ob_type->tp_name);') + code.putln( + "return -1;") + code.putln( + "}") + code.putln( + "}") + + def generate_getattro_function(self, scope, code): + # First try to get the attribute using PyObject_GenericGetAttr. + # If that raises an AttributeError, call the user's __getattr__ + # method. + entry = scope.lookup_here("__getattr__") + code.putln("") + code.putln( + "static PyObject *%s(PyObject *o, PyObject *n) {" + % scope.mangle_internal("tp_getattro")) + code.putln( + "PyObject *v = PyObject_GenericGetAttr(o, n);") + code.putln( + "if (!v && PyErr_ExceptionMatches(PyExc_AttributeError)) {") + code.putln( + "PyErr_Clear();") + code.putln( + "v = %s(o, n);" % + entry.func_cname) + code.putln( + "}") + code.putln( + "return v;") + code.putln( + "}") + + def generate_setattro_function(self, scope, code): + # Setting and deleting an attribute are both done through + # the setattro method, so we dispatch to user's __setattr__ + # or __delattr__ or fall back on PyObject_GenericSetAttr. + base_type = scope.parent_type.base_type + set_entry = scope.lookup_here("__setattr__") + del_entry = scope.lookup_here("__delattr__") + code.putln("") + code.putln( + "static int %s(PyObject *o, PyObject *n, PyObject *v) {" % + scope.mangle_internal("tp_setattro")) + code.putln( + "if (v) {") + if set_entry: + code.putln( + "return %s(o, n, v);" % + set_entry.func_cname) + else: + self.generate_guarded_basetype_call( + base_type, None, "tp_setattro", "o, n, v", code) + code.putln( + "return PyObject_GenericSetAttr(o, n, v);") + code.putln( + "}") + code.putln( + "else {") + if del_entry: + code.putln( + "return %s(o, n);" % + del_entry.func_cname) + else: + self.generate_guarded_basetype_call( + base_type, None, "tp_setattro", "o, n, v", code) + code.putln( + "return PyObject_GenericSetAttr(o, n, 0);") + code.putln( + "}") + code.putln( + "}") + + def generate_descr_get_function(self, scope, code): + # The __get__ function of a descriptor object can be + # called with NULL for the second or third arguments + # under some circumstances, so we replace them with + # None in that case. + user_get_entry = scope.lookup_here("__get__") + code.putln("") + code.putln( + "static PyObject *%s(PyObject *o, PyObject *i, PyObject *c) {" % + scope.mangle_internal("tp_descr_get")) + code.putln( + "PyObject *r = 0;") + code.putln( + "if (!i) i = Py_None;") + code.putln( + "if (!c) c = Py_None;") + #code.put_incref("i", py_object_type) + #code.put_incref("c", py_object_type) + code.putln( + "r = %s(o, i, c);" % + user_get_entry.func_cname) + #code.put_decref("i", py_object_type) + #code.put_decref("c", py_object_type) + code.putln( + "return r;") + code.putln( + "}") + + def generate_descr_set_function(self, scope, code): + # Setting and deleting are both done through the __set__ + # method of a descriptor, so we dispatch to user's __set__ + # or __delete__ or raise an exception. + base_type = scope.parent_type.base_type + user_set_entry = scope.lookup_here("__set__") + user_del_entry = scope.lookup_here("__delete__") + code.putln("") + code.putln( + "static int %s(PyObject *o, PyObject *i, PyObject *v) {" % + scope.mangle_internal("tp_descr_set")) + code.putln( + "if (v) {") + if user_set_entry: + code.putln( + "return %s(o, i, v);" % + user_set_entry.func_cname) + else: + self.generate_guarded_basetype_call( + base_type, None, "tp_descr_set", "o, i, v", code) + code.putln( + 'PyErr_SetString(PyExc_NotImplementedError, "__set__");') + code.putln( + "return -1;") + code.putln( + "}") + code.putln( + "else {") + if user_del_entry: + code.putln( + "return %s(o, i);" % + user_del_entry.func_cname) + else: + self.generate_guarded_basetype_call( + base_type, None, "tp_descr_set", "o, i, v", code) + code.putln( + 'PyErr_SetString(PyExc_NotImplementedError, "__delete__");') + code.putln( + "return -1;") + code.putln( + "}") + code.putln( + "}") + + def generate_property_accessors(self, cclass_scope, code): + for entry in cclass_scope.property_entries: + property_scope = entry.scope + if property_scope.defines_any(["__get__"]): + self.generate_property_get_function(entry, code) + if property_scope.defines_any(["__set__", "__del__"]): + self.generate_property_set_function(entry, code) + + def generate_property_get_function(self, property_entry, code): + property_scope = property_entry.scope + property_entry.getter_cname = property_scope.parent_scope.mangle( + Naming.prop_get_prefix, property_entry.name) + get_entry = property_scope.lookup_here("__get__") + code.putln("") + code.putln( + "static PyObject *%s(PyObject *o, void *x) {" % + property_entry.getter_cname) + code.putln( + "return %s(o);" % + get_entry.func_cname) + code.putln( + "}") + + def generate_property_set_function(self, property_entry, code): + property_scope = property_entry.scope + property_entry.setter_cname = property_scope.parent_scope.mangle( + Naming.prop_set_prefix, property_entry.name) + set_entry = property_scope.lookup_here("__set__") + del_entry = property_scope.lookup_here("__del__") + code.putln("") + code.putln( + "static int %s(PyObject *o, PyObject *v, void *x) {" % + property_entry.setter_cname) + code.putln( + "if (v) {") + if set_entry: + code.putln( + "return %s(o, v);" % + set_entry.func_cname) + else: + code.putln( + 'PyErr_SetString(PyExc_NotImplementedError, "__set__");') + code.putln( + "return -1;") + code.putln( + "}") + code.putln( + "else {") + if del_entry: + code.putln( + "return %s(o);" % + del_entry.func_cname) + else: + code.putln( + 'PyErr_SetString(PyExc_NotImplementedError, "__del__");') + code.putln( + "return -1;") + code.putln( + "}") + code.putln( + "}") + + def generate_typeobj_definition(self, modname, entry, code): + type = entry.type + scope = type.scope + for suite in TypeSlots.substructures: + suite.generate_substructure(scope, code) + code.putln("") + if entry.visibility == 'public': + header = "DL_EXPORT(PyTypeObject) %s = {" + else: + header = "statichere PyTypeObject %s = {" + #code.putln(header % scope.parent_type.typeobj_cname) + code.putln(header % type.typeobj_cname) + code.putln( + "PyObject_HEAD_INIT(0)") + code.putln( + "0, /*ob_size*/") + code.putln( + '"%s.%s", /*tp_name*/' % ( + modname, scope.class_name)) + if type.typedef_flag: + objstruct = type.objstruct_cname + else: + #objstruct = "struct %s" % scope.parent_type.objstruct_cname + objstruct = "struct %s" % type.objstruct_cname + code.putln( + "sizeof(%s), /*tp_basicsize*/" % + objstruct) + code.putln( + "0, /*tp_itemsize*/") + for slot in TypeSlots.slot_table: + slot.generate(scope, code) + code.putln( + "};") + + def generate_method_table(self, env, code): + code.putln("") + code.putln( + "static struct PyMethodDef %s[] = {" % + env.method_table_cname) + for entry in env.pyfunc_entries: + code.put_pymethoddef(entry, ",") + code.putln( + "{0, 0, 0, 0}") + code.putln( + "};") + + def generate_member_table(self, env, code): + #print "ModuleNode.generate_member_table: scope =", env ### + if env.public_attr_entries: + code.putln("") + code.putln( + "static struct PyMemberDef %s[] = {" % + env.member_table_cname) + type = env.parent_type + if type.typedef_flag: + objstruct = type.objstruct_cname + else: + objstruct = "struct %s" % type.objstruct_cname + for entry in env.public_attr_entries: + type_code = entry.type.pymemberdef_typecode + if entry.visibility == 'readonly': + flags = "READONLY" + else: + flags = "0" + code.putln('{"%s", %s, %s, %s, 0},' % ( + entry.name, + type_code, + "offsetof(%s, %s)" % (objstruct, entry.name), + flags)) + code.putln( + "{0, 0, 0, 0, 0}") + code.putln( + "};") + + def generate_getset_table(self, env, code): + if env.property_entries: + code.putln("") + code.putln( + "static struct PyGetSetDef %s[] = {" % + env.getset_table_cname) + for entry in env.property_entries: + code.putln( + '{"%s", %s, %s, %s, 0},' % ( + entry.name, + entry.getter_cname or "0", + entry.setter_cname or "0", + entry.doc_cname or "0")) + code.putln( + "{0, 0, 0, 0, 0}") + code.putln( + "};") + + def generate_interned_name_table(self, env, code): + items = env.intern_map.items() + if items: + items.sort() + code.putln("") + code.putln( + "static __Pyx_InternTabEntry %s[] = {" % + Naming.intern_tab_cname) + for (name, cname) in items: + code.putln( + '{&%s, "%s"},' % ( + cname, + name)) + code.putln( + "{0, 0}") + code.putln( + "};") + + def generate_py_string_table(self, env, code): + entries = env.all_pystring_entries + if entries: + code.putln("") + code.putln( + "static __Pyx_StringTabEntry %s[] = {" % + Naming.stringtab_cname) + for entry in entries: + code.putln( + "{&%s, %s, sizeof(%s)}," % ( + entry.pystring_cname, + entry.cname, + entry.cname)) + code.putln( + "{0, 0, 0}") + code.putln( + "};") + + def generate_module_init_func(self, imported_modules, env, code): + code.putln("") + header = "DL_EXPORT(void) init%s(void)" % env.module_name + code.putln("%s; /*proto*/" % header) + code.putln("%s {" % header) + code.put_var_declarations(env.temp_entries) + env.generate_library_function_declarations(code) + self.generate_module_creation_code(env, code) + self.generate_intern_code(env, code) + self.generate_string_init_code(env, code) + self.generate_global_init_code(env, code) + for module in imported_modules: + self.generate_type_import_code_for_module(module, env, code) + self.generate_type_init_code(env, code) + self.body.generate_execution_code(code) + code.putln("return;") + code.put_label(code.error_label) + code.put_var_xdecrefs(env.temp_entries) + code.putln('__Pyx_AddTraceback("%s");' % (env.module_name)) + env.use_utility_code(traceback_utility_code) + code.putln('}') + + def generate_module_creation_code(self, env, code): + # Generate code to create the module object and + # install the builtins. + if env.doc: + doc = env.doc_cname + else: + doc = "0" + code.putln( + '%s = Py_InitModule4("%s", %s, %s, 0, PYTHON_API_VERSION);' % ( + env.module_cname, + env.module_name, + env.method_table_cname, + doc)) + code.putln( + "if (!%s) %s;" % ( + env.module_cname, + code.error_goto(self.pos))); + code.putln( + '%s = PyImport_AddModule("__builtin__");' % + Naming.builtins_cname) + code.putln( + "if (!%s) %s;" % ( + Naming.builtins_cname, + code.error_goto(self.pos))); + code.putln( + 'if (PyObject_SetAttrString(%s, "__builtins__", %s) < 0) %s;' % ( + env.module_cname, + Naming.builtins_cname, + code.error_goto(self.pos))) + + def generate_intern_code(self, env, code): + if env.intern_map: + env.use_utility_code(init_intern_tab_utility_code); + code.putln( + "if (__Pyx_InternStrings(%s) < 0) %s;" % ( + Naming.intern_tab_cname, + code.error_goto(self.pos))) + + def generate_string_init_code(self, env, code): + if env.all_pystring_entries: + env.use_utility_code(init_string_tab_utility_code) + code.putln( + "if (__Pyx_InitStrings(%s) < 0) %s;" % ( + Naming.stringtab_cname, + code.error_goto(self.pos))) + + def generate_global_init_code(self, env, code): + # Generate code to initialise global PyObject * + # variables to None. + for entry in env.var_entries: + if entry.visibility <> 'extern': + if entry.type.is_pyobject: + code.put_init_var_to_py_none(entry) + + def generate_type_import_code_for_module(self, module, env, code): + # Generate type import code for all extension types in + # an imported module. + if module.c_class_entries: + for entry in module.c_class_entries: + self.generate_type_import_code(env, entry, code) + + def generate_type_init_code(self, env, code): + # Generate type import code for extern extension types + # and type ready code for non-extern ones. + for entry in env.c_class_entries: + if entry.visibility == 'extern': + self.generate_type_import_code(env, entry, code) + else: + self.generate_exttype_vtable_init_code(entry, code) + self.generate_type_ready_code(env, entry, code) + self.generate_typeptr_assignment_code(entry, code) + + def use_type_import_utility_code(self, env): + import ExprNodes + env.use_utility_code(type_import_utility_code) + env.use_utility_code(ExprNodes.import_utility_code) + + def generate_type_import_code(self, env, entry, code): + # Generate code to import the typeobject of an + # extension type defined in another module, and + # extract its C method table pointer if any. + type = entry.type + if type.typedef_flag: + objstruct = type.objstruct_cname + else: + objstruct = "struct %s" % type.objstruct_cname + code.putln('%s = __Pyx_ImportType("%s", "%s", sizeof(%s)); if (!%s) %s' % ( + type.typeptr_cname, + type.module_name, + type.name, + objstruct, + type.typeptr_cname, + code.error_goto(entry.pos))) + self.use_type_import_utility_code(env) + if type.vtabptr_cname: + code.putln( + "if (__Pyx_GetVtable(%s->tp_dict, &%s) < 0) %s" % ( + type.typeptr_cname, + type.vtabptr_cname, + code.error_goto(entry.pos))) + env.use_utility_code(get_vtable_utility_code) + + def generate_type_ready_code(self, env, entry, code): + # Generate a call to PyType_Ready for an extension + # type defined in this module. + type = entry.type + typeobj_cname = type.typeobj_cname + scope = type.scope + if scope: # could be None if there was an error + if entry.visibility <> 'extern': + for slot in TypeSlots.slot_table: + slot.generate_dynamic_init_code(scope, code) + code.putln( + "if (PyType_Ready(&%s) < 0) %s" % ( + typeobj_cname, + code.error_goto(entry.pos))) + if type.vtable_cname: + code.putln( + "if (__Pyx_SetVtable(%s.tp_dict, %s) < 0) %s" % ( + typeobj_cname, + type.vtabptr_cname, + code.error_goto(entry.pos))) + env.use_utility_code(set_vtable_utility_code) + code.putln( + 'if (PyObject_SetAttrString(%s, "%s", (PyObject *)&%s) < 0) %s' % ( + Naming.module_cname, + scope.class_name, + typeobj_cname, + code.error_goto(entry.pos))) + + def generate_exttype_vtable_init_code(self, entry, code): + # Generate code to initialise the C method table of an + # extension type. + type = entry.type + if type.vtable_cname: + code.putln( + "%s = &%s;" % ( + type.vtabptr_cname, + type.vtable_cname)) + if type.base_type and type.base_type.vtabptr_cname: + code.putln( + "%s.%s = *%s;" % ( + type.vtable_cname, + Naming.obj_base_cname, + type.base_type.vtabptr_cname)) + for meth_entry in type.scope.cfunc_entries: + #if not meth_entry.is_inherited: + if meth_entry.func_cname: + code.putln( + "%s.%s = (void *)%s;" % ( + type.vtable_cname, + meth_entry.cname, + meth_entry.func_cname)) + + def generate_typeptr_assignment_code(self, entry, code): + # Generate code to initialise the typeptr of an extension + # type defined in this module to point to its type object. + type = entry.type + if type.typeobj_cname: + code.putln( + "%s = &%s;" % ( + type.typeptr_cname, type.typeobj_cname)) + + def generate_utility_functions(self, env, code): + code.putln("") + code.putln("/* Runtime support code */") + for utility_code in env.utility_code_used: + code.put(utility_code) + + +class StatListNode(Node): + # stats a list of StatNode + + def analyse_declarations(self, env): + #print "StatListNode.analyse_declarations" ### + for stat in self.stats: + stat.analyse_declarations(env) + + def analyse_expressions(self, env): + #print "StatListNode.analyse_expressions" ### + for stat in self.stats: + stat.analyse_expressions(env) + + def generate_function_definitions(self, env, code): + #print "StatListNode.generate_function_definitions" ### + for stat in self.stats: + stat.generate_function_definitions(env, code) + + def generate_execution_code(self, code): + #print "StatListNode.generate_execution_code" ### + for stat in self.stats: + code.mark_pos(stat.pos) + stat.generate_execution_code(code) + + +class StatNode(Node): + # + # Code generation for statements is split into the following subphases: + # + # (1) generate_function_definitions + # Emit C code for the definitions of any structs, + # unions, enums and functions defined in the current + # scope-block. + # + # (2) generate_execution_code + # Emit C code for executable statements. + # + + def generate_function_definitions(self, env, code): + pass + + def generate_execution_code(self, code): + raise InternalError("generate_execution_code not implemented for %s" % \ + self.__class__.__name__) + + +class CDefExternNode(StatNode): + # include_file string or None + # body StatNode + + def analyse_declarations(self, env): + if self.include_file: + env.add_include_file(self.include_file) + old_cinclude_flag = env.in_cinclude + env.in_cinclude = 1 + self.body.analyse_declarations(env) + env.in_cinclude = old_cinclude_flag + + def analyse_expressions(self, env): + pass + + def generate_execution_code(self, code): + pass + + +class CDeclaratorNode(Node): + # Part of a C declaration. + # + # Processing during analyse_declarations phase: + # + # analyse + # Returns (name, type) pair where name is the + # CNameDeclaratorNode of the name being declared + # and type is the type it is being declared as. + # + pass + + +class CNameDeclaratorNode(CDeclaratorNode): + # name string The Pyrex name being declared + # cname string or None C name, if specified + + def analyse(self, base_type, env): + return self, base_type + + +class CPtrDeclaratorNode(CDeclaratorNode): + # base CDeclaratorNode + + def analyse(self, base_type, env): + if base_type.is_pyobject: + error(self.pos, + "Pointer base type cannot be a Python object") + ptr_type = PyrexTypes.c_ptr_type(base_type) + return self.base.analyse(ptr_type, env) + + +class CArrayDeclaratorNode(CDeclaratorNode): + # base CDeclaratorNode + # dimension ExprNode + + def analyse(self, base_type, env): + if self.dimension: + self.dimension.analyse_const_expression(env) + if not self.dimension.type.is_int: + error(self.dimension.pos, "Array dimension not integer") + #size = self.dimension.value + size = self.dimension.result + else: + size = None + if not base_type.is_complete(): + error(self.pos, + "Array element type '%s' is incomplete" % base_type) + if base_type.is_pyobject: + error(self.pos, + "Array element cannot be a Python object") + array_type = PyrexTypes.c_array_type(base_type, size) + return self.base.analyse(array_type, env) + + +class CFuncDeclaratorNode(CDeclaratorNode): + # base CDeclaratorNode + # args [CArgDeclNode] + # has_varargs boolean + # exception_value ConstNode + # exception_check boolean True if PyErr_Occurred check needed + + def analyse(self, return_type, env): + func_type_args = [] + for arg_node in self.args: + name_declarator, type = arg_node.analyse(env) + name = name_declarator.name + if name_declarator.cname: + error(self.pos, + "Function argument cannot have C name specification") + # Turn *[] argument into ** + if type.is_array: + type = PyrexTypes.c_ptr_type(type.base_type) + # Catch attempted C-style func(void) decl + if type.is_void: + error(arg_node.pos, "Function argument cannot be void") + func_type_args.append( + PyrexTypes.CFuncTypeArg(name, type, arg_node.pos)) + if arg_node.default: + error(arg_node.pos, "C function argument cannot have default value") + exc_val = None + exc_check = 0 + if return_type.is_pyobject \ + and (self.exception_value or self.exception_check): + error(self.pos, + "Exception clause not allowed for function returning Python object") + else: + if self.exception_value: + self.exception_value.analyse_const_expression(env) + exc_val = self.exception_value.result + if not return_type.assignable_from(self.exception_value.type): + error(self.exception_value.pos, + "Exception value incompatible with function return type") + exc_check = self.exception_check + func_type = PyrexTypes.CFuncType( + return_type, func_type_args, self.has_varargs, + exception_value = exc_val, exception_check = exc_check) + return self.base.analyse(func_type, env) + + +class CArgDeclNode(Node): + # Item in a function declaration argument list. + # + # base_type CBaseTypeNode + # declarator CDeclaratorNode + # not_none boolean Tagged with 'not None' + # default ExprNode or None + # default_entry Symtab.Entry Entry for the variable holding the default value + # is_self_arg boolean Is the "self" arg of an extension type method + + is_self_arg = 0 + + def analyse(self, env): + base_type = self.base_type.analyse(env) + return self.declarator.analyse(base_type, env) + + +class CBaseTypeNode(Node): + # Abstract base class for C base type nodes. + # + # Processing during analyse_declarations phase: + # + # analyse + # Returns the type. + + pass + + +class CSimpleBaseTypeNode(CBaseTypeNode): + # name string + # module_path [string] Qualifying name components + # is_basic_c_type boolean + # signed boolean + # longness integer + # is_self_arg boolean Is self argument of C method + + def analyse(self, env): + # Return type descriptor. + type = None + if self.is_basic_c_type: + type = PyrexTypes.simple_c_type(self.signed, self.longness, self.name) + if not type: + error(self.pos, "Unrecognised type modifier combination") + elif self.name == "object" and not self.module_path: + type = py_object_type + elif self.name is None: + if self.is_self_arg and env.is_c_class_scope: + type = env.parent_type + else: + type = py_object_type + else: + scope = env + for name in self.module_path: + entry = scope.find(name, self.pos) + if entry and entry.as_module: + scope = entry.as_module + else: + if entry: + error(self.pos, "'%s' is not a cimported module" % name) + scope = None + break + if scope: + entry = scope.find(self.name, self.pos) + if entry and entry.is_type: + type = entry.type + else: + error(self.pos, "'%s' is not a type identifier" % self.name) + if type: + return type + else: + return PyrexTypes.error_type + + +class CComplexBaseTypeNode(CBaseTypeNode): + # base_type CBaseTypeNode + # declarator CDeclaratorNode + + def analyse(self, env): + base = self.base_type.analyse(env) + _, type = self.declarator.analyse(base, env) + return type + + +class CVarDefNode(StatNode): + # C variable definition or forward/extern function declaration. + # + # visibility 'private' or 'public' or 'extern' + # base_type CBaseTypeNode + # declarators [CDeclaratorNode] + + def analyse_declarations(self, env, dest_scope = None): + if not dest_scope: + dest_scope = env + base_type = self.base_type.analyse(env) + for declarator in self.declarators: + name_declarator, type = declarator.analyse(base_type, env) + if not type.is_complete(): + if not (self.visibility == 'extern' and type.is_array): + error(declarator.pos, + "Variable type '%s' is incomplete" % type) + if self.visibility == 'extern' and type.is_pyobject: + error(declarator.pos, + "Python object cannot be declared extern") + name = name_declarator.name + cname = name_declarator.cname + if type.is_cfunction: + dest_scope.declare_cfunction(name, type, declarator.pos, + cname = cname, visibility = self.visibility) + else: + dest_scope.declare_var(name, type, declarator.pos, + cname = cname, visibility = self.visibility, is_cdef = 1) + + def analyse_expressions(self, env): + pass + + def generate_execution_code(self, code): + pass + + +class CStructOrUnionDefNode(StatNode): + # name string + # cname string or None + # kind "struct" or "union" + # typedef_flag boolean + # attributes [CVarDefNode] or None + # entry Entry + + def analyse_declarations(self, env): + scope = None + if self.attributes is not None: + scope = StructOrUnionScope() + self.entry = env.declare_struct_or_union( + self.name, self.kind, scope, self.typedef_flag, self.pos, + self.cname) + if self.attributes is not None: + for attr in self.attributes: + attr.analyse_declarations(env, scope) + + def analyse_expressions(self, env): + pass + + def generate_execution_code(self, code): + pass + + +class CEnumDefNode(StatNode): + # name string or None + # cname string or None + # items [CEnumDefItemNode] + # typedef_flag boolean + # entry Entry + + def analyse_declarations(self, env): + self.entry = env.declare_enum(self.name, self.pos, + cname = self.cname, typedef_flag = self.typedef_flag) + for item in self.items: + item.analyse_declarations(env, self.entry) + + def analyse_expressions(self, env): + pass + + def generate_execution_code(self, code): + pass + + +class CEnumDefItemNode(StatNode): + # name string + # cname string or None + # value ExprNode or None + + def analyse_declarations(self, env, enum_entry): + if self.value: + self.value.analyse_const_expression(env) + value = self.value.result + else: + value = self.name + entry = env.declare_const(self.name, enum_entry.type, + value, self.pos, cname = self.cname) + enum_entry.enum_values.append(entry) + + +class CTypeDefNode(StatNode): + # base_type CBaseTypeNode + # declarator CDeclaratorNode + + def analyse_declarations(self, env): + base = self.base_type.analyse(env) + name_declarator, type = self.declarator.analyse(base, env) + name = name_declarator.name + cname = name_declarator.cname + if env.in_cinclude: + type = CTypedefType(cname or name, type) + env.declare_type(name, type, self.pos, cname = cname) + + def analyse_expressions(self, env): + pass + + def generate_execution_code(self, code): + pass + + +class FuncDefNode(StatNode, BlockNode): + # Base class for function definition nodes. + # + # return_type PyrexType + # #filename string C name of filename string const + # entry Symtab.Entry + + def analyse_expressions(self, env): + pass + + def generate_function_definitions(self, env, code): + # Generate C code for header and body of function + genv = env.global_scope() + lenv = LocalScope(name = self.entry.name, outer_scope = genv) + #lenv.function_name = self.function_name() + lenv.return_type = self.return_type + #self.filename = lenv.get_filename_const(self.pos) + code.init_labels() + self.declare_arguments(lenv) + self.body.analyse_declarations(lenv) + self.body.analyse_expressions(lenv) + # Code for nested function definitions would go here + # if we supported them, which we probably won't. + # ----- Top-level constants used by this function + self.generate_interned_name_decls(lenv, code) + self.generate_py_string_decls(lenv, code) + #code.putln("") + #code.put_var_declarations(lenv.const_entries, static = 1) + self.generate_const_definitions(lenv, code) + # ----- Function header + code.putln("") + self.generate_function_header(code, + with_pymethdef = env.is_py_class_scope) + # ----- Local variable declarations + self.generate_argument_declarations(lenv, code) + code.put_var_declarations(lenv.var_entries) + init = "" + if not self.return_type.is_void: + code.putln( + "%s%s;" % + (self.return_type.declaration_code( + Naming.retval_cname), + init)) + code.put_var_declarations(lenv.temp_entries) + self.generate_keyword_list(code) + # ----- Extern library function declarations + lenv.generate_library_function_declarations(code) + # ----- Fetch arguments + self.generate_argument_parsing_code(code) + self.generate_argument_increfs(lenv, code) + #self.generate_stararg_getting_code(code) + self.generate_argument_conversion_code(code) + # ----- Initialise local variables + for entry in lenv.var_entries: + if entry.type.is_pyobject and entry.init_to_none: + code.put_init_var_to_py_none(entry) + # ----- Check types of arguments + self.generate_argument_type_tests(code) + # ----- Function body + self.body.generate_execution_code(code) + # ----- Default return value + code.putln("") + if self.return_type.is_pyobject: + if self.return_type.is_extension_type: + cast = "(PyObject *)" + else: + cast = None + lhs = Naming.retval_cname + code.put_init_to_py_none(cast, lhs) + else: + val = self.return_type.default_value + if val: + code.putln("%s = %s;" % (Naming.retval_cname, val)) + code.putln("goto %s;" % code.return_label) + # ----- Error cleanup + code.put_label(code.error_label) + code.put_var_xdecrefs(lenv.temp_entries) + err_val = self.error_value() + exc_check = self.caller_will_check_exceptions() + if err_val is not None or exc_check: + code.putln( + '__Pyx_AddTraceback("%s");' % + self.entry.qualified_name) + if err_val is not None: + code.putln( + "%s = %s;" % ( + Naming.retval_cname, + err_val)) + else: + code.putln( + '__Pyx_WriteUnraisable("%s");' % + self.entry.qualified_name) + env.use_utility_code(unraisable_exception_utility_code) + # ----- Return cleanup + code.put_label(code.return_label) + code.put_var_decrefs(lenv.var_entries) + code.put_var_decrefs(lenv.arg_entries) + self.put_stararg_decrefs(code) + if not self.return_type.is_void: + retval_code = Naming.retval_cname + if self.return_type.is_extension_type: + retval_code = "((%s)%s) " % ( + self.return_type.declaration_code(""), + retval_code) + code.putln("return %s;" % retval_code) + code.putln("}") + + def put_stararg_decrefs(self, code): + pass + + def declare_argument(self, env, arg): + if arg.type.is_void: + error(arg.pos, "Invalid use of 'void'") + elif not arg.type.is_complete() and not arg.type.is_array: + error(arg.pos, + "Argument type '%s' is incomplete" % arg.type) + return env.declare_arg(arg.name, arg.type, arg.pos) + + def generate_argument_increfs(self, env, code): + # Turn borrowed argument refs into owned refs. + # This is necessary, because if the argument is + # assigned to, it will be decrefed. + for entry in env.arg_entries: + code.put_var_incref(entry) + + def generate_execution_code(self, code): + pass + + +class CFuncDefNode(FuncDefNode): + # C function definition. + # + # visibility 'private' or 'public' or 'extern' + # base_type CBaseTypeNode + # declarator CDeclaratorNode + # body StatListNode + # + # type CFuncType + + def unqualified_name(self): + return self.entry.name + + def analyse_declarations(self, env): + base_type = self.base_type.analyse(env) + name_declarator, type = self.declarator.analyse(base_type, env) + # Remember the actual type according to the function header + # written here, because the type in the symbol table entry + # may be different if we're overriding a C method inherited + # from the base type of an extension type. + self.type = type + if not type.is_cfunction: + error(self.pos, + "Suite attached to non-function declaration") + name = name_declarator.name + cname = name_declarator.cname + self.entry = env.declare_cfunction( + name, type, self.pos, + cname = cname, visibility = self.visibility, + defining = self.body is not None) + self.return_type = type.return_type + + def declare_arguments(self, env): + for arg in self.type.args: + if not arg.name: + error(arg.pos, "Missing argument name") + self.declare_argument(env, arg) + + def generate_function_header(self, code, with_pymethdef): + arg_decls = [] + type = self.type + for arg in type.args: + arg_decls.append(arg.declaration_code()) + if type.has_varargs: + arg_decls.append("...") + if not arg_decls: + arg_decls = ["void"] + entity = "%s(%s)" % (self.entry.func_cname, + string.join(arg_decls, ",")) + if self.visibility == 'public': + dll_linkage = "DL_EXPORT" + else: + dll_linkage = None + header = self.return_type.declaration_code(entity, + dll_linkage = dll_linkage) + if self.visibility <> 'private': + storage_class = "" + else: + storage_class = "static " + code.putln("%s%s {" % ( + storage_class, + header)) + + def generate_argument_declarations(self, env, code): + # Arguments already declared in function header + pass + + def generate_keyword_list(self, code): + pass + + def generate_argument_parsing_code(self, code): + pass + +# def generate_stararg_getting_code(self, code): +# pass + + def generate_argument_conversion_code(self, code): + pass + + def generate_argument_type_tests(self, code): + pass + + def error_value(self): + if self.return_type.is_pyobject: + return "0" + else: + #return None + return self.entry.type.exception_value + + def caller_will_check_exceptions(self): + return self.entry.type.exception_check + + +class PyArgDeclNode(Node): + # Argument which must be a Python object (used + # for * and ** arguments). + # + # name string + # entry Symtab.Entry + + pass + + +class DefNode(FuncDefNode): + # A Python function definition. + # + # name string the Python name of the function + # args [CArgDeclNode] formal arguments + # star_arg PyArgDeclNode or None * argument + # starstar_arg PyArgDeclNode or None ** argument + # doc string or None + # body StatListNode + # + # The following subnode is constructed internally + # when the def statement is inside a Python class definition. + # + # assmt AssignmentNode Function construction/assignment + + assmt = None + + def analyse_declarations(self, env): + for arg in self.args: + base_type = arg.base_type.analyse(env) + name_declarator, type = \ + arg.declarator.analyse(base_type, env) + arg.name = name_declarator.name + if name_declarator.cname: + error(self.pos, + "Python function argument cannot have C name specification") + arg.type = type.as_argument_type() + arg.hdr_type = None + arg.needs_conversion = 0 + arg.needs_type_test = 0 + arg.is_generic = 1 + if arg.not_none and not arg.type.is_extension_type: + error(self.pos, + "Only extension type arguments can have 'not None'") + self.declare_pyfunction(env) + self.analyse_signature(env) + self.return_type = self.entry.signature.return_type() + if self.star_arg or self.starstar_arg: + env.use_utility_code(get_starargs_utility_code) + + def analyse_signature(self, env): + any_type_tests_needed = 0 + sig = self.entry.signature + nfixed = sig.num_fixed_args() + for i in range(nfixed): + if i < len(self.args): + arg = self.args[i] + arg.is_generic = 0 + if sig.is_self_arg(i): + arg.is_self_arg = 1 + arg.hdr_type = arg.type = env.parent_type + arg.needs_conversion = 0 + else: + arg.hdr_type = sig.fixed_arg_type(i) + if not arg.type.same_as(arg.hdr_type): + if arg.hdr_type.is_pyobject and arg.type.is_pyobject: + arg.needs_type_test = 1 + any_type_tests_needed = 1 + else: + arg.needs_conversion = 1 + if arg.needs_conversion: + arg.hdr_cname = Naming.arg_prefix + arg.name + else: + arg.hdr_cname = Naming.var_prefix + arg.name + else: + self.bad_signature() + return + if nfixed < len(self.args): + if not sig.has_generic_args: + self.bad_signature() + for arg in self.args: + if arg.is_generic and arg.type.is_extension_type: + arg.needs_type_test = 1 + any_type_tests_needed = 1 + if any_type_tests_needed: + env.use_utility_code(arg_type_test_utility_code) + + def bad_signature(self): + sig = self.entry.signature + expected_str = "%d" % sig.num_fixed_args() + if sig.has_generic_args: + expected_str = expected_str + " or more" + name = self.name + if name.startswith("__") and name.endswith("__"): + desc = "Special method" + else: + desc = "Method" + error(self.pos, + "%s %s has wrong number of arguments " + "(%d declared, %s expected)" % ( + desc, self.name, len(self.args), expected_str)) + + def declare_pyfunction(self, env): + self.entry = env.declare_pyfunction(self.name, self.pos) + self.entry.doc = self.doc + self.entry.func_cname = \ + Naming.func_prefix + env.scope_prefix + self.name + self.entry.doc_cname = \ + Naming.funcdoc_prefix + env.scope_prefix + self.name + self.entry.pymethdef_cname = \ + Naming.pymethdef_prefix + env.scope_prefix + self.name + + def declare_arguments(self, env): + for arg in self.args: + if not arg.name: + error(arg.pos, "Missing argument name") + if arg.needs_conversion: + arg.entry = env.declare_var(arg.name, arg.type, arg.pos) + if arg.type.is_pyobject: + arg.entry.init = "0" + arg.entry.init_to_none = 0 + else: + arg.entry = self.declare_argument(env, arg) + arg.entry.is_self_arg = arg.is_self_arg + if arg.hdr_type: + if arg.is_self_arg or \ + (arg.type.is_extension_type and not arg.hdr_type.is_extension_type): + arg.entry.is_declared_generic = 1 + self.declare_python_arg(env, self.star_arg) + self.declare_python_arg(env, self.starstar_arg) + + def declare_python_arg(self, env, arg): + if arg: + arg.entry = env.declare_var(arg.name, + PyrexTypes.py_object_type, arg.pos) + arg.entry.init = "0" + arg.entry.init_to_none = 0 + arg.entry.xdecref_cleanup = 1 + + def analyse_expressions(self, env): + self.analyse_default_values(env) + if env.is_py_class_scope: + self.synthesize_assignment_node(env) + + def analyse_default_values(self, env): + for arg in self.args: + if arg.default: + if arg.is_generic: + arg.default.analyse_types(env) + arg.default = arg.default.coerce_to(arg.type, env) + arg.default.allocate_temps(env) + arg.default_entry = env.add_default_value(arg.type) + else: + error(arg.pos, + "This argument cannot have a default value") + arg.default = None + + def synthesize_assignment_node(self, env): + import ExprNodes + self.assmt = SingleAssignmentNode(self.pos, + lhs = ExprNodes.NameNode(self.pos, name = self.name), + rhs = ExprNodes.UnboundMethodNode(self.pos, + class_cname = env.class_obj_cname, + function = ExprNodes.PyCFunctionNode(self.pos, + pymethdef_cname = self.entry.pymethdef_cname))) + self.assmt.analyse_declarations(env) + self.assmt.analyse_expressions(env) + + def generate_function_header(self, code, with_pymethdef): + arg_code_list = [] + sig = self.entry.signature + if sig.has_dummy_arg: + arg_code_list.append( + "PyObject *%s" % Naming.self_cname) + for arg in self.args: + if not arg.is_generic: + if arg.is_self_arg: + arg_code_list.append("PyObject *%s" % arg.hdr_cname) + else: + arg_code_list.append( + arg.hdr_type.declaration_code(arg.hdr_cname)) + if sig.has_generic_args: + arg_code_list.append( + "PyObject *%s, PyObject *%s" + % (Naming.args_cname, Naming.kwds_cname)) + arg_code = ", ".join(arg_code_list) + dc = self.return_type.declaration_code(self.entry.func_cname) + header = "static %s(%s)" % (dc, arg_code) + code.putln("%s; /*proto*/" % header) + if self.entry.doc: + code.putln( + 'static char %s[] = "%s";' % ( + self.entry.doc_cname, + self.entry.doc)) + if with_pymethdef: + code.put( + "static PyMethodDef %s = " % + self.entry.pymethdef_cname) + code.put_pymethoddef(self.entry, ";") + code.putln("%s {" % header) + + def generate_argument_declarations(self, env, code): + for arg in self.args: + if arg.is_generic: # or arg.needs_conversion: + code.put_var_declaration(arg.entry) + + def generate_keyword_list(self, code): + if self.entry.signature.has_generic_args: + code.put( + "static char *%s[] = {" % + Naming.kwdlist_cname) + for arg in self.args: + if arg.is_generic: + code.put( + '"%s",' % + arg.name) + code.putln( + "0};") + + def generate_argument_parsing_code(self, code): + # Generate PyArg_ParseTuple call for generic + # arguments, if any. + if self.entry.signature.has_generic_args: + arg_addrs = [] + arg_formats = [] + default_seen = 0 + for arg in self.args: + arg_entry = arg.entry + if arg.is_generic: + if arg.default: + code.putln( + "%s = %s;" % ( + arg_entry.cname, + arg.default_entry.cname)) + if not default_seen: + arg_formats.append("|") + default_seen = 1 + elif default_seen: + error(arg.pos, "Non-default argument following default argument") + arg_addrs.append("&" + arg_entry.cname) + format = arg_entry.type.parsetuple_format + if format: + arg_formats.append(format) + else: + error(arg.pos, + "Cannot convert Python object argument to type '%s'" + % arg.type) + argformat = '"%s"' % string.join(arg_formats, "") + has_starargs = self.star_arg is not None or self.starstar_arg is not None + if has_starargs: + self.generate_stararg_getting_code(code) + pt_arglist = [Naming.args_cname, Naming.kwds_cname, argformat, + Naming.kwdlist_cname] + arg_addrs + pt_argstring = string.join(pt_arglist, ", ") + code.put( + 'if (!PyArg_ParseTupleAndKeywords(%s)) ' % + pt_argstring) + error_return_code = "return %s;" % self.error_value() + if has_starargs: + code.putln("{") + code.put_xdecref(Naming.args_cname, py_object_type) + code.put_xdecref(Naming.kwds_cname, py_object_type) + self.generate_arg_xdecref(self.star_arg, code) + self.generate_arg_xdecref(self.starstar_arg, code) + code.putln(error_return_code) + code.putln("}") + else: + code.putln(error_return_code) + + def put_stararg_decrefs(self, code): + if self.star_arg or self.starstar_arg: + code.put_xdecref(Naming.args_cname, py_object_type) + code.put_xdecref(Naming.kwds_cname, py_object_type) + + def generate_arg_xdecref(self, arg, code): + if arg: + code.put_var_xdecref(arg.entry) + + def arg_address(self, arg): + if arg: + return "&%s" % arg.entry.cname + else: + return 0 + + def generate_stararg_getting_code(self, code): + if self.star_arg or self.starstar_arg: + if not self.entry.signature.has_generic_args: + error(self.pos, "This method cannot have * or ** arguments") + star_arg_addr = self.arg_address(self.star_arg) + starstar_arg_addr = self.arg_address(self.starstar_arg) + code.putln( + "if (__Pyx_GetStarArgs(&%s, &%s, %s, %s, %s, %s) < 0) return %s;" % ( + Naming.args_cname, + Naming.kwds_cname, + Naming.kwdlist_cname, + len(self.args) - self.entry.signature.num_fixed_args(), + star_arg_addr, + starstar_arg_addr, + self.error_value())) + + def generate_argument_conversion_code(self, code): + # Generate code to convert arguments from + # signature type to declared type, if needed. + for arg in self.args: + if arg.needs_conversion: + self.generate_arg_conversion(arg, code) + + def generate_arg_conversion(self, arg, code): + # Generate conversion code for one argument. + old_type = arg.hdr_type + new_type = arg.type + if old_type.is_pyobject: + self.generate_arg_conversion_from_pyobject(arg, code) + elif new_type.is_pyobject: + self.generate_arg_conversion_to_pyobject(arg, code) + else: + if new_type.assignable_from(old_type): + code.putln( + "%s = %s;" % (arg.entry.cname, arg.hdr_cname)) + else: + error(arg.pos, + "Cannot convert argument from '%s' to '%s'" % + (old_type, new_type)) + + def generate_arg_conversion_from_pyobject(self, arg, code): + new_type = arg.type + func = new_type.from_py_function + if func: + code.putln("%s = %s(%s); if (PyErr_Occurred()) %s" % ( + arg.entry.cname, + func, + arg.hdr_cname, + code.error_goto(arg.pos))) + else: + error(arg.pos, + "Cannot convert Python object argument to type '%s'" + % new_type) + + def generate_arg_conversion_to_pyobject(self, arg, code): + old_type = arg.hdr_type + func = old_type.to_py_function + if func: + code.putln("%s = %s(%s); if (!%s) %s" % ( + arg.entry.cname, + func, + arg.hdr_cname, + arg.entry.cname, + code.error_goto(arg.pos))) + else: + error(arg.pos, + "Cannot convert argument of type '%s' to Python object" + % old_type) + + def generate_argument_type_tests(self, code): + # Generate type tests for args whose signature + # type is PyObject * and whose declared type is + # a subtype thereof. + for arg in self.args: + if arg.needs_type_test: + self.generate_arg_type_test(arg, code) + + def generate_arg_type_test(self, arg, code): + # Generate type test for one argument. + if arg.type.typeobj_is_available(): + typeptr_cname = arg.type.typeptr_cname + arg_code = "((PyObject *)%s)" % arg.entry.cname + code.putln( + 'if (!__Pyx_ArgTypeTest(%s, %s, %d, "%s")) %s' % ( + arg_code, + typeptr_cname, + not arg.not_none, + arg.name, + code.error_goto(arg.pos))) + else: + error(arg.pos, "Cannot test type of extern C class " + "without type object name specification") + + def generate_execution_code(self, code): + # Evaluate and store argument default values + for arg in self.args: + default = arg.default + if default: + default.generate_evaluation_code(code) + default.make_owned_reference(code) + code.putln( + "%s = %s;" % ( + arg.default_entry.cname, + default.result)) + if default.is_temp and default.type.is_pyobject: + code.putln( + "%s = 0;" % + default.result) + # For Python class methods, create and store function object + if self.assmt: + self.assmt.generate_execution_code(code) + + def error_value(self): + return self.entry.signature.error_value + + def caller_will_check_exceptions(self): + return 1 + + +class PyClassDefNode(StatNode, BlockNode): + # A Python class definition. + # + # name string Name of the class + # doc string or None + # body StatNode Attribute definition code + # entry Symtab.Entry + # scope PyClassScope + # + # The following subnodes are constructed internally: + # + # dict DictNode Class dictionary + # classobj ClassNode Class object + # target NameNode Variable to assign class object to + + def __init__(self, pos, name, bases, doc, body): + StatNode.__init__(self, pos) + self.name = name + self.doc = doc + self.body = body + import ExprNodes + self.dict = ExprNodes.DictNode(pos, key_value_pairs = []) + if self.doc: + doc_node = ExprNodes.StringNode(pos, value = self.doc) + else: + doc_node = None + self.classobj = ExprNodes.ClassNode(pos, + name = ExprNodes.StringNode(pos, value = name), + bases = bases, dict = self.dict, doc = doc_node) + self.target = ExprNodes.NameNode(pos, name = name) + + def analyse_declarations(self, env): + self.target.analyse_target_declaration(env) + + def analyse_expressions(self, env): + self.dict.analyse_expressions(env) + self.classobj.analyse_expressions(env) + genv = env.global_scope() + cenv = PyClassScope(name = self.name, outer_scope = genv) + cenv.class_dict_cname = self.dict.result + cenv.class_obj_cname = self.classobj.result + self.scope = cenv + self.body.analyse_declarations(cenv) + self.body.analyse_expressions(cenv) + self.target.analyse_target_expression(env) + self.dict.release_temp(env) + self.classobj.release_temp(env) + self.target.release_target_temp(env) + env.recycle_pending_temps() + + def generate_function_definitions(self, env, code): + self.generate_py_string_decls(self.scope, code) + self.body.generate_function_definitions( + self.scope, code) + + def generate_execution_code(self, code): + self.dict.generate_evaluation_code(code) + self.classobj.generate_evaluation_code(code) + self.body.generate_execution_code(code) + self.target.generate_assignment_code(self.classobj, code) + self.dict.generate_disposal_code(code) + + +class CClassDefNode(StatNode): + # An extension type definition. + # + # visibility 'private' or 'public' or 'extern' + # typedef_flag boolean + # module_name string or None For import of extern type objects + # class_name string Unqualified name of class + # as_name string or None Name to declare as in this scope + # base_class_module string or None Module containing the base class + # base_class_name string or None Name of the base class + # objstruct_name string or None Specified C name of object struct + # typeobj_name string or None Specified C name of type object + # in_pxd boolean Is in a .pxd file + # doc string or None + # body StatNode or None + # entry Symtab.Entry + # base_type PyExtensionType or None + + def analyse_declarations(self, env): + #print "CClassDefNode.analyse_declarations:", self.class_name + #print "...visibility =", self.visibility + #print "...module_name =", self.module_name + if env.in_cinclude and not self.objstruct_name: + error(self.pos, "Object struct name specification required for " + "C class defined in 'extern from' block") + self.base_type = None + if self.base_class_name: + if self.base_class_module: + base_class_scope = env.find_module(self.base_class_module, self.pos) + else: + base_class_scope = env + if base_class_scope: + base_class_entry = base_class_scope.find(self.base_class_name, self.pos) + if base_class_entry: + if not base_class_entry.is_type: + error(self.pos, "'%s' is not a type name" % self.base_class_name) + elif not base_class_entry.type.is_extension_type: + error(self.pos, "'%s' is not an extension type" % self.base_class_name) + elif not base_class_entry.type.is_complete(): + error(self.pos, "Base class '%s' is incomplete" % self.base_class_name) + else: + self.base_type = base_class_entry.type + has_body = self.body is not None + self.entry = env.declare_c_class( + name = self.class_name, + pos = self.pos, + defining = has_body and self.in_pxd, + implementing = has_body and not self.in_pxd, + module_name = self.module_name, + base_type = self.base_type, + objstruct_cname = self.objstruct_name, + typeobj_cname = self.typeobj_name, + visibility = self.visibility, + typedef_flag = self.typedef_flag) + scope = self.entry.type.scope + if self.doc: + scope.doc = self.doc + if has_body: + self.body.analyse_declarations(scope) + if self.in_pxd: + scope.defined = 1 + else: + scope.implemented = 1 + env.allocate_vtable_names(self.entry) + + def analyse_expressions(self, env): + if self.body: + self.body.analyse_expressions(env) + + def generate_function_definitions(self, env, code): + if self.body: + self.body.generate_function_definitions( + self.entry.type.scope, code) + + def generate_execution_code(self, code): + # This is needed to generate evaluation code for + # default values of method arguments. + if self.body: + self.body.generate_execution_code(code) + + +class PropertyNode(StatNode): + # Definition of a property in an extension type. + # + # name string + # doc string or None Doc string + # body StatListNode + + def analyse_declarations(self, env): + entry = env.declare_property(self.name, self.doc, self.pos) + if entry: + if self.doc: + doc_entry = env.get_string_const(self.doc) + entry.doc_cname = doc_entry.cname + self.body.analyse_declarations(entry.scope) + + def analyse_expressions(self, env): + self.body.analyse_expressions(env) + + def generate_function_definitions(self, env, code): + self.body.generate_function_definitions(env, code) + + def generate_execution_code(self, code): + pass + + +class GlobalNode(StatNode): + # Global variable declaration. + # + # names [string] + + def analyse_declarations(self, env): + for name in self.names: + env.declare_global(name, self.pos) + + def analyse_expressions(self, env): + pass + + def generate_execution_code(self, code): + pass + + +class ExprStatNode(StatNode): + # Expression used as a statement. + # + # expr ExprNode + + def analyse_expressions(self, env): + self.expr.analyse_expressions(env) + self.expr.release_temp(env) + env.recycle_pending_temps() # TEMPORARY + + def generate_execution_code(self, code): + self.expr.generate_evaluation_code(code) + if not self.expr.is_temp and self.expr.result: + code.putln("%s;" % self.expr.result) + self.expr.generate_disposal_code(code) + + +class AssignmentNode(StatNode): + # Abstract base class for assignment nodes. + # + # The analyse_expressions and generate_execution_code + # phases of assignments are split into two sub-phases + # each, to enable all the right hand sides of a + # parallel assignment to be evaluated before assigning + # to any of the left hand sides. + + def analyse_expressions(self, env): + self.analyse_expressions_1(env) + self.analyse_expressions_2(env) + + def generate_execution_code(self, code): + self.generate_rhs_evaluation_code(code) + self.generate_assignment_code(code) + + +class SingleAssignmentNode(AssignmentNode): + # The simplest case: + # + # a = b + # + # lhs ExprNode Left hand side + # rhs ExprNode Right hand side + + def analyse_declarations(self, env): + self.lhs.analyse_target_declaration(env) + + def analyse_expressions_1(self, env, use_temp = 0): + self.rhs.analyse_types(env) + self.lhs.analyse_target_types(env) + self.rhs = self.rhs.coerce_to(self.lhs.type, env) + if use_temp: + self.rhs = self.rhs.coerce_to_temp(env) + self.rhs.allocate_temps(env) + + def analyse_expressions_2(self, env): + self.lhs.allocate_target_temps(env) + self.lhs.release_target_temp(env) + self.rhs.release_temp(env) + +# def analyse_assignment(self, env, lhs, rhs): +# # Returns coerced RHS. +# rhs.analyse_types(env) +# lhs.analyse_target_types(env) +# rhs = rhs.coerce_to(lhs.type, env) +# rhs.allocate_temps(env) +# lhs.allocate_target_temps(env) +# return rhs + + def generate_rhs_evaluation_code(self, code): + self.rhs.generate_evaluation_code(code) + + def generate_assignment_code(self, code): + self.lhs.generate_assignment_code(self.rhs, code) + + +class CascadedAssignmentNode(AssignmentNode): + # An assignment with multiple left hand sides: + # + # a = b = c + # + # lhs_list [ExprNode] Left hand sides + # rhs ExprNode Right hand sides + # + # Used internally: + # + # coerced_rhs_list [ExprNode] RHS coerced to type of each LHS + + def analyse_declarations(self, env): + for lhs in self.lhs_list: + lhs.analyse_target_declaration(env) + +# def analyse_expressions(self, env): +# import ExprNodes +# self.rhs.analyse_types(env) +# self.rhs = self.rhs.coerce_to_temp(env) +# self.rhs.allocate_temps(env) +# self.coerced_rhs_list = [] +# for lhs in self.lhs_list: +# lhs.analyse_target_types(env) +# coerced_rhs = ExprNodes.CloneNode(self.rhs).coerce_to(lhs.type, env) +# self.coerced_rhs_list.append(coerced_rhs) +# coerced_rhs.allocate_temps(env) +# lhs.allocate_target_temps(env) +# coerced_rhs.release_temp(env) +# lhs.release_target_temp(env) +# self.rhs.release_temp(env) + + def analyse_expressions_1(self, env, use_temp = 0): + self.rhs.analyse_types(env) + if use_temp: + self.rhs = self.rhs.coerce_to_temp(env) + else: + self.rhs = self.rhs.coerce_to_simple(env) + self.rhs.allocate_temps(env) + + def analyse_expressions_2(self, env): + from ExprNodes import CloneNode + self.coerced_rhs_list = [] + for lhs in self.lhs_list: + lhs.analyse_target_types(env) + rhs = CloneNode(self.rhs) + rhs = rhs.coerce_to(lhs.type, env) + self.coerced_rhs_list.append(rhs) + rhs.allocate_temps(env) + lhs.allocate_target_temps(env) + lhs.release_target_temp(env) + rhs.release_temp(env) + self.rhs.release_temp(env) + +# def generate_execution_code(self, code): +# self.rhs.generate_evaluation_code(code) +# for i in range(len(self.lhs_list)): +# lhs = self.lhs_list[i] +# rhs = self.coerced_rhs_list[i] +# rhs.generate_evaluation_code(code) +# lhs.generate_assignment_code(rhs, code) +# # Assignment has already disposed of the cloned RHS +# self.rhs.generate_disposal_code(code) + + def generate_rhs_evaluation_code(self, code): + self.rhs.generate_evaluation_code(code) + + def generate_assignment_code(self, code): + for i in range(len(self.lhs_list)): + lhs = self.lhs_list[i] + rhs = self.coerced_rhs_list[i] + rhs.generate_evaluation_code(code) + lhs.generate_assignment_code(rhs, code) + # Assignment has disposed of the cloned RHS + self.rhs.generate_disposal_code(code) + +class ParallelAssignmentNode(AssignmentNode): + # A combined packing/unpacking assignment: + # + # a, b, c = d, e, f + # + # This has been rearranged by the parser into + # + # a = d ; b = e ; c = f + # + # but we must evaluate all the right hand sides + # before assigning to any of the left hand sides. + # + # stats [AssignmentNode] The constituent assignments + + def analyse_declarations(self, env): + for stat in self.stats: + stat.analyse_declarations(env) + + def analyse_expressions(self, env): + for stat in self.stats: + stat.analyse_expressions_1(env, use_temp = 1) + for stat in self.stats: + stat.analyse_expressions_2(env) + + def generate_execution_code(self, code): + for stat in self.stats: + stat.generate_rhs_evaluation_code(code) + for stat in self.stats: + stat.generate_assignment_code(code) + + +class PrintStatNode(StatNode): + # print statement + # + # args [ExprNode] + # ends_with_comma boolean + + def analyse_expressions(self, env): + for i in range(len(self.args)): + arg = self.args[i] + arg.analyse_types(env) + arg = arg.coerce_to_pyobject(env) + arg.allocate_temps(env) + arg.release_temp(env) + self.args[i] = arg + env.recycle_pending_temps() # TEMPORARY + env.use_utility_code(printing_utility_code) + + def generate_execution_code(self, code): + for arg in self.args: + arg.generate_evaluation_code(code) + code.putln( + "if (__Pyx_PrintItem(%s) < 0) %s" % ( + arg.result, + code.error_goto(self.pos))) + arg.generate_disposal_code(code) + if not self.ends_with_comma: + code.putln( + "if (__Pyx_PrintNewline() < 0) %s" % + code.error_goto(self.pos)) + + +class DelStatNode(StatNode): + # del statement + # + # args [ExprNode] + + def analyse_declarations(self, env): + for arg in self.args: + arg.analyse_target_declaration(env) + + def analyse_expressions(self, env): + for arg in self.args: + arg.analyse_target_expression(env) + if not arg.type.is_pyobject: + error(arg.pos, "Deletion of non-Python object") + env.recycle_pending_temps() # TEMPORARY + + def generate_execution_code(self, code): + for arg in self.args: + if arg.type.is_pyobject: + arg.generate_deletion_code(code) + # else error reported earlier + + +class PassStatNode(StatNode): + # pass statement + + def analyse_expressions(self, env): + pass + + def generate_execution_code(self, code): + pass + + +class BreakStatNode(StatNode): + + def analyse_expressions(self, env): + pass + + def generate_execution_code(self, code): + if not code.break_label: + error(self.pos, "break statement not inside loop") + else: + code.putln( + "goto %s;" % + code.break_label) + + +class ContinueStatNode(StatNode): + + def analyse_expressions(self, env): + pass + + def generate_execution_code(self, code): + if code.in_try_finally: + error(self.pos, "continue statement inside try of try...finally") + elif not code.continue_label: + error(self.pos, "continue statement not inside loop") + else: + code.putln( + "goto %s;" % + code.continue_label) + + +class ReturnStatNode(StatNode): + # return statement + # + # value ExprNode or None + # return_type PyrexType + + def analyse_expressions(self, env): + return_type = env.return_type + self.return_type = return_type + if not return_type: + error(self.pos, "Return not inside a function body") + return + if self.value: + self.value.analyse_types(env) + if return_type.is_void or return_type.is_returncode: + error(self.value.pos, + "Return with value in void function") + else: + self.value = self.value.coerce_to(env.return_type, env) + self.value.allocate_temps(env) + self.value.release_temp(env) + else: + if (not return_type.is_void + and not return_type.is_pyobject + and not return_type.is_returncode): + error(self.pos, "Return value required") + + def generate_execution_code(self, code): + if not self.return_type: + # error reported earlier + return + if self.value: + self.value.generate_evaluation_code(code) + if self.value.type.is_pyobject and not self.value.is_temp: + code.put_incref(self.value.result, self.value.type) + if self.return_type.is_extension_type: + cast = "(%s)" % self.return_type.declaration_code("") + else: + cast = "" + code.putln( + "%s = %s%s;" % ( + Naming.retval_cname, + cast, + self.value.result)) + self.value.generate_post_assignment_code(code) + else: + if self.return_type.is_pyobject: + code.putln( + "%s = Py_None; Py_INCREF(%s);" % ( + Naming.retval_cname, + Naming.retval_cname)) + elif self.return_type.is_returncode: + code.putln( + "%s = %s;" % ( + Naming.retval_cname, + self.return_type.default_value)) + code.putln( + "goto %s;" % + code.return_label) + + +class RaiseStatNode(StatNode): + # raise statement + # + # exc_type ExprNode or None + # exc_value ExprNode or None + # exc_tb ExprNode or None + + def analyse_expressions(self, env): + if self.exc_type: + self.exc_type.analyse_types(env) + self.exc_type = self.exc_type.coerce_to_pyobject(env) + self.exc_type.allocate_temps(env) + if self.exc_value: + self.exc_value.analyse_types(env) + self.exc_value = self.exc_value.coerce_to_pyobject(env) + self.exc_value.allocate_temps(env) + if self.exc_tb: + self.exc_tb.analyse_types(env) + self.exc_tb = self.exc_tb.coerce_to_pyobject(env) + self.exc_tb.allocate_temps(env) + if self.exc_type: + self.exc_type.release_temp(env) + if self.exc_value: + self.exc_value.release_temp(env) + if self.exc_tb: + self.exc_tb.release_temp(env) + env.recycle_pending_temps() # TEMPORARY + if not (self.exc_type or self.exc_value or self.exc_tb): + env.use_utility_code(reraise_utility_code) + else: + env.use_utility_code(raise_utility_code) + + def generate_execution_code(self, code): + if self.exc_type: + self.exc_type.generate_evaluation_code(code) + type_code = self.exc_type.result + else: + type_code = 0 + if self.exc_value: + self.exc_value.generate_evaluation_code(code) + value_code = self.exc_value.result + else: + value_code = "0" + if self.exc_tb: + self.exc_tb.generate_evaluation_code(code) + tb_code = self.exc_tb.result + else: + tb_code = "0" + if self.exc_type or self.exc_value or self.exc_tb: + code.putln( + "__Pyx_Raise(%s, %s, %s);" % ( + type_code, + value_code, + tb_code)) + else: + code.putln( + "__Pyx_ReRaise();") + if self.exc_type: + self.exc_type.generate_disposal_code(code) + if self.exc_value: + self.exc_value.generate_disposal_code(code) + if self.exc_tb: + self.exc_tb.generate_disposal_code(code) + code.putln( + code.error_goto(self.pos)) + + +class AssertStatNode(StatNode): + # assert statement + # + # cond ExprNode + # value ExprNode or None + + def analyse_expressions(self, env): + self.cond = self.cond.analyse_boolean_expression(env) + if self.value: + self.value.analyse_types(env) + self.value = self.value.coerce_to_pyobject(env) + self.value.allocate_temps(env) + self.cond.release_temp(env) + if self.value: + self.value.release_temp(env) + env.recycle_pending_temps() # TEMPORARY + + def generate_execution_code(self, code): + self.cond.generate_evaluation_code(code) + if self.value: + self.value.generate_evaluation_code(code) + code.putln( + "if (!%s) {" % + self.cond.result) + if self.value: + code.putln( + "PyErr_SetObject(PyExc_AssertionError, %s);" % + self.value.result) + else: + code.putln( + "PyErr_SetNone(PyExc_AssertionError);") + code.putln( + code.error_goto(self.pos)) + code.putln( + "}") + self.cond.generate_disposal_code(code) + if self.value: + self.value.generate_disposal_code(code) + + +class IfStatNode(StatNode): + # if statement + # + # if_clauses [IfClauseNode] + # else_clause StatNode or None + + def analyse_declarations(self, env): + for if_clause in self.if_clauses: + if_clause.analyse_declarations(env) + if self.else_clause: + self.else_clause.analyse_declarations(env) + + def analyse_expressions(self, env): + for if_clause in self.if_clauses: + if_clause.analyse_expressions(env) + if self.else_clause: + self.else_clause.analyse_expressions(env) + + def generate_execution_code(self, code): + end_label = code.new_label() + for if_clause in self.if_clauses: + if_clause.generate_execution_code(code, end_label) + if self.else_clause: + code.putln("/*else*/ {") + self.else_clause.generate_execution_code(code) + code.putln("}") + code.put_label(end_label) + + +class IfClauseNode(Node): + # if or elif clause in an if statement + # + # condition ExprNode + # body StatNode + + def analyse_declarations(self, env): + self.condition.analyse_declarations(env) + self.body.analyse_declarations(env) + + def analyse_expressions(self, env): + self.condition = \ + self.condition.analyse_temp_boolean_expression(env) + self.condition.release_temp(env) + env.recycle_pending_temps() # TEMPORARY + self.body.analyse_expressions(env) + + def generate_execution_code(self, code, end_label): + self.condition.generate_evaluation_code(code) + code.putln( + "if (%s) {" % + self.condition.result) + self.body.generate_execution_code(code) + code.putln( + "goto %s;" % + end_label) + code.putln("}") + + +class WhileStatNode(StatNode): + # while statement + # + # condition ExprNode + # body StatNode + # else_clause StatNode + + def analyse_declarations(self, env): + self.body.analyse_declarations(env) + if self.else_clause: + self.else_clause.analyse_declarations(env) + + def analyse_expressions(self, env): + self.condition = \ + self.condition.analyse_temp_boolean_expression(env) + self.condition.release_temp(env) + env.recycle_pending_temps() # TEMPORARY + self.body.analyse_expressions(env) + if self.else_clause: + self.else_clause.analyse_expressions(env) + + def generate_execution_code(self, code): + old_loop_labels = code.new_loop_labels() + code.putln( + "while (1) {") + code.put_label(code.continue_label) + self.condition.generate_evaluation_code(code) + code.putln( + "if (!%s) break;" % + self.condition.result) + self.body.generate_execution_code(code) + code.putln("}") + break_label = code.break_label + code.set_loop_labels(old_loop_labels) + if self.else_clause: + code.putln("/*else*/ {") + self.else_clause.generate_execution_code(code) + code.putln("}") + code.put_label(break_label) + + +class ForInStatNode(StatNode): + # for statement + # + # target ExprNode + # iterator IteratorNode + # body StatNode + # else_clause StatNode + # item NextNode used internally + + def analyse_declarations(self, env): + self.target.analyse_target_declaration(env) + self.body.analyse_declarations(env) + if self.else_clause: + self.else_clause.analyse_declarations(env) + + def analyse_expressions(self, env): + import ExprNodes + self.iterator.analyse_expressions(env) + self.target.analyse_target_types(env) + self.item = ExprNodes.NextNode(self.iterator, env) + self.item = self.item.coerce_to(self.target.type, env) + self.item.allocate_temps(env) + self.target.allocate_target_temps(env) + self.item.release_temp(env) + self.target.release_target_temp(env) + env.recycle_pending_temps() # TEMPORARY + self.body.analyse_expressions(env) + self.iterator.release_temp(env) + env.recycle_pending_temps() # TEMPORARY + if self.else_clause: + self.else_clause.analyse_expressions(env) + + def generate_execution_code(self, code): + old_loop_labels = code.new_loop_labels() + self.iterator.generate_evaluation_code(code) + code.putln( + "for (;;) {") + code.put_label(code.continue_label) + self.item.generate_evaluation_code(code) + self.target.generate_assignment_code(self.item, code) + self.body.generate_execution_code(code) + code.putln( + "}") + break_label = code.break_label + code.set_loop_labels(old_loop_labels) + if self.else_clause: + code.putln("/*else*/ {") + self.else_clause.generate_execution_code(code) + code.putln("}") + code.put_label(break_label) + self.iterator.generate_disposal_code(code) + + +class ForFromStatNode(StatNode): + # for name from expr rel name rel expr + # + # target NameNode + # bound1 ExprNode + # relation1 string + # relation2 string + # bound2 ExprNode + # body StatNode + # else_clause StatNode or None + # + # Used internally: + # + # loopvar_name string + # py_loopvar_node PyTempNode or None + + def analyse_declarations(self, env): + self.target.analyse_target_declaration(env) + self.body.analyse_declarations(env) + if self.else_clause: + self.else_clause.analyse_declarations(env) + + def analyse_expressions(self, env): + import ExprNodes + self.target.analyse_target_types(env) + self.bound1.analyse_types(env) + self.bound2.analyse_types(env) + self.bound1 = self.bound1.coerce_to_integer(env) + self.bound2 = self.bound2.coerce_to_integer(env) + if not (self.bound2.is_name or self.bound2.is_literal): + self.bound2 = self.bound2.coerce_to_temp(env) + target_type = self.target.type + if not (target_type.is_pyobject + or target_type.assignable_from(PyrexTypes.c_int_type)): + error(self.target.pos, + "Cannot assign integer to variable of type '%s'" % target_type) + if target_type.is_int: + self.loopvar_name = self.target.entry.cname + self.py_loopvar_node = None + else: + c_loopvar_node = ExprNodes.TempNode(self.pos, + PyrexTypes.c_long_type, env) + c_loopvar_node.allocate_temps(env) + self.loopvar_name = c_loopvar_node.result + self.py_loopvar_node = \ + ExprNodes.CloneNode(c_loopvar_node).coerce_to_pyobject(env) + self.bound1.allocate_temps(env) + self.bound2.allocate_temps(env) + if self.py_loopvar_node: + self.py_loopvar_node.allocate_temps(env) + self.target.allocate_target_temps(env) + self.target.release_target_temp(env) + if self.py_loopvar_node: + self.py_loopvar_node.release_temp(env) + self.body.analyse_expressions(env) + if self.py_loopvar_node: + c_loopvar_node.release_temp(env) + if self.else_clause: + self.else_clause.analyse_expressions(env) + self.bound1.release_temp(env) + self.bound2.release_temp(env) + env.recycle_pending_temps() # TEMPORARY + + def generate_execution_code(self, code): + old_loop_labels = code.new_loop_labels() + self.bound1.generate_evaluation_code(code) + self.bound2.generate_evaluation_code(code) + offset, incop = self.relation_table[self.relation1] + code.putln( + "for (%s = %s%s; %s %s %s; %s%s) {" % ( + self.loopvar_name, + self.bound1.result, offset, + self.loopvar_name, self.relation2, self.bound2.result, + incop, self.loopvar_name)) + if self.py_loopvar_node: + self.py_loopvar_node.generate_evaluation_code(code) + self.target.generate_assignment_code(self.py_loopvar_node, code) + self.body.generate_execution_code(code) + code.put_label(code.continue_label) + code.putln("}") + break_label = code.break_label + code.set_loop_labels(old_loop_labels) + if self.else_clause: + code.putln("/*else*/ {") + self.else_clause.generate_execution_code(code) + code.putln("}") + code.put_label(break_label) + self.bound1.generate_disposal_code(code) + self.bound2.generate_disposal_code(code) + + relation_table = { + # {relop : (initial offset, increment op)} + '<=': ("", "++"), + '<' : ("+1", "++"), + '>=': ("", "--"), + '>' : ("-1", "--") + } + + +class TryExceptStatNode(StatNode): + # try .. except statement + # + # body StatNode + # except_clauses [ExceptClauseNode] + # else_clause StatNode or None + # cleanup_list [Entry] temps to clean up on error + + def analyse_declarations(self, env): + self.body.analyse_declarations(env) + for except_clause in self.except_clauses: + except_clause.analyse_declarations(env) + if self.else_clause: + self.else_clause.analyse_declarations(env) + + def analyse_expressions(self, env): + self.body.analyse_expressions(env) + self.cleanup_list = env.free_temp_entries[:] + for except_clause in self.except_clauses: + except_clause.analyse_expressions(env) + if self.else_clause: + self.else_clause.analyse_expressions(env) + + def generate_execution_code(self, code): + old_error_label = code.new_error_label() + our_error_label = code.error_label + end_label = code.new_label() + code.putln( + "/*try:*/ {") + self.body.generate_execution_code(code) + code.putln( + "}") + code.error_label = old_error_label + if self.else_clause: + code.putln( + "/*else:*/ {") + self.else_clause.generate_execution_code(code) + code.putln( + "}") + code.putln( + "goto %s;" % + end_label) + code.put_label(our_error_label) + code.put_var_xdecrefs_clear(self.cleanup_list) + default_clause_seen = 0 + for except_clause in self.except_clauses: + if not except_clause.pattern: + default_clause_seen = 1 + else: + if default_clause_seen: + error(except_clause.pos, "Default except clause not last") + except_clause.generate_handling_code(code, end_label) + if not default_clause_seen: + code.putln( + "goto %s;" % + code.error_label) + code.put_label(end_label) + + +class ExceptClauseNode(Node): + # Part of try ... except statement. + # + # pattern ExprNode + # target ExprNode or None + # body StatNode + # match_flag string result of exception match + # exc_value ExcValueNode used internally + # function_name string qualified name of enclosing function + + def analyse_declarations(self, env): + if self.target: + self.target.analyse_target_declaration(env) + self.body.analyse_declarations(env) + + def analyse_expressions(self, env): + import ExprNodes + genv = env.global_scope() + self.function_name = env.qualified_name + if self.pattern: + self.pattern.analyse_expressions(env) + self.pattern = self.pattern.coerce_to_pyobject(env) + self.match_flag = env.allocate_temp(PyrexTypes.c_int_type) + self.pattern.release_temp(env) + env.release_temp(self.match_flag) + self.exc_value = ExprNodes.ExcValueNode(self.pos, env) + self.exc_value.allocate_temps(env) + if self.target: + self.target.analyse_target_expression(env) + self.exc_value.release_temp(env) + if self.target: + self.target.release_target_temp(env) + env.recycle_pending_temps() # TEMPORARY + self.body.analyse_expressions(env) + + def generate_handling_code(self, code, end_label): + code.mark_pos(self.pos) + if self.pattern: + self.pattern.generate_evaluation_code(code) + code.putln( + "%s = PyErr_ExceptionMatches(%s);" % ( + self.match_flag, + self.pattern.result)) + self.pattern.generate_disposal_code(code) + code.putln( + "if (%s) {" % + self.match_flag) + else: + code.putln( + "/*except:*/ {") + code.putln( + '__Pyx_AddTraceback("%s");' % (self.function_name)) + # We always have to fetch the exception value even if + # there is no target, because this also normalises the + # exception and stores it in the thread state. + self.exc_value.generate_evaluation_code(code) + if self.target: + self.target.generate_assignment_code(self.exc_value, code) + else: + self.exc_value.generate_disposal_code(code) + self.body.generate_execution_code(code) + code.putln( + "goto %s;" + % end_label) + code.putln( + "}") + + +class TryFinallyStatNode(StatNode): + # try ... finally statement + # + # body StatNode + # finally_clause StatNode + # cleanup_list [Entry] temps to clean up on error + # exc_vars 3*(string,) temps to hold saved exception + # + # The plan is that we funnel all continue, break + # return and error gotos into the beginning of the + # finally block, setting a variable to remember which + # one we're doing. At the end of the finally block, we + # switch on the variable to figure out where to go. + # In addition, if we're doing an error, we save the + # exception on entry to the finally block and restore + # it on exit. + + disallow_continue_in_try_finally = 0 + # There doesn't seem to be any point in disallowing + # continue in the try block, since we have no problem + # handling it. + + def analyse_declarations(self, env): + self.body.analyse_declarations(env) + self.finally_clause.analyse_declarations(env) + + def analyse_expressions(self, env): + self.body.analyse_expressions(env) + self.cleanup_list = env.free_temp_entries[:] + self.exc_vars = ( + env.allocate_temp(PyrexTypes.py_object_type), + env.allocate_temp(PyrexTypes.py_object_type), + env.allocate_temp(PyrexTypes.py_object_type)) + self.lineno_var = \ + env.allocate_temp(PyrexTypes.c_int_type) + self.finally_clause.analyse_expressions(env) + for var in self.exc_vars: + env.release_temp(var) + + def generate_execution_code(self, code): + old_error_label = code.error_label + old_labels = code.all_new_labels() + new_labels = code.get_all_labels() + new_error_label = code.error_label + catch_label = code.new_label() + code.putln( + "/*try:*/ {") + if self.disallow_continue_in_try_finally: + was_in_try_finally = code.in_try_finally + code.in_try_finally = 1 + self.body.generate_execution_code(code) + if self.disallow_continue_in_try_finally: + code.in_try_finally = was_in_try_finally + code.putln( + "}") + code.putln( + "/*finally:*/ {") + code.putln( + "int __pyx_why;") + #code.putln( + # "PyObject *%s, *%s, *%s;" % + # self.exc_vars) + #code.putln( + # "int %s;" % + # self.lineno_var) + code.putln( + "__pyx_why = 0; goto %s;" % + catch_label) + for i in range(len(new_labels)): + if new_labels[i] and new_labels[i] <> "": + if new_labels[i] == new_error_label: + self.put_error_catcher(code, + new_error_label, i+1, catch_label) + else: + code.putln( + "%s: __pyx_why = %s; goto %s;" % ( + new_labels[i], + i+1, + catch_label)) + code.put_label(catch_label) + code.set_all_labels(old_labels) + self.finally_clause.generate_execution_code(code) + code.putln( + "switch (__pyx_why) {") + for i in range(len(old_labels)): + if old_labels[i]: + if old_labels[i] == old_error_label: + self.put_error_uncatcher(code, i+1, old_error_label) + else: + code.putln( + "case %s: goto %s;" % ( + i+1, + old_labels[i])) + code.putln( + "}") + code.putln( + "}") + + def put_error_catcher(self, code, error_label, i, catch_label): + code.putln( + "%s: {" % + error_label) + code.putln( + "__pyx_why = %s;" % + i) + code.put_var_xdecrefs_clear(self.cleanup_list) + code.putln( + "PyErr_Fetch(&%s, &%s, &%s);" % + self.exc_vars) + code.putln( + "%s = %s;" % ( + self.lineno_var, Naming.lineno_cname)) + code.putln( + "goto %s;" % + catch_label) + code.putln( + "}") + + def put_error_uncatcher(self, code, i, error_label): + code.putln( + "case %s: {" % + i) + code.putln( + "PyErr_Restore(%s, %s, %s);" % + self.exc_vars) + code.putln( + "%s = %s;" % ( + Naming.lineno_cname, self.lineno_var)) + for var in self.exc_vars: + code.putln( + "%s = 0;" % + var) + code.putln( + "goto %s;" % + error_label) + code.putln( + "}") + + +class CImportStatNode(StatNode): + # cimport statement + # + # module_name string Qualified name of module being imported + # as_name string or None Name specified in "as" clause, if any + + def analyse_declarations(self, env): + module_scope = env.find_module(self.module_name, self.pos) + if "." in self.module_name: + names = self.module_name.split(".") + top_name = names[0] + top_module_scope = env.context.find_submodule(top_name) + module_scope = top_module_scope + for name in names[1:]: + submodule_scope = module_scope.find_submodule(name) + module_scope.declare_module(name, submodule_scope, self.pos) + module_scope = submodule_scope + if self.as_name: + env.declare_module(self.as_name, module_scope, self.pos) + else: + env.declare_module(top_name, top_module_scope, self.pos) + else: + name = self.as_name or self.module_name + env.declare_module(name, module_scope, self.pos) + + def analyse_expressions(self, env): + pass + + def generate_execution_code(self, code): + pass + + +class FromCImportStatNode(StatNode): + # from ... cimport statement + # + # module_name string Qualified name of module + # imported_names [(pos, name, as_name)] Names to be imported + + def analyse_declarations(self, env): + module_scope = env.find_module(self.module_name, self.pos) + env.add_imported_module(module_scope) + for pos, name, as_name in self.imported_names: + entry = module_scope.find(name, pos) + if entry: + local_name = as_name or name + env.add_imported_entry(local_name, entry, pos) + + def analyse_expressions(self, env): + pass + + def generate_execution_code(self, code): + pass + + +class FromImportStatNode(StatNode): + # from ... import statement + # + # module ImportNode + # items [(string, NameNode)] + # interned_items [(string, NameNode)] + # item PyTempNode used internally + + def analyse_declarations(self, env): + for _, target in self.items: + target.analyse_target_declaration(env) + + def analyse_expressions(self, env): + import ExprNodes + self.module.analyse_expressions(env) + self.item = ExprNodes.PyTempNode(self.pos, env) + self.item.allocate_temp(env) + self.interned_items = [] + for name, target in self.items: + if Options.intern_names: + self.interned_items.append((env.intern(name), target)) + target.analyse_target_expression(env) + target.release_temp(env) + self.module.release_temp(env) + self.item.release_temp(env) + env.recycle_pending_temps() # TEMPORARY + + def generate_execution_code(self, code): + self.module.generate_evaluation_code(code) + if Options.intern_names: + for cname, target in self.interned_items: + code.putln( + '%s = PyObject_GetAttr(%s, %s); if (!%s) %s' % ( + self.item.result, + self.module.result, + cname, + self.item.result, + code.error_goto(self.pos))) + target.generate_assignment_code(self.item, code) + else: + for name, target in self.items: + code.putln( + '%s = PyObject_GetAttrString(%s, "%s"); if (!%s) %s' % ( + self.item.result, + self.module.result, + name, + self.item.result, + code.error_goto(self.pos))) + target.generate_assignment_code(self.item, code) + self.module.generate_disposal_code(code) + +#------------------------------------------------------------------------------------ +# +# Runtime support code +# +#------------------------------------------------------------------------------------ + +utility_function_predeclarations = \ +""" +typedef struct {PyObject **p; char *s;} __Pyx_InternTabEntry; /*proto*/ +typedef struct {PyObject **p; char *s; long n;} __Pyx_StringTabEntry; /*proto*/ +static PyObject *__Pyx_UnpackItem(PyObject *, int); /*proto*/ +static int __Pyx_EndUnpack(PyObject *, int); /*proto*/ +static int __Pyx_PrintItem(PyObject *); /*proto*/ +static int __Pyx_PrintNewline(void); /*proto*/ +static void __Pyx_Raise(PyObject *type, PyObject *value, PyObject *tb); /*proto*/ +static void __Pyx_ReRaise(void); /*proto*/ +static PyObject *__Pyx_Import(PyObject *name, PyObject *from_list); /*proto*/ +static PyObject *__Pyx_GetExcValue(void); /*proto*/ +static int __Pyx_ArgTypeTest(PyObject *obj, PyTypeObject *type, int none_allowed, char *name); /*proto*/ +static int __Pyx_TypeTest(PyObject *obj, PyTypeObject *type); /*proto*/ +static int __Pyx_GetStarArgs(PyObject **args, PyObject **kwds,\ + char *kwd_list[], int nargs, PyObject **args2, PyObject **kwds2); /*proto*/ +static void __Pyx_WriteUnraisable(char *name); /*proto*/ +static void __Pyx_AddTraceback(char *funcname); /*proto*/ +static PyTypeObject *__Pyx_ImportType(char *module_name, char *class_name, long size); /*proto*/ +static int __Pyx_SetVtable(PyObject *dict, void *vtable); /*proto*/ +static int __Pyx_GetVtable(PyObject *dict, void *vtabptr); /*proto*/ +static PyObject *__Pyx_CreateClass(PyObject *bases, PyObject *dict, PyObject *name, char *modname); /*proto*/ +static int __Pyx_InternStrings(__Pyx_InternTabEntry *t); /*proto*/ +static int __Pyx_InitStrings(__Pyx_StringTabEntry *t); /*proto*/ +""" + +get_name_predeclaration = \ +"static PyObject *__Pyx_GetName(PyObject *dict, char *name); /*proto*/" + +get_name_interned_predeclaration = \ +"static PyObject *__Pyx_GetName(PyObject *dict, PyObject *name); /*proto*/" + +#------------------------------------------------------------------------------------ + +printing_utility_code = \ +r""" +static PyObject *__Pyx_GetStdout(void) { + PyObject *f = PySys_GetObject("stdout"); + if (!f) { + PyErr_SetString(PyExc_RuntimeError, "lost sys.stdout"); + } + return f; +} + +static int __Pyx_PrintItem(PyObject *v) { + PyObject *f; + + if (!(f = __Pyx_GetStdout())) + return -1; + if (PyFile_SoftSpace(f, 1)) { + if (PyFile_WriteString(" ", f) < 0) + return -1; + } + if (PyFile_WriteObject(v, f, Py_PRINT_RAW) < 0) + return -1; + if (PyString_Check(v)) { + char *s = PyString_AsString(v); + int len = PyString_Size(v); + if (len > 0 && + isspace(Py_CHARMASK(s[len-1])) && + s[len-1] != ' ') + PyFile_SoftSpace(f, 0); + } + return 0; +} + +static int __Pyx_PrintNewline(void) { + PyObject *f; + + if (!(f = __Pyx_GetStdout())) + return -1; + if (PyFile_WriteString("\n", f) < 0) + return -1; + PyFile_SoftSpace(f, 0); + return 0; +} +""" + +#------------------------------------------------------------------------------------ + +# The following function is based on do_raise() from ceval.c. + +raise_utility_code = \ +""" +static void __Pyx_Raise(PyObject *type, PyObject *value, PyObject *tb) { + Py_XINCREF(type); + Py_XINCREF(value); + Py_XINCREF(tb); + /* First, check the traceback argument, replacing None with NULL. */ + if (tb == Py_None) { + Py_DECREF(tb); + tb = 0; + } + else if (tb != NULL && !PyTraceBack_Check(tb)) { + PyErr_SetString(PyExc_TypeError, + "raise: arg 3 must be a traceback or None"); + goto raise_error; + } + /* Next, replace a missing value with None */ + if (value == NULL) { + value = Py_None; + Py_INCREF(value); + } + /* Next, repeatedly, replace a tuple exception with its first item */ + while (PyTuple_Check(type) && PyTuple_Size(type) > 0) { + PyObject *tmp = type; + type = PyTuple_GET_ITEM(type, 0); + Py_INCREF(type); + Py_DECREF(tmp); + } + if (PyString_Check(type)) + ; + else if (PyClass_Check(type)) + ; /*PyErr_NormalizeException(&type, &value, &tb);*/ + else if (PyInstance_Check(type)) { + /* Raising an instance. The value should be a dummy. */ + if (value != Py_None) { + PyErr_SetString(PyExc_TypeError, + "instance exception may not have a separate value"); + goto raise_error; + } + else { + /* Normalize to raise , */ + Py_DECREF(value); + value = type; + type = (PyObject*) ((PyInstanceObject*)type)->in_class; + Py_INCREF(type); + } + } + else { + /* Not something you can raise. You get an exception + anyway, just not what you specified :-) */ + PyErr_Format(PyExc_TypeError, + "exceptions must be strings, classes, or " + "instances, not %s", type->ob_type->tp_name); + goto raise_error; + } + PyErr_Restore(type, value, tb); + return; +raise_error: + Py_XDECREF(value); + Py_XDECREF(type); + Py_XDECREF(tb); + return; +} +""" + +#------------------------------------------------------------------------------------ + +reraise_utility_code = \ +""" +static void __Pyx_ReRaise(void) { + PyThreadState *tstate = PyThreadState_Get(); + PyObject *type = tstate->exc_type; + PyObject *value = tstate->exc_value; + PyObject *tb = tstate->exc_traceback; + Py_XINCREF(type); + Py_XINCREF(value); + Py_XINCREF(tb); + PyErr_Restore(type, value, tb); +} +""" + +#------------------------------------------------------------------------------------ + +arg_type_test_utility_code = \ +""" +static int __Pyx_ArgTypeTest(PyObject *obj, PyTypeObject *type, int none_allowed, char *name) { + if (!type) { + PyErr_Format(PyExc_SystemError, "Missing type object"); + return 0; + } + if ((none_allowed && obj == Py_None) || PyObject_TypeCheck(obj, type)) + return 1; + PyErr_Format(PyExc_TypeError, + "Argument '%s' has incorrect type (expected %s, got %s)", + name, type->tp_name, obj->ob_type->tp_name); + return 0; +} +""" + +#------------------------------------------------------------------------------------ +# +# __Pyx_GetStarArgs splits the args tuple and kwds dict into two parts +# each, one part suitable for passing to PyArg_ParseTupleAndKeywords, +# and the other containing any extra arguments. On success, replaces +# the borrowed references *args and *kwds with references to a new +# tuple and dict, and passes back new references in *args2 and *kwds2. +# Does not touch any of its arguments on failure. +# +# Any of *kwds, args2 and kwds2 may be 0 (but not args or kwds). If +# *kwds == 0, it is not changed. If kwds2 == 0 and *kwds != 0, a new +# reference to the same dictionary is passed back in *kwds. +# + +get_starargs_utility_code = \ +""" +static int __Pyx_GetStarArgs( + PyObject **args, + PyObject **kwds, + char *kwd_list[], + int nargs, + PyObject **args2, + PyObject **kwds2) +{ + PyObject *x = 0, *args1 = 0, *kwds1 = 0; + + if (args2) + *args2 = 0; + if (kwds2) + *kwds2 = 0; + + if (args2) { + args1 = PyTuple_GetSlice(*args, 0, nargs); + if (!args1) + goto bad; + *args2 = PyTuple_GetSlice(*args, nargs, PyTuple_Size(*args)); + if (!*args2) + goto bad; + } + else { + args1 = *args; + Py_INCREF(args1); + } + + if (kwds2) { + if (*kwds) { + char **p; + kwds1 = PyDict_New(); + if (!kwds) + goto bad; + *kwds2 = PyDict_Copy(*kwds); + if (!*kwds2) + goto bad; + for (p = kwd_list; *p; p++) { + x = PyDict_GetItemString(*kwds, *p); + if (x) { + if (PyDict_SetItemString(kwds1, *p, x) < 0) + goto bad; + if (PyDict_DelItemString(*kwds2, *p) < 0) + goto bad; + } + } + } + else { + *kwds2 = PyDict_New(); + if (!*kwds2) + goto bad; + } + } + else { + kwds1 = *kwds; + Py_XINCREF(kwds1); + } + + *args = args1; + *kwds = kwds1; + return 0; +bad: + Py_XDECREF(args1); + Py_XDECREF(kwds1); + if (*args2) + Py_XDECREF(*args2); + if (*kwds2) + Py_XDECREF(*kwds2); + return -1; +} +""" + +#------------------------------------------------------------------------------------ + +unraisable_exception_utility_code = \ +""" +static void __Pyx_WriteUnraisable(char *name) { + PyObject *old_exc, *old_val, *old_tb; + PyObject *ctx; + PyErr_Fetch(&old_exc, &old_val, &old_tb); + ctx = PyString_FromString(name); + PyErr_Restore(old_exc, old_val, old_tb); + if (!ctx) + ctx = Py_None; + PyErr_WriteUnraisable(ctx); +} +""" + +#------------------------------------------------------------------------------------ + +traceback_utility_code = \ +""" +#include "compile.h" +#include "frameobject.h" +#include "traceback.h" + +static void __Pyx_AddTraceback(char *funcname) { + PyObject *py_srcfile = 0; + PyObject *py_funcname = 0; + PyObject *py_globals = 0; + PyObject *empty_tuple = 0; + PyObject *empty_string = 0; + PyCodeObject *py_code = 0; + PyFrameObject *py_frame = 0; + + py_srcfile = PyString_FromString(%(FILENAME)s); + if (!py_srcfile) goto bad; + py_funcname = PyString_FromString(funcname); + if (!py_funcname) goto bad; + py_globals = PyModule_GetDict(%(GLOBALS)s); + if (!py_globals) goto bad; + empty_tuple = PyTuple_New(0); + if (!empty_tuple) goto bad; + empty_string = PyString_FromString(""); + if (!empty_string) goto bad; + py_code = PyCode_New( + 0, /*int argcount,*/ + 0, /*int nlocals,*/ + 0, /*int stacksize,*/ + 0, /*int flags,*/ + empty_string, /*PyObject *code,*/ + empty_tuple, /*PyObject *consts,*/ + empty_tuple, /*PyObject *names,*/ + empty_tuple, /*PyObject *varnames,*/ + empty_tuple, /*PyObject *freevars,*/ + empty_tuple, /*PyObject *cellvars,*/ + py_srcfile, /*PyObject *filename,*/ + py_funcname, /*PyObject *name,*/ + %(LINENO)s, /*int firstlineno,*/ + empty_string /*PyObject *lnotab*/ + ); + if (!py_code) goto bad; + py_frame = PyFrame_New( + PyThreadState_Get(), /*PyThreadState *tstate,*/ + py_code, /*PyCodeObject *code,*/ + py_globals, /*PyObject *globals,*/ + 0 /*PyObject *locals*/ + ); + if (!py_frame) goto bad; + py_frame->f_lineno = %(LINENO)s; + PyTraceBack_Here(py_frame); +bad: + Py_XDECREF(py_srcfile); + Py_XDECREF(py_funcname); + Py_XDECREF(empty_tuple); + Py_XDECREF(empty_string); + Py_XDECREF(py_code); + Py_XDECREF(py_frame); +} +""" % { + 'FILENAME': Naming.filename_cname, + 'LINENO': Naming.lineno_cname, + 'GLOBALS': Naming.module_cname +} + +#------------------------------------------------------------------------------------ + +type_import_utility_code = \ +""" +static PyTypeObject *__Pyx_ImportType(char *module_name, char *class_name, + long size) +{ + PyObject *py_module_name = 0; + PyObject *py_class_name = 0; + PyObject *py_name_list = 0; + PyObject *py_module = 0; + PyObject *result = 0; + + py_module_name = PyString_FromString(module_name); + if (!py_module_name) + goto bad; + py_class_name = PyString_FromString(class_name); + if (!py_class_name) + goto bad; + py_name_list = PyList_New(1); + if (!py_name_list) + goto bad; + Py_INCREF(py_class_name); + if (PyList_SetItem(py_name_list, 0, py_class_name) < 0) + goto bad; + py_module = __Pyx_Import(py_module_name, py_name_list); + if (!py_module) + goto bad; + result = PyObject_GetAttr(py_module, py_class_name); + if (!result) + goto bad; + if (!PyType_Check(result)) { + PyErr_Format(PyExc_TypeError, + "%s.%s is not a type object", + module_name, class_name); + goto bad; + } + if (((PyTypeObject *)result)->tp_basicsize != size) { + PyErr_Format(PyExc_ValueError, + "%s.%s does not appear to be the correct type object", + module_name, class_name); + goto bad; + } + goto done; +bad: + Py_XDECREF(result); + result = 0; +done: + Py_XDECREF(py_module_name); + Py_XDECREF(py_class_name); + Py_XDECREF(py_name_list); + return (PyTypeObject *)result; +} +""" + +#------------------------------------------------------------------------------------ + +set_vtable_utility_code = \ +""" +static int __Pyx_SetVtable(PyObject *dict, void *vtable) { + PyObject *pycobj = 0; + int result; + + pycobj = PyCObject_FromVoidPtr(vtable, 0); + if (!pycobj) + goto bad; + if (PyDict_SetItemString(dict, "__pyx_vtable__", pycobj) < 0) + goto bad; + result = 0; + goto done; + +bad: + result = -1; +done: + Py_XDECREF(pycobj); + return result; +} +""" + +#------------------------------------------------------------------------------------ + +get_vtable_utility_code = \ +r""" +static int __Pyx_GetVtable(PyObject *dict, void *vtabptr) { + int result; + PyObject *pycobj; + + pycobj = PyMapping_GetItemString(dict, "__pyx_vtable__"); + if (!pycobj) + goto bad; + *(void **)vtabptr = PyCObject_AsVoidPtr(pycobj); + if (!*(void **)vtabptr) + goto bad; + result = 0; + goto done; + +bad: + result = -1; +done: + Py_XDECREF(pycobj); + return result; +} +""" + +#------------------------------------------------------------------------------------ + +init_intern_tab_utility_code = \ +""" +static int __Pyx_InternStrings(__Pyx_InternTabEntry *t) { + while (t->p) { + *t->p = PyString_InternFromString(t->s); + if (!*t->p) + return -1; + ++t; + } + return 0; +} +"""; + +#------------------------------------------------------------------------------------ + +init_string_tab_utility_code = \ +""" +static int __Pyx_InitStrings(__Pyx_StringTabEntry *t) { + while (t->p) { + *t->p = PyString_FromStringAndSize(t->s, t->n - 1); + if (!*t->p) + return -1; + ++t; + } + return 0; +} +"""; + +#------------------------------------------------------------------------------------ Added: lxml/pyrex/Pyrex/Compiler/Nodes.pyc ============================================================================== Binary file. No diff available. Added: lxml/pyrex/Pyrex/Compiler/Options.py ============================================================================== --- (empty file) +++ lxml/pyrex/Pyrex/Compiler/Options.py Fri Mar 10 13:58:37 2006 @@ -0,0 +1,5 @@ +# +# Pyrex - Compilation-wide options +# + +intern_names = 1 # Intern global variable and attribute names Added: lxml/pyrex/Pyrex/Compiler/Options.pyc ============================================================================== Binary file. No diff available. Added: lxml/pyrex/Pyrex/Compiler/Parsing.py ============================================================================== --- (empty file) +++ lxml/pyrex/Pyrex/Compiler/Parsing.py Fri Mar 10 13:58:37 2006 @@ -0,0 +1,1813 @@ +# +# Pyrex Parser +# + +import os, re +from string import join, replace +from types import ListType, TupleType +from Scanning import PyrexScanner +import Nodes +import ExprNodes +from Errors import error, InternalError + +def p_ident(s, message = "Expected an identifier"): + if s.sy == 'IDENT': + name = s.systring + s.next() + return name + else: + s.error(message) + +def p_ident_list(s): + names = [] + while s.sy == 'IDENT': + names.append(s.systring) + s.next() + if s.sy <> ',': + break + s.next() + return names + +#------------------------------------------ +# +# Expressions +# +#------------------------------------------ + +def p_binop_expr(s, ops, p_sub_expr): + #print "p_binop_expr:", ops, p_sub_expr ### + n1 = p_sub_expr(s) + #print "p_binop_expr(%s):" % p_sub_expr, s.sy ### + while s.sy in ops: + op = s.sy + pos = s.position() + s.next() + n2 = p_sub_expr(s) + n1 = ExprNodes.binop_node(pos, op, n1, n2) + return n1 + +#test: and_test ('or' and_test)* | lambdef + +def p_simple_expr(s): + #return p_binop_expr(s, ('or',), p_and_test) + return p_rassoc_binop_expr(s, ('or',), p_and_test) + +def p_rassoc_binop_expr(s, ops, p_subexpr): + n1 = p_subexpr(s) + if s.sy in ops: + pos = s.position() + op = s.sy + s.next() + n2 = p_rassoc_binop_expr(s, ops, p_subexpr) + n1 = ExprNodes.binop_node(pos, op, n1, n2) + return n1 + +#and_test: not_test ('and' not_test)* + +def p_and_test(s): + #return p_binop_expr(s, ('and',), p_not_test) + return p_rassoc_binop_expr(s, ('and',), p_not_test) + +#not_test: 'not' not_test | comparison + +def p_not_test(s): + if s.sy == 'not': + pos = s.position() + s.next() + return ExprNodes.NotNode(pos, operand = p_not_test(s)) + else: + return p_comparison(s) + +#comparison: expr (comp_op expr)* +#comp_op: '<'|'>'|'=='|'>='|'<='|'<>'|'!='|'in'|'not' 'in'|'is'|'is' 'not' + +def p_comparison(s): + n1 = p_bit_expr(s) + if s.sy in comparison_ops: + pos = s.position() + op = p_cmp_op(s) + n2 = p_bit_expr(s) + n1 = ExprNodes.PrimaryCmpNode(pos, + operator = op, operand1 = n1, operand2 = n2) + if s.sy in comparison_ops: + n1.cascade = p_cascaded_cmp(s) + return n1 + +def p_cascaded_cmp(s): + pos = s.position() + op = p_cmp_op(s) + n2 = p_bit_expr(s) + result = ExprNodes.CascadedCmpNode(pos, + operator = op, operand2 = n2) + if s.sy in comparison_ops: + result.cascade = p_cascaded_cmp(s) + return result + +def p_cmp_op(s): + if s.sy == 'not': + s.next() + s.expect('in') + op = 'not_in' + elif s.sy == 'is': + s.next() + if s.sy == 'not': + s.next() + op = 'is_not' + else: + op = 'is' + else: + op = s.sy + s.next() + if op == '<>': + op = '!=' + return op + +comparison_ops = ( + '<', '>', '==', '>=', '<=', '<>', '!=', + 'in', 'is', 'not' +) + +#expr: xor_expr ('|' xor_expr)* + +def p_bit_expr(s): + return p_binop_expr(s, ('|',), p_xor_expr) + +#xor_expr: and_expr ('^' and_expr)* + +def p_xor_expr(s): + return p_binop_expr(s, ('^',), p_and_expr) + +#and_expr: shift_expr ('&' shift_expr)* + +def p_and_expr(s): + return p_binop_expr(s, ('&',), p_shift_expr) + +#shift_expr: arith_expr (('<<'|'>>') arith_expr)* + +def p_shift_expr(s): + return p_binop_expr(s, ('<<', '>>'), p_arith_expr) + +#arith_expr: term (('+'|'-') term)* + +def p_arith_expr(s): + return p_binop_expr(s, ('+', '-'), p_term) + +#term: factor (('*'|'/'|'%') factor)* + +def p_term(s): + return p_binop_expr(s, ('*', '/', '%'), p_factor) + +#factor: ('+'|'-'|'~'|'&'|typecast|sizeof) factor | power + +def p_factor(s): + sy = s.sy + if sy in ('+', '-', '~'): + op = s.sy + pos = s.position() + s.next() + return ExprNodes.unop_node(pos, op, p_factor(s)) + elif sy == '&': + pos = s.position() + s.next() + arg = p_factor(s) + return ExprNodes.AmpersandNode(pos, operand = arg) + elif sy == "<": + return p_typecast(s) + elif sy == 'IDENT' and s.systring == "sizeof": + return p_sizeof(s) + else: + return p_power(s) + +def p_typecast(s): + # s.sy == "<" + pos = s.position() + s.next() + base_type = p_c_base_type(s) + declarator = p_c_declarator(s, empty = 1) + s.expect(">") + operand = p_factor(s) + return ExprNodes.TypecastNode(pos, + base_type = base_type, + declarator = declarator, + operand = operand) + +def p_sizeof(s): + # s.sy == ident "sizeof" + pos = s.position() + s.next() + s.expect('(') + if looking_at_type(s): + base_type = p_c_base_type(s) + declarator = p_c_declarator(s, empty = 1) + node = ExprNodes.SizeofTypeNode(pos, + base_type = base_type, declarator = declarator) + else: + operand = p_simple_expr(s) + node = ExprNodes.SizeofVarNode(pos, operand = operand) + s.expect(')') + return node + +#power: atom trailer* ('**' factor)* + +def p_power(s): + n1 = p_atom(s) + while s.sy in ('(', '[', '.'): + n1 = p_trailer(s, n1) + if s.sy == '**': + pos = s.position() + s.next() + n2 = p_factor(s) + n1 = ExprNodes.binop_node(pos, '**', n1, n2) + return n1 + +#trailer: '(' [arglist] ')' | '[' subscriptlist ']' | '.' NAME + +def p_trailer(s, node1): + pos = s.position() + if s.sy == '(': + return p_call(s, node1) + elif s.sy == '[': + return p_index(s, node1) + else: # s.sy == '.' + s.next() + name = p_ident(s) + return ExprNodes.AttributeNode(pos, + obj = node1, attribute = name) + +# arglist: argument (',' argument)* [','] +# argument: [test '='] test # Really [keyword '='] test + +def p_call(s, function): + # s.sy == '(' + pos = s.position() + s.next() + positional_args = [] + keyword_args = [] + star_arg = None + starstar_arg = None + while s.sy not in ('*', '**', ')'): + arg = p_simple_expr(s) + if s.sy == '=': + s.next() + if not arg.is_name: + s.error("Expected an identifier before '='", + pos = arg.pos) + keyword = ExprNodes.StringNode(arg.pos, + value = arg.name) + arg = p_simple_expr(s) + keyword_args.append((keyword, arg)) + else: + if keyword_args: + s.error("Non-keyword arg following keyword arg", + pos = arg.pos) + positional_args.append(arg) + if s.sy <> ',': + break + s.next() + if s.sy == '*': + s.next() + star_arg = p_simple_expr(s) + if s.sy == ',': + s.next() + if s.sy == '**': + s.next() + starstar_arg = p_simple_expr(s) + if s.sy == ',': + s.next() + s.expect(')') + if not (keyword_args or star_arg or starstar_arg): + return ExprNodes.SimpleCallNode(pos, + function = function, + args = positional_args) + else: + arg_tuple = None + keyword_dict = None + if positional_args or not star_arg: + arg_tuple = ExprNodes.TupleNode(pos, + args = positional_args) + if star_arg: + star_arg_tuple = ExprNodes.AsTupleNode(pos, arg = star_arg) + if arg_tuple: + arg_tuple = ExprNodes.binop_node(pos, + operator = '+', operand1 = arg_tuple, + operand2 = star_arg_tuple) + else: + arg_tuple = star_arg_tuple + if keyword_args: + keyword_dict = ExprNodes.DictNode(pos, + key_value_pairs = keyword_args) + return ExprNodes.GeneralCallNode(pos, + function = function, + positional_args = arg_tuple, + keyword_args = keyword_dict, + starstar_arg = starstar_arg) + +#lambdef: 'lambda' [varargslist] ':' test + +#subscriptlist: subscript (',' subscript)* [','] + +def p_index(s, base): + # s.sy == '[' + pos = s.position() + s.next() + subscripts = p_subscript_list(s) + if len(subscripts) == 1 and len(subscripts[0]) == 2: + start, stop = subscripts[0] + result = ExprNodes.SliceIndexNode(pos, + base = base, start = start, stop = stop) + else: + indexes = make_slice_nodes(pos, subscripts) + if len(indexes) == 1: + index = indexes[0] + else: + index = ExprNodes.TupleNode(pos, args = indexes) + result = ExprNodes.IndexNode(pos, + base = base, index = index) + s.expect(']') + return result + +def p_subscript_list(s): + items = [p_subscript(s)] + while s.sy == ',': + s.next() + if s.sy == ']': + break + items.append(p_subscript(s)) + return items + +#subscript: '.' '.' '.' | test | [test] ':' [test] [':' [test]] + +def p_subscript(s): + # Parse a subscript and return a list of + # 1, 2 or 3 ExprNodes, depending on how + # many slice elements were encountered. + pos = s.position() + if s.sy == '.': + expect_ellipsis(s) + return [ExprNodes.EllipsisNode(pos)] + else: + start = p_slice_element(s, (':',)) + if s.sy <> ':': + return [start] + s.next() + stop = p_slice_element(s, (':', ',', ']')) + if s.sy <> ':': + return [start, stop] + s.next() + step = p_slice_element(s, (':', ',', ']')) + return [start, stop, step] + +def p_slice_element(s, follow_set): + # Simple expression which may be missing iff + # it is followed by something in follow_set. + if s.sy not in follow_set: + return p_simple_expr(s) + else: + return None + +def expect_ellipsis(s): + s.expect('.') + s.expect('.') + s.expect('.') + +def make_slice_nodes(pos, subscripts): + # Convert a list of subscripts as returned + # by p_subscript_list into a list of ExprNodes, + # creating SliceNodes for elements with 2 or + # more components. + result = [] + for subscript in subscripts: + if len(subscript) == 1: + result.append(subscript[0]) + else: + result.append(make_slice_node(pos, *subscript)) + return result + +def make_slice_node(pos, start, stop = None, step = None): + if not start: + start = ExprNodes.NoneNode(pos) + if not stop: + stop = ExprNodes.NoneNode(pos) + if not step: + step = ExprNodes.NoneNode(pos) + return ExprNodes.SliceNode(pos, + start = start, stop = stop, step = step) + +#atom: '(' [testlist] ')' | '[' [listmaker] ']' | '{' [dictmaker] '}' | '`' testlist '`' | NAME | NUMBER | STRING+ + +def p_atom(s): + pos = s.position() + sy = s.sy + if sy == '(': + s.next() + if s.sy == ')': + result = ExprNodes.TupleNode(pos, args = []) + else: + result = p_expr(s) + s.expect(')') + return result + elif sy == '[': + return p_list_maker(s) + elif sy == '{': + return p_dict_maker(s) + elif sy == '`': + return p_backquote_expr(s) + elif sy == 'INT': + digits = s.systring + if digits[:2] == "0x": + value = long(digits[2:], 16) + elif digits[:1] == "0": + value = int(digits, 8) + else: + value = int(s.systring) + s.next() + return ExprNodes.IntNode(pos, value = value) + elif sy == 'FLOAT': + #value = float(s.systring) + value = s.systring + s.next() + return ExprNodes.FloatNode(pos, value = value) + elif sy == 'IMAG': + #value = float(s.systring[:-1]) + value = s.systring[:-1] + s.next() + return ExprNodes.ImagNode(pos, value = value) + elif sy == 'STRING' or sy == 'BEGIN_STRING': + kind, value = p_cat_string_literal(s) + if kind == 'c': + return ExprNodes.CharNode(pos, value = value) + else: + return ExprNodes.StringNode(pos, value = value) + elif sy == 'IDENT': + name = s.systring + s.next() + if name == "None": + return ExprNodes.NoneNode(pos) + else: + return ExprNodes.NameNode(pos, name=name) + elif sy == 'NULL': + s.next() + return ExprNodes.NullNode(pos) + else: + s.error("Expected an identifier or literal") + +def p_cat_string_literal(s): + # A sequence of one or more adjacent string literals. + # Returns (kind, value) where kind in ('', 'c', 'r') + kind, value = p_string_literal(s) + if kind <> 'c': + strings = [value] + while s.sy == 'STRING' or s.sy == 'BEGIN_STRING': + next_kind, next_value = p_string_literal(s) + if next_kind == 'c': + self.error( + "Cannot concatenate char literal with another string or char literal") + strings.append(next_value) + value = ''.join(strings) + return kind, value + +def p_opt_string_literal(s): + if s.sy == 'STRING' or s.sy == 'BEGIN_STRING': + return p_string_literal(s) + else: + return None + +def p_string_literal(s): + # A single string or char literal. + # Returns (kind, value) where kind in ('', 'c', 'r') + if s.sy == 'STRING': + value = unquote(s.systring) + s.next() + return value + # s.sy == 'BEGIN_STRING' + pos = s.position() + #is_raw = s.systring[:1].lower() == "r" + kind = s.systring[:1].lower() + if kind not in "cr": + kind = '' + chars = [] + while 1: + s.next() + sy = s.sy + #print "p_string_literal: sy =", sy, repr(s.systring) ### + if sy == 'CHARS': + systr = s.systring + if len(systr) == 1 and systr in "'\"\n": + chars.append('\\') + chars.append(systr) + elif sy == 'ESCAPE': + systr = s.systring + if kind == 'r': + if systr == '\\\n': + chars.append(r'\\\n') + else: + chars.append('\\' + systr) + else: + c = systr[1] + if c in "'\"\\abfnrtv01234567": + chars.append(systr) + elif c == 'x': + chars.append('\\x0' + systr[2:]) + elif c == '\n': + pass + else: + chars.append(systr[1:]) + elif sy == 'NEWLINE': + chars.append(r'\n') + elif sy == 'END_STRING': + break + elif sy == 'EOF': + s.error("Unclosed string literal", pos = pos) + else: + s.error( + "Unexpected token %r:%r in string literal" % + (sy, s.systring)) + s.next() + value = join(chars, '') + #print "p_string_literal: value =", repr(value) ### + return kind, value + +def unquote(s): + is_raw = 0 + if s[:1].lower() == "r": + is_raw = 1 + s = s[1:] + q = s[:3] + if q == '"""' or q == "'''": + s = s[3:-3] + else: + s = s[1:-1] + if is_raw: + s = s.replace('\\', '\\\\') + s = s.replace('\n', '\\\n') + else: + # Split into double quotes, newlines, escape sequences + # and spans of regular chars + l1 = re.split(r'((?:\\[0-7]{1,3})|(?:\\x[0-9A-Fa-f]{2})|(?:\\.)|(?:\\\n)|(?:\n)|")', s) + print "unquote: l1 =", l1 ### + l2 = [] + for item in l1: + if item == '"' or item == '\n': + l2.append('\\' + item) + elif item == '\\\n': + pass + elif item[:1] == '\\': + if len(item) == 2: + if item[1] in '"\\abfnrtv': + l2.append(item) + else: + l2.append(item[1]) + elif item[1:2] == 'x': + l2.append('\\x0' + item[2:]) + else: + # octal escape + l2.append(item) + else: + l2.append(item) + s = "".join(l2) + return s + +def p_list_maker(s): + # s.sy == '[' + pos = s.position() + s.next() + exprs = p_simple_expr_list(s) + s.expect(']') + return ExprNodes.ListNode(pos, args = exprs) + +#dictmaker: test ':' test (',' test ':' test)* [','] + +def p_dict_maker(s): + # s.sy == '{' + pos = s.position() + s.next() + items = [] + while s.sy <> '}': + key = p_simple_expr(s) + s.expect(':') + value = p_simple_expr(s) + items.append((key, value)) + if s.sy <> ',': + break + s.next() + s.expect('}') + return ExprNodes.DictNode(pos, key_value_pairs = items) + +def p_backquote_expr(s): + # s.sy == '`' + pos = s.position() + s.next() + arg = p_expr(s) + s.expect('`') + return ExprNodes.BackquoteNode(pos, arg = arg) + +#testlist: test (',' test)* [','] + +def p_simple_expr_list(s): + exprs = [] + while s.sy not in expr_terminators: + exprs.append(p_simple_expr(s)) + if s.sy <> ',': + break + s.next() + return exprs + +def p_expr(s): + pos = s.position() + expr = p_simple_expr(s) + if s.sy == ',': + s.next() + exprs = [expr] + p_simple_expr_list(s) + return ExprNodes.TupleNode(pos, args = exprs) + else: + return expr + +expr_terminators = (')', ']', '}', ':', '=', 'NEWLINE') + +#------------------------------------------------------- +# +# Statements +# +#------------------------------------------------------- + +def p_global_statement(s): + # assume s.sy == 'global' + pos = s.position() + s.next() + names = p_ident_list(s) + return Nodes.GlobalNode(pos, names = names) + +def p_expression_or_assignment(s): + expr_list = [p_expr(s)] + while s.sy == '=': + s.next() + expr_list.append(p_expr(s)) + if len(expr_list) == 1: + expr = expr_list[0] + return Nodes.ExprStatNode(expr.pos, expr = expr) + else: + expr_list_list = [] + flatten_parallel_assignments(expr_list, expr_list_list) + nodes = [] + for expr_list in expr_list_list: + lhs_list = expr_list[:-1] + rhs = expr_list[-1] + if len(lhs_list) == 1: + node = Nodes.SingleAssignmentNode(rhs.pos, + lhs = lhs_list[0], rhs = rhs) + else: + node = Nodes.CascadedAssignmentNode(rhs.pos, + lhs_list = lhs_list, rhs = rhs) + nodes.append(node) + if len(nodes) == 1: + return nodes[0] + else: + #return Nodes.StatListNode(nodes[0].pos, stats = nodes) + return Nodes.ParallelAssignmentNode(nodes[0].pos, stats = nodes) + +def flatten_parallel_assignments(input, output): + # The input is a list of expression nodes, representing + # the LHSs and RHS of one (possibly cascaded) assignment + # statement. If they are all sequence constructors with + # the same number of arguments, rearranges them into a + # list of equivalent assignments between the individual + # elements. This transformation is applied recursively. + size = find_parallel_assignment_size(input) + if size >= 0: + for i in range(size): + new_exprs = [expr.args[i] for expr in input] + flatten_parallel_assignments(new_exprs, output) + else: + output.append(input) + +def find_parallel_assignment_size(input): + # The input is a list of expression nodes. If + # they are all sequence constructors with the same number + # of arguments, return that number, else return -1. + # Produces an error message if they are all sequence + # constructors but not all the same size. + for expr in input: + if not expr.is_sequence_constructor: + return -1 + rhs = input[-1] + rhs_size = len(rhs.args) + for lhs in input[:-1]: + lhs_size = len(lhs.args) + if lhs_size <> rhs_size: + error(lhs.pos, "Unpacking sequence of wrong size (expected %d, got %d)" + % (lhs_size, rhs_size)) + return -1 + return rhs_size + +def p_print_statement(s): + # s.sy == 'print' + pos = s.position() + s.next() + if s.sy == '>>': + s.error("'print >>' not yet implemented") + args = [] + ewc = 0 + if s.sy not in ('NEWLINE', 'EOF'): + args.append(p_simple_expr(s)) + while s.sy == ',': + s.next() + if s.sy in ('NEWLINE', 'EOF'): + ewc = 1 + break + args.append(p_simple_expr(s)) + return Nodes.PrintStatNode(pos, + args = args, ends_with_comma = ewc) + +def p_del_statement(s): + # s.sy == 'del' + pos = s.position() + s.next() + args = p_simple_expr_list(s) + return Nodes.DelStatNode(pos, args = args) + +def p_pass_statement(s, with_newline = 0): + pos = s.position() + s.expect('pass') + if with_newline: + s.expect_newline("Expected a newline") + return Nodes.PassStatNode(pos) + +def p_break_statement(s): + # s.sy == 'break' + pos = s.position() + s.next() + return Nodes.BreakStatNode(pos) + +def p_continue_statement(s): + # s.sy == 'continue' + pos = s.position() + s.next() + return Nodes.ContinueStatNode(pos) + +def p_return_statement(s): + # s.sy == 'return' + pos = s.position() + s.next() + if s.sy not in statement_terminators: + value = p_expr(s) + else: + value = None + return Nodes.ReturnStatNode(pos, value = value) + +def p_raise_statement(s): + # s.sy == 'raise' + pos = s.position() + s.next() + exc_type = None + exc_value = None + exc_tb = None + if s.sy not in statement_terminators: + exc_type = p_simple_expr(s) + if s.sy == ',': + s.next() + exc_value = p_simple_expr(s) + if s.sy == ',': + s.next() + exc_tb = p_simple_expr(s) + return Nodes.RaiseStatNode(pos, + exc_type = exc_type, + exc_value = exc_value, + exc_tb = exc_tb) + +def p_import_statement(s): + # s.sy in ('import', 'cimport') + pos = s.position() + kind = s.sy + s.next() + items = [p_dotted_name(s, as_allowed = 1)] + while s.sy == ',': + s.next() + items.append(p_dotted_name(s, as_allowed = 1)) + stats = [] + for pos, target_name, dotted_name, as_name in items: + if kind == 'cimport': + stat = Nodes.CImportStatNode(pos, + module_name = dotted_name, + as_name = as_name) + else: + stat = Nodes.SingleAssignmentNode(pos, + lhs = ExprNodes.NameNode(pos, + name = as_name or target_name), + rhs = ExprNodes.ImportNode(pos, + module_name = ExprNodes.StringNode(pos, + value = dotted_name), + name_list = None)) + stats.append(stat) + return Nodes.StatListNode(pos, stats = stats) + +def p_from_import_statement(s): + # s.sy == 'from' + pos = s.position() + s.next() + (dotted_name_pos, _, dotted_name, _) = \ + p_dotted_name(s, as_allowed = 0) + if s.sy in ('import', 'cimport'): + kind = s.sy + s.next() + else: + s.error("Expected 'import' or 'cimport'") + if s.sy == '*': + s.error("'import *' not supported") + imported_names = [p_imported_name(s)] + while s.sy == ',': + s.next() + imported_names.append(p_imported_name(s)) + if kind == 'cimport': + for (name_pos, name, as_name) in imported_names: + local_name = as_name or name + s.add_type_name(local_name) + return Nodes.FromCImportStatNode(pos, + module_name = dotted_name, + imported_names = imported_names) + else: + imported_name_strings = [] + items = [] + for (name_pos, name, as_name) in imported_names: + imported_name_strings.append( + ExprNodes.StringNode(name_pos, value = name)) + items.append( + (name, + ExprNodes.NameNode(name_pos, + name = as_name or name))) + import_list = ExprNodes.ListNode( + imported_names[0][0], args = imported_name_strings) + return Nodes.FromImportStatNode(pos, + module = ExprNodes.ImportNode(dotted_name_pos, + module_name = ExprNodes.StringNode(dotted_name_pos, + value = dotted_name), + name_list = import_list), + items = items) + +def p_imported_name(s): + pos = s.position() + name = p_ident(s) + as_name = p_as_name(s) + return (pos, name, as_name) + +def p_dotted_name(s, as_allowed): + pos = s.position() + target_name = p_ident(s) + as_name = None + names = [target_name] + while s.sy == '.': + s.next() + names.append(p_ident(s)) + if as_allowed: + as_name = p_as_name(s) + else: + as_name = None + return (pos, target_name, join(names, "."), as_name) + +def p_as_name(s): + if s.sy == 'IDENT' and s.systring == 'as': + s.next() + return p_ident(s) + else: + return None + +def p_assert_statement(s): + # s.sy == 'assert' + pos = s.position() + s.next() + cond = p_simple_expr(s) + if s.sy == ',': + s.next() + value = p_simple_expr(s) + else: + value = None + return Nodes.AssertStatNode(pos, cond = cond, value = value) + +statement_terminators = (';', 'NEWLINE', 'EOF') + +def p_if_statement(s): + # s.sy == 'if' + pos = s.position() + s.next() + if_clauses = [p_if_clause(s)] + while s.sy == 'elif': + s.next() + if_clauses.append(p_if_clause(s)) + else_clause = p_else_clause(s) + return Nodes.IfStatNode(pos, + if_clauses = if_clauses, else_clause = else_clause) + +def p_if_clause(s): + pos = s.position() + test = p_simple_expr(s) + body = p_suite(s) + return Nodes.IfClauseNode(pos, + condition = test, body = body) + +def p_else_clause(s): + if s.sy == 'else': + s.next() + return p_suite(s) + else: + return None + +def p_while_statement(s): + # s.sy == 'while' + pos = s.position() + s.next() + test = p_simple_expr(s) + body = p_suite(s) + else_clause = p_else_clause(s) + return Nodes.WhileStatNode(pos, + condition = test, body = body, + else_clause = else_clause) + +def p_for_statement(s): + # s.sy == 'for' + pos = s.position() + s.next() + target = p_for_target(s) + if s.sy == 'in': + s.next() + iterator = p_for_iterator(s) + body = p_suite(s) + else_clause = p_else_clause(s) + return Nodes.ForInStatNode(pos, + target = target, + iterator = iterator, + body = body, + else_clause = else_clause) + elif s.sy == 'from': + s.next() + bound1 = p_bit_expr(s) + rel1 = p_for_from_relation(s) + name2_pos = s.position() + name2 = p_ident(s) + rel2_pos = s.position() + rel2 = p_for_from_relation(s) + bound2 = p_bit_expr(s) + if not target.is_name: + error(target.pos, + "Target of for-from statement must be a variable name") + elif name2 <> target.name: + error(name2_pos, + "Variable name in for-from range does not match target") + if rel1[0] <> rel2[0]: + error(rel2_pos, + "Relation directions in for-from do not match") + body = p_suite(s) + else_clause = p_else_clause(s) + return Nodes.ForFromStatNode(pos, + target = target, + bound1 = bound1, + relation1 = rel1, + relation2 = rel2, + bound2 = bound2, + body = body, + else_clause = else_clause) + +def p_for_from_relation(s): + if s.sy in inequality_relations: + op = s.sy + s.next() + return op + else: + s.error("Expected one of '<', '<=', '>' '>='") + +inequality_relations = ('<', '<=', '>', '>=') + +def p_for_target(s): + pos = s.position() + expr = p_bit_expr(s) + if s.sy == ',': + s.next() + exprs = [expr] + while s.sy <> 'in': + exprs.append(p_bit_expr(s)) + if s.sy <> ',': + break + s.next() + return ExprNodes.TupleNode(pos, args = exprs) + else: + return expr + +def p_for_iterator(s): + pos = s.position() + expr = p_expr(s) + return ExprNodes.IteratorNode(pos, sequence = expr) + +def p_try_statement(s): + # s.sy == 'try' + pos = s.position() + s.next() + body = p_suite(s) + except_clauses = [] + else_clause = None + if s.sy in ('except', 'else'): + while s.sy == 'except': + except_clauses.append(p_except_clause(s)) + if s.sy == 'else': + s.next() + else_clause = p_suite(s) + return Nodes.TryExceptStatNode(pos, + body = body, except_clauses = except_clauses, + else_clause = else_clause) + elif s.sy == 'finally': + s.next() + finally_clause = p_suite(s) + return Nodes.TryFinallyStatNode(pos, + body = body, finally_clause = finally_clause) + else: + s.error("Expected 'except' or 'finally'") + +def p_except_clause(s): + # s.sy == 'except' + pos = s.position() + s.next() + exc_type = None + exc_value = None + if s.sy <> ':': + exc_type = p_simple_expr(s) + if s.sy == ',': + s.next() + exc_value = p_simple_expr(s) + body = p_suite(s) + return Nodes.ExceptClauseNode(pos, + pattern = exc_type, target = exc_value, body = body) + +def p_include_statement(s, level): + pos = s.position() + s.next() # 'include' + _, include_file_name = p_string_literal(s) + s.expect_newline("Syntax error in include statement") + include_file_path = s.context.find_include_file(include_file_name, pos) + if include_file_path: + f = open(include_file_path, "r") + s2 = PyrexScanner(f, include_file_path, s) + try: + tree = p_statement_list(s2, level) + finally: + f.close() + return tree + else: + return None + +def p_simple_statement(s): + #print "p_simple_statement:", s.sy, s.systring ### + if s.sy == 'global': + node = p_global_statement(s) + elif s.sy == 'print': + node = p_print_statement(s) + elif s.sy == 'del': + node = p_del_statement(s) + elif s.sy == 'break': + node = p_break_statement(s) + elif s.sy == 'continue': + node = p_continue_statement(s) + elif s.sy == 'return': + node = p_return_statement(s) + elif s.sy == 'raise': + node = p_raise_statement(s) + elif s.sy in ('import', 'cimport'): + node = p_import_statement(s) + elif s.sy == 'from': + node = p_from_import_statement(s) + elif s.sy == 'assert': + node = p_assert_statement(s) + elif s.sy == 'pass': + node = p_pass_statement(s) + else: + node = p_expression_or_assignment(s) + return node + +def p_simple_statement_list(s): + # Parse a series of simple statements on one line + # separated by semicolons. + stat = p_simple_statement(s) + if s.sy == ';': + stats = [stat] + while s.sy == ';': + #print "p_simple_statement_list: maybe more to follow" ### + s.next() + if s.sy in ('NEWLINE', 'EOF'): + break + stats.append(p_simple_statement(s)) + stat = Nodes.StatListNode(stats[0].pos, stats = stats) + s.expect_newline("Syntax error in simple statement list") + return stat + +def p_statement(s, level, cdef_flag = 0, visibility = 'private'): + #print "p_statement:", s.sy, s.systring ### + if s.sy == 'ctypedef': + if level not in ('module', 'module_pxd'): + s.error("ctypedef statement not allowed here") + return p_ctypedef_statement(s, level, visibility) + if s.sy == 'cdef': + cdef_flag = 1 + s.next() + if cdef_flag: + if level not in ('module', 'module_pxd', 'function', 'c_class', 'c_class_pxd'): + s.error('cdef statement not allowed here') + return p_cdef_statement(s, level, visibility) + elif s.sy == 'def': + if level not in ('module', 'class', 'c_class', 'property'): + s.error('def statement not allowed here') + return p_def_statement(s) + elif s.sy == 'class': + if level <> 'module': + s.error("class definition not allowed here") + return p_class_statement(s) + elif s.sy == 'include': + if level not in ('module', 'module_pxd'): + s.error("include statement not allowed here") + return p_include_statement(s, level) + elif level == 'c_class' and s.sy == 'IDENT' and s.systring == 'property': + return p_property_decl(s) + else: + if level in ('c_class', 'c_class_pxd'): + if s.sy == 'pass': + return p_pass_statement(s, with_newline = 1) + else: + s.error("Executable statement not allowed here") + if s.sy == 'if': + return p_if_statement(s) + elif s.sy == 'while': + return p_while_statement(s) + elif s.sy == 'for': + return p_for_statement(s) + elif s.sy == 'try': + return p_try_statement(s) + else: + return p_simple_statement_list(s) + +def p_statement_list(s, level, + cdef_flag = 0, visibility = 'private'): + # Parse a series of statements separated by newlines. + #print "p_statement_list:", s.sy, s.systring ### + pos = s.position() + stats = [] + while s.sy not in ('DEDENT', 'EOF'): + stats.append(p_statement(s, level, + cdef_flag = cdef_flag, visibility = visibility)) + return Nodes.StatListNode(pos, stats = stats) + +def p_suite(s, level = 'other', cdef_flag = 0, + visibility = 'private', with_doc = 0): + pos = s.position() + s.expect(':') + doc = None + stmts = [] + if s.sy == 'NEWLINE': + s.next() + s.expect_indent() + if with_doc: + doc = p_doc_string(s) + body = p_statement_list(s, + level = level, + cdef_flag = cdef_flag, + visibility = visibility) + s.expect_dedent() + else: + if level in ('module', 'class', 'function', 'other'): + body = p_simple_statement_list(s) + else: + body = p_pass_statement(s) + s.expect_newline("Syntax error in declarations") + if with_doc: + return doc, body + else: + return body + +def p_c_base_type(s, self_flag = 0): + # If self_flag is true, this is the base type for the + # self argument of a C method of an extension type. + if s.sy == '(': + return p_c_complex_base_type(s) + else: + return p_c_simple_base_type(s, self_flag) + +def p_c_complex_base_type(s): + # s.sy == '(' + pos = s.position() + s.next() + base_type = p_c_base_type(s) + declarator = p_c_declarator(s, empty = 1) + s.expect(')') + return Nodes.CComplexBaseTypeNode(pos, + base_type = base_type, declarator = declarator) + +def p_c_simple_base_type(s, self_flag): + #print "p_c_simple_base_type: self_flag =", self_flag + is_basic = 0 + signed = 1 + longness = 0 + pos = s.position() + module_path = [] + if looking_at_base_type(s): + #print "p_c_simple_base_type: looking_at_base_type at", s.position() + is_basic = 1 + #signed = p_signed_or_unsigned(s) + #longness = p_short_or_long(s) + signed, longness = p_sign_and_longness(s) + if s.sy == 'IDENT' and s.systring in basic_c_type_names: + name = s.systring + s.next() + else: + name = 'int' + elif s.looking_at_type_name() or looking_at_dotted_name(s): + #print "p_c_simple_base_type: looking_at_type_name at", s.position() + name = s.systring + s.next() + while s.sy == '.': + module_path.append(name) + s.next() + name = p_ident(s) + else: + #print "p_c_simple_base_type: not looking at type at", s.position() + name = None + return Nodes.CSimpleBaseTypeNode(pos, + name = name, module_path = module_path, + is_basic_c_type = is_basic, signed = signed, + longness = longness, is_self_arg = self_flag) + +def looking_at_type(s): + return looking_at_base_type(s) or s.looking_at_type_name() + +def looking_at_base_type(s): + #print "looking_at_base_type?", s.sy, s.systring, s.position() + return s.sy == 'IDENT' and s.systring in base_type_start_words + +def looking_at_dotted_name(s): + if s.sy == 'IDENT': + name = s.systring + s.next() + result = s.sy == '.' + s.put_back('IDENT', name) + return result + else: + return 0 + +#base_type_start_words = ( +# "char", "short", "int", "long", "float", "double", +# "void", "signed", "unsigned" +#) + +basic_c_type_names = ("void", "char", "int", "float", "double") + +sign_and_longness_words = ("short", "long", "signed", "unsigned") + +base_type_start_words = basic_c_type_names + sign_and_longness_words + +def p_sign_and_longness(s): + signed = 1 + longness = 0 + while s.sy == 'IDENT' and s.systring in sign_and_longness_words: + if s.systring == 'unsigned': + signed = 0 + elif s.systring == 'short': + longness = -1 + elif s.systring == 'long': + longness += 1 + s.next() + return signed, longness + +#def p_signed_or_unsigned(s): +# signed = 1 +# if s.sy == 'IDENT': +# if s.systring == 'signed': +# s.next() +# elif s.systring == 'unsigned': +# signed = 0 +# s.next() +# return signed +# +#def p_short_or_long(s): +# longness = 0 +# if s.sy == 'IDENT' and s.systring == 'short': +# longness = -1 +# s.next() +# else: +# while s.sy == 'IDENT' and s.systring == 'long': +# longness += 1 +# s.next() +# return longness + +def p_opt_cname(s): + literal = p_opt_string_literal(s) + if literal: + _, cname = literal + else: + cname = None + return cname + +def p_c_declarator(s, empty = 0, is_type = 0, cmethod_flag = 0): + # If empty is true, the declarator must be + # empty, otherwise we don't care. + # If cmethod_flag is true, then if this declarator declares + # a function, it's a C method of an extension type. + pos = s.position() + if s.sy == '*': + s.next() + base = p_c_declarator(s, empty, is_type, cmethod_flag) + result = Nodes.CPtrDeclaratorNode(pos, + base = base) + elif s.sy == '**': # scanner returns this as a single token + s.next() + base = p_c_declarator(s, empty, is_type, cmethod_flag) + result = Nodes.CPtrDeclaratorNode(pos, + base = Nodes.CPtrDeclaratorNode(pos, + base = base)) + else: + if s.sy == '(': + s.next() + result = p_c_declarator(s, empty, is_type, cmethod_flag) + s.expect(')') + else: + if s.sy == 'IDENT': + name = s.systring + if is_type: + s.add_type_name(name) + if empty: + error(s.position(), "Declarator should be empty") + s.next() + cname = p_opt_cname(s) + else: + name = "" + cname = None + result = Nodes.CNameDeclaratorNode(pos, + name = name, cname = cname) + while s.sy in ('[', '('): + if s.sy == '[': + s.next() + if s.sy <> ']': + dim = p_expr(s) + else: + dim = None + s.expect(']') + result = Nodes.CArrayDeclaratorNode(pos, + base = result, dimension = dim) + else: # sy == '(' + s.next() + args = p_c_arg_list(s, in_pyfunc = 0, cmethod_flag = cmethod_flag) + ellipsis = p_optional_ellipsis(s) + s.expect(')') + exc_val, exc_check = p_exception_value_clause(s) + result = Nodes.CFuncDeclaratorNode(pos, + base = result, args = args, has_varargs = ellipsis, + exception_value = exc_val, exception_check = exc_check) + cmethod_flag = 0 + return result + +def p_exception_value_clause(s): + exc_val = None + exc_check = 0 + if s.sy == 'except': + s.next() + if s.sy == '*': + exc_check = 1 + s.next() + else: + if s.sy == '?': + exc_check = 1 + s.next() + exc_val = p_exception_value(s) + return exc_val, exc_check + +def p_exception_value(s): + sign = "" + if s.sy == "-": + sign = "-" + s.next() + if s.sy in ('INT', 'FLOAT', 'NULL'): + s.systring = sign + s.systring + return p_atom(s) + else: + s.error("Exception value must be an int or float literal or NULL") + +c_arg_list_terminators = ('*', '**', '.', ')') +c_arg_list_trailers = ('.', '*', '**') + +def p_c_arg_list(s, in_pyfunc, cmethod_flag = 0): + args = [] + if s.sy not in c_arg_list_terminators: + args.append(p_c_arg_decl(s, in_pyfunc, cmethod_flag)) + while s.sy == ',': + s.next() + if s.sy in c_arg_list_trailers: + break + args.append(p_c_arg_decl(s, in_pyfunc)) + return args + +def p_optional_ellipsis(s): + if s.sy == '.': + expect_ellipsis(s) + return 1 + else: + return 0 + +def p_c_arg_decl(s, in_pyfunc, cmethod_flag = 0): + pos = s.position() + not_none = 0 + default = None + base_type = p_c_base_type(s, cmethod_flag) + declarator = p_c_declarator(s) + if s.sy == 'not': + s.next() + if s.sy == 'IDENT' and s.systring == 'None': + s.next() + else: + s.error("Expected 'None'") + if not in_pyfunc: + error(pos, "'not None' only allowed in Python functions") + not_none = 1 + if s.sy == '=': + s.next() + default = p_simple_expr(s) + return Nodes.CArgDeclNode(pos, + base_type = base_type, + declarator = declarator, + not_none = not_none, + default = default) + +def p_cdef_statement(s, level, visibility = 'private'): + pos = s.position() + visibility = p_visibility(s, visibility) + if visibility == 'extern' and s.sy in ('from' ,':'): + return p_cdef_extern_block(s, level, pos) + elif s.sy == 'class': + if level not in ('module', 'module_pxd'): + error(pos, "Extension type definition not allowed here") + return p_c_class_definition(s, level, pos, visibility = visibility) + elif s.sy == 'IDENT' and s.systring in struct_union_or_enum: + if level not in ('module', 'module_pxd'): + error(pos, "C struct/union/enum definition not allowed here") + if visibility == 'public': + error(pos, "Public struct/union/enum definition not implemented") + if s.systring == "enum": + return p_c_enum_definition(s, pos) + else: + return p_c_struct_or_union_definition(s, pos) + elif s.sy == 'pass': + node = p_pass_statement(s) + s.expect_newline('Expected a newline') + return node + else: + return p_c_func_or_var_declaration(s, level, pos, visibility) + +def p_cdef_extern_block(s, level, pos): + include_file = None + s.expect('from') + if s.sy == '*': + s.next() + else: + _, include_file = p_string_literal(s) + body = p_suite(s, level, cdef_flag = 1, visibility = 'extern') + return Nodes.CDefExternNode(pos, + include_file = include_file, + body = body) + +struct_union_or_enum = ( + "struct", "union", "enum" +) + +def p_c_enum_definition(s, pos, typedef_flag = 0): + # s.sy == ident 'enum' + s.next() + if s.sy == 'IDENT': + name = s.systring + s.next() + s.add_type_name(name) + cname = p_opt_cname(s) + else: + name = None + cname = None + items = None + s.expect(':') + items = [] + if s.sy <> 'NEWLINE': + p_c_enum_line(s, items) + else: + s.next() # 'NEWLINE' + s.expect_indent() + while s.sy not in ('DEDENT', 'EOF'): + p_c_enum_line(s, items) + s.expect_dedent() + return Nodes.CEnumDefNode(pos, name = name, cname = cname, + items = items, typedef_flag = typedef_flag) + +def p_c_enum_line(s, items): + if s.sy <> 'pass': + p_c_enum_item(s, items) + while s.sy == ',': + s.next() + if s.sy in ('NEWLINE', 'EOF'): + break + p_c_enum_item(s, items) + else: + s.next() + s.expect_newline("Syntax error in enum item list") + +def p_c_enum_item(s, items): + pos = s.position() + name = p_ident(s) + cname = p_opt_cname(s) + value = None + if s.sy == '=': + s.next() + value = p_simple_expr(s) + items.append(Nodes.CEnumDefItemNode(pos, + name = name, cname = cname, value = value)) + +def p_c_struct_or_union_definition(s, pos, typedef_flag = 0): + # s.sy == ident 'struct' or 'union' + kind = s.systring + s.next() + name = p_ident(s) + cname = p_opt_cname(s) + s.add_type_name(name) + attributes = None + if s.sy == ':': + s.next() + s.expect('NEWLINE') + s.expect_indent() + attributes = [] + while s.sy <> 'DEDENT': + if s.sy <> 'pass': + attributes.append( + p_c_func_or_var_declaration(s, level = 'other', pos = s.position())) + else: + s.next() + s.expect_newline("Expected a newline") + s.expect_dedent() + else: + s.expect_newline("Syntax error in struct or union definition") + return Nodes.CStructOrUnionDefNode(pos, + name = name, cname = cname, kind = kind, attributes = attributes, + typedef_flag = typedef_flag) + +def p_visibility(s, prev_visibility): + pos = s.position() + visibility = prev_visibility + if s.sy == 'IDENT' and s.systring in ('extern', 'public', 'readonly'): + visibility = s.systring + if prev_visibility <> 'private' and visibility <> prev_visibility: + s.error("Conflicting visibility options '%s' and '%s'" + % (prev_visibility, visibility)) + s.next() + return visibility + +def p_c_func_or_var_declaration(s, level, pos, visibility = 'private'): + cmethod_flag = level in ('c_class', 'c_class_pxd') + base_type = p_c_base_type(s) + declarator = p_c_declarator(s, cmethod_flag = cmethod_flag) + if s.sy == ':': + if level not in ('module', 'c_class'): + s.error("C function definition not allowed here") + suite = p_suite(s, 'function') + result = Nodes.CFuncDefNode(pos, + visibility = visibility, + base_type = base_type, + declarator = declarator, + body = suite) + else: + if level == 'module_pxd' and visibility <> 'extern': + error(pos, + "Only 'extern' C function or variable declaration allowed in .pxd file") + declarators = [declarator] + while s.sy == ',': + s.next() + declarator = p_c_declarator(s, cmethod_flag = cmethod_flag) + declarators.append(declarator) + s.expect_newline("Syntax error in C variable declaration") + result = Nodes.CVarDefNode(pos, + visibility = visibility, + base_type = base_type, + declarators = declarators) + return result + +def p_ctypedef_statement(s, level, visibility = 'private'): + # s.sy == 'ctypedef' + pos = s.position() + s.next() + visibility = p_visibility(s, visibility) + if s.sy == 'class': + return p_c_class_definition(s, level, pos, + visibility = visibility, + typedef_flag = 1) + elif s.sy == 'IDENT' and s.systring in ('struct', 'union', 'enum'): + if s.systring == 'enum': + return p_c_enum_definition(s, pos, typedef_flag = 1) + else: + return p_c_struct_or_union_definition(s, pos, typedef_flag = 1) + else: + base_type = p_c_base_type(s) + declarator = p_c_declarator(s, is_type = 1) + s.expect_newline("Syntax error in ctypedef statement") + return Nodes.CTypeDefNode(pos, + base_type = base_type, declarator = declarator) + +def p_def_statement(s): + # s.sy == 'def' + pos = s.position() + s.next() + name = p_ident(s) + args = [] + s.expect('('); + args = p_c_arg_list(s, in_pyfunc = 1) + star_arg = None + starstar_arg = None + if s.sy == '*': + s.next() + star_arg = p_py_arg_decl(s) + if s.sy == ',': + s.next() + if s.sy == '**': + s.next() + starstar_arg = p_py_arg_decl(s) + elif s.sy == '**': + s.next() + starstar_arg = p_py_arg_decl(s) + s.expect(')') + doc, body = p_suite(s, 'function', with_doc = 1) + return Nodes.DefNode(pos, name = name, args = args, + star_arg = star_arg, starstar_arg = starstar_arg, + doc = doc, body = body) + +def p_py_arg_decl(s): + pos = s.position() + name = p_ident(s) + return Nodes.PyArgDeclNode(pos, name = name) + +def p_class_statement(s): + # s.sy == 'class' + pos = s.position() + s.next() + class_name = p_ident(s) + if s.sy == '(': + s.next() + base_list = p_simple_expr_list(s) + s.expect(')') + else: + base_list = [] + doc, body = p_suite(s, 'class', with_doc = 1) + return Nodes.PyClassDefNode(pos, + name = class_name, + bases = ExprNodes.TupleNode(pos, args = base_list), + doc = doc, body = body) + +def p_c_class_definition(s, level, pos, + visibility = 'private', typedef_flag = 0): + # s.sy == 'class' + s.next() + module_path = [] + class_name = p_ident(s) + while s.sy == '.': + s.next() + module_path.append(class_name) + class_name = p_ident(s) + if module_path and visibility <> 'extern': + error(pos, "Qualified class name only allowed for 'extern' C class") + if module_path and s.sy == 'IDENT' and s.systring == 'as': + s.next() + as_name = p_ident(s) + else: + as_name = class_name + s.add_type_name(as_name) + objstruct_name = None + typeobj_name = None + base_class_module = None + base_class_name = None + if s.sy == '(': + s.next() + base_class_path = [p_ident(s)] + while s.sy == '.': + s.next() + base_class_path.append(p_ident(s)) + if s.sy == ',': + s.error("C class may only have one base class") + s.expect(')') + base_class_module = ".".join(base_class_path[:-1]) + base_class_name = base_class_path[-1] + if s.sy == '[': + if visibility not in ('public', 'extern'): + error(s.position(), "Name options only allowed for 'public' or 'extern' C class") + objstruct_name, typeobj_name = p_c_class_options(s) + if s.sy == ':': + if level == 'module_pxd': + body_level = 'c_class_pxd' + else: + body_level = 'c_class' + doc, body = p_suite(s, body_level, with_doc = 1) + else: + s.expect_newline("Syntax error in C class definition") + doc = None + body = None + if visibility == 'extern': + if not module_path: + error(pos, "Module name required for 'extern' C class") + if typeobj_name: + error(pos, "Type object name specification not allowed for 'extern' C class") + elif visibility == 'public': + if not objstruct_name: + error(pos, "Object struct name specification required for 'public' C class") + if not typeobj_name: + error(pos, "Type object name specification required for 'public' C class") + return Nodes.CClassDefNode(pos, + visibility = visibility, + typedef_flag = typedef_flag, + module_name = ".".join(module_path), + class_name = class_name, + as_name = as_name, + base_class_module = base_class_module, + base_class_name = base_class_name, + objstruct_name = objstruct_name, + typeobj_name = typeobj_name, + in_pxd = level == 'module_pxd', + doc = doc, + body = body) + +def p_c_class_options(s): + objstruct_name = None + typeobj_name = None + s.expect('[') + while 1: + if s.sy <> 'IDENT': + break + if s.systring == 'object': + s.next() + objstruct_name = p_ident(s) + elif s.systring == 'type': + s.next() + typeobj_name = p_ident(s) + if s.sy <> ',': + break + s.next() + s.expect(']', "Expected 'object' or 'type'") + return objstruct_name, typeobj_name + +def p_property_decl(s): + pos = s.position() + s.next() # 'property' + name = p_ident(s) + doc, body = p_suite(s, 'property', with_doc = 1) + return Nodes.PropertyNode(pos, name = name, doc = doc, body = body) + +def p_doc_string(s): + if s.sy == 'STRING' or s.sy == 'BEGIN_STRING': + _, result = p_cat_string_literal(s) + if s.sy <> 'EOF': + s.expect_newline("Syntax error in doc string") + return result + else: + return None + +def p_module(s, pxd): + s.add_type_name("object") + pos = s.position() + doc = p_doc_string(s) + if pxd: + level = 'module_pxd' + else: + level = 'module' + body = p_statement_list(s, level) + if s.sy <> 'EOF': + s.error("Syntax error in statement [%s,%s]" % ( + repr(s.sy), repr(s.systring))) + return Nodes.ModuleNode(pos, doc = doc, body = body) + +#---------------------------------------------- +# +# Debugging +# +#---------------------------------------------- + +def print_parse_tree(f, node, level, key = None): + ind = " " * level + if node: + f.write(ind) + if key: + f.write("%s: " % key) + t = type(node) + if t == TupleType: + f.write("(%s @ %s\n" % (node[0], node[1])) + for i in xrange(2, len(node)): + print_parse_tree(f, node[i], level+1) + f.write("%s)\n" % ind) + return + elif isinstance(node, Node): + try: + tag = node.tag + except AttributeError: + tag = node.__class__.__name__ + f.write("%s @ %s\n" % (tag, node.pos)) + for name, value in node.__dict__.items(): + if name <> 'tag' and name <> 'pos': + print_parse_tree(f, value, level+1, name) + return + elif t == ListType: + f.write("[\n") + for i in xrange(len(node)): + print_parse_tree(f, node[i], level+1) + f.write("%s]\n" % ind) + return + f.write("%s%s\n" % (ind, node)) + Added: lxml/pyrex/Pyrex/Compiler/Parsing.pyc ============================================================================== Binary file. No diff available. Added: lxml/pyrex/Pyrex/Compiler/PyrexTypes.py ============================================================================== --- (empty file) +++ lxml/pyrex/Pyrex/Compiler/PyrexTypes.py Fri Mar 10 13:58:37 2006 @@ -0,0 +1,790 @@ +# +# Pyrex - Types +# + +import string +import Naming + +class PyrexType: + # + # Base class for all Pyrex types. + # + # is_pyobject boolean Is a Python object type + # is_extension_type boolean Is a Python extension type + # is_numeric boolean Is a C numeric type + # is_int boolean Is a C integer type + # is_float boolean Is a C floating point type + # is_void boolean Is the C void type + # is_array boolean Is a C array type + # is_ptr boolean Is a C pointer type + # is_null_ptr boolean Is the type of NULL + # is_cfunction boolean Is a C function type + # is_struct_or_union boolean Is a C struct or union type + # is_enum boolean Is a C enum type + # is_string boolean Is a C char * type + # is_returncode boolean Is used only to signal exceptions + # is_error boolean Is the dummy error type + # has_attributes boolean Has C dot-selectable attributes + # default_value string Initial value + # parsetuple_format string Format char for PyArg_ParseTuple + # pymemberdef_typecode string Type code for PyMemberDef struct + # + # declaration_code(entity_code, + # for_display = 0, dll_linkage = None, pyrex = 0) + # Returns a code fragment for the declaration of an entity + # of this type, given a code fragment for the entity. + # * If for_display, this is for reading by a human in an error + # message; otherwise it must be valid C code. + # * If dll_linkage is not None, it must be 'DL_IMPORT' or + # 'DL_EXPORT', and will be added to the base type part of + # the declaration. + # * If pyrex = 1, this is for use in a 'cdef extern' + # statement of a Pyrex include file. + # + # assignable_from(src_type) + # Tests whether a variable of this type can be + # assigned a value of type src_type. + # + # same_as(other_type) + # Tests whether this type represents the same type + # as other_type. + # + # as_argument_type(): + # Coerces array type into pointer type for use as + # a formal argument type. + # + + is_pyobject = 0 + is_extension_type = 0 + is_numeric = 0 + is_int = 0 + is_float = 0 + is_void = 0 + is_array = 0 + is_ptr = 0 + is_null_ptr = 0 + is_cfunction = 0 + is_struct_or_union = 0 + is_enum = 0 + is_string = 0 + is_returncode = 0 + is_error = 0 + has_attributes = 0 + default_value = "" + parsetuple_format = "" + pymemberdef_typecode = None + + def resolve(self): + # If a typedef, returns the base type. + return self + + def literal_code(self, value): + # Returns a C code fragment representing a literal + # value of this type. + return str(value) + + def __str__(self): + return string.strip(self.declaration_code("", for_display = 1)) + + def same_as(self, other_type, **kwds): + return self.same_as_resolved_type(other_type.resolve(), **kwds) + + def same_as_resolved_type(self, other_type): + return self is other_type or other_type is error_type + + def subtype_of(self, other_type): + return self.subtype_of_resolved_type(other_type.resolve()) + + def subtype_of_resolved_type(self, other_type): + return self.same_as(other_type) + + def assignable_from(self, src_type): + return self.assignable_from_resolved_type(src_type.resolve()) + + def assignable_from_resolved_type(self, src_type): + return self.same_as(src_type) + + def as_argument_type(self): + return self + + def is_complete(self): + # A type is incomplete if it is an unsized array, + # a struct whose attributes are not defined, etc. + return 1 + + +class CTypedefType: + # + # Type defined with a ctypedef statement in a + # 'cdef extern from' block. Delegates most attribute + # lookups to the base type. + # + + def __init__(self, cname, base_type): + self.typedef_cname = cname + self.typedef_base_type = base_type + + def resolve(self): + return self.typedef_base_type.resolve() + + def declaration_code(self, entity_code, + for_display = 0, dll_linkage = None, pyrex = 0): + return "%s %s" % (self.typedef_cname, entity_code) + + def __str__(self): + return self.typedef_cname + + def __getattr__(self, name): + return getattr(self.typedef_base_type, name) + + +class PyObjectType(PyrexType): + # + # Base class for all Python object types (reference-counted). + # + + is_pyobject = 1 + default_value = "0" + parsetuple_format = "O" + pymemberdef_typecode = "T_OBJECT" + + def __str__(self): + return "Python object" + + def __repr__(self): + return "PyObjectType" + + def assignable_from(self, src_type): + return 1 # Conversion will be attempted + + def declaration_code(self, entity_code, + for_display = 0, dll_linkage = None, pyrex = 0): + if pyrex: + return "object %s" % entity_code + else: + return "%s *%s" % (public_decl("PyObject", dll_linkage), entity_code) + + +class PyExtensionType(PyObjectType): + # + # A Python extension type. + # + # name string + # scope CClassScope Attribute namespace + # visibility string + # typedef_flag boolean + # base_type PyExtensionType or None + # module_name string or None Qualified name of defining module + # objstruct_cname string Name of PyObject struct + # typeobj_cname string or None C code fragment referring to type object + # typeptr_cname string or None Name of pointer to external type object + # vtabslot_cname string Name of C method table member + # vtabstruct_cname string Name of C method table struct + # vtabptr_cname string Name of pointer to C method table + # vtable_cname string Name of C method table definition + + is_extension_type = 1 + has_attributes = 1 + + def __init__(self, name, typedef_flag, base_type): + self.name = name + self.scope = None + self.typedef_flag = typedef_flag + self.base_type = base_type + self.module_name = None + self.objstruct_cname = None + self.typeobj_cname = None + self.typeptr_cname = None + self.vtabslot_cname = None + self.vtabstruct_cname = None + self.vtabptr_cname = None + self.vtable_cname = None + + def set_scope(self, scope): + self.scope = scope + if scope: + scope.parent_type = self + + def subtype_of_resolved_type(self, other_type): + if other_type.is_extension_type: + return self is other_type or ( + self.base_type and self.base_type.subtype_of(other_type)) + else: + return other_type is py_object_type + + def typeobj_is_available(self): + # Do we have a pointer to the type object? + return self.typeptr_cname + + def typeobj_is_imported(self): + # If we don't know the C name of the type object but we do + # know which module it's defined in, it will be imported. + return self.typeobj_cname is None and self.module_name is not None + + def declaration_code(self, entity_code, + for_display = 0, dll_linkage = None, pyrex = 0): + if pyrex: + return "%s %s" % (self.name, entity_code) + else: + if self.typedef_flag: + base_format = "%s" + else: + base_format = "struct %s" + base = public_decl(base_format % self.objstruct_cname, dll_linkage) + return "%s *%s" % (base, entity_code) + + def attributes_known(self): + return self.scope is not None + + def __str__(self): + return self.name + + def __repr__(self): + return "PyExtensionType(%s%s)" % (self.scope.class_name, + ("", ".typedef_flag=1")[self.typedef_flag]) + + +class CType(PyrexType): + # + # Base class for all C types (non-reference-counted). + # + # to_py_function string C function for converting to Python object + # from_py_function string C function for constructing from Python object + # + + to_py_function = None + from_py_function = None + + +class CSimpleType(CType): + # + # Base class for all unstructured C types. + # + pass + + +class CVoidType(CSimpleType): + is_void = 1 + + def __repr__(self): + return "" + + def declaration_code(self, entity_code, + for_display = 0, dll_linkage = None, pyrex = 0): + base = public_decl("void", dll_linkage) + return "%s %s" % (base, entity_code) + + def is_complete(self): + return 0 + + +class CNumericType(CType): + # + # Base class for all C numeric types. + # + # rank integer Relative size + # signed boolean + # + + is_numeric = 1 + default_value = "0" + + parsetuple_formats = "chilLfd?" # rank -> format + + def __init__(self, rank, pymemberdef_typecode = None): + self.rank = rank + ptf = self.parsetuple_formats[rank] + if ptf == '?': + ptf = None + self.parsetuple_format = ptf + self.pymemberdef_typecode = pymemberdef_typecode + + def __repr__(self): + if self.signed: + u = "" + else: + u = "unsigned " + return "" % (u, rank_to_type_name[self.rank]) + + def assignable_from_resolved_type(self, src_type): + return src_type.is_numeric or src_type is error_type + + def declaration_code(self, entity_code, + for_display = 0, dll_linkage = None, pyrex = 0): + if self.signed: + u = "" + else: + u = "unsigned " + base = public_decl(u + rank_to_type_name[self.rank], dll_linkage) + return "%s %s" % (base, entity_code) + +# return "%s%s %s" % (u, rank_to_type_name[self.rank], entity_code) + + +class CIntType(CNumericType): + + is_int = 1 + typedef_flag = 0 + to_py_function = "PyInt_FromLong" + from_py_function = "PyInt_AsLong" + + def __init__(self, rank, signed, pymemberdef_typecode = None, is_returncode = 0): + CNumericType.__init__(self, rank, pymemberdef_typecode) + self.signed = signed + self.is_returncode = is_returncode + + +class CULongType(CIntType): + + to_py_function = "PyLong_FromUnsignedLong" + from_py_function = "PyLong_AsUnsignedLong" + + +class CLongLongType(CIntType): + + to_py_function = "PyLong_FromLongLong" + from_py_function = "PyLong_AsLongLong" + + +class CULongLongType(CIntType): + + to_py_function = "PyLong_FromUnsignedLongLong" + from_py_function = "PyLong_AsUnsignedLongLong" + + +class CFloatType(CNumericType): + + is_float = 1 + signed = 1 + to_py_function = "PyFloat_FromDouble" + from_py_function = "PyFloat_AsDouble" + + +class CArrayType(CType): + # base_type CType Element type + # size integer or None Number of elements + + is_array = 1 + + def __init__(self, base_type, size): + self.base_type = base_type + self.size = size + if base_type is c_char_type: + self.is_string = 1 + + def __repr__(self): + return "CArrayType(%s,%s)" % (self.size, repr(self.base_type)) + + def same_as_resolved_type(self, other_type): + return ((other_type.is_array and + self.base_type.same_as(other_type.base_type)) + or other_type is error_type) + + def assignable_from_resolved_type(self, src_type): + # Can't assign to a variable of an array type + return 0 + + def element_ptr_type(self): + return c_ptr_type(self.base_type) + + def declaration_code(self, entity_code, + for_display = 0, dll_linkage = None, pyrex = 0): + if self.size is not None: + dimension_code = self.size + else: + dimension_code = "" + return self.base_type.declaration_code( + "(%s[%s])" % (entity_code, dimension_code), + for_display, dll_linkage, pyrex) + + def as_argument_type(self): + return c_ptr_type(self.base_type) + + def is_complete(self): + return self.size is not None + + +class CPtrType(CType): + # base_type CType Referenced type + + is_ptr = 1 + default_value = 0 + + def __init__(self, base_type): + self.base_type = base_type + + def __repr__(self): + return "CPtrType(%s)" % repr(self.base_type) + + def same_as_resolved_type(self, other_type): + return ((other_type.is_ptr and + self.base_type.same_as(other_type.base_type)) + or other_type is error_type) + + def declaration_code(self, entity_code, + for_display = 0, dll_linkage = None, pyrex = 0): + return self.base_type.declaration_code( + "(*%s)" % entity_code, + for_display, dll_linkage, pyrex) + + def assignable_from_resolved_type(self, other_type): + if other_type is error_type: + return 1 + elif self.base_type.is_cfunction and other_type.is_cfunction: + return self.base_type.same_as(other_type) + elif not other_type.is_ptr: + return 0 + elif self.base_type.is_void: + return 1 + elif other_type.is_null_ptr: + return 1 + else: + return self.base_type.same_as(other_type.base_type) + + +class CNullPtrType(CPtrType): + + is_null_ptr = 1 + + +class CFuncType(CType): + # return_type CType + # args [CFuncTypeArg] + # has_varargs boolean + # exception_value string + # exception_check boolean True if PyErr_Occurred check needed + + is_cfunction = 1 + + def __init__(self, return_type, args, has_varargs, + exception_value = None, exception_check = 0): + self.return_type = return_type + self.args = args + self.has_varargs = has_varargs + self.exception_value = exception_value + self.exception_check = exception_check + + def __repr__(self): + arg_reprs = map(repr, self.args) + if self.has_varargs: + arg_reprs.append("...") + return "CFuncType(%s,[%s])" % ( + repr(self.return_type), + string.join(arg_reprs, ",")) + + def same_c_signature_as(self, other_type, as_cmethod = 0): + return self.same_c_signature_as_resolved_type( + other_type.resolve(), as_cmethod) + + def same_c_signature_as_resolved_type(self, other_type, as_cmethod): + if other_type is error_type: + return 1 + if not other_type.is_cfunction: + return 0 + nargs = len(self.args) + if nargs <> len(other_type.args): + return 0 + # When comparing C method signatures, the first argument + # is exempt from compatibility checking (the proper check + # is performed elsewhere). + for i in range(as_cmethod, nargs): + if not self.args[i].type.same_as( + other_type.args[i].type): + return 0 + if self.has_varargs <> other_type.has_varargs: + return 0 + if not self.return_type.same_as(other_type.return_type): + return 0 + return 1 + + def same_exception_signature_as(self, other_type): + return self.same_exception_signature_as_resolved_type( + other_type.resolve()) + + def same_exception_signature_as_resolved_type(self, other_type): + return self.exception_value == other_type.exception_value \ + and self.exception_check == other_type.exception_check + + def same_as_resolved_type(self, other_type, as_cmethod = 0): + return self.same_c_signature_as_resolved_type(other_type, as_cmethod) \ + and self.same_exception_signature_as_resolved_type(other_type) + + def declaration_code(self, entity_code, + for_display = 0, dll_linkage = None, pyrex = 0): + arg_decl_list = [] + for arg in self.args: + arg_decl_list.append( + arg.type.declaration_code("", for_display, pyrex = pyrex)) + if self.has_varargs: + arg_decl_list.append("...") + arg_decl_code = string.join(arg_decl_list, ",") + if not arg_decl_code and not pyrex: + arg_decl_code = "void" + exc_clause = "" + if for_display: + if self.exception_value and self.exception_check: + exc_clause = " except? %s" % self.exception_value + elif self.exception_value: + exc_clause = " except %s" % self.exception_value + elif self.exception_check: + exc_clause = " except *" + return self.return_type.declaration_code( + "(%s(%s)%s)" % (entity_code, arg_decl_code, exc_clause), + for_display, dll_linkage, pyrex) + + +class CFuncTypeArg: + # name string + # cname string + # type PyrexType + # pos source file position + + def __init__(self, name, type, pos): + self.name = name + self.cname = Naming.var_prefix + name + self.type = type + self.pos = pos + + def __repr__(self): + return "%s:%s" % (self.name, repr(self.type)) + + def declaration_code(self, for_display = 0): + return self.type.declaration_code(self.cname, for_display) + + +class CStructOrUnionType(CType): + # name string + # cname string + # kind string "struct" or "union" + # scope StructOrUnionScope, or None if incomplete + # typedef_flag boolean + + is_struct_or_union = 1 + has_attributes = 1 + + def __init__(self, name, kind, scope, typedef_flag, cname): + self.name = name + self.cname = cname + self.kind = kind + self.scope = scope + self.typedef_flag = typedef_flag + + def __repr__(self): + return "CStructOrUnionType(%s,%s%s)" % (self.name, self.cname, + ("", ",typedef_flag=1")[self.typedef_flag]) + + def declaration_code(self, entity_code, + for_display = 0, dll_linkage = None, pyrex = 0): + if pyrex: + return "%s %s" % (self.name, entity_code) + else: + if for_display: + base = self.name + elif self.typedef_flag: + base = self.cname + else: + base = "%s %s" % (self.kind, self.cname) + return "%s %s" % (public_decl(base, dll_linkage), entity_code) + + def is_complete(self): + return self.scope is not None + + def attributes_known(self): + return self.is_complete() + + +class CEnumType(CIntType): + # name string + # cname string or None + # typedef_flag boolean + + is_enum = 1 + signed = 1 + rank = 2 + + def __init__(self, name, cname, typedef_flag): + self.name = name + self.cname = cname + self.values = [] + self.typedef_flag = typedef_flag + + def __repr__(self): + return "CEnumType(%s,%s%s)" % (self.name, self.cname, + ("", ",typedef_flag=1")[self.typedef_flag]) + + def declaration_code(self, entity_code, + for_display = 0, dll_linkage = None, pyrex = 0): + if pyrex: + return "%s %s" % (self.cname, entity_code) + else: + if self.typedef_flag: + base = self.cname + else: + base = "enum %s" % self.cname + return "%s %s" % (public_decl(base, dll_linkage), entity_code) + + +class CStringType: + # Mixin class for C string types. + + is_string = 1 + + to_py_function = "PyString_FromString" + from_py_function = "PyString_AsString" + + def literal_code(self, value): + return '"%s"' % value + + +class CCharArrayType(CStringType, CArrayType): + # C 'char []' type. + + parsetuple_format = "s" + pymemberdef_typecode = "T_STRING_INPLACE" + + def __init__(self, size): + CArrayType.__init__(self, c_char_type, size) + + +class CCharPtrType(CStringType, CPtrType): + # C 'char *' type. + + parsetuple_format = "s" + pymemberdef_typecode = "T_STRING" + + def __init__(self): + CPtrType.__init__(self, c_char_type) + + +class ErrorType(PyrexType): + # Used to prevent propagation of error messages. + + is_error = 1 + exception_value = "0" + exception_check = 0 + to_py_function = "dummy" + from_py_function = "dummy" + + def declaration_code(self, entity_code, + for_display = 0, dll_linkage = None, pyrex = 0): + return "" + + def same_as_resolved_type(self, other_type): + return 1 + + +py_object_type = PyObjectType() + +c_void_type = CVoidType() +c_void_ptr_type = CPtrType(c_void_type) +c_void_ptr_ptr_type = CPtrType(c_void_ptr_type) + +c_char_type = CIntType(0, 1, "T_CHAR") +c_short_type = CIntType(1, 1, "T_SHORT") +c_int_type = CIntType(2, 1, "T_INT") +c_long_type = CIntType(3, 1, "T_LONG") +c_longlong_type = CLongLongType(4, 1) + +c_uchar_type = CIntType(0, 0, "T_UBYTE") +c_ushort_type = CIntType(1, 0, "T_USHORT") +c_uint_type = CIntType(2, 0, "T_UINT") +c_ulong_type = CULongType(3, 0, "T_ULONG") +c_ulonglong_type = CULongLongType(4, 0) + +c_float_type = CFloatType(5, "T_FLOAT") +c_double_type = CFloatType(6, "T_DOUBLE") +c_longdouble_type = CFloatType(7) + +c_null_ptr_type = CNullPtrType(c_void_type) +c_char_array_type = CCharArrayType(None) +c_char_ptr_type = CCharPtrType() +c_char_ptr_ptr_type = CPtrType(c_char_ptr_type) +c_int_ptr_type = CPtrType(c_int_type) + +c_returncode_type = CIntType(2, 1, "T_INT", is_returncode = 1) + +error_type = ErrorType() + +lowest_float_rank = 5 + +rank_to_type_name = ( + "char", # 0 + "short", # 1 + "int", # 2 + "long", # 3 + "PY_LONG_LONG", # 4 + "float", # 5 + "double", # 6 + "long double", # 7 +) + +sign_and_rank_to_type = { + #(signed, rank) + (0, 0, ): c_uchar_type, + (0, 1): c_ushort_type, + (0, 2): c_uint_type, + (0, 3): c_ulong_type, + (0, 4): c_ulonglong_type, + (1, 0): c_char_type, + (1, 1): c_short_type, + (1, 2): c_int_type, + (1, 3): c_long_type, + (1, 4): c_longlong_type, + (1, 5): c_float_type, + (1, 6): c_double_type, + (1, 7): c_longdouble_type, +} + +modifiers_and_name_to_type = { + #(signed, longness, name) + (0, 0, "char"): c_uchar_type, + (0, -1, "int"): c_ushort_type, + (0, 0, "int"): c_uint_type, + (0, 1, "int"): c_ulong_type, + (0, 2, "int"): c_ulonglong_type, + (1, 0, "void"): c_void_type, + (1, 0, "char"): c_char_type, + (1, -1, "int"): c_short_type, + (1, 0, "int"): c_int_type, + (1, 1, "int"): c_long_type, + (1, 2, "int"): c_longlong_type, + (1, 0, "float"): c_float_type, + (1, 0, "double"): c_double_type, + (1, 1, "double"): c_longdouble_type, + (1, 0, "object"): py_object_type, +} + +def widest_numeric_type(type1, type2): + # Given two numeric types, return the narrowest type + # encompassing both of them. + signed = type1.signed + rank = max(type1.rank, type2.rank) + if rank >= lowest_float_rank: + signed = 1 + return sign_and_rank_to_type[signed, rank] + +def simple_c_type(signed, longness, name): + # Find type descriptor for simple type given name and modifiers. + # Returns None if arguments don't make sense. + return modifiers_and_name_to_type.get((signed, longness, name)) + +def c_array_type(base_type, size): + # Construct a C array type. + if base_type is c_char_type: + return CCharArrayType(size) + else: + return CArrayType(base_type, size) + +def c_ptr_type(base_type): + # Construct a C pointer type. + if base_type is c_char_type: + return c_char_ptr_type + else: + return CPtrType(base_type) + +def public_decl(base, dll_linkage): + if dll_linkage: + return "%s(%s)" % (dll_linkage, base) + else: + return base + Added: lxml/pyrex/Pyrex/Compiler/PyrexTypes.pyc ============================================================================== Binary file. No diff available. Added: lxml/pyrex/Pyrex/Compiler/Scanning.py ============================================================================== --- (empty file) +++ lxml/pyrex/Pyrex/Compiler/Scanning.py Fri Mar 10 13:58:37 2006 @@ -0,0 +1,328 @@ +# +# Pyrex Scanner +# + +#import pickle +import cPickle as pickle + +import os +import stat +import sys +from time import time + +from Pyrex import Plex +from Pyrex.Plex import Scanner +from Pyrex.Plex.Errors import UnrecognizedInput +from Errors import CompileError, error +from Lexicon import string_prefixes, make_lexicon + +plex_version = getattr(Plex, '_version', None) +#print "Plex version:", plex_version ### + +debug_scanner = 0 +trace_scanner = 0 +#scanner_dump_file = open("Lexicon_dump.txt", "w") +scanner_debug_flags = 0 +scanner_dump_file = None +binary_lexicon_pickle = 1 +notify_lexicon_unpickling = 0 +notify_lexicon_pickling = 1 + +lexicon = None + +#----------------------------------------------------------------- + +def hash_source_file(path): + # Try to calculate a hash code for the given source file. + # Returns an empty string if the file cannot be accessed. + #print "Hashing", path ### + import md5 + try: + try: + f = open(path) + text = f.read() + except IOError, e: + print "Unable to hash scanner source file (%s)" % e + return "" + finally: + f.close() + # Normalise spaces/tabs. We don't know what sort of + # space-tab substitution the file may have been + # through, so we replace all spans of spaces and + # tabs by a single space. + import re + text = re.sub("[ \t]+", " ", text) + hash = md5.new(text).hexdigest() + return hash + +def open_pickled_lexicon(expected_hash): + # Try to open pickled lexicon file and verify that + # it matches the source file. Returns the opened + # file if successful, otherwise None. ??? + f = None + result = None + if os.path.exists(lexicon_pickle): + try: + f = open(lexicon_pickle, "rb") + actual_hash = pickle.load(f) + if actual_hash == expected_hash: + result = f + f = None + else: + print "Lexicon hash mismatch:" ### + print " expected", expected_hash ### + print " got ", actual_hash ### + except IOError, e: + print "Warning: Unable to read pickled lexicon", lexicon_pickle + print e + if f: + f.close() + return result + +def try_to_unpickle_lexicon(): + global lexicon, lexicon_pickle, lexicon_hash + dir = os.path.dirname(__file__) + source_file = os.path.join(dir, "Lexicon.py") + lexicon_hash = hash_source_file(source_file) + lexicon_pickle = os.path.join(dir, "Lexicon.pickle") + f = open_pickled_lexicon(expected_hash = lexicon_hash) + if f: + if notify_lexicon_unpickling: + t0 = time() + print "Unpickling lexicon..." + lexicon = pickle.load(f) + f.close() + if notify_lexicon_unpickling: + t1 = time() + print "Done (%.2f seconds)" % (t1 - t0) + +def create_new_lexicon(): + global lexicon + t0 = time() + print "Creating lexicon..." + lexicon = make_lexicon() + t1 = time() + print "Done (%.2f seconds)" % (t1 - t0) + +def pickle_lexicon(): + f = None + try: + f = open(lexicon_pickle, "wb") + except IOError: + print "Warning: Unable to save pickled lexicon in", lexicon_pickle + if f: + if notify_lexicon_pickling: + t0 = time() + print "Pickling lexicon..." + pickle.dump(lexicon_hash, f, binary_lexicon_pickle) + pickle.dump(lexicon, f, binary_lexicon_pickle) + f.close() + if notify_lexicon_pickling: + t1 = time() + print "Done (%.2f seconds)" % (t1 - t0) + +def get_lexicon(): + global lexicon + if not lexicon and plex_version is None: + try_to_unpickle_lexicon() + if not lexicon: + create_new_lexicon() + if plex_version is None: + pickle_lexicon() + return lexicon + +#------------------------------------------------------------------ + +reserved_words = [ + "global", "include", "ctypedef", "cdef", "def", "class", + "print", "del", "pass", "break", "continue", "return", + "raise", "import", "exec", "try", "except", "finally", + "while", "if", "elif", "else", "for", "in", "assert", + "and", "or", "not", "is", "in", "lambda", "from", + "NULL", "cimport" +] + +class Method: + + def __init__(self, name): + self.name = name + self.__name__ = name # for Plex tracing + + def __call__(self, stream, text): + return getattr(stream, self.name)(text) + + +#def make_lexicon(): +# import Lexicon +# return Lexicon.lexicon + +#------------------------------------------------------------------ + +def build_resword_dict(): + d = {} + for word in reserved_words: + d[word] = 1 + return d + +#------------------------------------------------------------------ + +class PyrexScanner(Scanner): + + resword_dict = build_resword_dict() + + def __init__(self, file, filename, parent_scanner = None, + type_names = None, context = None): + Scanner.__init__(self, get_lexicon(), file, filename) + if parent_scanner: + self.context = parent_scanner.context + self.type_names = parent_scanner.type_names + else: + self.context = context + self.type_names = type_names + self.trace = trace_scanner + self.indentation_stack = [0] + self.indentation_char = None + self.bracket_nesting_level = 0 + self.begin('INDENT') + self.sy = '' + self.next() + + def current_level(self): + return self.indentation_stack[-1] + + def open_bracket_action(self, text): + self.bracket_nesting_level = self.bracket_nesting_level + 1 + return text + + def close_bracket_action(self, text): + self.bracket_nesting_level = self.bracket_nesting_level - 1 + return text + + def newline_action(self, text): + if self.bracket_nesting_level == 0: + self.begin('INDENT') + self.produce('NEWLINE', '') + + string_states = { + "'": 'SQ_STRING', + '"': 'DQ_STRING', + "'''": 'TSQ_STRING', + '"""': 'TDQ_STRING' + } + + def begin_string_action(self, text): + if text[:1] in string_prefixes: + text = text[1:] + self.begin(self.string_states[text]) + self.produce('BEGIN_STRING') + + def end_string_action(self, text): + self.begin('') + self.produce('END_STRING') + + def unclosed_string_action(self, text): + self.end_string_action(text) + self.error("Unclosed string literal") + + def indentation_action(self, text): + self.begin('') + # Indentation within brackets should be ignored. + #if self.bracket_nesting_level > 0: + # return + # Check that tabs and spaces are being used consistently. + if text: + c = text[0] + #print "Scanner.indentation_action: indent with", repr(c) ### + if self.indentation_char is None: + self.indentation_char = c + #print "Scanner.indentation_action: setting indent_char to", repr(c) + else: + if self.indentation_char <> c: + self.error("Mixed use of tabs and spaces") + if text.replace(c, "") <> "": + self.error("Mixed use of tabs and spaces") + # Figure out how many indents/dedents to do + current_level = self.current_level() + new_level = len(text) + #print "Changing indent level from", current_level, "to", new_level ### + if new_level == current_level: + return + elif new_level > current_level: + #print "...pushing level", new_level ### + self.indentation_stack.append(new_level) + self.produce('INDENT', '') + else: + while new_level < self.current_level(): + #print "...popping level", self.indentation_stack[-1] ### + self.indentation_stack.pop() + self.produce('DEDENT', '') + #print "...current level now", self.current_level() ### + if new_level <> self.current_level(): + self.error("Inconsistent indentation") + + def eof_action(self, text): + while len(self.indentation_stack) > 1: + self.produce('DEDENT', '') + self.indentation_stack.pop() + self.produce('EOF', '') + + def next(self): + try: + sy, systring = self.read() + except UnrecognizedInput: + self.error("Unrecognized character") + if sy == 'IDENT' and systring in self.resword_dict: + sy = systring + self.sy = sy + self.systring = systring + if debug_scanner: + _, line, col = self.position() + if not self.systring or self.sy == self.systring: + t = self.sy + else: + t = "%s %s" % (self.sy, self.systring) + print "--- %3d %2d %s" % (line, col, t) + + def put_back(self, sy, systring): + self.unread(self.sy, self.systring) + self.sy = sy + self.systring = systring + + def unread(self, token, value): + # This method should be added to Plex + self.queue.insert(0, (token, value)) + + def add_type_name(self, name): + self.type_names[name] = 1 + + def looking_at_type_name(self): + return self.sy == 'IDENT' and self.systring in self.type_names + + def error(self, message, pos = None): + if pos is None: + pos = self.position() + if self.sy == 'INDENT': + error(pos, "Possible inconsistent indentation") + raise error(pos, message) + + def expect(self, what, message = None): + if self.sy == what: + self.next() + else: + if message: + self.error(message) + else: + self.error("Expected '%s'" % what) + + def expect_indent(self): + self.expect('INDENT', + "Expected an increase in indentation level") + + def expect_dedent(self): + self.expect('DEDENT', + "Expected a decrease in indentation level") + + def expect_newline(self, message): + # Expect either a newline or end of file + if self.sy <> 'EOF': + self.expect('NEWLINE', message) Added: lxml/pyrex/Pyrex/Compiler/Scanning.pyc ============================================================================== Binary file. No diff available. Added: lxml/pyrex/Pyrex/Compiler/Symtab.py ============================================================================== --- (empty file) +++ lxml/pyrex/Pyrex/Compiler/Symtab.py Fri Mar 10 13:58:37 2006 @@ -0,0 +1,1054 @@ +# +# Pyrex - Symbol Table +# + +import re +from Errors import error, InternalError +import Options +import Naming +from PyrexTypes import c_int_type, \ + py_object_type, c_char_array_type, \ + CEnumType, CStructOrUnionType, PyExtensionType +from TypeSlots import \ + pyfunction_signature, pymethod_signature, \ + get_special_method_signature, get_property_accessor_signature + +identifier_pattern = re.compile(r"[A-Za-z_][A-Za-z0-9_]*$") + +class Entry: + # A symbol table entry in a Scope or ModuleNamespace. + # + # name string Python name of entity + # cname string C name of entity + # type PyrexType Type of entity + # doc string Doc string + # #borrowed bool Is a borrowed reference + # init string Initial value + # visibility 'private' or 'public' or 'extern' + # is_builtin boolean Is a Python builtin name + # is_cglobal boolean Is a C global variable + # is_pyglobal boolean Is a Python module-level variable + # or class attribute during + # class construction + # is_variable boolean Is a variable + # is_cfunction boolean Is a C function + # is_cmethod boolean Is a C method of an extension type + # is_type boolean Is a type definition + # is_const boolean Is a constant + # is_property boolean Is a property of an extension type: + # doc_cname string or None C const holding the docstring + # getter_cname string C func for getting property + # setter_cname string C func for setting or deleting property + # is_self_arg boolean Is the "self" arg of an exttype method + # is_readonly boolean Can't be assigned to + # func_cname string C func implementing Python func + # pos position Source position where declared + # namespace_cname string If is_pyglobal, the C variable + # holding its home namespace + # pymethdef_cname string PyMethodDef structure + # signature Signature Arg & return types for Python func + # init_to_none boolean True if initial value should be None + # as_variable Entry Alternative interpretation of extension + # type name as a variable + # xdecref_cleanup boolean Use Py_XDECREF for error cleanup + # in_cinclude boolean Suppress C declaration code + # enum_values [Entry] For enum types, list of values + # qualified_name string "modname.funcname" or "modname.classname" + # or "modname.classname.funcname" + # is_declared_generic boolean Is declared as PyObject * even though its + # type is an extension type + # as_module None Module scope, if a cimported module + # is_inherited boolean Is an inherited attribute of an extension type + # interned_cname string C name of interned name string + # pystring_cname string C name of Python version of string literal + # is_interned boolean For string const entries, value is interned + + borrowed = 0 + init = "" + visibility = 'private' + is_builtin = 0 + is_cglobal = 0 + is_pyglobal = 0 + is_variable = 0 + is_cfunction = 0 + is_cmethod = 0 + is_type = 0 + is_const = 0 + is_property = 0 + doc_cname = None + getter_cname = None + setter_cname = None + is_self_arg = 0 + is_declared_generic = 0 + is_readonly = 0 + func_cname = None + doc = None + init_to_none = 0 + as_variable = None + xdecref_cleanup = 0 + in_cinclude = 0 + as_module = None + is_inherited = 0 + interned_cname = None + pystring_cname = None + is_interned = 0 + + def __init__(self, name, cname, type, pos = None, init = None): + self.name = name + self.cname = cname + self.type = type + self.pos = pos + self.init = init + + +class Scope: + # name string Unqualified name + # outer_scope Scope or None Enclosing scope + # entries {string : Entry} Python name to entry, non-types + # const_entries [Entry] Constant entries + # sue_entries [Entry] Struct/union/enum entries + # arg_entries [Entry] Function argument entries + # var_entries [Entry] User-defined variable entries + # pyfunc_entries [Entry] Python function entries + # cfunc_entries [Entry] C function entries + # c_class_entries [Entry] All extension type entries + # temp_entries [Entry] Temporary variable entries + # free_temp_entries [Entry] Temp variables currently unused + # temp_counter integer Counter for naming temp vars + # cname_to_entry {string : Entry} Temp cname to entry mapping + # pow_function_used boolean The C pow() function is used + # return_type PyrexType or None Return type of function owning scope + # is_py_class_scope boolean Is a Python class scope + # is_c_class_scope boolean Is an extension type scope + # scope_prefix string Disambiguator for C names + # in_cinclude boolean Suppress C declaration code + # qualified_name string "modname" or "modname.classname" + # pystring_entries [Entry] String const entries newly used as + # Python strings in this scope + + is_py_class_scope = 0 + is_c_class_scope = 0 + scope_prefix = "" + in_cinclude = 0 + + def __init__(self, name, outer_scope, parent_scope): + # The outer_scope is the next scope in the lookup chain. + # The parent_scope is used to derive the qualified name of this scope. + self.name = name + self.outer_scope = outer_scope + self.parent_scope = parent_scope + mangled_name = "%d%s_" % (len(name), name) + qual_scope = self.qualifying_scope() + if qual_scope: + self.qualified_name = qual_scope.qualify_name(name) + self.scope_prefix = qual_scope.scope_prefix + mangled_name + else: + self.qualified_name = name + self.scope_prefix = mangled_name + self.entries = {} + self.const_entries = [] + self.sue_entries = [] + self.arg_entries = [] + self.var_entries = [] + self.pyfunc_entries = [] + self.cfunc_entries = [] + self.c_class_entries = [] + self.defined_c_classes = [] + self.imported_c_classes = {} + self.temp_entries = [] + self.free_temp_entries = [] + #self.pending_temp_entries = [] # TEMPORARY + self.temp_counter = 1 + self.cname_to_entry = {} + self.pow_function_used = 0 + self.string_to_entry = {} + self.pystring_entries = [] + + def __str__(self): + return "<%s %s>" % (self.__class__.__name__, self.qualified_name) + + def intern(self, name): + return self.global_scope().intern(name) + + def qualifying_scope(self): + #return self.outer_scope + return self.parent_scope + + def mangle(self, prefix, name = None): + if name: + return "%s%s%s" % (prefix, self.scope_prefix, name) + else: + return self.parent_scope.mangle(prefix, self.name) + + def mangle_internal(self, name): + # Mangle an internal name so as not to clash with any + # user-defined name in this scope. + prefix = "%s%s_" % (Naming.pyrex_prefix, name) + return self.mangle(prefix) + #return self.parent_scope.mangle(prefix, self.name) + + def global_scope(self): + # Return the module-level scope containing this scope. + return self.outer_scope.global_scope() + + def declare(self, name, cname, type, pos): + # Create new entry, and add to dictionary if + # name is not None. Reports an error if already + # declared. + dict = self.entries + if name and dict.has_key(name): + error(pos, "'%s' redeclared" % name) + entry = Entry(name, cname, type, pos = pos) + entry.in_cinclude = self.in_cinclude + if name: + entry.qualified_name = self.qualify_name(name) + dict[name] = entry + return entry + + def qualify_name(self, name): + return "%s.%s" % (self.qualified_name, name) + + #def undeclare(self, name): + # del self.entries[name] + + def declare_const(self, name, type, value, pos, cname = None): + # Add an entry for a named constant. + if not cname: + if self.in_cinclude: + cname = name + else: + cname = self.mangle(Naming.enum_prefix, name) + entry = self.declare(name, cname, type, pos) + entry.is_const = 1 + entry.value = value + return entry + + def declare_type(self, name, type, pos, + cname = None, visibility = 'private'): + # Add an entry for a type definition. + if not cname: + cname = name + entry = self.declare(name, cname, type, pos) + entry.visibility = visibility + entry.is_type = 1 + return entry + + def declare_struct_or_union(self, name, kind, scope, + typedef_flag, pos, cname = None): + # Add an entry for a struct or union definition. + if not cname: + if self.in_cinclude: + cname = name + else: + cname = self.mangle(Naming.type_prefix, name) + entry = self.lookup_here(name) + if not entry: + type = CStructOrUnionType(name, kind, scope, typedef_flag, cname) + entry = self.declare_type(name, type, pos, cname) + self.sue_entries.append(entry) + else: + if not (entry.is_type and entry.type.is_struct_or_union): + error(pos, "'%s' redeclared" % name) + elif scope and entry.type.scope: + error(pos, "'%s' already defined" % name) + else: + self.check_previous_typedef_flag(entry, typedef_flag, pos) + if scope: + entry.type.scope = scope + if not scope and not entry.type.scope: + self.check_for_illegal_incomplete_ctypedef(typedef_flag, pos) + return entry + + def check_previous_typedef_flag(self, entry, typedef_flag, pos): + if typedef_flag <> entry.type.typedef_flag: + error(pos, "'%s' previously declared using '%s'" % ( + entry.name, ("cdef", "ctypedef")[entry.type.typedef_flag])) + + def declare_enum(self, name, pos, cname, typedef_flag): + if name: + if not cname: + if self.in_cinclude: + cname = name + else: + cname = self.mangle(Naming.type_prefix, name) + type = CEnumType(name, cname, typedef_flag) + else: + type = c_int_type + entry = self.declare_type(name, type, pos, cname = cname) + entry.enum_values = [] + self.sue_entries.append(entry) + return entry + + def declare_var(self, name, type, pos, + cname = None, visibility = 'private', is_cdef = 0): + # Add an entry for a variable. + if not cname: + if visibility <> 'private': + cname = name + else: + cname = self.mangle(Naming.var_prefix, name) + entry = self.declare(name, cname, type, pos) + entry.is_variable = 1 + entry.visibility = visibility + return entry + + def declare_builtin(self, name, pos): + return self.outer_scope.declare_builtin(name, pos) + + def declare_pyfunction(self, name, pos): + # Add an entry for a Python function. + entry = self.declare_var(name, py_object_type, pos) + entry.signature = pyfunction_signature + self.pyfunc_entries.append(entry) + return entry + + def register_pyfunction(self, entry): + self.pyfunc_entries.append(entry) + + def declare_cfunction(self, name, type, pos, + cname = None, visibility = 'private', defining = 0): + # Add an entry for a C function. + if not cname: + if visibility <> 'private': + cname = name + else: + cname = self.mangle(Naming.func_prefix, name) + entry = self.add_cfunction(name, type, pos, cname, visibility) + entry.func_cname = cname + return entry + + def add_cfunction(self, name, type, pos, cname, visibility): + # Add a C function entry without giving it a func_cname. + entry = self.declare(name, cname, type, pos) + entry.is_cfunction = 1 + entry.visibility = visibility + self.cfunc_entries.append(entry) + return entry + + def find(self, name, pos): + # Look up name, report error if not found. + entry = self.lookup(name) + if entry: + return entry + else: + error(pos, "'%s' is not declared" % name) + + def lookup(self, name): + # Look up name in this scope or an enclosing one. + # Return None if not found. + return (self.lookup_here(name) + or (self.outer_scope and self.outer_scope.lookup(name)) + or None) + + def lookup_here(self, name): + # Look up in this scope only, return None if not found. + return self.entries.get(name, None) + + def lookup_target(self, name): + # Look up name in this scope only. Declare as Python + # variable if not found. + entry = self.lookup_here(name) + if not entry: + entry = self.declare_var(name, py_object_type, None) + return entry + + def add_string_const(self, value): + # Add an entry for a string constant. + cname = self.new_const_cname() + entry = Entry("", cname, c_char_array_type, init = value) + self.const_entries.append(entry) + return entry + + def get_string_const(self, value): + # Get entry for string constant. Returns an existing + # one if possible, otherwise creates a new one. + genv = self.global_scope() + entry = genv.string_to_entry.get(value) + if not entry: + entry = self.add_string_const(value) + genv.string_to_entry[value] = entry + return entry + + def add_py_string(self, entry): + # If not already done, allocate a C name for a Python version of + # a string literal, and add it to the list of Python strings to + # be created at module init time. If the string resembles a + # Python identifier, it will be interned. + if not entry.pystring_cname: + value = entry.init + if identifier_pattern.match(value): + entry.pystring_cname = self.intern(value) + entry.is_interned = 1 + else: + entry.pystring_cname = entry.cname + "p" + self.pystring_entries.append(entry) + self.global_scope().all_pystring_entries.append(entry) + + def new_const_cname(self): + # Create a new globally-unique name for a constant. + return self.global_scope().new_const_cname() + + def allocate_temp(self, type): + # Allocate a temporary variable of the given type from the + # free list if available, otherwise create a new one. + # Returns the cname of the variable. + for entry in self.free_temp_entries: + if entry.type == type: + self.free_temp_entries.remove(entry) + return entry.cname + n = self.temp_counter + self.temp_counter = n + 1 + cname = "%s%d" % (Naming.pyrex_prefix, n) + entry = Entry("", cname, type) + if type.is_pyobject: + entry.init = "0" + self.cname_to_entry[entry.cname] = entry + self.temp_entries.append(entry) + return entry.cname + + def allocate_temp_pyobject(self): + # Allocate a temporary PyObject variable. + return self.allocate_temp(py_object_type) + + def release_temp(self, cname): + # Release a temporary variable for re-use. + if not cname: # can happen when type of an expr is void + return + entry = self.cname_to_entry[cname] + if entry in self.free_temp_entries: + raise InternalError("Temporary variable %s released more than once" + % cname) + self.free_temp_entries.append(entry) + + def recycle_pending_temps(self): + # Obsolete + pass + + def use_utility_code(self, new_code): + self.global_scope().use_utility_code(new_code) + + def generate_library_function_declarations(self, code): + # Generate extern decls for C library funcs used. + if self.pow_function_used: + code.putln("extern double pow(double, double);") + + def defines_any(self, names): + # Test whether any of the given names are + # defined in this scope. + for name in names: + if name in self.entries: + return 1 + return 0 + + +class BuiltinScope(Scope): + # The builtin namespace. + + def __init__(self): + Scope.__init__(self, "__builtin__", None, None) + + def declare_builtin(self, name, pos): + entry = self.declare(name, name, py_object_type, pos) + entry.is_builtin = 1 + return entry + + +class ModuleScope(Scope): + # module_name string Python name of the module + # module_cname string C name of Python module object + # #module_dict_cname string C name of module dict object + # method_table_cname string C name of method table + # doc string Module doc string + # doc_cname string C name of module doc string + # const_counter integer Counter for naming constants + # utility_code_used [string] Utility code to be included + # default_entries [Entry] Function argument default entries + # python_include_files [string] Standard Python headers to be included + # include_files [string] Other C headers to be included + # string_to_entry {string : Entry} Map string const to entry + # context Context + # parent_module Scope Parent in the import namespace + # module_entries {string : Entry} For cimport statements + # type_names {string : 1} Set of type names (used during parsing) + # pxd_file_loaded boolean Corresponding .pxd file has been processed + # cimported_modules [ModuleScope] Modules imported with cimport + # intern_map {string : string} Mapping from Python names to interned strs + # interned_names [string] Interned names pending generation of declarations + # all_pystring_entries [Entry] Python string consts from all scopes + + def __init__(self, name, parent_module, context): + self.parent_module = parent_module + outer_scope = context.find_submodule("__builtin__") + Scope.__init__(self, name, outer_scope, parent_module) + self.module_name = name + self.context = context + self.module_cname = Naming.module_cname + self.module_dict_cname = Naming.moddict_cname + self.method_table_cname = Naming.methtable_cname + self.doc = "" + self.doc_cname = Naming.moddoc_cname + self.const_counter = 1 + self.utility_code_used = [] + self.default_entries = [] + self.module_entries = {} + self.python_include_files = ["Python.h", "structmember.h"] + self.include_files = [] + self.type_names = {} + self.pxd_file_loaded = 0 + self.cimported_modules = [] + self.intern_map = {} + self.interned_names = [] + self.all_pystring_entries = [] + + def qualifying_scope(self): + return self.parent_module + + def global_scope(self): + return self + + def declare_builtin(self, name, pos): + entry = Scope.declare_builtin(self, name, pos) + entry.interned_cname = self.intern(name) + return entry + + def intern(self, name): + intern_map = self.intern_map + cname = intern_map.get(name) + if not cname: + cname = Naming.interned_prefix + name + intern_map[name] = cname + self.interned_names.append(name) + return cname + + def find_module(self, module_name, pos): + # Find a module in the import namespace, interpreting + # relative imports relative to this module's parent. + # Finds and parses the module's .pxd file if the module + # has not been referenced before. + return self.global_scope().context.find_module( + module_name, relative_to = self.parent_module, pos = pos) + + def find_submodule(self, name): + # Find and return scope for a submodule of this module, + # creating a new empty one if necessary. Doesn't parse .pxd. + scope = self.lookup_submodule(name) + if not scope: + scope = ModuleScope(name, + parent_module = self, context = self.context) + self.module_entries[name] = scope + return scope + + def lookup_submodule(self, name): + # Return scope for submodule of this module, or None. + return self.module_entries.get(name, None) + + def add_include_file(self, filename): + if filename not in self.python_include_files \ + and filename not in self.include_files: + self.include_files.append(filename) + + def add_imported_module(self, scope): + if scope not in self.cimported_modules: + self.cimported_modules.append(scope) + + def add_imported_entry(self, name, entry, pos): + if entry not in self.entries: + self.entries[name] = entry + else: + error(pos, "'%s' redeclared" % name) + + def declare_module(self, name, scope, pos): + # Declare a cimported module. This is represented as a + # Python module-level variable entry with a module + # scope attached to it. Reports an error and returns + # None if previously declared as something else. + entry = self.lookup_here(name) + if entry: + if not (entry.is_pyglobal and not entry.as_module): + error(pos, "'%s' redeclared" % name) + return None + else: + entry = self.declare_var(name, py_object_type, pos) + entry.as_module = scope + self.cimported_modules.append(scope) + return entry + + def declare_var(self, name, type, pos, + cname = None, visibility = 'private', is_cdef = 0): + # Add an entry for a global variable. If it is a Python + # object type, and not declared with cdef, it will live + # in the module dictionary, otherwise it will be a C + # global variable. + entry = Scope.declare_var(self, name, type, pos, + cname, visibility, is_cdef) + if not visibility in ('private', 'public', 'extern'): + error(pos, "Module-level variable cannot be declared %s" % visibility) + if not is_cdef: + if not (type.is_pyobject and not type.is_extension_type): + raise InternalError( + "Non-cdef global variable is not a generic Python object") + entry.is_pyglobal = 1 + entry.namespace_cname = self.module_cname + if Options.intern_names: + entry.interned_cname = self.intern(name) + else: + entry.is_cglobal = 1 + self.var_entries.append(entry) + return entry + + def declare_global(self, name, pos): + entry = self.lookup_here(name) + if not entry: + self.declare_var(name, py_object_type, pos) + + def add_default_value(self, type): + # Add an entry for holding a function argument + # default value. + cname = self.new_const_cname() + entry = Entry("", cname, type) + self.default_entries.append(entry) + return entry + + def new_const_cname(self): + # Create a new globally-unique name for a constant. + n = self.const_counter + self.const_counter = n + 1 + return "%s%d" % (Naming.const_prefix, n) + + def use_utility_code(self, new_code): + # Add string to list of utility code to be included, + # if not already there (tested using 'is'). + for old_code in self.utility_code_used: + if old_code is new_code: + return + self.utility_code_used.append(new_code) + + def declare_c_class(self, name, pos, defining, implementing, + module_name, base_type, objstruct_cname, typeobj_cname, + visibility, typedef_flag): + # + #print "declare_c_class:", name + #print "...visibility =", visibility + # + # Look for previous declaration as a type + # + entry = self.lookup_here(name) + if entry: + type = entry.type + if not (entry.is_type and type.is_extension_type): + entry = None # Will cause an error when we redeclare it + else: + self.check_previous_typedef_flag(entry, typedef_flag, pos) + if base_type <> type.base_type: + error(pos, "Base type does not match previous declaration") + # + # Make a new entry if needed + # + if not entry: + type = PyExtensionType(name, typedef_flag, base_type) + if visibility == 'extern': + type.module_name = module_name + else: + type.module_name = self.qualified_name + type.typeptr_cname = self.mangle(Naming.typeptr_prefix, name) + entry = self.declare_type(name, type, pos, visibility = visibility) + if objstruct_cname: + type.objstruct_cname = objstruct_cname + elif not entry.in_cinclude: + type.objstruct_cname = self.mangle(Naming.objstruct_prefix, name) + else: + error(entry.pos, + "Object name required for 'public' or 'extern' C class") + self.attach_var_entry_to_c_class(entry) + self.c_class_entries.append(entry) + # + # Check for re-definition and create scope if needed + # + if not type.scope: + if defining or implementing: + scope = CClassScope(name = name, outer_scope = self, + visibility = visibility) + if base_type: + scope.declare_inherited_c_attributes(base_type.scope) + type.set_scope(scope) + else: + self.check_for_illegal_incomplete_ctypedef(typedef_flag, pos) + else: + if defining and type.scope.defined: + error(pos, "C class '%s' already defined" % name) + elif implementing and type.scope.implemented: + error(pos, "C class '%s' already implemented" % name) + # + # Fill in options, checking for compatibility with any previous declaration + # + if implementing: # So that filenames in runtime exceptions refer to + entry.pos = pos # the .pyx file and not the .pxd file + if entry.visibility <> visibility: + error(pos, "Declaration of '%s' as '%s' conflicts with previous " + "declaration as '%s'" % (class_name, visibility, entry.visibility)) + if objstruct_cname: + if type.objstruct_cname and type.objstruct_cname <> objstruct_cname: + error(pos, "Object struct name differs from previous declaration") + type.objstruct_cname = objstruct_cname + if typeobj_cname: + if type.typeobj_cname and type.typeobj_cname <> typeobj_cname: + error(pos, "Type object name differs from previous declaration") + type.typeobj_cname = typeobj_cname + # + # Return new or existing entry + # + return entry + + def check_for_illegal_incomplete_ctypedef(self, typedef_flag, pos): + if typedef_flag and not self.in_cinclude: + error(pos, "Forward-referenced type must use 'cdef', not 'ctypedef'") + + def allocate_vtable_names(self, entry): + # If extension type has a vtable, allocate vtable struct and + # slot names for it. + type = entry.type + if type.base_type and type.base_type.vtabslot_cname: + #print "...allocating vtabslot_cname because base type has one" ### + type.vtabslot_cname = "%s.%s" % ( + Naming.obj_base_cname, type.base_type.vtabslot_cname) + elif type.scope and type.scope.cfunc_entries: + #print "...allocating vtabslot_cname because there are C methods" ### + type.vtabslot_cname = Naming.vtabslot_cname + if type.vtabslot_cname: + #print "...allocating other vtable related cnames" ### + type.vtabstruct_cname = self.mangle(Naming.vtabstruct_prefix, entry.name) + type.vtabptr_cname = self.mangle(Naming.vtabptr_prefix, entry.name) + + def check_c_classes(self): + # Performs post-analysis checking and finishing up of extension types + # being implemented in this module. This is called only for the main + # .pyx file scope, not for cimported .pxd scopes. + # + # Checks all extension types declared in this scope to + # make sure that: + # + # * The extension type is implemented + # * All required object and type names have been specified or generated + # * All non-inherited C methods are implemented + # + # Also allocates a name for the vtable if needed. + # + debug_check_c_classes = 0 + if debug_check_c_classes: + print "Scope.check_c_classes: checking scope", self.qualified_name + for entry in self.c_class_entries: + if debug_check_c_classes: + print "...entry", entry.name, entry + print "......type =", entry.type + print "......visibility =", entry.visibility + type = entry.type + name = entry.name + visibility = entry.visibility + # Check defined + if not type.scope: + error(entry.pos, "C class '%s' is declared but not defined" % name) + # Generate typeobj_cname + if visibility <> 'extern' and not type.typeobj_cname: + type.typeobj_cname = self.mangle(Naming.typeobj_prefix, name) + ## Generate typeptr_cname + #type.typeptr_cname = self.mangle(Naming.typeptr_prefix, name) + # Check C methods defined + if type.scope: + for method_entry in type.scope.cfunc_entries: + if not method_entry.is_inherited and not method_entry.func_cname: + error(method_entry.pos, "C method '%s' is declared but not defined" % + method_entry.name) + # Allocate vtable name if necessary + if type.vtabslot_cname: + #print "ModuleScope.check_c_classes: allocating vtable cname for", self ### + type.vtable_cname = self.mangle(Naming.vtable_prefix, entry.name) + + def attach_var_entry_to_c_class(self, entry): + # The name of an extension class has to serve as both a type + # name and a variable name holding the type object. It is + # represented in the symbol table by a type entry with a + # variable entry attached to it. For the variable entry, + # we use a read-only C global variable whose name is an + # expression that refers to the type object. + var_entry = Entry(name = entry.name, + type = py_object_type, + pos = entry.pos, + cname = "((PyObject*)%s)" % entry.type.typeptr_cname) + var_entry.is_variable = 1 + var_entry.is_cglobal = 1 + var_entry.is_readonly = 1 + entry.as_variable = var_entry + + +class LocalScope(Scope): + + def __init__(self, name, outer_scope): + Scope.__init__(self, name, outer_scope, outer_scope) + + def mangle(self, prefix, name): + return prefix + name + + def declare_arg(self, name, type, pos): + # Add an entry for an argument of a function. + cname = self.mangle(Naming.var_prefix, name) + entry = self.declare(name, cname, type, pos) + entry.is_variable = 1 + if type.is_pyobject: + entry.init = "0" + #entry.borrowed = 1 # Not using borrowed arg refs for now + self.arg_entries.append(entry) + return entry + + def declare_var(self, name, type, pos, + cname = None, visibility = 'private', is_cdef = 0): + # Add an entry for a local variable. + if visibility in ('public', 'readonly'): + error(pos, "Local variable cannot be declared %s" % visibility) + entry = Scope.declare_var(self, name, type, pos, + cname, visibility, is_cdef) + entry.init_to_none = type.is_pyobject + self.var_entries.append(entry) + return entry + + def declare_global(self, name, pos): + # Pull entry from global scope into local scope. + if self.lookup_here(name): + error(pos, "'%s' redeclared") + else: + entry = self.global_scope().lookup_target(name) + self.entries[name] = entry + + +class StructOrUnionScope(Scope): + # Namespace of a C struct or union. + + def __init__(self): + Scope.__init__(self, "?", None, None) + + def declare_var(self, name, type, pos, + cname = None, visibility = 'private', is_cdef = 0): + # Add an entry for an attribute. + if not cname: + cname = name + entry = self.declare(name, cname, type, pos) + entry.is_variable = 1 + self.var_entries.append(entry) + if type.is_pyobject: + error(pos, + "C struct/union member cannot be a Python object") + if visibility <> 'private': + error(pos, + "C struct/union member cannot be declared %s" % visibility) + return entry + + +class ClassScope(Scope): + # Abstract base class for namespace of + # Python class or extension type. + # + # class_name string Pyrex name of the class + # scope_prefix string Additional prefix for names + # declared in the class + # doc string or None Doc string + + def __init__(self, name, outer_scope): + Scope.__init__(self, name, outer_scope, outer_scope) + self.class_name = name + self.doc = None + + def add_string_const(self, value): + return self.outer_scope.add_string_const(value) + + +class PyClassScope(ClassScope): + # Namespace of a Python class. + # + # class_dict_cname string C variable holding class dict + # class_obj_cname string C variable holding class object + + is_py_class_scope = 1 + + def declare_var(self, name, type, pos, + cname = None, visibility = 'private', is_cdef = 0): + # Add an entry for a class attribute. + entry = Scope.declare_var(self, name, type, pos, + cname, visibility, is_cdef) + entry.is_pyglobal = 1 + entry.namespace_cname = self.class_obj_cname + if Options.intern_names: + entry.interned_cname = self.intern(name) + return entry + + def allocate_temp(self, type): + return self.outer_scope.allocate_temp(type) + + def release_temp(self, cname): + self.outer_scope.release_temp(cname) + + def recycle_pending_temps(self): + self.outer_scope.recycle_pending_temps() + + def add_default_value(self, type): + return self.outer_scope.add_default_value(type) + + +class CClassScope(ClassScope): + # Namespace of an extension type. + # + # parent_type CClassType + # #typeobj_cname string or None + # #objstruct_cname string + # method_table_cname string + # member_table_cname string + # getset_table_cname string + # has_pyobject_attrs boolean Any PyObject attributes? + # public_attr_entries boolean public/readonly attrs + # property_entries [Entry] + # defined boolean Defined in .pxd file + # implemented boolean Defined in .pyx file + # inherited_var_entries [Entry] Adapted var entries from base class + + is_c_class_scope = 1 + + def __init__(self, name, outer_scope, visibility): + ClassScope.__init__(self, name, outer_scope) + if visibility <> 'extern': + self.method_table_cname = outer_scope.mangle(Naming.methtab_prefix, name) + self.member_table_cname = outer_scope.mangle(Naming.memtab_prefix, name) + self.getset_table_cname = outer_scope.mangle(Naming.gstab_prefix, name) + self.has_pyobject_attrs = 0 + self.public_attr_entries = [] + self.property_entries = [] + self.inherited_var_entries = [] + self.defined = 0 + self.implemented = 0 + + def needs_gc(self): + # If the type or any of its base types have Python-valued + # C attributes, then it needs to participate in GC. + return self.has_pyobject_attrs or \ + (self.parent_type.base_type and \ + self.parent_type.base_type.scope.needs_gc()) + + def declare_var(self, name, type, pos, + cname = None, visibility = 'private', is_cdef = 0): + # Add an entry for an attribute. + if self.defined: + error(pos, + "C attributes cannot be added in implementation part of" + " extension type") + if get_special_method_signature(name): + error(pos, + "The name '%s' is reserved for a special method." + % name) + if not cname: + cname = name + entry = self.declare(name, cname, type, pos) + entry.visibility = visibility + entry.is_variable = 1 + self.var_entries.append(entry) + if type.is_pyobject: + self.has_pyobject_attrs = 1 + if visibility not in ('private', 'public', 'readonly'): + error(pos, + "Attribute of extension type cannot be declared %s" % visibility) + if visibility in ('public', 'readonly'): + if type.pymemberdef_typecode: + self.public_attr_entries.append(entry) + else: + error(pos, + "C attribute of type '%s' cannot be accessed from Python" % type) + if visibility == 'public' and type.is_extension_type: + error(pos, + "Non-generic Python attribute cannot be exposed for writing from Python") + return entry + + def declare_pyfunction(self, name, pos): + # Add an entry for a method. + entry = self.declare(name, name, py_object_type, pos) + special_sig = get_special_method_signature(name) + if special_sig: + entry.signature = special_sig + # Special methods don't get put in the method table + else: + entry.signature = pymethod_signature + self.pyfunc_entries.append(entry) + return entry + + def declare_cfunction(self, name, type, pos, + cname = None, visibility = 'private', defining = 0): + args = type.args + if not args: + error(pos, "C method has no self argument") + elif not args[0].type.same_as(self.parent_type): + error(pos, "Self argument of C method does not match parent type") + entry = self.lookup_here(name) + if entry: + if not entry.is_cfunction: + error(pos, "'%s' redeclared" % name) + else: + if defining and entry.func_cname: + error(pos, "'%s' already defined" % name) + if not entry.type.same_as(type, as_cmethod = 1): + error(pos, "Signature does not match previous declaration") + else: + if self.defined: + error(pos, + "C method '%s' not previously declared in definition part of" + " extension type" % name) + entry = self.add_cfunction(name, type, pos, cname or name, visibility) + if defining: + entry.func_cname = self.mangle(Naming.func_prefix, name) + return entry + + def add_cfunction(self, name, type, pos, cname, visibility): + # Add a cfunction entry without giving it a func_cname. + entry = ClassScope.add_cfunction(self, name, type, pos, cname, visibility) + entry.is_cmethod = 1 + return entry + + def declare_property(self, name, doc, pos): + entry = self.declare(name, name, py_object_type, pos) + entry.is_property = 1 + entry.doc = doc + entry.scope = PropertyScope(name, + outer_scope = self.global_scope(), parent_scope = self) + entry.scope.parent_type = self.parent_type + self.property_entries.append(entry) + return entry + + def declare_inherited_c_attributes(self, base_scope): + # Declare entries for all the C attributes of an + # inherited type, with cnames modified appropriately + # to work with this type. + def adapt(cname): + return "%s.%s" % (Naming.obj_base_cname, base_entry.cname) + for base_entry in \ + base_scope.inherited_var_entries + base_scope.var_entries: + entry = self.declare(base_entry.name, adapt(base_entry.cname), + base_entry.type, None) + entry.is_variable = 1 + self.inherited_var_entries.append(entry) + for base_entry in base_scope.cfunc_entries: + entry = self.add_cfunction(base_entry.name, base_entry.type, None, + adapt(base_entry.cname), base_entry.visibility) + entry.is_inherited = 1 + + +class PropertyScope(Scope): + # Scope holding the __get__, __set__ and __del__ methods for + # a property of an extension type. + # + # parent_type PyExtensionType The type to which the property belongs + + def declare_pyfunction(self, name, pos): + # Add an entry for a method. + signature = get_property_accessor_signature(name) + if signature: + entry = self.declare(name, name, py_object_type, pos) + entry.signature = signature + return entry + else: + error(pos, "Only __get__, __set__ and __del__ methods allowed " + "in a property declaration") + return None Added: lxml/pyrex/Pyrex/Compiler/Symtab.pyc ============================================================================== Binary file. No diff available. Added: lxml/pyrex/Pyrex/Compiler/TypeSlots.py ============================================================================== --- (empty file) +++ lxml/pyrex/Pyrex/Compiler/TypeSlots.py Fri Mar 10 13:58:37 2006 @@ -0,0 +1,574 @@ +# +# Pyrex - Tables describing slots in the type object +# and associated know-how. +# + +import Naming +import PyrexTypes + +class Signature: + # Method slot signature descriptor. + # + # has_dummy_arg boolean + # has_generic_args boolean + # fixed_arg_format string + # ret_format string + # error_value string + # + # The formats are strings made up of the following + # characters: + # + # 'O' Python object + # 'T' Python object of the type of 'self' + # 'v' void + # 'p' void * + # 'P' void ** + # 'i' int + # 'I' int * + # 'l' long + # 's' char * + # 'S' char ** + # 'r' int used only to signal exception + # '-' dummy 'self' argument (not used) + # '*' rest of args passed as generic Python + # arg tuple and kw dict (must be last + # char in format string) + + format_map = { + 'O': PyrexTypes.py_object_type, + 'v': PyrexTypes.c_void_type, + 'p': PyrexTypes.c_void_ptr_type, + 'P': PyrexTypes.c_void_ptr_ptr_type, + 'i': PyrexTypes.c_int_type, + 'I': PyrexTypes.c_int_ptr_type, + 'l': PyrexTypes.c_long_type, + 's': PyrexTypes.c_char_ptr_type, + 'S': PyrexTypes.c_char_ptr_ptr_type, + 'r': PyrexTypes.c_returncode_type, + # 'T', '-' and '*' are handled otherwise + # and are not looked up in here + } + + error_value_map = { + 'O': "0", + 'i': "-1", + 'l': "-1", + 'r': "-1", + } + + def __init__(self, arg_format, ret_format): + self.has_dummy_arg = 0 + self.has_generic_args = 0 + if arg_format[:1] == '-': + self.has_dummy_arg = 1 + arg_format = arg_format[1:] + if arg_format[-1:] == '*': + self.has_generic_args = 1 + arg_format = arg_format[:-1] + self.fixed_arg_format = arg_format + self.ret_format = ret_format + self.error_value = self.error_value_map.get(ret_format, None) + + def num_fixed_args(self): + return len(self.fixed_arg_format) + + def is_self_arg(self, i): + return self.fixed_arg_format[i] == 'T' + + def fixed_arg_type(self, i): + return self.format_map[self.fixed_arg_format[i]] + + def return_type(self): + return self.format_map[self.ret_format] + + +class SlotDescriptor: + # Abstract base class for type slot descriptors. + # + # slot_name string Member name of the slot in the type object + # is_initialised_dynamically Is initialised by code in the module init function + + def __init__(self, slot_name, dynamic = 0): + self.slot_name = slot_name + self.is_initialised_dynamically = dynamic + + def generate(self, scope, code): + if self.is_initialised_dynamically: + value = 0 + else: + value = self.slot_code(scope) + code.putln("%s, /*%s*/" % (value, self.slot_name)) + + # Some C implementations have trouble statically + # initialising a global with a pointer to an extern + # function, so we initialise some of the type slots + # in the module init function instead. + + def generate_dynamic_init_code(self, scope, code): + if self.is_initialised_dynamically: + value = self.slot_code(scope) + if value <> "0": + code.putln("%s.%s = %s;" % ( + scope.parent_type.typeobj_cname, + self.slot_name, + value + ) + ) + + +class FixedSlot(SlotDescriptor): + # Descriptor for a type slot with a fixed value. + # + # value string + + def __init__(self, slot_name, value): + SlotDescriptor.__init__(self, slot_name) + self.value = value + + def slot_code(self, scope): + return self.value + + +class EmptySlot(FixedSlot): + # Descriptor for a type slot whose value is always 0. + + def __init__(self, slot_name): + FixedSlot.__init__(self, slot_name, "0") + + +class GCDependentSlot(SlotDescriptor): + # Descriptor for a slot whose value depends on whether + # the type participates in GC. + + def __init__(self, slot_name, no_gc_value, gc_value, dynamic = 0): + SlotDescriptor.__init__(self, slot_name, dynamic) + self.no_gc_value = no_gc_value + self.gc_value = gc_value + + def slot_code(self, scope): + if scope.has_pyobject_attrs: + return self.gc_value + else: + return self.no_gc_value + + +class MethodSlot(SlotDescriptor): + # Type slot descriptor for a user-definable method. + # + # signature Signature + # method_name string The __xxx__ name of the method + # default string or None Default value of the slot + + def __init__(self, signature, slot_name, method_name, default = None): + SlotDescriptor.__init__(self, slot_name) + self.signature = signature + self.slot_name = slot_name + self.method_name = method_name + self.default = default + method_name_to_slot[method_name] = self + + def slot_code(self, scope): + entry = scope.lookup_here(self.method_name) + if entry: + return entry.func_cname + else: + return "0" + + +class InternalMethodSlot(SlotDescriptor): + # Type slot descriptor for a method which is always + # synthesized by Pyrex. + # + # slot_name string Member name of the slot in the type object + + def __init__(self, slot_name): + SlotDescriptor.__init__(self, slot_name) + + def slot_code(self, scope): + return scope.mangle_internal(self.slot_name) + + +class SyntheticSlot(InternalMethodSlot): + # Type slot descriptor for a synthesized method which + # dispatches to one or more user-defined methods depending + # on its arguments. If none of the relevant methods are + # defined, the method will not be synthesized and an + # alternative default value will be placed in the type + # slot. + + def __init__(self, slot_name, user_methods, default_value): + InternalMethodSlot.__init__(self, slot_name) + self.user_methods = user_methods + self.default_value = default_value + + def slot_code(self, scope): + if scope.defines_any(self.user_methods): + return InternalMethodSlot.slot_code(self, scope) + else: + return self.default_value + + +class TypeFlagsSlot(SlotDescriptor): + # Descriptor for the type flags slot. + + def slot_code(self, scope): + value = "Py_TPFLAGS_DEFAULT|Py_TPFLAGS_CHECKTYPES|Py_TPFLAGS_BASETYPE" + if scope.has_pyobject_attrs: + value += "|Py_TPFLAGS_HAVE_GC" + return value + + +class DocStringSlot(SlotDescriptor): + # Descriptor for the docstring slot. + + def slot_code(self, scope): + if scope.doc is not None: + return '"%s"' % scope.doc + else: + return "0" + + +class SuiteSlot(SlotDescriptor): + # Descriptor for a substructure of the type object. + # + # sub_slots [SlotDescriptor] + + def __init__(self, sub_slots, slot_type, slot_name): + SlotDescriptor.__init__(self, slot_name) + self.sub_slots = sub_slots + self.slot_type = slot_type + substructures.append(self) + + def substructure_cname(self, scope): + return "%s%s_%s" % (Naming.pyrex_prefix, self.slot_name, scope.class_name) + + def slot_code(self, scope): + return "&%s" % self.substructure_cname(scope) + + def generate_substructure(self, scope, code): + code.putln("") + code.putln( + "static %s %s = {" % ( + self.slot_type, + self.substructure_cname(scope))) + for slot in self.sub_slots: + slot.generate(scope, code) + code.putln("};") + +substructures = [] # List of all SuiteSlot instances + +class MethodTableSlot(SlotDescriptor): + # Slot descriptor for the method table. + + def slot_code(self, scope): + return scope.method_table_cname + + +class MemberTableSlot(SlotDescriptor): + # Slot descriptor for the table of Python-accessible attributes. + + def slot_code(self, scope): + if scope.public_attr_entries: + return scope.member_table_cname + else: + return "0" + + +class GetSetSlot(SlotDescriptor): + # Slot descriptor for the table of attribute get & set methods. + + def slot_code(self, scope): + if scope.property_entries: + return scope.getset_table_cname + else: + return "0" + + +class BaseClassSlot(SlotDescriptor): + # Slot descriptor for the base class slot. + + def __init__(self, name): + SlotDescriptor.__init__(self, name, dynamic = 1) + + def generate_dynamic_init_code(self, scope, code): + base_type = scope.parent_type.base_type + if base_type: + code.putln("%s.%s = %s;" % ( + scope.parent_type.typeobj_cname, + self.slot_name, + base_type.typeptr_cname)) + + +# The following dictionary maps __xxx__ method names to slot descriptors. + +method_name_to_slot = {} + +## The following slots are (or could be) initialised with an +## extern function pointer. +# +#slots_initialised_from_extern = ( +# "tp_free", +#) + +#------------------------------------------------------------------------------------------ +# +# Utility functions for accessing slot table data structures +# +#------------------------------------------------------------------------------------------ + +def get_special_method_signature(name): + # Given a method name, if it is a special method, + # return its signature, else return None. + slot = method_name_to_slot.get(name) + if slot: + return slot.signature + else: + return None + +def get_property_accessor_signature(name): + # Return signature of accessor for an extension type + # property, else None. + return property_accessor_signatures.get(name) + +#------------------------------------------------------------------------------------------ +# +# Signatures for generic Python functions and methods. +# +#------------------------------------------------------------------------------------------ + +pyfunction_signature = Signature("-*", "O") +pymethod_signature = Signature("T*", "O") + +#------------------------------------------------------------------------------------------ +# +# Signatures for the various kinds of function that +# can appear in the type object and its substructures. +# +#------------------------------------------------------------------------------------------ + +unaryfunc = Signature("T", "O") # typedef PyObject * (*unaryfunc)(PyObject *); +binaryfunc = Signature("OO", "O") # typedef PyObject * (*binaryfunc)(PyObject *, PyObject *); +ibinaryfunc = Signature("TO", "O") # typedef PyObject * (*binaryfunc)(PyObject *, PyObject *); +ternaryfunc = Signature("OOO", "O") # typedef PyObject * (*ternaryfunc)(PyObject *, PyObject *, PyObject *); +iternaryfunc = Signature("TOO", "O") # typedef PyObject * (*ternaryfunc)(PyObject *, PyObject *, PyObject *); +callfunc = Signature("T*", "O") # typedef PyObject * (*ternaryfunc)(PyObject *, PyObject *, PyObject *); +inquiry = Signature("T", "i") # typedef int (*inquiry)(PyObject *); + # typedef int (*coercion)(PyObject **, PyObject **); +intargfunc = Signature("Ti", "O") # typedef PyObject *(*intargfunc)(PyObject *, int); +intintargfunc = Signature("Tii", "O") # typedef PyObject *(*intintargfunc)(PyObject *, int, int); +intobjargproc = Signature("TiO", 'r') # typedef int(*intobjargproc)(PyObject *, int, PyObject *); +intintobjargproc = Signature("TiiO", 'r') # typedef int(*intintobjargproc)(PyObject *, int, int, PyObject *); +intintargproc = Signature("Tii", 'r') +objargfunc = Signature("TO", "O") +objobjargproc = Signature("TOO", 'r') # typedef int (*objobjargproc)(PyObject *, PyObject *, PyObject *); +getreadbufferproc = Signature("TiP", 'i') # typedef int (*getreadbufferproc)(PyObject *, int, void **); +getwritebufferproc = Signature("TiP", 'i') # typedef int (*getwritebufferproc)(PyObject *, int, void **); +getsegcountproc = Signature("TI", 'i') # typedef int (*getsegcountproc)(PyObject *, int *); +getcharbufferproc = Signature("TiS", 'i') # typedef int (*getcharbufferproc)(PyObject *, int, const char **); +objargproc = Signature("TO", 'r') # typedef int (*objobjproc)(PyObject *, PyObject *); + # typedef int (*visitproc)(PyObject *, void *); + # typedef int (*traverseproc)(PyObject *, visitproc, void *); + +destructor = Signature("T", "v") # typedef void (*destructor)(PyObject *); +# printfunc = Signature("TFi", 'r') # typedef int (*printfunc)(PyObject *, FILE *, int); + # typedef PyObject *(*getattrfunc)(PyObject *, char *); +getattrofunc = Signature("TO", "O") # typedef PyObject *(*getattrofunc)(PyObject *, PyObject *); + # typedef int (*setattrfunc)(PyObject *, char *, PyObject *); +setattrofunc = Signature("TOO", 'r') # typedef int (*setattrofunc)(PyObject *, PyObject *, PyObject *); +delattrofunc = Signature("TO", 'r') +cmpfunc = Signature("TO", "i") # typedef int (*cmpfunc)(PyObject *, PyObject *); +reprfunc = Signature("T", "O") # typedef PyObject *(*reprfunc)(PyObject *); +hashfunc = Signature("T", "l") # typedef long (*hashfunc)(PyObject *); + # typedef PyObject *(*richcmpfunc) (PyObject *, PyObject *, int); +richcmpfunc = Signature("OOi", "O") # typedef PyObject *(*richcmpfunc) (PyObject *, PyObject *, int); +getiterfunc = Signature("T", "O") # typedef PyObject *(*getiterfunc) (PyObject *); +iternextfunc = Signature("T", "O") # typedef PyObject *(*iternextfunc) (PyObject *); +descrgetfunc = Signature("TOO", "O") # typedef PyObject *(*descrgetfunc) (PyObject *, PyObject *, PyObject *); +descrsetfunc = Signature("TOO", 'r') # typedef int (*descrsetfunc) (PyObject *, PyObject *, PyObject *); +descrdelfunc = Signature("TO", 'r') +initproc = Signature("T*", 'r') # typedef int (*initproc)(PyObject *, PyObject *, PyObject *); + # typedef PyObject *(*newfunc)(struct _typeobject *, PyObject *, PyObject *); + # typedef PyObject *(*allocfunc)(struct _typeobject *, int); + +#------------------------------------------------------------------------------------------ +# +# Signatures for accessor methods of properties. +# +#------------------------------------------------------------------------------------------ + +property_accessor_signatures = { + '__get__': Signature("T", "O"), + '__set__': Signature("TO", 'r'), + '__del__': Signature("T", 'r') +} + +#------------------------------------------------------------------------------------------ +# +# Descriptor tables for the slots of the various type object +# substructures, in the order they appear in the structure. +# +#------------------------------------------------------------------------------------------ + +PyNumberMethods = ( + MethodSlot(binaryfunc, "nb_add", "__add__"), + MethodSlot(binaryfunc, "nb_subtract", "__sub__"), + MethodSlot(binaryfunc, "nb_multiply", "__mul__"), + MethodSlot(binaryfunc, "nb_divide", "__div__"), + MethodSlot(binaryfunc, "nb_remainder", "__mod__"), + MethodSlot(binaryfunc, "nb_divmod", "__divmod__"), + MethodSlot(ternaryfunc, "nb_power", "__pow__"), + MethodSlot(unaryfunc, "nb_negative", "__neg__"), + MethodSlot(unaryfunc, "nb_positive", "__pos__"), + MethodSlot(unaryfunc, "nb_absolute", "__abs__"), + MethodSlot(inquiry, "nb_nonzero", "__nonzero__"), + MethodSlot(unaryfunc, "nb_invert", "__invert__"), + MethodSlot(binaryfunc, "nb_lshift", "__lshift__"), + MethodSlot(binaryfunc, "nb_rshift", "__rshift__"), + MethodSlot(binaryfunc, "nb_and", "__and__"), + MethodSlot(binaryfunc, "nb_xor", "__xor__"), + MethodSlot(binaryfunc, "nb_or", "__or__"), + EmptySlot("nb_coerce"), + MethodSlot(unaryfunc, "nb_int", "__int__"), + MethodSlot(unaryfunc, "nb_long", "__long__"), + MethodSlot(unaryfunc, "nb_float", "__float__"), + MethodSlot(unaryfunc, "nb_oct", "__oct__"), + MethodSlot(unaryfunc, "nb_hex", "__hex__"), + + # Added in release 2.0 + MethodSlot(ibinaryfunc, "nb_inplace_add", "__iadd__"), + MethodSlot(ibinaryfunc, "nb_inplace_subtract", "__isub__"), + MethodSlot(ibinaryfunc, "nb_inplace_multiply", "__imul__"), + MethodSlot(ibinaryfunc, "nb_inplace_divide", "__idiv__"), + MethodSlot(ibinaryfunc, "nb_inplace_remainder", "__imod__"), + MethodSlot(ternaryfunc, "nb_inplace_power", "__ipow__"), # NOT iternaryfunc!!! + MethodSlot(ibinaryfunc, "nb_inplace_lshift", "__ilshift__"), + MethodSlot(ibinaryfunc, "nb_inplace_rshift", "__irshift__"), + MethodSlot(ibinaryfunc, "nb_inplace_and", "__iand__"), + MethodSlot(ibinaryfunc, "nb_inplace_xor", "__ixor__"), + MethodSlot(ibinaryfunc, "nb_inplace_or", "__ior__"), + + # Added in release 2.2 + # The following require the Py_TPFLAGS_HAVE_CLASS flag + MethodSlot(binaryfunc, "nb_floor_divide", "__floordiv__"), + MethodSlot(binaryfunc, "nb_true_divide", "__truediv__"), + MethodSlot(ibinaryfunc, "nb_inplace_floor_divide", "__ifloordiv__"), + MethodSlot(ibinaryfunc, "nb_inplace_true_divide", "__itruediv__"), +) + +PySequenceMethods = ( + MethodSlot(inquiry, "sq_length", "__len__"), # EmptySlot("sq_length"), # mp_length used instead + EmptySlot("sq_concat"), # nb_add used instead + EmptySlot("sq_repeat"), # nb_multiply used instead + SyntheticSlot("sq_item", ["__getitem__"], "0"), #EmptySlot("sq_item"), # mp_subscript used instead + MethodSlot(intintargfunc, "sq_slice", "__getslice__"), + EmptySlot("sq_ass_item"), # mp_ass_subscript used instead + SyntheticSlot("sq_ass_slice", ["__setslice__", "__delslice__"], "0"), + MethodSlot(cmpfunc, "sq_contains", "__contains__"), + EmptySlot("sq_inplace_concat"), # nb_inplace_add used instead + EmptySlot("sq_inplace_repeat"), # nb_inplace_multiply used instead +) + +PyMappingMethods = ( + MethodSlot(inquiry, "mp_length", "__len__"), + MethodSlot(objargfunc, "mp_subscript", "__getitem__"), + SyntheticSlot("mp_ass_subscript", ["__setitem__"], "0"), +) + +PyBufferProcs = ( + MethodSlot(getreadbufferproc, "bf_getreadbuffer", "__getreadbuffer__"), + MethodSlot(getwritebufferproc, "bf_getwritebuffer", "__getwritebuffer__"), + MethodSlot(getsegcountproc, "bf_getsegcount", "__getsegcount__"), + MethodSlot(getcharbufferproc, "bf_getcharbuffer", "__getcharbuffer__"), +) + +#------------------------------------------------------------------------------------------ +# +# The main slot table. This table contains descriptors for all the +# top-level type slots, beginning with tp_dealloc, in the order they +# appear in the type object. +# +#------------------------------------------------------------------------------------------ + +slot_table = ( + InternalMethodSlot("tp_dealloc"), + EmptySlot("tp_print"), #MethodSlot(printfunc, "tp_print", "__print__"), + EmptySlot("tp_getattr"), + EmptySlot("tp_setattr"), + MethodSlot(cmpfunc, "tp_compare", "__cmp__"), + MethodSlot(reprfunc, "tp_repr", "__repr__"), + + SuiteSlot(PyNumberMethods, "PyNumberMethods", "tp_as_number"), + SuiteSlot(PySequenceMethods, "PySequenceMethods", "tp_as_sequence"), + SuiteSlot(PyMappingMethods, "PyMappingMethods", "tp_as_mapping"), + + MethodSlot(hashfunc, "tp_hash", "__hash__"), + MethodSlot(callfunc, "tp_call", "__call__"), + MethodSlot(reprfunc, "tp_str", "__str__"), + + SyntheticSlot("tp_getattro", ["__getattr__"], "0"), #"PyObject_GenericGetAttr"), + SyntheticSlot("tp_setattro", ["__setattr__", "__delattr__"], "0"), #"PyObject_GenericSetAttr"), + + SuiteSlot(PyBufferProcs, "PyBufferProcs", "tp_as_buffer"), + + TypeFlagsSlot("tp_flags"), + DocStringSlot("tp_doc"), + + InternalMethodSlot("tp_traverse"), + InternalMethodSlot("tp_clear"), + + # Later -- synthesize a method to split into separate ops? + MethodSlot(richcmpfunc, "tp_richcompare", "__richcmp__"), + + EmptySlot("tp_weaklistoffset"), + + MethodSlot(getiterfunc, "tp_iter", "__iter__"), + MethodSlot(iternextfunc, "tp_iternext", "__next__"), + + MethodTableSlot("tp_methods"), + MemberTableSlot("tp_members"), + GetSetSlot("tp_getset"), + + BaseClassSlot("tp_base"), #EmptySlot("tp_base"), + EmptySlot("tp_dict"), + + SyntheticSlot("tp_descr_get", ["__get__"], "0"), + SyntheticSlot("tp_descr_set", ["__set__", "__delete__"], "0"), + + EmptySlot("tp_dictoffset"), + + MethodSlot(initproc, "tp_init", "__init__"), + EmptySlot("tp_alloc"), #FixedSlot("tp_alloc", "PyType_GenericAlloc"), + InternalMethodSlot("tp_new"), + # Some versions of Python 2.2 inherit the wrong value for tp_free when the + # type has GC but the base type doesn't, so we explicitly set it ourselves + # in that case. + GCDependentSlot("tp_free", "0", "_PyObject_GC_Del", dynamic = 1), + + EmptySlot("tp_is_gc"), + EmptySlot("tp_bases"), + EmptySlot("tp_mro"), + EmptySlot("tp_cache"), + EmptySlot("tp_subclasses"), + EmptySlot("tp_weaklist"), +) + +#------------------------------------------------------------------------------------------ +# +# Descriptors for special methods which don't appear directly +# in the type object or its substructures. These methods are +# called from slot functions synthesized by Pyrex. +# +#------------------------------------------------------------------------------------------ + +MethodSlot(initproc, "", "__new__") +MethodSlot(destructor, "", "__dealloc__") +MethodSlot(objobjargproc, "", "__setitem__") +MethodSlot(objargproc, "", "__delitem__") +MethodSlot(intintobjargproc, "", "__setslice__") +MethodSlot(intintargproc, "", "__delslice__") +MethodSlot(getattrofunc, "", "__getattr__") +MethodSlot(setattrofunc, "", "__setattr__") +MethodSlot(delattrofunc, "", "__delattr__") +MethodSlot(descrgetfunc, "", "__get__") +MethodSlot(descrsetfunc, "", "__set__") +MethodSlot(descrdelfunc, "", "__delete__") Added: lxml/pyrex/Pyrex/Compiler/TypeSlots.pyc ============================================================================== Binary file. No diff available. Added: lxml/pyrex/Pyrex/Compiler/Version.py ============================================================================== --- (empty file) +++ lxml/pyrex/Pyrex/Compiler/Version.py Fri Mar 10 13:58:37 2006 @@ -0,0 +1 @@ +version = '0.9.3.1' Added: lxml/pyrex/Pyrex/Compiler/Version.pyc ============================================================================== Binary file. No diff available. Added: lxml/pyrex/Pyrex/Compiler/__init__.py ============================================================================== Added: lxml/pyrex/Pyrex/Compiler/__init__.pyc ============================================================================== Binary file. No diff available. Added: lxml/pyrex/Pyrex/Debugging.py ============================================================================== --- (empty file) +++ lxml/pyrex/Pyrex/Debugging.py Fri Mar 10 13:58:37 2006 @@ -0,0 +1,20 @@ +############################################### +# +# Odds and ends for debugging +# +############################################### + +def print_call_chain(*args): + import sys + print " ".join(map(str, args)) + f = sys._getframe(2) + while f: + name = f.f_code.co_name + s = f.f_locals.get('self', None) + if s: + c = getattr(s, "__class__", None) + if c: + name = "%s.%s" % (c.__name__, name) + print "Called from:", name, f.f_lineno + f = f.f_back + print "-" * 70 Added: lxml/pyrex/Pyrex/Debugging.pyc ============================================================================== Binary file. No diff available. Added: lxml/pyrex/Pyrex/Distutils/__init__.py ============================================================================== --- (empty file) +++ lxml/pyrex/Pyrex/Distutils/__init__.py Fri Mar 10 13:58:37 2006 @@ -0,0 +1,22 @@ +# July 2002, Graham Fawcett + +# + +# this hack was inspired by the way Thomas Heller got py2exe + +# to appear as a distutil command + +# + +# we replace distutils.command.build_ext with our own version + +# and keep the old one under the module name _build_ext, + +# so that *our* build_ext can make use of it. + + + +from build_ext import build_ext + + + Added: lxml/pyrex/Pyrex/Distutils/build_ext.py ============================================================================== --- (empty file) +++ lxml/pyrex/Pyrex/Distutils/build_ext.py Fri Mar 10 13:58:37 2006 @@ -0,0 +1,79 @@ +# Subclasses disutils.command.build_ext, +# replacing it with a Pyrex version that compiles pyx->c +# before calling the original build_ext command. +# July 2002, Graham Fawcett +# Modified by Darrell Gallion +# to allow inclusion of .c files along with .pyx files. +# Pyrex is (c) Greg Ewing. + +import distutils.command.build_ext +import Pyrex.Compiler.Main +from Pyrex.Compiler.Errors import PyrexError +from distutils.dep_util import newer +import os +import sys + +def replace_suffix(path, new_suffix): + return os.path.splitext(path)[0] + new_suffix + +class build_ext (distutils.command.build_ext.build_ext): + + description = "compile Pyrex scripts, then build C/C++ extensions (compile/link to build directory)" + + def finalize_options (self): + distutils.command.build_ext.build_ext.finalize_options(self) + + # The following hack should no longer be needed. + if 0: + # compiling with mingw32 gets an "initializer not a constant" error + # doesn't appear to happen with MSVC! + # so if we are compiling with mingw32, + # switch to C++ mode, to avoid the problem + if self.compiler == 'mingw32': + self.swig_cpp = 1 + + def swig_sources (self, sources, extension=None): + if not self.extensions: + return + + # collect the names of the source (.pyx) files + pyx_sources = [] + pyx_sources = [source for source in sources if source.endswith('.pyx')] + other_sources = [source for source in sources if not source.endswith('.pyx')] + + extension = self.swig_cpp and '.cpp' or '.c' + for pyx in pyx_sources: + # should I raise an exception if it doesn't exist? + if os.path.exists(pyx): + source = pyx + #target = source.replace('.pyx', extension) + target = replace_suffix(source, extension) + if newer(source, target) or self.force: + self.pyrex_compile(source) + + if self.swig_cpp: + # rename .c to .cpp (Pyrex always builds .c ...) + if os.path.exists(target): + os.unlink(target) + #os.rename(source.replace('.pyx', '.c'), target) + os.rename(replace_suffix(source, '.c'), target) + # massage the cpp file + self.c_to_cpp(target) + + return [replace_suffix(src, extension) for src in pyx_sources] + other_sources + + def pyrex_compile(self, source): + result = Pyrex.Compiler.Main.compile(source) + if result.num_errors <> 0: + sys.exit(1) + + def c_to_cpp(self, filename): + """touch up the Pyrex generated c/cpp files to meet mingw32/distutils requirements.""" + f = open(filename, 'r') + lines = [line for line in f.readlines() if not line.startswith('staticforward PyTypeObject __pyx_type_')] + f.close() + f = open(filename, 'w') + lines.insert(1, 'extern "C" {\n') + lines.append('}\n') + f.write(''.join(lines)) + f.close() Added: lxml/pyrex/Pyrex/Mac/DarwinSystem.py ============================================================================== --- (empty file) +++ lxml/pyrex/Pyrex/Mac/DarwinSystem.py Fri Mar 10 13:58:37 2006 @@ -0,0 +1,60 @@ +# +# Pyrex - Darwin system interface +# + +verbose = 0 + +import os +from Pyrex.Utils import replace_suffix +from Pyrex.Compiler.Errors import PyrexError + +py_include_dirs = [ + "/Library/Frameworks/Python.framework/Headers" +] + +compiler = "gcc" +compiler_options = \ + "-g -c -fno-strict-aliasing -Wno-long-double -no-cpp-precomp " \ + "-mno-fused-madd -fno-common -dynamic" \ + .split() + +linker = "gcc" +linker_options = \ + "-Wl,-F.,-w -bundle -framework Python" \ + .split() + +class CCompilerError(PyrexError): + pass + +def c_compile(c_file, verbose_flag = 0): + # Compile the given C source file to produce + # an object file. Returns the pathname of the + # resulting file. + c_file = os.path.join(os.getcwd(), c_file) + o_file = replace_suffix(c_file, ".o") + include_options = [] + for dir in py_include_dirs: + include_options.append("-I%s" % dir) + args = [compiler] + compiler_options + include_options + [c_file, "-o", o_file] + if verbose_flag or verbose: + print " ".join(args) + status = os.spawnvp(os.P_WAIT, compiler, args) + if status <> 0: + raise CCompilerError("C compiler returned status %s" % status) + return o_file + +def c_link(obj_file, verbose_flag = 0): + return c_link_list([obj_file], verbose_flag) + +def c_link_list(obj_files, verbose_flag = 0): + # Link the given object files into a dynamically + # loadable extension file. Returns the pathname + # of the resulting file. + out_file = replace_suffix(obj_files[0], ".so") + args = [linker] + linker_options + obj_files + ["-o", out_file] + if verbose_flag or verbose: + print " ".join(args) + status = os.spawnvp(os.P_WAIT, linker, args) + if status <> 0: + raise CCompilerError("Linker returned status %s" % status) + return out_file Added: lxml/pyrex/Pyrex/Mac/Finder_Std_Suite.py ============================================================================== --- (empty file) +++ lxml/pyrex/Pyrex/Mac/Finder_Std_Suite.py Fri Mar 10 13:58:37 2006 @@ -0,0 +1,768 @@ +"""Suite Standard Suite: Common terms for most applications +Level 1, version 1 + +Generated from Macintosh HD:System 8.0:Finder +AETE/AEUT resource version 0/144, language 0, script 0 +""" + +import aetools +import MacOS + +_code = 'core' + +class Finder_Std_Suite: + + _argmap_class_info = { + '_in' : 'wrcd', + } + + def class_info(self, _object=None, _attributes={}, **_arguments): + """class info: Get information about an object class + Required argument: the object class about which information is requested + Keyword argument _in: the human language and script system in which to return information + Keyword argument _attributes: AppleEvent attribute dictionary + Returns: a record containing the object's properties and elements + """ + _code = 'core' + _subcode = 'qobj' + + aetools.keysubst(_arguments, self._argmap_class_info) + _arguments['----'] = _object + + + _reply, _arguments, _attributes = self.send(_code, _subcode, + _arguments, _attributes) + if _arguments.has_key('errn'): + raise aetools.Error, aetools.decodeerror(_arguments) + # XXXX Optionally decode result + if _arguments.has_key('----'): + return _arguments['----'] + + _argmap_close = { + 'saving' : 'savo', + 'saving_in' : 'kfil', + } + + def close(self, _object, _attributes={}, **_arguments): + """close: Close an object + Required argument: the object to close + Keyword argument saving: specifies whether changes should be saved before closing + Keyword argument saving_in: the file in which to save the object + Keyword argument _attributes: AppleEvent attribute dictionary + """ + _code = 'core' + _subcode = 'clos' + + aetools.keysubst(_arguments, self._argmap_close) + _arguments['----'] = _object + + aetools.enumsubst(_arguments, 'savo', _Enum_savo) + + _reply, _arguments, _attributes = self.send(_code, _subcode, + _arguments, _attributes) + if _arguments.has_key('errn'): + raise aetools.Error, aetools.decodeerror(_arguments) + # XXXX Optionally decode result + if _arguments.has_key('----'): + return _arguments['----'] + + _argmap_count = { + 'each' : 'kocl', + } + + def count(self, _object, _attributes={}, **_arguments): + """count: Return the number of elements of a particular class within an object + Required argument: the object whose elements are to be counted + Keyword argument each: the class of the elements to be counted + Keyword argument _attributes: AppleEvent attribute dictionary + Returns: the number of elements + """ + _code = 'core' + _subcode = 'cnte' + + aetools.keysubst(_arguments, self._argmap_count) + _arguments['----'] = _object + + + _reply, _arguments, _attributes = self.send(_code, _subcode, + _arguments, _attributes) + if _arguments.has_key('errn'): + raise aetools.Error, aetools.decodeerror(_arguments) + # XXXX Optionally decode result + if _arguments.has_key('----'): + return _arguments['----'] + + _argmap_data_size = { + 'as' : 'rtyp', + } + + def data_size(self, _object, _attributes={}, **_arguments): + """data size: Return the size in bytes of an object + Required argument: the object whose data size is to be returned + Keyword argument as: the data type for which the size is calculated + Keyword argument _attributes: AppleEvent attribute dictionary + Returns: the size of the object in bytes + """ + _code = 'core' + _subcode = 'dsiz' + + aetools.keysubst(_arguments, self._argmap_data_size) + _arguments['----'] = _object + + + _reply, _arguments, _attributes = self.send(_code, _subcode, + _arguments, _attributes) + if _arguments.has_key('errn'): + raise aetools.Error, aetools.decodeerror(_arguments) + # XXXX Optionally decode result + if _arguments.has_key('----'): + return _arguments['----'] + + def delete(self, _object, _attributes={}, **_arguments): + """delete: Delete an element from an object + Required argument: the element to delete + Keyword argument _attributes: AppleEvent attribute dictionary + """ + _code = 'core' + _subcode = 'delo' + + if _arguments: raise TypeError, 'No optional args expected' + _arguments['----'] = _object + + + _reply, _arguments, _attributes = self.send(_code, _subcode, + _arguments, _attributes) + if _arguments.has_key('errn'): + raise aetools.Error, aetools.decodeerror(_arguments) + # XXXX Optionally decode result + if _arguments.has_key('----'): + return _arguments['----'] + + _argmap_duplicate = { + 'to' : 'insh', + 'replacing' : 'alrp', + 'routing_suppressed' : 'rout', + } + + def duplicate(self, _object, _attributes={}, **_arguments): + """duplicate: Duplicate object(s) + Required argument: the object(s) to duplicate + Keyword argument to: the new location for the object(s) + Keyword argument replacing: Specifies whether or not to replace items in the destination that have the same name as items being duplicated + Keyword argument routing_suppressed: Specifies whether or not to autoroute items (default is false). Only applies when copying to the system folder. + Keyword argument _attributes: AppleEvent attribute dictionary + Returns: to the duplicated object(s) + """ + _code = 'core' + _subcode = 'clon' + + aetools.keysubst(_arguments, self._argmap_duplicate) + _arguments['----'] = _object + + aetools.enumsubst(_arguments, 'alrp', _Enum_bool) + aetools.enumsubst(_arguments, 'rout', _Enum_bool) + + _reply, _arguments, _attributes = self.send(_code, _subcode, + _arguments, _attributes) + if _arguments.has_key('errn'): + raise aetools.Error, aetools.decodeerror(_arguments) + # XXXX Optionally decode result + if _arguments.has_key('----'): + return _arguments['----'] + + _argmap_event_info = { + '_in' : 'wrcd', + } + + def event_info(self, _object, _attributes={}, **_arguments): + """event info: Get information about the Apple events in a suite + Required argument: the event class of the Apple events for which to return information + Keyword argument _in: the human language and script system in which to return information + Keyword argument _attributes: AppleEvent attribute dictionary + Returns: a record containing the events and their parameters + """ + _code = 'core' + _subcode = 'gtei' + + aetools.keysubst(_arguments, self._argmap_event_info) + _arguments['----'] = _object + + + _reply, _arguments, _attributes = self.send(_code, _subcode, + _arguments, _attributes) + if _arguments.has_key('errn'): + raise aetools.Error, aetools.decodeerror(_arguments) + # XXXX Optionally decode result + if _arguments.has_key('----'): + return _arguments['----'] + + def exists(self, _object, _attributes={}, **_arguments): + """exists: Verify if an object exists + Required argument: the object in question + Keyword argument _attributes: AppleEvent attribute dictionary + Returns: true if it exists, false if not + """ + _code = 'core' + _subcode = 'doex' + + if _arguments: raise TypeError, 'No optional args expected' + _arguments['----'] = _object + + + _reply, _arguments, _attributes = self.send(_code, _subcode, + _arguments, _attributes) + if _arguments.has_key('errn'): + raise aetools.Error, aetools.decodeerror(_arguments) + # XXXX Optionally decode result + if _arguments.has_key('----'): + return _arguments['----'] + + _argmap_get = { + 'as' : 'rtyp', + } + + def get(self, _object, _attributes={}, **_arguments): + """get: Get the data for an object + Required argument: the object whose data is to be returned + Keyword argument as: the desired types for the data, in order of preference + Keyword argument _attributes: AppleEvent attribute dictionary + Returns: the data from the object + """ + _code = 'core' + _subcode = 'getd' + + aetools.keysubst(_arguments, self._argmap_get) + _arguments['----'] = _object + + + _reply, _arguments, _attributes = self.send(_code, _subcode, + _arguments, _attributes) + if _arguments.has_key('errn'): + raise aetools.Error, aetools.decodeerror(_arguments) + # XXXX Optionally decode result + if _arguments.has_key('----'): + return _arguments['----'] + + _argmap_make = { + 'new' : 'kocl', + 'at' : 'insh', + 'to' : 'to ', + 'with_data' : 'data', + 'with_properties' : 'prdt', + } + + def make(self, _no_object=None, _attributes={}, **_arguments): + """make: Make a new element + Keyword argument new: the class of the new element + Keyword argument at: the location at which to insert the element + Keyword argument to: when creating an alias file, the original item to create an alias to + Keyword argument with_data: the initial data for the element + Keyword argument with_properties: the initial values for the properties of the element + Keyword argument _attributes: AppleEvent attribute dictionary + Returns: to the new object(s) + """ + _code = 'core' + _subcode = 'crel' + + aetools.keysubst(_arguments, self._argmap_make) + if _no_object != None: raise TypeError, 'No direct arg expected' + + + _reply, _arguments, _attributes = self.send(_code, _subcode, + _arguments, _attributes) + if _arguments.has_key('errn'): + raise aetools.Error, aetools.decodeerror(_arguments) + # XXXX Optionally decode result + if _arguments.has_key('----'): + return _arguments['----'] + + _argmap_move = { + 'to' : 'insh', + 'replacing' : 'alrp', + 'positioned_at' : 'mvpl', + 'routing_suppressed' : 'rout', + } + + def move(self, _object, _attributes={}, **_arguments): + """move: Move object(s) to a new location + Required argument: the object(s) to move + Keyword argument to: the new location for the object(s) + Keyword argument replacing: Specifies whether or not to replace items in the destination that have the same name as items being moved + Keyword argument positioned_at: Gives a list (in local window coordinates) of positions for the destination items + Keyword argument routing_suppressed: Specifies whether or not to autoroute items (default is false). Only applies when moving to the system folder. + Keyword argument _attributes: AppleEvent attribute dictionary + Returns: to the object(s) after they have been moved + """ + _code = 'core' + _subcode = 'move' + + aetools.keysubst(_arguments, self._argmap_move) + _arguments['----'] = _object + + aetools.enumsubst(_arguments, 'alrp', _Enum_bool) + aetools.enumsubst(_arguments, 'mvpl', _Enum_list) + aetools.enumsubst(_arguments, 'rout', _Enum_bool) + + _reply, _arguments, _attributes = self.send(_code, _subcode, + _arguments, _attributes) + if _arguments.has_key('errn'): + raise aetools.Error, aetools.decodeerror(_arguments) + # XXXX Optionally decode result + if _arguments.has_key('----'): + return _arguments['----'] + + _argmap_open = { + 'using' : 'usin', + 'with_properties' : 'prdt', + } + + def open(self, _object, _attributes={}, **_arguments): + """open: Open the specified object(s) + Required argument: list of objects to open + Keyword argument using: the application file to open the object with + Keyword argument with_properties: the initial values for the properties, to be sent along with the open event sent to the application that opens the direct object + Keyword argument _attributes: AppleEvent attribute dictionary + """ + _code = 'aevt' + _subcode = 'odoc' + + aetools.keysubst(_arguments, self._argmap_open) + _arguments['----'] = _object + + + _reply, _arguments, _attributes = self.send(_code, _subcode, + _arguments, _attributes) + if _arguments.has_key('errn'): + raise aetools.Error, aetools.decodeerror(_arguments) + # XXXX Optionally decode result + if _arguments.has_key('----'): + return _arguments['----'] + + def _print(self, _object, _attributes={}, **_arguments): + """print: Print the specified object(s) + Required argument: list of objects to print + Keyword argument _attributes: AppleEvent attribute dictionary + """ + _code = 'aevt' + _subcode = 'pdoc' + + if _arguments: raise TypeError, 'No optional args expected' + _arguments['----'] = _object + + + _reply, _arguments, _attributes = self.send(_code, _subcode, + _arguments, _attributes) + if _arguments.has_key('errn'): + raise aetools.Error, aetools.decodeerror(_arguments) + # XXXX Optionally decode result + if _arguments.has_key('----'): + return _arguments['----'] + + _argmap_quit = { + 'saving' : 'savo', + } + + def quit(self, _no_object=None, _attributes={}, **_arguments): + """quit: Quit the Finder (direct parameter ignored) + Keyword argument saving: specifies whether to save currently open documents (not supported by Finder) + Keyword argument _attributes: AppleEvent attribute dictionary + """ + _code = 'aevt' + _subcode = 'quit' + + aetools.keysubst(_arguments, self._argmap_quit) + if _no_object != None: raise TypeError, 'No direct arg expected' + + aetools.enumsubst(_arguments, 'savo', _Enum_savo) + + _reply, _arguments, _attributes = self.send(_code, _subcode, + _arguments, _attributes) + if _arguments.has_key('errn'): + raise aetools.Error, aetools.decodeerror(_arguments) + # XXXX Optionally decode result + if _arguments.has_key('----'): + return _arguments['----'] + + _argmap_save = { + '_in' : 'kfil', + 'as' : 'fltp', + } + + def save(self, _object, _attributes={}, **_arguments): + """save: Save an object (Not supported by Finder) + Required argument: the object to save + Keyword argument _in: the file in which to save the object (not supported by Finder) + Keyword argument as: the file type of the document in which to save the data (not supported by Finder) + Keyword argument _attributes: AppleEvent attribute dictionary + """ + _code = 'core' + _subcode = 'save' + + aetools.keysubst(_arguments, self._argmap_save) + _arguments['----'] = _object + + + _reply, _arguments, _attributes = self.send(_code, _subcode, + _arguments, _attributes) + if _arguments.has_key('errn'): + raise aetools.Error, aetools.decodeerror(_arguments) + # XXXX Optionally decode result + if _arguments.has_key('----'): + return _arguments['----'] + + _argmap_set = { + 'to' : 'data', + } + + def set(self, _object, _attributes={}, **_arguments): + """set: Set an object's data + Required argument: the object to change + Keyword argument to: the new value + Keyword argument _attributes: AppleEvent attribute dictionary + """ + _code = 'core' + _subcode = 'setd' + + aetools.keysubst(_arguments, self._argmap_set) + _arguments['----'] = _object + + + _reply, _arguments, _attributes = self.send(_code, _subcode, + _arguments, _attributes) + if _arguments.has_key('errn'): + raise aetools.Error, aetools.decodeerror(_arguments) + # XXXX Optionally decode result + if _arguments.has_key('----'): + return _arguments['----'] + + _argmap_suite_info = { + '_in' : 'wrcd', + } + + def suite_info(self, _object, _attributes={}, **_arguments): + """suite info: Get information about event suite(s) + Required argument: the suite for which to return information + Keyword argument _in: the human language and script system in which to return information + Keyword argument _attributes: AppleEvent attribute dictionary + Returns: a record containing the suites and their versions + """ + _code = 'core' + _subcode = 'gtsi' + + aetools.keysubst(_arguments, self._argmap_suite_info) + _arguments['----'] = _object + + + _reply, _arguments, _attributes = self.send(_code, _subcode, + _arguments, _attributes) + if _arguments.has_key('errn'): + raise aetools.Error, aetools.decodeerror(_arguments) + # XXXX Optionally decode result + if _arguments.has_key('----'): + return _arguments['----'] + + +class application(aetools.ComponentItem): + """application - An application program""" + want = 'capp' +class about_this_computer(aetools.NProperty): + """about this computer - the "About this Computer" dialog and the list of running processes displayed in it""" + which = 'abbx' + want = 'obj ' +class apple_menu_items_folder(aetools.NProperty): + """apple menu items folder - the special folder named "Apple Menu Items," the contents of which appear in the Apple menu""" + which = 'amnu' + want = 'obj ' +class clipboard(aetools.NProperty): + """clipboard - the Finder's clipboard window""" + which = 'pcli' + want = 'obj ' +class control_panels_folder(aetools.NProperty): + """control panels folder - the special folder named 'Control Panels'""" + which = 'ctrl' + want = 'obj ' +class desktop(aetools.NProperty): + """desktop - the desktop""" + which = 'desk' + want = 'obj ' +class extensions_folder(aetools.NProperty): + """extensions folder - the special folder named 'Extensions'""" + which = 'extn' + want = 'obj ' +class file_sharing(aetools.NProperty): + """file sharing - Is file sharing on?""" + which = 'fshr' + want = 'bool' +class Finder_preferences(aetools.NProperty): + """Finder preferences - Various preferences that apply to the Finder as a whole""" + which = 'pfrp' + want = 'obj ' +class fonts_folder(aetools.NProperty): + """fonts folder - the special folder named 'Fonts'""" + which = 'ffnt' + want = 'obj ' +class frontmost(aetools.NProperty): + """frontmost - Is the Finder the frontmost process?""" + which = 'pisf' + want = 'bool' +class insertion_location(aetools.NProperty): + """insertion location - the container in which a new folder would appear if "New Folder" was selected""" + which = 'pins' + want = 'obj ' +class largest_free_block(aetools.NProperty): + """largest free block - the largest free block of process memory available to launch an application""" + which = 'mfre' + want = 'long' +class preferences_folder(aetools.NProperty): + """preferences folder - the special folder named 'Preferences'""" + which = 'pref' + want = 'obj ' +class product_version(aetools.NProperty): + """product version - the version of the System software running on this computer""" + which = 'ver2' + want = 'itxt' +class selection(aetools.NProperty): + """selection - the selection visible to the user""" + which = 'sele' + want = 'obj ' +class sharing_starting_up(aetools.NProperty): + """sharing starting up - Is file sharing in the process of starting up?""" + which = 'fsup' + want = 'bool' +class shutdown_items_folder(aetools.NProperty): + """shutdown items folder - the special folder named 'Shutdown Items'""" + which = 'shdf' + want = 'obj ' +class startup_items_folder(aetools.NProperty): + """startup items folder - the special folder named 'Startup Items'""" + which = 'strt' + want = 'obj ' +class system_folder(aetools.NProperty): + """system folder - the System folder""" + which = 'macs' + want = 'obj ' +class temporary_items_folder(aetools.NProperty): + """temporary items folder - the special folder named "Temporary Items" (invisible)""" + which = 'temp' + want = 'obj ' +class version(aetools.NProperty): + """version - the version of the Finder""" + which = 'vers' + want = 'itxt' +class view_preferences(aetools.NProperty): + """view preferences - backwards compatibility with Finder Scripting Extension. DEPRECATED -- not supported after Finder 8.0""" + which = 'pvwp' + want = 'obj ' +class visible(aetools.NProperty): + """visible - Is the Finder's layer visible?""" + which = 'pvis' + want = 'bool' +# element 'dsut' as ['indx', 'name'] +# element 'alia' as ['indx', 'name'] +# element 'appf' as ['indx', 'name', 'ID '] +# element 'clpf' as ['indx', 'name'] +# element 'lwnd' as ['indx', 'name'] +# element 'ctnr' as ['indx', 'name'] +# element 'cwnd' as ['indx', 'name'] +# element 'dwnd' as ['indx', 'name'] +# element 'ccdv' as ['indx', 'name'] +# element 'dafi' as ['indx', 'name'] +# element 'cdsk' as ['indx', 'name'] +# element 'cdis' as ['indx', 'name', 'ID '] +# element 'docf' as ['indx', 'name'] +# element 'file' as ['indx', 'name'] +# element 'cfol' as ['indx', 'name', 'ID '] +# element 'fntf' as ['indx', 'name'] +# element 'fsut' as ['indx', 'name'] +# element 'iwnd' as ['indx', 'name'] +# element 'cobj' as ['indx', 'name'] +# element 'sctr' as ['indx', 'name'] +# element 'swnd' as ['indx', 'name'] +# element 'sndf' as ['indx', 'name'] +# element 'qwnd' as ['indx', 'name'] +# element 'stcs' as ['indx', 'name'] +# element 'ctrs' as ['indx', 'name'] +# element 'cwin' as ['indx', 'name'] + +class file(aetools.ComponentItem): + """file - A file""" + want = 'file' +class creator_type(aetools.NProperty): + """creator type - the OSType identifying the application that created the item""" + which = 'fcrt' + want = 'type' +class file_type_obsolete(aetools.NProperty): + """file type obsolete - the OSType identifying the type of data contained in the item (DEPRECATED - for use with scripts compiled before Finder 8.0. Will be removed in the next release)""" + which = 'fitp' + want = 'type' +class file_type(aetools.NProperty): + """file type - the OSType identifying the type of data contained in the item""" + which = 'asty' + want = 'type' +class locked_obsolete(aetools.NProperty): + """locked obsolete - Is the file locked? (DEPRECATED - for use with scripts compiled before Finder 8.0. Will be removed in the next release)""" + which = 'islk' + want = 'bool' +class locked(aetools.NProperty): + """locked - Is the file locked?""" + which = 'aslk' + want = 'bool' +# repeated property product_version the version of the product (visible at the top of the "Get Info" window) +class stationery(aetools.NProperty): + """stationery - Is the file a stationery pad?""" + which = 'pspd' + want = 'bool' +# repeated property version the version of the file (visible at the bottom of the "Get Info" window) + +files = file + +class window(aetools.ComponentItem): + """window - A window""" + want = 'cwin' +class collapsed(aetools.NProperty): + """collapsed - Is the window collapsed (only applies to non-pop-up windows)?""" + which = 'wshd' + want = 'bool' +class popup(aetools.NProperty): + """popup - Is the window is a pop-up window?""" + which = 'drwr' + want = 'bool' +class pulled_open(aetools.NProperty): + """pulled open - Is the window pulled open (only applies to pop-up windows)?""" + which = 'pull' + want = 'bool' +# repeated property visible Is the window visible (always true for Finder windows)? +class zoomed_full_size(aetools.NProperty): + """zoomed full size - Is the window zoomed to the full size of the screen? (can only be set, not read)""" + which = 'zumf' + want = 'bool' + +windows = window +# XXXX application element 'dsut' not found!! +# XXXX application element 'alia' not found!! +# XXXX application element 'appf' not found!! +# XXXX application element 'clpf' not found!! +# XXXX application element 'lwnd' not found!! +# XXXX application element 'ctnr' not found!! +# XXXX application element 'cwnd' not found!! +# XXXX application element 'dwnd' not found!! +# XXXX application element 'ccdv' not found!! +# XXXX application element 'dafi' not found!! +# XXXX application element 'cdsk' not found!! +# XXXX application element 'cdis' not found!! +# XXXX application element 'docf' not found!! +# XXXX application element 'cfol' not found!! +# XXXX application element 'fntf' not found!! +# XXXX application element 'fsut' not found!! +# XXXX application element 'iwnd' not found!! +# XXXX application element 'cobj' not found!! +# XXXX application element 'sctr' not found!! +# XXXX application element 'swnd' not found!! +# XXXX application element 'sndf' not found!! +# XXXX application element 'qwnd' not found!! +# XXXX application element 'stcs' not found!! +# XXXX application element 'ctrs' not found!! +application._propdict = { + 'about_this_computer' : about_this_computer, + 'apple_menu_items_folder' : apple_menu_items_folder, + 'clipboard' : clipboard, + 'control_panels_folder' : control_panels_folder, + 'desktop' : desktop, + 'extensions_folder' : extensions_folder, + 'file_sharing' : file_sharing, + 'Finder_preferences' : Finder_preferences, + 'fonts_folder' : fonts_folder, + 'frontmost' : frontmost, + 'insertion_location' : insertion_location, + 'largest_free_block' : largest_free_block, + 'preferences_folder' : preferences_folder, + 'product_version' : product_version, + 'selection' : selection, + 'sharing_starting_up' : sharing_starting_up, + 'shutdown_items_folder' : shutdown_items_folder, + 'startup_items_folder' : startup_items_folder, + 'system_folder' : system_folder, + 'temporary_items_folder' : temporary_items_folder, + 'version' : version, + 'view_preferences' : view_preferences, + 'visible' : visible, +} +application._elemdict = { + 'file' : file, + 'window' : window, +} +file._propdict = { + 'creator_type' : creator_type, + 'file_type_obsolete' : file_type_obsolete, + 'file_type' : file_type, + 'locked_obsolete' : locked_obsolete, + 'locked' : locked, + 'product_version' : product_version, + 'stationery' : stationery, + 'version' : version, +} +file._elemdict = { +} +window._propdict = { + 'collapsed' : collapsed, + 'popup' : popup, + 'pulled_open' : pulled_open, + 'visible' : visible, + 'zoomed_full_size' : zoomed_full_size, +} +window._elemdict = { +} +# XXXX enum list not found!! +# XXXX enum bool not found!! +# XXXX enum savo not found!! + +# +# Indices of types declared in this module +# +_classdeclarations = { + 'cwin' : window, + 'file' : file, + 'capp' : application, +} + +_propdeclarations = { + 'amnu' : apple_menu_items_folder, + 'pvwp' : view_preferences, + 'extn' : extensions_folder, + 'pins' : insertion_location, + 'fshr' : file_sharing, + 'aslk' : locked, + 'drwr' : popup, + 'fcrt' : creator_type, + 'pcli' : clipboard, + 'asty' : file_type, + 'strt' : startup_items_folder, + 'islk' : locked_obsolete, + 'pvis' : visible, + 'pref' : preferences_folder, + 'pisf' : frontmost, + 'sele' : selection, + 'temp' : temporary_items_folder, + 'pull' : pulled_open, + 'abbx' : about_this_computer, + 'wshd' : collapsed, + 'pspd' : stationery, + 'fitp' : file_type_obsolete, + 'pfrp' : Finder_preferences, + 'desk' : desktop, + 'fsup' : sharing_starting_up, + 'mfre' : largest_free_block, + 'ctrl' : control_panels_folder, + 'zumf' : zoomed_full_size, + 'shdf' : shutdown_items_folder, + 'ffnt' : fonts_folder, + 'macs' : system_folder, + 'ver2' : product_version, + 'vers' : version, +} + +_compdeclarations = { +} + +_enumdeclarations = { +} Added: lxml/pyrex/Pyrex/Mac/MPW_Misc_Suite.py ============================================================================== --- (empty file) +++ lxml/pyrex/Pyrex/Mac/MPW_Misc_Suite.py Fri Mar 10 13:58:37 2006 @@ -0,0 +1,49 @@ +"""Suite Misc Suite: Suite that adds additional features to the Application. +Level 1, version 1 + +Generated from MPW:MPW Shell +AETE/AEUT resource version 1/0, language 0, script 0 +""" + +import aetools +import MacOS + +_code = 'misc' + +class MPW_Misc_Suite: + + def DoScript(self, _object, _attributes={}, **_arguments): + """DoScript: Execute an MPW command, any command that could be executed from the command line can be sent as a script. + Required argument: The script to execute + Keyword argument _attributes: AppleEvent attribute dictionary + """ + _code = 'misc' + _subcode = 'dosc' + + if _arguments: raise TypeError, 'No optional args expected' + _arguments['----'] = _object + + + _reply, _arguments, _attributes = self.send(_code, _subcode, + _arguments, _attributes) + if _arguments.has_key('errn'): + raise aetools.Error, aetools.decodeerror(_arguments) + # XXXX Optionally decode result + if _arguments.has_key('----'): + return _arguments['----'] + + +# +# Indices of types declared in this module +# +_classdeclarations = { +} + +_propdeclarations = { +} + +_compdeclarations = { +} + +_enumdeclarations = { +} Added: lxml/pyrex/Pyrex/Mac/MacSystem.py ============================================================================== --- (empty file) +++ lxml/pyrex/Pyrex/Mac/MacSystem.py Fri Mar 10 13:58:37 2006 @@ -0,0 +1,135 @@ +# +# Pyrex -- Mac system interface +# + +import os, sys, string +import aetools +from aetools import TalkTo +from StdSuites.Standard_Suite import Standard_Suite_Events as Standard_Suite +from Pyrex.Utils import replace_suffix +from Pyrex.Compiler.Errors import PyrexError + +c_compiler = "MWCPPC" +c_optimizations = "off" +#c_linker = "PPCLink" +c_linker = "MWLinkPPC" +shared_lib_suffix = ".slb" + +#py_home = "Python2.2:Home:" +py_home = sys.exec_prefix + +py_include_dirs = ( + py_home + "Include:", + py_home + "Mac:Include:" +) + +pythoncore = py_home + "PythonCore" + +mwlibdir = "MPW:Interfaces&Libraries:Libraries:MWPPCLibraries:" + +libraries = ( + #mwlibdir + "'MSL C.PPC.Lib'", + #mwlibdir + "'MSL RuntimePPC.Lib'", + mwlibdir + "'MSL ShLibRuntime.Lib'", + mwlibdir + "InterfaceLib", + #mwlibdir + "MathLib", + ) + +class CCompilerError(PyrexError): + pass + +#---------------- ToolServer --------------------------- + +from TS_Misc_Suite import TS_Misc_Suite + +class ToolServer(Standard_Suite, TS_Misc_Suite, TalkTo): + pass + +def send_toolserver_command(cmd): + ts = ToolServer('MPSX', start = 1) + return ts.DoScript(cmd) + +def do_toolserver_command(command): + try: + result = send_toolserver_command(command) + except aetools.Error, e: + raise CCompilerError("Apple Event error: %s" % e) + errn, stat, stdout, stderr = result + if errn: + raise CCompilerError("ToolServer error: %s" % errn) + stdout = string.replace(stdout, "\r", "\n") + stderr = string.replace(stderr, "\r", "\n") + if stdout: + #print "<<< Begin ToolServer StdOut >>>" + sys.stderr.write(stdout) + #print "<<< End ToolServer StdOut >>>" + if stderr: + #print "<<< Begin ToolServer StdErr >>>" + sys.stderr.write(stderr) + #print "<<< End ToolServer StdErr >>>" + return stat + +#------------------------------------------------------- + +def c_compile(c_file): + # Compile the given C source file to produce + # an object file. Returns the pathname of the + # resulting file. + c_file = os.path.join(os.getcwd(), c_file) + #print "c_compile: c_file =", repr(c_file) ### + c_file_dir = os.path.dirname(c_file) + o_file = replace_suffix(c_file, ".o") + include_options = ["-i %s" % c_file_dir] + for dir in py_include_dirs: + include_options.append("-i %s" % dir) + command = "%s -opt %s -nomapcr -w off -r %s %s -o %s" % ( + c_compiler, + c_optimizations, + string.join(include_options), + c_file, + o_file, + #e_file + ) + #print "...command =", repr(command) ### + stat = do_toolserver_command(command) + if stat: + raise CCompilerError("C compiler returned status %s" % stat) + return o_file + +def c_link(obj_file): + return c_link_list([obj_file]) + +def c_link_list(obj_files): + # Link the given object files into a dynamically + # loadable extension file. Returns the pathname + # of the resulting file. + out_file = replace_suffix(obj_files[0], shared_lib_suffix) + command = "%s -xm s -export all %s %s %s -o %s" % ( + c_linker, + string.join(obj_files), + pythoncore, + string.join(libraries), + out_file) + stat = do_toolserver_command(command) + if stat: + raise CCompilerError("Linker returned status %s" % stat) + return out_file + +def test_c_compile(link = 0): + objs = [] + for arg in sys.argv[1:]: + if arg.endswith(".c"): + try: + obj = c_compile(arg) + except PyrexError, e: + #print "Caught a PyrexError:" ### + #print repr(e) ### + print "%s.%s:" % (e.__class__.__module__, + e.__class__.__name__), e + sys.exit(1) + else: + obj = arg + objs.append(obj) + if link: + c_link_list(objs) + Added: lxml/pyrex/Pyrex/Mac/MacUtils.py ============================================================================== --- (empty file) +++ lxml/pyrex/Pyrex/Mac/MacUtils.py Fri Mar 10 13:58:37 2006 @@ -0,0 +1,34 @@ +# +# Pyrex -- Misc Mac-specific things +# + +import os, MacOS, macfs + +def open_new_file(path): + # On the Mac, try to preserve Finder position + # of previously existing file. + fsspec = macfs.FSSpec(path) + try: + old_finfo = fsspec.GetFInfo() + except MacOS.Error, e: + #print "MacUtils.open_new_file:", e ### + old_finfo = None + try: + os.unlink(path) + except OSError: + pass + file = open(path, "w") + new_finfo = fsspec.GetFInfo() + if old_finfo: + #print "MacUtils.open_new_file:", path ### + #print "...old file info =", old_finfo.Creator, old_finfo.Type, old_finfo.Location ### + #print "...new file info =", new_finfo.Creator, new_finfo.Type, new_finfo.Location ### + new_finfo.Location = old_finfo.Location + new_finfo.Flags = old_finfo.Flags + # Make darn sure the type and creator are right. There seems + # to be a bug in MacPython 2.2 that screws them up sometimes. + new_finfo.Creator = "R*ch" + new_finfo.Type = "TEXT" + fsspec.SetFInfo(new_finfo) + return file + Added: lxml/pyrex/Pyrex/Mac/PS_Misc_Suite.py ============================================================================== --- (empty file) +++ lxml/pyrex/Pyrex/Mac/PS_Misc_Suite.py Fri Mar 10 13:58:37 2006 @@ -0,0 +1,44 @@ +"Apple Event suite for pyserver." + +import aetools +import MacOS + +_code = 'misc' + +class PS_Misc_Suite: + + def DoScript(self, _object, _attributes={}, **_arguments): + """DoScript: Execute a Python file, optionally with command line args. + Required argument: filename.py or [filename.py, arg, ...] + Keyword argument _attributes: AppleEvent attribute dictionary + """ + _code = 'misc' + _subcode = 'dosc' + + if _arguments: raise TypeError, 'No optional args expected' + _arguments['----'] = _object + + + _reply, _arguments, _attributes = self.send(_code, _subcode, + _arguments, _attributes) + if _arguments.has_key('errn'): + raise aetools.Error, aetools.decodeerror(_arguments) + # XXXX Optionally decode result + if _arguments.has_key('----'): + return _arguments['----'] + + +# +# Indices of types declared in this module +# +_classdeclarations = { +} + +_propdeclarations = { +} + +_compdeclarations = { +} + +_enumdeclarations = { +} Added: lxml/pyrex/Pyrex/Mac/PyServerMain.py ============================================================================== --- (empty file) +++ lxml/pyrex/Pyrex/Mac/PyServerMain.py Fri Mar 10 13:58:37 2006 @@ -0,0 +1,86 @@ +# +# Simple Apple-event driven Python interpreter +# + +import os, sys, traceback +from cStringIO import StringIO +from MiniAEFrame import AEServer, MiniApplication + +class PythonServer(AEServer, MiniApplication): + + def __init__(self): + MiniApplication.__init__(self) + AEServer.__init__(self) + self.installaehandler('aevt', 'oapp', ignore) + self.installaehandler('aevt', 'quit', quit) + self.installaehandler('misc', 'dosc', doscript) + + +def ignore(**kwds): + pass + +def quit(**kwds): + server._quit() + +def doscript(args, **kwds): + print "doscript:", repr(args) ### + stat = 0 + output = "" + errput = "" + #print "Normalising args" ### + if type(args) == type(""): + args = [args] + #print "Setting sys.argv" ### + sys.argv = args + #print "Finding script directory and module file" ### + dir = os.path.dirname(args[0]) + dir = os.path.join(start_dir, dir) + pyfile = os.path.basename(args[0]) + mod = os.path.splitext(pyfile)[0] + #print "dir:", repr(dir) ### + #print "mod:", repr(mod) ### + os.chdir(dir) + sys.path = start_path[:] + sys.path[0] = dir + #print "path:", sys.path ### + try: + sys.stdout = StringIO() + sys.stderr = StringIO() + try: + #sys.__stdout__.write("Path: %s\n" % sys.path) ### + #sys.__stdout__.write("Importing: %s\n" % mod) ### + try: + __import__(mod) + except KeyboardInterrupt: + raise + except SystemExit, exc: + #sys.__stdout__.write("Caught a SystemExit\n") ### + try: + stat = int(str(exc)) + except ValueError: + stat = 1 + #sys.__stdout__.write("stat = %s\n" % stat) ### + except: + traceback.print_exc() + stat = 1 + #sys.__stdout__.write("Done the import\n") ### + finally: + output = sys.stdout.getvalue() + #sys.__stdout__.write("Output:\n%s" % output) ### + errput = sys.stderr.getvalue() + finally: + sys.stdout = sys.__stdout__ + sys.stderr = sys.__stdout__ + pass + return [stat, output, errput] + +start_dir = os.getcwd() +start_path = sys.path[:] +server = PythonServer() +#print "Open for business" +try: + server.mainloop() +except: + traceback.print_exc() + #sys.exit(1) +#print "Closing shop" Added: lxml/pyrex/Pyrex/Mac/TS_Misc_Suite.py ============================================================================== --- (empty file) +++ lxml/pyrex/Pyrex/Mac/TS_Misc_Suite.py Fri Mar 10 13:58:37 2006 @@ -0,0 +1,64 @@ +"""Suite Misc Suite: Suite that adds additional features to the Application. +Level 1, version 1 + +Generated from Macintosh HD:Desktop Folder:ToolServer 3.4.1:ToolServer +AETE/AEUT resource version 1/0, language 0, script 0 +""" + +import aetools +import MacOS + +_code = 'misc' + +class TS_Misc_Suite: + + def DoScript(self, _object, _attributes={}, **_arguments): + """DoScript: Execute an MPW command, any command that could be executed from the command line can be sent as a script. + Required argument: The script to execute + Keyword argument _attributes: AppleEvent attribute dictionary + """ + _code = 'misc' + _subcode = 'dosc' + + if _arguments: raise TypeError, 'No optional args expected' + _arguments['----'] = _object + + + _reply, _arguments, _attributes = self.send(_code, _subcode, + _arguments, _attributes) + #if _arguments.has_key('errn'): + # raise aetools.Error, aetools.decodeerror(_arguments) + # XXXX Optionally decode result + #if _arguments.has_key('----'): + # return _arguments['----'] + errn = 0 + stat = 0 + stdout = "" + stderr = "" + if _arguments.has_key('errn'): + errn = _arguments['errn'] + if errn: + errn = aetools.decodeerror(_arguments) + if _arguments.has_key('stat'): + stat = _arguments['stat'] + if _arguments.has_key('----'): + stdout = _arguments['----'] + if _arguments.has_key('diag'): + stderr = _arguments['diag'] + return (errn, stat, stdout, stderr) + + +# +# Indices of types declared in this module +# +_classdeclarations = { +} + +_propdeclarations = { +} + +_compdeclarations = { +} + +_enumdeclarations = { +} Added: lxml/pyrex/Pyrex/Mac/__init__.py ============================================================================== Added: lxml/pyrex/Pyrex/Plex/Actions.py ============================================================================== --- (empty file) +++ lxml/pyrex/Pyrex/Plex/Actions.py Fri Mar 10 13:58:37 2006 @@ -0,0 +1,109 @@ +#======================================================================= +# +# Python Lexical Analyser +# +# Actions for use in token specifications +# +#======================================================================= + +class Action: + + def same_as(self, other): + return self is other + + +class Return(Action): + """ + Internal Plex action which causes |value| to + be returned as the value of the associated token + """ + + value = None + + def __init__(self, value): + self.value = value + + def perform(self, token_stream, text): + return self.value + + def same_as(self, other): + return isinstance(other, Return) and self.value == other.value + + def __repr__(self): + return "Return(%s)" % repr(self.value) + + +class Call(Action): + """ + Internal Plex action which causes a function to be called. + """ + + function = None + + def __init__(self, function): + self.function = function + + def perform(self, token_stream, text): + return self.function(token_stream, text) + + def __repr__(self): + return "Call(%s)" % self.function.__name__ + + def same_as(self, other): + return isinstance(other, Call) and self.function is other.function + + +class Begin(Action): + """ + Begin(state_name) is a Plex action which causes the Scanner to + enter the state |state_name|. See the docstring of Plex.Lexicon + for more information. + """ + + state_name = None + + def __init__(self, state_name): + self.state_name = state_name + + def perform(self, token_stream, text): + token_stream.begin(self.state_name) + + def __repr__(self): + return "Begin(%s)" % self.state_name + + def same_as(self, other): + return isinstance(other, Begin) and self.state_name == other.state_name + + +class Ignore(Action): + """ + IGNORE is a Plex action which causes its associated token + to be ignored. See the docstring of Plex.Lexicon for more + information. + """ + def perform(self, token_stream, text): + return None + + def __repr__(self): + return "IGNORE" + +IGNORE = Ignore() +IGNORE.__doc__ = Ignore.__doc__ + +class Text(Action): + """ + TEXT is a Plex action which causes the text of a token to + be returned as the value of the token. See the docstring of + Plex.Lexicon for more information. + """ + + def perform(self, token_stream, text): + return text + + def __repr__(self): + return "TEXT" + +TEXT = Text() +TEXT.__doc__ = Text.__doc__ + + Added: lxml/pyrex/Pyrex/Plex/Actions.pyc ============================================================================== Binary file. No diff available. Added: lxml/pyrex/Pyrex/Plex/DFA.py ============================================================================== --- (empty file) +++ lxml/pyrex/Pyrex/Plex/DFA.py Fri Mar 10 13:58:37 2006 @@ -0,0 +1,156 @@ +#======================================================================= +# +# Python Lexical Analyser +# +# Converting NFA to DFA +# +#======================================================================= + +import Machines +from Machines import LOWEST_PRIORITY +from Transitions import TransitionMap + +def nfa_to_dfa(old_machine, debug = None): + """ + Given a nondeterministic Machine, return a new equivalent + Machine which is deterministic. + """ + # We build a new machine whose states correspond to sets of states + # in the old machine. Initially we add a new state corresponding to + # the epsilon-closure of each initial old state. Then we give transitions + # to each new state which are the union of all transitions out of any + # of the corresponding old states. The new state reached on a given + # character is the one corresponding to the set of states reachable + # on that character from any of the old states. As new combinations of + # old states are created, new states are added as needed until closure + # is reached. + new_machine = Machines.FastMachine() + state_map = StateMap(new_machine) + # Seed the process using the initial states of the old machine. + # Make the corresponding new states into initial states of the new + # machine with the same names. + for (key, old_state) in old_machine.initial_states.items(): + new_state = state_map.old_to_new(epsilon_closure(old_state)) + new_machine.make_initial_state(key, new_state) + # Tricky bit here: we add things to the end of this list while we're + # iterating over it. The iteration stops when closure is achieved. + for new_state in new_machine.states: + transitions = TransitionMap() + for old_state in state_map.new_to_old(new_state).keys(): + for event, old_target_states in old_state.transitions.items(): + if event and old_target_states: + transitions.add_set(event, set_epsilon_closure(old_target_states)) + for event, old_states in transitions.items(): + new_machine.add_transitions(new_state, event, state_map.old_to_new(old_states)) + if debug: + debug.write("\n===== State Mapping =====\n") + state_map.dump(debug) + return new_machine + +def set_epsilon_closure(state_set): + """ + Given a set of states, return the union of the epsilon + closures of its member states. + """ + result = {} + for state1 in state_set.keys(): + for state2 in epsilon_closure(state1).keys(): + result[state2] = 1 + return result + +def epsilon_closure(state): + """ + Return the set of states reachable from the given state + by epsilon moves. + """ + # Cache the result + result = state.epsilon_closure + if result is None: + result = {} + state.epsilon_closure = result + add_to_epsilon_closure(result, state) + return result + +def add_to_epsilon_closure(state_set, state): + """ + Recursively add to |state_set| states reachable from the given state + by epsilon moves. + """ + if not state_set.get(state, 0): + state_set[state] = 1 + state_set_2 = state.transitions.get_epsilon() + if state_set_2: + for state2 in state_set_2.keys(): + add_to_epsilon_closure(state_set, state2) + +class StateMap: + """ + Helper class used by nfa_to_dfa() to map back and forth between + sets of states from the old machine and states of the new machine. + """ + new_machine = None # Machine + old_to_new_dict = None # {(old_state,...) : new_state} + new_to_old_dict = None # {id(new_state) : old_state_set} + + def __init__(self, new_machine): + self.new_machine = new_machine + self.old_to_new_dict = {} + self.new_to_old_dict= {} + + def old_to_new(self, old_state_set): + """ + Return the state of the new machine corresponding to the + set of old machine states represented by |state_set|. A new + state will be created if necessary. If any of the old states + are accepting states, the new state will be an accepting state + with the highest priority action from the old states. + """ + key = self.make_key(old_state_set) + new_state = self.old_to_new_dict.get(key, None) + if not new_state: + action = self.highest_priority_action(old_state_set) + new_state = self.new_machine.new_state(action) + self.old_to_new_dict[key] = new_state + self.new_to_old_dict[id(new_state)] = old_state_set + #for old_state in old_state_set.keys(): + #new_state.merge_actions(old_state) + return new_state + + def highest_priority_action(self, state_set): + best_action = None + best_priority = LOWEST_PRIORITY + for state in state_set.keys(): + priority = state.action_priority + if priority > best_priority: + best_action = state.action + best_priority = priority + return best_action + +# def old_to_new_set(self, old_state_set): +# """ +# Return the new state corresponding to a set of old states as +# a singleton set. +# """ +# return {self.old_to_new(old_state_set):1} + + def new_to_old(self, new_state): + """Given a new state, return a set of corresponding old states.""" + return self.new_to_old_dict[id(new_state)] + + def make_key(self, state_set): + """ + Convert a set of states into a uniquified + sorted tuple suitable for use as a dictionary key. + """ + lst = state_set.keys() + lst.sort() + return tuple(lst) + + def dump(self, file): + from Transitions import state_set_str + for new_state in self.new_machine.states: + old_state_set = self.new_to_old_dict[id(new_state)] + file.write(" State %s <-- %s\n" % ( + new_state['number'], state_set_str(old_state_set))) + + Added: lxml/pyrex/Pyrex/Plex/DFA.pyc ============================================================================== Binary file. No diff available. Added: lxml/pyrex/Pyrex/Plex/Errors.py ============================================================================== --- (empty file) +++ lxml/pyrex/Pyrex/Plex/Errors.py Fri Mar 10 13:58:37 2006 @@ -0,0 +1,52 @@ +#======================================================================= +# +# Python Lexical Analyser +# +# Exception classes +# +#======================================================================= + +import exceptions + +class PlexError(exceptions.Exception): + message = "" + +class PlexTypeError(PlexError, TypeError): + pass + +class PlexValueError(PlexError, ValueError): + pass + +class InvalidRegex(PlexError): + pass + +class InvalidToken(PlexError): + + def __init__(self, token_number, message): + PlexError.__init__(self, "Token number %d: %s" % (token_number, message)) + +class InvalidScanner(PlexError): + pass + +class AmbiguousAction(PlexError): + message = "Two tokens with different actions can match the same string" + + def __init__(self): + pass + +class UnrecognizedInput(PlexError): + scanner = None + position = None + state_name = None + + def __init__(self, scanner, state_name): + self.scanner = scanner + self.position = scanner.position() + self.state_name = state_name + + def __str__(self): + return ("'%s', line %d, char %d: Token not recognised in state %s" + % (self.position + (repr(self.state_name),))) + + + Added: lxml/pyrex/Pyrex/Plex/Errors.pyc ============================================================================== Binary file. No diff available. Added: lxml/pyrex/Pyrex/Plex/Lexicons.py ============================================================================== --- (empty file) +++ lxml/pyrex/Pyrex/Plex/Lexicons.py Fri Mar 10 13:58:37 2006 @@ -0,0 +1,192 @@ +#======================================================================= +# +# Python Lexical Analyser +# +# Lexical Analyser Specification +# +#======================================================================= + +import types + +import Actions +import DFA +import Errors +import Machines +import Regexps + +# debug_flags for Lexicon constructor +DUMP_NFA = 1 +DUMP_DFA = 2 + +class State: + """ + This class is used as part of a Plex.Lexicon specification to + introduce a user-defined state. + + Constructor: + + State(name, token_specifications) + """ + + name = None + tokens = None + + def __init__(self, name, tokens): + self.name = name + self.tokens = tokens + +class Lexicon: + """ + Lexicon(specification) builds a lexical analyser from the given + |specification|. The specification consists of a list of + specification items. Each specification item may be either: + + 1) A token definition, which is a tuple: + + (pattern, action) + + The |pattern| is a regular axpression built using the + constructors defined in the Plex module. + + The |action| is the action to be performed when this pattern + is recognised (see below). + + 2) A state definition: + + State(name, tokens) + + where |name| is a character string naming the state, + and |tokens| is a list of token definitions as + above. The meaning and usage of states is described + below. + + Actions + ------- + + The |action| in a token specication may be one of three things: + + 1) A function, which is called as follows: + + function(scanner, text) + + where |scanner| is the relevant Scanner instance, and |text| + is the matched text. If the function returns anything + other than None, that value is returned as the value of the + token. If it returns None, scanning continues as if the IGNORE + action were specified (see below). + + 2) One of the following special actions: + + IGNORE means that the recognised characters will be treated as + white space and ignored. Scanning will continue until + the next non-ignored token is recognised before returning. + + TEXT causes the scanned text itself to be returned as the + value of the token. + + 3) Any other value, which is returned as the value of the token. + + States + ------ + + At any given time, the scanner is in one of a number of states. + Associated with each state is a set of possible tokens. When scanning, + only tokens associated with the current state are recognised. + + There is a default state, whose name is the empty string. Token + definitions which are not inside any State definition belong to + the default state. + + The initial state of the scanner is the default state. The state can + be changed in one of two ways: + + 1) Using Begin(state_name) as the action of a token. + + 2) Calling the begin(state_name) method of the Scanner. + + To change back to the default state, use '' as the state name. + """ + + machine = None # Machine + tables = None # StateTableMachine + + def __init__(self, specifications, debug = None, debug_flags = 7, timings = None): + if type(specifications) <> types.ListType: + raise Errors.InvalidScanner("Scanner definition is not a list") + if timings: + from Timing import time + total_time = 0.0 + time1 = time() + nfa = Machines.Machine() + default_initial_state = nfa.new_initial_state('') + token_number = 1 + for spec in specifications: + if isinstance(spec, State): + user_initial_state = nfa.new_initial_state(spec.name) + for token in spec.tokens: + self.add_token_to_machine( + nfa, user_initial_state, token, token_number) + token_number = token_number + 1 + elif type(spec) == types.TupleType: + self.add_token_to_machine( + nfa, default_initial_state, spec, token_number) + token_number = token_number + 1 + else: + raise Errors.InvalidToken( + token_number, + "Expected a token definition (tuple) or State instance") + if timings: + time2 = time() + total_time = total_time + (time2 - time1) + time3 = time() + if debug and (debug_flags & 1): + debug.write("\n============= NFA ===========\n") + nfa.dump(debug) + dfa = DFA.nfa_to_dfa(nfa, debug = (debug_flags & 3) == 3 and debug) + if timings: + time4 = time() + total_time = total_time + (time4 - time3) + if debug and (debug_flags & 2): + debug.write("\n============= DFA ===========\n") + dfa.dump(debug) + if timings: + timings.write("Constructing NFA : %5.2f\n" % (time2 - time1)) + timings.write("Converting to DFA: %5.2f\n" % (time4 - time3)) + timings.write("TOTAL : %5.2f\n" % total_time) + self.machine = dfa + + def add_token_to_machine(self, machine, initial_state, token_spec, token_number): + try: + (re, action_spec) = self.parse_token_definition(token_spec) + # Disabled this -- matching empty strings can be useful + #if re.nullable: + # raise Errors.InvalidToken( + # token_number, "Pattern can match 0 input symbols") + if isinstance(action_spec, Actions.Action): + action = action_spec + elif callable(action_spec): + action = Actions.Call(action_spec) + else: + action = Actions.Return(action_spec) + final_state = machine.new_state() + re.build_machine(machine, initial_state, final_state, + match_bol = 1, nocase = 0) + final_state.set_action(action, priority = -token_number) + except Errors.PlexError, e: + raise e.__class__("Token number %d: %s" % (token_number, e)) + + def parse_token_definition(self, token_spec): + if type(token_spec) <> types.TupleType: + raise Errors.InvalidToken("Token definition is not a tuple") + if len(token_spec) <> 2: + raise Errors.InvalidToken("Wrong number of items in token definition") + pattern, action = token_spec + if not isinstance(pattern, Regexps.RE): + raise Errors.InvalidToken("Pattern is not an RE instance") + return (pattern, action) + + def get_initial_state(self, name): + return self.machine.get_initial_state(name) + + + Added: lxml/pyrex/Pyrex/Plex/Lexicons.pyc ============================================================================== Binary file. No diff available. Added: lxml/pyrex/Pyrex/Plex/Machines.py ============================================================================== --- (empty file) +++ lxml/pyrex/Pyrex/Plex/Machines.py Fri Mar 10 13:58:37 2006 @@ -0,0 +1,326 @@ +#======================================================================= +# +# Python Lexical Analyser +# +# Classes for building NFAs and DFAs +# +#======================================================================= + +import string +import sys +from sys import maxint +from types import TupleType + +from Transitions import TransitionMap + +LOWEST_PRIORITY = -sys.maxint + +class Machine: + """A collection of Nodes representing an NFA or DFA.""" + states = None # [Node] + next_state_number = 1 + initial_states = None # {(name, bol): Node} + + def __init__(self): + self.states = [] + self.initial_states = {} + + def __del__(self): + #print "Destroying", self ### + for state in self.states: + state.destroy() + + def new_state(self): + """Add a new state to the machine and return it.""" + s = Node() + n = self.next_state_number + self.next_state_number = n + 1 + s.number = n + self.states.append(s) + return s + + def new_initial_state(self, name): + state = self.new_state() + self.make_initial_state(name, state) + return state + + def make_initial_state(self, name, state): + self.initial_states[name] = state + + def get_initial_state(self, name): + return self.initial_states[name] + + def dump(self, file): + file.write("Plex.Machine:\n") + if self.initial_states is not None: + file.write(" Initial states:\n") + for (name, state) in self.initial_states.items(): + file.write(" '%s': %d\n" % (name, state.number)) + for s in self.states: + s.dump(file) + +class Node: + """A state of an NFA or DFA.""" + transitions = None # TransitionMap + action = None # Action + action_priority = None # integer + number = 0 # for debug output + epsilon_closure = None # used by nfa_to_dfa() + + def __init__(self): + # Preinitialise the list of empty transitions, because + # the nfa-to-dfa algorithm needs it + #self.transitions = {'':[]} + self.transitions = TransitionMap() + self.action_priority = LOWEST_PRIORITY + + def destroy(self): + #print "Destroying", self ### + self.transitions = None + self.action = None + self.epsilon_closure = None + + def add_transition(self, event, new_state): + self.transitions.add(event, new_state) + + def link_to(self, state): + """Add an epsilon-move from this state to another state.""" + self.add_transition('', state) + + def set_action(self, action, priority): + """Make this an accepting state with the given action. If + there is already an action, choose the action with highest + priority.""" + if priority > self.action_priority: + self.action = action + self.action_priority = priority + + def get_action(self): + return self.action + + def get_action_priority(self): + return self.action_priority + +# def merge_actions(self, other_state): +# """Merge actions of other state into this state according +# to their priorities.""" +# action = other_state.get_action() +# priority = other_state.get_action_priority() +# self.set_action(action, priority) + + def is_accepting(self): + return self.action is not None + + def __str__(self): + return "State %d" % self.number + + def dump(self, file): + import string + # Header + file.write(" State %d:\n" % self.number) + # Transitions +# self.dump_transitions(file) + self.transitions.dump(file) + # Action + action = self.action + priority = self.action_priority + if action is not None: + file.write(" %s [priority %d]\n" % (action, priority)) + + +class FastMachine: + """ + FastMachine is a deterministic machine represented in a way that + allows fast scanning. + """ + initial_states = None # {state_name:state} + states = None # [state] + # where state = {event:state, 'else':state, 'action':Action} + next_number = 1 # for debugging + + new_state_template = { + '':None, 'bol':None, 'eol':None, 'eof':None, 'else':None + } + + def __init__(self, old_machine = None): + self.initial_states = initial_states = {} + self.states = [] + if old_machine: + self.old_to_new = old_to_new = {} + for old_state in old_machine.states: + new_state = self.new_state() + old_to_new[old_state] = new_state + for name, old_state in old_machine.initial_states.items(): + initial_states[name] = old_to_new[old_state] + for old_state in old_machine.states: + new_state = old_to_new[old_state] + for event, old_state_set in old_state.transitions.items(): + if old_state_set: + new_state[event] = old_to_new[old_state_set.keys()[0]] + else: + new_state[event] = None + new_state['action'] = old_state.action + + def __del__(self): + for state in self.states: + state.clear() + + def new_state(self, action = None): + number = self.next_number + self.next_number = number + 1 + result = self.new_state_template.copy() + result['number'] = number + result['action'] = action + self.states.append(result) + return result + + def make_initial_state(self, name, state): + self.initial_states[name] = state + + def add_transitions(self, state, event, new_state): + if type(event) == TupleType: + code0, code1 = event + if code0 == -maxint: + state['else'] = new_state + elif code1 <> maxint: + while code0 < code1: + state[chr(code0)] = new_state + code0 = code0 + 1 + else: + state[event] = new_state + + def get_initial_state(self, name): + return self.initial_states[name] + + def dump(self, file): + file.write("Plex.FastMachine:\n") + file.write(" Initial states:\n") + for name, state in self.initial_states.items(): + file.write(" %s: %s\n" % (repr(name), state['number'])) + for state in self.states: + self.dump_state(state, file) + + def dump_state(self, state, file): + import string + # Header + file.write(" State %d:\n" % state['number']) + # Transitions + self.dump_transitions(state, file) + # Action + action = state['action'] + if action is not None: + file.write(" %s\n" % action) + + def dump_transitions(self, state, file): + chars_leading_to_state = {} + special_to_state = {} + for (c, s) in state.items(): + if len(c) == 1: + chars = chars_leading_to_state.get(id(s), None) + if chars is None: + chars = [] + chars_leading_to_state[id(s)] = chars + chars.append(c) + elif len(c) <= 4: + special_to_state[c] = s + ranges_to_state = {} + for state in self.states: + char_list = chars_leading_to_state.get(id(state), None) + if char_list: + ranges = self.chars_to_ranges(char_list) + ranges_to_state[ranges] = state + ranges_list = ranges_to_state.keys() + ranges_list.sort() + for ranges in ranges_list: + key = self.ranges_to_string(ranges) + state = ranges_to_state[ranges] + file.write(" %s --> State %d\n" % (key, state['number'])) + for key in ('bol', 'eol', 'eof', 'else'): + state = special_to_state.get(key, None) + if state: + file.write(" %s --> State %d\n" % (key, state['number'])) + + def chars_to_ranges(self, char_list): + char_list.sort() + i = 0 + n = len(char_list) + result = [] + while i < n: + c1 = ord(char_list[i]) + c2 = c1 + i = i + 1 + while i < n and ord(char_list[i]) == c2 + 1: + i = i + 1 + c2 = c2 + 1 + result.append((chr(c1), chr(c2))) + return tuple(result) + + def ranges_to_string(self, range_list): + return string.join(map(self.range_to_string, range_list), ",") + + def range_to_string(self, (c1, c2)): + if c1 == c2: + return repr(c1) + else: + return "%s..%s" % (repr(c1), repr(c2)) +## +## (Superseded by Machines.FastMachine) +## +## class StateTableMachine: +## """ +## StateTableMachine is an alternative representation of a Machine +## that can be run more efficiently. +## """ +## initial_states = None # {state_name:state_index} +## states = None # [([state] indexed by char code, Action)] + +## special_map = {'bol':256, 'eol':257, 'eof':258} + +## def __init__(self, m): +## """ +## Initialise StateTableMachine from Machine |m|. +## """ +## initial_states = self.initial_states = {} +## states = self.states = [None] +## old_to_new = {} +## i = 1 +## for old_state in m.states: +## new_state = ([0] * 259, old_state.get_action()) +## states.append(new_state) +## old_to_new[old_state] = i # new_state +## i = i + 1 +## for name, old_state in m.initial_states.items(): +## initial_states[name] = old_to_new[old_state] +## for old_state in m.states: +## new_state_index = old_to_new[old_state] +## new_table = states[new_state_index][0] +## transitions = old_state.transitions +## for c, old_targets in transitions.items(): +## if old_targets: +## old_target = old_targets[0] +## new_target_index = old_to_new[old_target] +## if len(c) == 1: +## a = ord(c) +## else: +## a = self.special_map[c] +## new_table[a] = states[new_target_index] + +## def dump(self, f): +## f.write("Plex.StateTableMachine:\n") +## f.write(" Initial states:\n") +## for name, index in self.initial_states.items(): +## f.write(" %s: State %d\n" % ( +## repr(name), id(self.states[index]))) +## for i in xrange(1, len(self.states)): +## table, action = self.states[i] +## f.write(" State %d:" % i) +## if action: +## f.write("%s" % action) +## f.write("\n") +## f.write(" %s\n" % map(id,table)) + + + + + + Added: lxml/pyrex/Pyrex/Plex/Machines.pyc ============================================================================== Binary file. No diff available. Added: lxml/pyrex/Pyrex/Plex/Regexps.py ============================================================================== --- (empty file) +++ lxml/pyrex/Pyrex/Plex/Regexps.py Fri Mar 10 13:58:37 2006 @@ -0,0 +1,557 @@ +#======================================================================= +# +# Python Lexical Analyser +# +# Regular Expressions +# +#======================================================================= + +import array +import string +import types +from sys import maxint + +import Errors + +# +# Constants +# + +BOL = 'bol' +EOL = 'eol' +EOF = 'eof' + +nl_code = ord('\n') + +# +# Helper functions +# + +def chars_to_ranges(s): + """ + Return a list of character codes consisting of pairs + [code1a, code1b, code2a, code2b,...] which cover all + the characters in |s|. + """ + char_list = list(s) + char_list.sort() + i = 0 + n = len(char_list) + result = [] + while i < n: + code1 = ord(char_list[i]) + code2 = code1 + 1 + i = i + 1 + while i < n and code2 >= ord(char_list[i]): + code2 = code2 + 1 + i = i + 1 + result.append(code1) + result.append(code2) + return result + +def uppercase_range(code1, code2): + """ + If the range of characters from code1 to code2-1 includes any + lower case letters, return the corresponding upper case range. + """ + code3 = max(code1, ord('a')) + code4 = min(code2, ord('z') + 1) + if code3 < code4: + d = ord('A') - ord('a') + return (code3 + d, code4 + d) + else: + return None + +def lowercase_range(code1, code2): + """ + If the range of characters from code1 to code2-1 includes any + upper case letters, return the corresponding lower case range. + """ + code3 = max(code1, ord('A')) + code4 = min(code2, ord('Z') + 1) + if code3 < code4: + d = ord('a') - ord('A') + return (code3 + d, code4 + d) + else: + return None + +def CodeRanges(code_list): + """ + Given a list of codes as returned by chars_to_ranges, return + an RE which will match a character in any of the ranges. + """ + re_list = [] + for i in xrange(0, len(code_list), 2): + re_list.append(CodeRange(code_list[i], code_list[i + 1])) + return apply(Alt, tuple(re_list)) + +def CodeRange(code1, code2): + """ + CodeRange(code1, code2) is an RE which matches any character + with a code |c| in the range |code1| <= |c| < |code2|. + """ + if code1 <= nl_code < code2: + return Alt(RawCodeRange(code1, nl_code), + RawNewline, + RawCodeRange(nl_code + 1, code2)) + else: + return RawCodeRange(code1, code2) + +# +# Abstract classes +# + +class RE: + """RE is the base class for regular expression constructors. + The following operators are defined on REs: + + re1 + re2 is an RE which matches |re1| followed by |re2| + re1 | re2 is an RE which matches either |re1| or |re2| + """ + + nullable = 1 # True if this RE can match 0 input symbols + match_nl = 1 # True if this RE can match a string ending with '\n' + str = None # Set to a string to override the class's __str__ result + + def build_machine(self, machine, initial_state, final_state, + match_bol, nocase): + """ + This method should add states to |machine| to implement this + RE, starting at |initial_state| and ending at |final_state|. + If |match_bol| is true, the RE must be able to match at the + beginning of a line. If nocase is true, upper and lower case + letters should be treated as equivalent. + """ + raise exceptions.UnimplementedMethod("%s.build_machine not implemented" % + self.__class__.__name__) + + def build_opt(self, m, initial_state, c): + """ + Given a state |s| of machine |m|, return a new state + reachable from |s| on character |c| or epsilon. + """ + s = m.new_state() + initial_state.link_to(s) + initial_state.add_transition(c, s) + return s + + def __add__(self, other): + return Seq(self, other) + + def __or__(self, other): + return Alt(self, other) + + def __str__(self): + if self.str: + return self.str + else: + return self.calc_str() + + def check_re(self, num, value): + if not isinstance(value, RE): + self.wrong_type(num, value, "Plex.RE instance") + + def check_string(self, num, value): + if type(value) <> type(''): + self.wrong_type(num, value, "string") + + def check_char(self, num, value): + self.check_string(num, value) + if len(value) <> 1: + raise Errors.PlexValueError("Invalid value for argument %d of Plex.%s." + "Expected a string of length 1, got: %s" % ( + num, self.__class__.__name__, repr(value))) + + def wrong_type(self, num, value, expected): + if type(value) == types.InstanceType: + got = "%s.%s instance" % ( + value.__class__.__module__, value.__class__.__name__) + else: + got = type(value).__name__ + raise Errors.PlexTypeError("Invalid type for argument %d of Plex.%s " + "(expected %s, got %s" % ( + num, self.__class__.__name__, expected, got)) + +# +# Primitive RE constructors +# ------------------------- +# +# These are the basic REs from which all others are built. +# + +## class Char(RE): +## """ +## Char(c) is an RE which matches the character |c|. +## """ + +## nullable = 0 + +## def __init__(self, char): +## self.char = char +## self.match_nl = char == '\n' + +## def build_machine(self, m, initial_state, final_state, match_bol, nocase): +## c = self.char +## if match_bol and c <> BOL: +## s1 = self.build_opt(m, initial_state, BOL) +## else: +## s1 = initial_state +## if c == '\n' or c == EOF: +## s1 = self.build_opt(m, s1, EOL) +## if len(c) == 1: +## code = ord(self.char) +## s1.add_transition((code, code+1), final_state) +## if nocase and is_letter_code(code): +## code2 = other_case_code(code) +## s1.add_transition((code2, code2+1), final_state) +## else: +## s1.add_transition(c, final_state) + +## def calc_str(self): +## return "Char(%s)" % repr(self.char) + +def Char(c): + """ + Char(c) is an RE which matches the character |c|. + """ + if len(c) == 1: + result = CodeRange(ord(c), ord(c) + 1) + else: + result = SpecialSymbol(c) + result.str = "Char(%s)" % repr(c) + return result + +class RawCodeRange(RE): + """ + RawCodeRange(code1, code2) is a low-level RE which matches any character + with a code |c| in the range |code1| <= |c| < |code2|, where the range + does not include newline. For internal use only. + """ + nullable = 0 + match_nl = 0 + range = None # (code, code) + uppercase_range = None # (code, code) or None + lowercase_range = None # (code, code) or None + + def __init__(self, code1, code2): + self.range = (code1, code2) + self.uppercase_range = uppercase_range(code1, code2) + self.lowercase_range = lowercase_range(code1, code2) + + def build_machine(self, m, initial_state, final_state, match_bol, nocase): + if match_bol: + initial_state = self.build_opt(m, initial_state, BOL) + initial_state.add_transition(self.range, final_state) + if nocase: + if self.uppercase_range: + initial_state.add_transition(self.uppercase_range, final_state) + if self.lowercase_range: + initial_state.add_transition(self.lowercase_range, final_state) + + def calc_str(self): + return "CodeRange(%d,%d)" % (self.code1, self.code2) + +class _RawNewline(RE): + """ + RawNewline is a low-level RE which matches a newline character. + For internal use only. + """ + nullable = 0 + match_nl = 1 + + def build_machine(self, m, initial_state, final_state, match_bol, nocase): + if match_bol: + initial_state = self.build_opt(m, initial_state, BOL) + s = self.build_opt(m, initial_state, EOL) + s.add_transition((nl_code, nl_code + 1), final_state) + +RawNewline = _RawNewline() + + +class SpecialSymbol(RE): + """ + SpecialSymbol(sym) is an RE which matches the special input + symbol |sym|, which is one of BOL, EOL or EOF. + """ + nullable = 0 + match_nl = 0 + sym = None + + def __init__(self, sym): + self.sym = sym + + def build_machine(self, m, initial_state, final_state, match_bol, nocase): + # Sequences 'bol bol' and 'bol eof' are impossible, so only need + # to allow for bol if sym is eol + if match_bol and self.sym == EOL: + initial_state = self.build_opt(m, initial_state, BOL) + initial_state.add_transition(self.sym, final_state) + + +class Seq(RE): + """Seq(re1, re2, re3...) is an RE which matches |re1| followed by + |re2| followed by |re3|...""" + + def __init__(self, *re_list): + nullable = 1 + for i in xrange(len(re_list)): + re = re_list[i] + self.check_re(i, re) + nullable = nullable and re.nullable + self.re_list = re_list + self.nullable = nullable + i = len(re_list) + match_nl = 0 + while i: + i = i - 1 + re = re_list[i] + if re.match_nl: + match_nl = 1 + break + if not re.nullable: + break + self.match_nl = match_nl + + def build_machine(self, m, initial_state, final_state, match_bol, nocase): + re_list = self.re_list + if len(re_list) == 0: + initial_state.link_to(final_state) + else: + s1 = initial_state + n = len(re_list) + for i in xrange(n): + if i < n - 1: + s2 = m.new_state() + else: + s2 = final_state + re = re_list[i] + re.build_machine(m, s1, s2, match_bol, nocase) + s1 = s2 + match_bol = re.match_nl or (match_bol and re.nullable) + + def calc_str(self): + return "Seq(%s)" % string.join(map(str, self.re_list), ",") + + +class Alt(RE): + """Alt(re1, re2, re3...) is an RE which matches either |re1| or + |re2| or |re3|...""" + + def __init__(self, *re_list): + self.re_list = re_list + nullable = 0 + match_nl = 0 + nullable_res = [] + non_nullable_res = [] + i = 1 + for re in re_list: + self.check_re(i, re) + if re.nullable: + nullable_res.append(re) + nullable = 1 + else: + non_nullable_res.append(re) + if re.match_nl: + match_nl = 1 + i = i + 1 + self.nullable_res = nullable_res + self.non_nullable_res = non_nullable_res + self.nullable = nullable + self.match_nl = match_nl + + def build_machine(self, m, initial_state, final_state, match_bol, nocase): + for re in self.nullable_res: + re.build_machine(m, initial_state, final_state, match_bol, nocase) + if self.non_nullable_res: + if match_bol: + initial_state = self.build_opt(m, initial_state, BOL) + for re in self.non_nullable_res: + re.build_machine(m, initial_state, final_state, 0, nocase) + + def calc_str(self): + return "Alt(%s)" % string.join(map(str, self.re_list), ",") + + +class Rep1(RE): + """Rep1(re) is an RE which matches one or more repetitions of |re|.""" + + def __init__(self, re): + self.check_re(1, re) + self.re = re + self.nullable = re.nullable + self.match_nl = re.match_nl + + def build_machine(self, m, initial_state, final_state, match_bol, nocase): + s1 = m.new_state() + s2 = m.new_state() + initial_state.link_to(s1) + self.re.build_machine(m, s1, s2, match_bol or self.re.match_nl, nocase) + s2.link_to(s1) + s2.link_to(final_state) + + def calc_str(self): + return "Rep1(%s)" % self.re + + +class SwitchCase(RE): + """ + SwitchCase(re, nocase) is an RE which matches the same strings as RE, + but treating upper and lower case letters according to |nocase|. If + |nocase| is true, case is ignored, otherwise it is not. + """ + re = None + nocase = None + + def __init__(self, re, nocase): + self.re = re + self.nocase = nocase + self.nullable = re.nullable + self.match_nl = re.match_nl + + def build_machine(self, m, initial_state, final_state, match_bol, nocase): + self.re.build_machine(m, initial_state, final_state, match_bol, + self.nocase) + + def calc_str(self): + if self.nocase: + name = "NoCase" + else: + name = "Case" + return "%s(%s)" % (name, self.re) + +# +# Composite RE constructors +# ------------------------- +# +# These REs are defined in terms of the primitive REs. +# + +Empty = Seq() +Empty.__doc__ = \ + """ + Empty is an RE which matches the empty string. + """ +Empty.str = "Empty" + +def Str1(s): + """ + Str1(s) is an RE which matches the literal string |s|. + """ + result = apply(Seq, tuple(map(Char, s))) + result.str = "Str(%s)" % repr(s) + return result + +def Str(*strs): + """ + Str(s) is an RE which matches the literal string |s|. + Str(s1, s2, s3, ...) is an RE which matches any of |s1| or |s2| or |s3|... + """ + if len(strs) == 1: + return Str1(strs[0]) + else: + result = apply(Alt, tuple(map(Str1, strs))) + result.str = "Str(%s)" % string.join(map(repr, strs), ",") + return result + +def Any(s): + """ + Any(s) is an RE which matches any character in the string |s|. + """ + #result = apply(Alt, tuple(map(Char, s))) + result = CodeRanges(chars_to_ranges(s)) + result.str = "Any(%s)" % repr(s) + return result + +def AnyBut(s): + """ + AnyBut(s) is an RE which matches any character (including + newline) which is not in the string |s|. + """ + ranges = chars_to_ranges(s) + ranges.insert(0, -maxint) + ranges.append(maxint) + result = CodeRanges(ranges) + result.str = "AnyBut(%s)" % repr(s) + return result + +AnyChar = AnyBut("") +AnyChar.__doc__ = \ + """ + AnyChar is an RE which matches any single character (including a newline). + """ +AnyChar.str = "AnyChar" + +def Range(s1, s2 = None): + """ + Range(c1, c2) is an RE which matches any single character in the range + |c1| to |c2| inclusive. + Range(s) where |s| is a string of even length is an RE which matches + any single character in the ranges |s[0]| to |s[1]|, |s[2]| to |s[3]|,... + """ + if s2: + result = CodeRange(ord(s1), ord(s2) + 1) + result.str = "Range(%s,%s)" % (s1, s2) + else: + ranges = [] + for i in range(0, len(s1), 2): + ranges.append(CodeRange(ord(s1[i]), ord(s1[i+1]) + 1)) + result = apply(Alt, tuple(ranges)) + result.str = "Range(%s)" % repr(s1) + return result + +def Opt(re): + """ + Opt(re) is an RE which matches either |re| or the empty string. + """ + result = Alt(re, Empty) + result.str = "Opt(%s)" % re + return result + +def Rep(re): + """ + Rep(re) is an RE which matches zero or more repetitions of |re|. + """ + result = Opt(Rep1(re)) + result.str = "Rep(%s)" % re + return result + +def NoCase(re): + """ + NoCase(re) is an RE which matches the same strings as RE, but treating + upper and lower case letters as equivalent. + """ + return SwitchCase(re, nocase = 1) + +def Case(re): + """ + Case(re) is an RE which matches the same strings as RE, but treating + upper and lower case letters as distinct, i.e. it cancels the effect + of any enclosing NoCase(). + """ + return SwitchCase(re, nocase = 0) + +# +# RE Constants +# + +Bol = Char(BOL) +Bol.__doc__ = \ + """ + Bol is an RE which matches the beginning of a line. + """ +Bol.str = "Bol" + +Eol = Char(EOL) +Eol.__doc__ = \ + """ + Eol is an RE which matches the end of a line. + """ +Eol.str = "Eol" + +Eof = Char(EOF) +Eof.__doc__ = \ + """ + Eof is an RE which matches the end of the file. + """ +Eof.str = "Eof" + Added: lxml/pyrex/Pyrex/Plex/Regexps.pyc ============================================================================== Binary file. No diff available. Added: lxml/pyrex/Pyrex/Plex/Scanners.py ============================================================================== --- (empty file) +++ lxml/pyrex/Pyrex/Plex/Scanners.py Fri Mar 10 13:58:37 2006 @@ -0,0 +1,377 @@ +#======================================================================= +# +# Python Lexical Analyser +# +# +# Scanning an input stream +# +#======================================================================= + +import Errors +from Regexps import BOL, EOL, EOF + +class Scanner: + """ + A Scanner is used to read tokens from a stream of characters + using the token set specified by a Plex.Lexicon.