From scoder at codespeak.net Sat Apr 8 23:16:38 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Sat Apr 8 23:16:39 2006 Subject: [Lxml-checkins] r25600 - in lxml/branch/xsltext/src/lxml: . tests Message-ID: <20060408211638.0C4E91024B@code0.codespeak.net> Author: scoder Date: Sat Apr 8 23:16:35 2006 New Revision: 25600 Modified: lxml/branch/xsltext/src/lxml/etree.pyx lxml/branch/xsltext/src/lxml/nsclasses.pxi lxml/branch/xsltext/src/lxml/tests/test_xslt.py lxml/branch/xsltext/src/lxml/xslt.pxd lxml/branch/xsltext/src/lxml/xslt.pxi Log: API rewrite: XSLT elements are now classes that inherit from XSLTElementBase (i.e. they represent real elements), instances are callable to apply the element [currently not working] Modified: lxml/branch/xsltext/src/lxml/etree.pyx ============================================================================== --- lxml/branch/xsltext/src/lxml/etree.pyx (original) +++ lxml/branch/xsltext/src/lxml/etree.pyx Sat Apr 8 23:16:35 2006 @@ -768,7 +768,6 @@ return XPathElementEvaluator(self, namespaces).evaluate(_path, **_variables) cdef _Element _elementFactory(_Document doc, xmlNode* c_node): - cdef _Element result cdef char* c_ns_href result = getProxy(c_node, PROXY_ELEMENT) if result is not None: @@ -785,6 +784,11 @@ element_class = _Comment else: assert 0, "Unknown node type: %s" % c_node.type + return _newElement(doc, c_node, element_class) + +cdef _Element _newElement(_Document doc, xmlNode* c_node, + object element_class): + cdef _Element result result = element_class() result._tag = None result._doc = doc Modified: lxml/branch/xsltext/src/lxml/nsclasses.pxi ============================================================================== --- lxml/branch/xsltext/src/lxml/nsclasses.pxi (original) +++ lxml/branch/xsltext/src/lxml/nsclasses.pxi Sat Apr 8 23:16:35 2006 @@ -28,7 +28,7 @@ return __CLASS_NAMESPACE_REGISTRIES[ns_utf] except KeyError: registry = __CLASS_NAMESPACE_REGISTRIES[ns_utf] = \ - _ClassNamespaceRegistry(ns_uri) + _ClassNamespaceRegistry(ns_uri, ElementBase) return registry def FunctionNamespace(ns_uri): @@ -54,18 +54,20 @@ def XSLTElementNamespace(ns_uri): """Retrieve the XSLT element namespace object associated with the given URI. Creates a new one if it does not yet exist. This namespace can only - be used to register functions for XSLT extension elements. + be used to register subtypes of XSLTElementBase for XSLT extension + elements. - The registered functions must have the signature + Instances of the registered element type must be callable with the + following signature: - result = function(_, subtree) + result = instance(subtree) - 'subtree' is the current result tree from inside the element. It may be a - string value, a node or a sequence of nodes. The element function can - freely operate on this. + 'subtree' is the context node of the current template. It may be a string + value or a node. The element function must not modify this value. - The function must return either a None, a node, a sequence of nodes or a - string value. + The function must return either None, a node, a sequence of nodes or a + string value. It may call its own 'apply_templates' method on any of its + children (only one at a time). """ if ns_uri is not None: ns_utf = _utf8(ns_uri) @@ -73,14 +75,16 @@ return __XSLT_ELEMENT_NAMESPACE_REGISTRIES[ns_utf] except KeyError: registry = __XSLT_ELEMENT_NAMESPACE_REGISTRIES[ns_utf] = \ - _FunctionNamespaceRegistry(ns_uri) + _ClassNamespaceRegistry(ns_uri, XSLTElementBase) return registry cdef class _NamespaceRegistry: "Dictionary-like namespace registry" cdef object _ns_uri + cdef object _entries def __init__(self, ns_uri): self._ns_uri = ns_uri + self._entries = {} def update(self, class_dict_iterable): """Forgivingly update the registry. If registered values do not match @@ -102,61 +106,56 @@ cdef object _get(self, object name): cdef python.PyObject* dict_result - dict_result = python.PyDict_GetItem(self._classes, name) + dict_result = python.PyDict_GetItem(self._entries, name) if dict_result is NULL: - dict_result = python.PyDict_GetItem(self._extensions, name) + raise KeyError, "Name not registered." + return dict_result + + cdef object _getForString(self, char* name): + cdef python.PyObject* dict_result + dict_result = python.PyDict_GetItemString(self._entries, name) if dict_result is NULL: raise KeyError, "Name not registered." return dict_result + def clear(self): + self._entries.clear() + cdef class _ClassNamespaceRegistry(_NamespaceRegistry): "Dictionary-like registry for namespace implementation classes" - cdef object _classes - def __init__(self, ns_uri): + cdef object _base_type + cdef object _type_error + def __init__(self, ns_uri, base_type): _NamespaceRegistry.__init__(self, ns_uri) - self._classes = {} + self._base_type = base_type + self._type_error = "Registered item must be subtypes of %s." % self._base_type.__name__ def __setitem__(self, name, item): if not python.PyType_Check(item) or \ - not issubclass(item, ElementBase): - raise NamespaceRegistryError, \ - "Registered item must be subtypes of ElementBase." + not issubclass(item, self._base_type): + raise NamespaceRegistryError, self._type_error if name is not None: name = _utf8(name) - self._classes[name] = item - - def clear(self): - self._classes.clear() + self._entries[name] = item def __repr__(self): return "Namespace(%r)" % self._ns_uri cdef class _FunctionNamespaceRegistry(_NamespaceRegistry): - cdef object _extensions def __init__(self, ns_uri): _NamespaceRegistry.__init__(self, ns_uri) - self._extensions = {} def __setitem__(self, name, item): if not callable(item): - raise NamespaceRegistryError, "Registered function must be callable." + raise NamespaceRegistryError, \ + "Registered functions must be callable." if name is not None: name = _utf8(name) - self._extensions[name] = item - - cdef object _get(self, object name): - cdef python.PyObject* dict_result - dict_result = python.PyDict_GetItem(self._extensions, name) - if dict_result is NULL: - raise KeyError, "Name not registered." - return dict_result + self._entries[name] = item def __repr__(self): return "FunctionNamespace(%r)" % self._ns_uri - def clear(self): - self._extensions.clear() - cdef class _XPathFunctionNamespaceRegistry(_FunctionNamespaceRegistry): cdef object _prefix cdef object _prefix_utf @@ -209,11 +208,11 @@ cdef object _find_all_extension_elements(): "Internal lookup function to find all extension elements for XSLT." - cdef _FunctionNamespaceRegistry registry + cdef _ClassNamespaceRegistry registry ns_extensions = {} for (ns_utf, registry) in __XSLT_ELEMENT_NAMESPACE_REGISTRIES.iteritems(): - if registry._extensions: - ns_extensions[ns_utf] = registry._extensions + if registry._entries: + ns_extensions[ns_utf] = registry._entries return ns_extensions cdef object _find_extension_elements(namespaces): @@ -228,26 +227,37 @@ __XSLT_ELEMENT_NAMESPACE_REGISTRIES, ns_uri) if dict_result is NULL: continue - extensions = (<_FunctionNamespaceRegistry>dict_result)._extensions + extensions = (<_ClassNamespaceRegistry>dict_result)._entries if extensions: python.PyDict_SetItem(extension_dict, ns_uri, extensions) return extension_dict +cdef object _find_extension_element_class(char* c_namespace_utf, + char* c_element_name_utf): + return _find_element_in_registries( + __XSLT_ELEMENT_NAMESPACE_REGISTRIES, + c_namespace_utf, c_element_name_utf) + cdef object _find_element_class(char* c_namespace_utf, char* c_element_name_utf): + return _find_element_in_registries( + __CLASS_NAMESPACE_REGISTRIES, + c_namespace_utf, c_element_name_utf) + +cdef object _find_element_in_registries(registries, + char* c_namespace_utf, + char* c_element_name_utf): cdef python.PyObject* dict_result cdef _ClassNamespaceRegistry registry if c_namespace_utf is not NULL: - dict_result = python.PyDict_GetItemString( - __CLASS_NAMESPACE_REGISTRIES, c_namespace_utf) + dict_result = python.PyDict_GetItemString(registries, c_namespace_utf) else: - dict_result = python.PyDict_GetItem( - __CLASS_NAMESPACE_REGISTRIES, None) + dict_result = python.PyDict_GetItem(registries, None) if dict_result is NULL: return _Element registry = <_ClassNamespaceRegistry>dict_result - classes = registry._classes + classes = registry._entries if c_element_name_utf is not NULL: dict_result = python.PyDict_GetItemString( Modified: lxml/branch/xsltext/src/lxml/tests/test_xslt.py ============================================================================== --- lxml/branch/xsltext/src/lxml/tests/test_xslt.py (original) +++ lxml/branch/xsltext/src/lxml/tests/test_xslt.py Sat Apr 8 23:16:35 2006 @@ -352,10 +352,11 @@ ''') - def myext(_, tree): - s = etree.Element("TEST") - s.append(tree) - return s + class myext(etree.XSLTElementBase): + def __call__(self): + s = etree.Element("TEST") + s[:] = map(self.apply_templates, self) + return s namespace = etree.XSLTElementNamespace('testextns') namespace['myext'] = myext @@ -365,7 +366,7 @@ 'B') etree.XSLTElementNamespace('testextns').clear() - def test_xslt_extension_elements_nodeset(self): + def _test_xslt_extension_elements_nodeset(self): tree = self.parse('BC') style = self.parse('''\ ''') - def myext(_, tree): - print tree, len(tree) - s = etree.Element("TEST") - s.append(tree) - return s + class myext(etree.XSLTElementBase): + def __call__(self): + print tree, len(tree) + s = etree.Element("TEST") + s.append(tree.__copy__()) + return s namespace = etree.XSLTElementNamespace('testextns') namespace['myext'] = myext @@ -392,7 +394,7 @@ 'BC') etree.XSLTElementNamespace('testextns').clear() - def test_xslt_extension_elements_error(self): + def _test_xslt_extension_elements_error(self): tree = self.parse('B') style = self.parse('''\ rctxt.userData - # find name and namespace of called element - name = instr.name - if instr.ns is NULL or instr.ns.href is NULL: - uri = None - else: - uri = instr.ns.href - -## # build child tree -## c_node = instr.children -## while c_node is not NULL: -## if xslt.IS_XSLT_ELEM(c_node): -## pass -## c_node = c_node.next - - # lookup up the extension element in the context - f = extensions.find_extension_element(uri, name) - - out_doc = _documentFactory(ctxt.output) - tree_doc = _documentFactory(c_tree_node.doc) - - insert_element = _elementFactory(out_doc, ctxt.insert) - context_tree = _elementFactory(tree_doc, c_tree_node) - - instr_attribs = _attributeDict(instr) - instr_tag = _namespacedName(instr) - try: - result = f(instr_tag, instr_attribs, context_tree) + # create Python representations of insertion point and context node + out_doc = _documentFactory(ctxt.output) + insertion_point = _elementFactory(out_doc, ctxt.insert) + +## tree_doc = _documentFactory(c_tree_node.doc) +## context_tree = _extensionNodeFactory(tree_doc, c_tree_node) + + # deep-copy the XSLT element to create a Python proxy + c_doc = tree.xmlCopyDoc(instr.doc, 0) # non recursive doc copy + instr_doc = _documentFactory(c_doc) + c_node = tree.xmlDocCopyNode(instr, c_doc, 1) # recursive + tree.xmlDocSetRootElement(c_doc, c_node) + + # create Python representation + #assert c_node is not NULL and c_node.type == tree.XML_ELEMENT_NODE + if c_node.ns == NULL: + c_ns_href = NULL + else: + c_ns_href = c_node.ns.href + element_class = _find_extension_element_class(c_ns_href, c_node.name) + + #assert issubclass(element_class, XSLTElementBase) + instr_element = _newElement(instr_doc, c_node, element_class) + instr_element._ctxt = ctxt + instr_element._c_self = instr + instr_element._result_doc = out_doc + + # call the extension + assert callable(instr_element), "Extension objects must be callable." + result = instr_element() + # handle the result (if any) if isinstance(result, _NodeBase): - insert_element.append(result) + insertion_point.append(result) elif python.PyString_Check(result) or PyUnicode_Check(result): - insert_element.text = result + insertion_point.text = result elif python.PySequence_Check(result): for node in result: - insert_element.append(node) + insertion_point.append(node) elif result is not None: raise TypeError, "Invalid return value from extension element." except Exception, e: @@ -890,30 +934,11 @@ # prevent garbage collection of document C structures out_doc._c_doc = NULL - tree_doc._c_doc = NULL - #insert_element._c_node = NULL +## tree_doc._c_doc = NULL - # FIXME: insert_element? current_tree? + # FIXME: insertion_point? current_tree? # if result_tree_node is in the result document it can be GCed, # otherwise it won't be - #insert_element._c_node = NULL + #insertion_point._c_node = NULL #current_tree._c_node = NULL - #del insert_element, current_tree, doc - -cdef object _attributeDict(xmlNode* c_element): - cdef xmlNode* c_node - result = {} - c_node = (c_element.properties) - while c_node is not NULL: - if c_node.type == tree.XML_ATTRIBUTE_NODE: - python.PyDict_SetItem( - result, _namespacedName(c_node), - _attributeValue(c_element, c_node) - ) - c_node = c_node.next - return result - -cdef xmlNode* _applyTemplate(xslt.xsltTransformContext* ctxt, xmlNode* c_node): - cdef xslt.xsltStylesheet* stylesheet - stylesheet = ctxt.style - + #del insertion_point, current_tree, doc From scoder at codespeak.net Mon Apr 10 09:31:14 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Mon Apr 10 09:31:16 2006 Subject: [Lxml-checkins] r25645 - lxml/branch/htmlparser/doc Message-ID: <20060410073114.490A010182@code0.codespeak.net> Author: scoder Date: Mon Apr 10 09:31:12 2006 New Revision: 25645 Modified: lxml/branch/htmlparser/doc/api.txt Log: doctest for HTML parser in api.txt Modified: lxml/branch/htmlparser/doc/api.txt ============================================================================== --- lxml/branch/htmlparser/doc/api.txt (original) +++ lxml/branch/htmlparser/doc/api.txt Mon Apr 10 09:31:12 2006 @@ -19,13 +19,14 @@ >>> from StringIO import StringIO -XMLParser ---------- +Parsers +------- -One of the differences is the parser. It is based on libxml2 and therefore -only supports options that are backed by the library. Parsers take a number -of keyword arguments. The following is an example for namespace cleanup -during parsing, first with the default parser, then with a parametrized one:: +One of the differences is the parser. There is support for both XML and +(broken) HTML. Both are based on libxml2 and therefore only support options +that are backed by the library. Parsers take a number of keyword arguments. +The following is an example for namespace cleanup during parsing, first with +the default parser, then with a parametrized one:: >>> xml = '' @@ -38,6 +39,23 @@ >>> print lxml.etree.tostring(et.getroot()) +HTML parsing is similarly simple:: + + >>> broken_html = "test<body><h1>page title</body></html>" + + >>> parser = lxml.etree.HTMLParser() + >>> et = lxml.etree.parse(StringIO(broken_html), parser) + + >>> print lxml.etree.tostring(et.getroot()) + <html><head><title>test

page title

+ +Lxml has an HTML function, similar to the XML shortcut known from +ElementTree:: + + >>> html = lxml.etree.HTML(broken_html) + >>> print lxml.etree.tostring(html) + test

page title

+ Error handling on exceptions ---------------------------- From scoder at codespeak.net Thu Apr 13 07:53:04 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Thu Apr 13 07:53:08 2006 Subject: [Lxml-checkins] r25761 - lxml/trunk Message-ID: <20060413055304.A552710082@code0.codespeak.net> Author: scoder Date: Thu Apr 13 07:53:01 2006 New Revision: 25761 Modified: lxml/trunk/version.txt Log: reflect current version 0.9.1 in version.txt Modified: lxml/trunk/version.txt ============================================================================== --- lxml/trunk/version.txt (original) +++ lxml/trunk/version.txt Thu Apr 13 07:53:01 2006 @@ -1 +1 @@ -0.9 +0.9.1 From scoder at codespeak.net Thu Apr 13 07:54:09 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Thu Apr 13 07:54:11 2006 Subject: [Lxml-checkins] r25762 - lxml/trunk/src/lxml/tests Message-ID: <20060413055409.7333710082@code0.codespeak.net> Author: scoder Date: Thu Apr 13 07:54:08 2006 New Revision: 25762 Modified: lxml/trunk/src/lxml/tests/test_xmlschema.py Log: new XMLSchema test case that crashes lxml if parsed schema document is not in the XML-Schema namespace Modified: lxml/trunk/src/lxml/tests/test_xmlschema.py ============================================================================== --- lxml/trunk/src/lxml/tests/test_xmlschema.py (original) +++ lxml/trunk/src/lxml/tests/test_xmlschema.py Thu Apr 13 07:54:08 2006 @@ -26,7 +26,7 @@ self.assert_(schema.validate(tree_valid)) self.assert_(not schema.validate(tree_invalid)) - def test_xmlschema_invalid_schema(self): + def test_xmlschema_invalid_schema1(self): schema = self.parse('''\ @@ -40,6 +40,11 @@ self.assertRaises(etree.XMLSchemaParseError, etree.XMLSchema, schema) + def test_xmlschema_invalid_schema2(self): + schema = self.parse('') + self.assertRaises(etree.XMLSchemaParseError, + etree.XMLSchema, schema) + ## def test_xmlschema_include(self): ## # this will only work if we access the file through path or ## # file object.. From scoder at codespeak.net Thu Apr 13 07:56:30 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Thu Apr 13 07:56:31 2006 Subject: [Lxml-checkins] r25763 - lxml/trunk/src/lxml Message-ID: <20060413055630.8915610082@code0.codespeak.net> Author: scoder Date: Thu Apr 13 07:56:29 2006 New Revision: 25763 Modified: lxml/trunk/src/lxml/xmlschema.pxi Log: make new XML-Schema test case by checking the root node namespace of the document passed to XMLSchema() ; also allow passing Elements instead of ElementTrees Modified: lxml/trunk/src/lxml/xmlschema.pxi ============================================================================== --- lxml/trunk/src/lxml/xmlschema.pxi (original) +++ lxml/trunk/src/lxml/xmlschema.pxi Thu Apr 13 07:56:29 2006 @@ -19,37 +19,59 @@ cdef xmlschema.xmlSchema* _c_schema cdef _ErrorLog _error_log - def __init__(self, _ElementTree etree): + def __init__(self, etree): cdef _Document doc + cdef _NodeBase root_node + cdef xmlDoc* fake_c_doc + cdef xmlNode* c_node cdef xmlschema.xmlSchemaParserCtxt* parser_ctxt - doc = etree._doc - parser_ctxt = xmlschema.xmlSchemaNewDocParserCtxt(doc._c_doc) + + doc = _documentOrRaise(etree) + root_node = _rootNodeOf(etree) + + # work around for libxml2 bug if document is not XML schema at all + c_node = root_node._c_node + if c_node.ns is NULL or c_node.ns.href is NULL or \ + tree.strcmp(c_node.ns.href, 'http://www.w3.org/2001/XMLSchema') != 0: + raise XMLSchemaParseError, "Document is not XML Schema" + + fake_c_doc = _fakeRootDoc(doc._c_doc, root_node._c_node) + parser_ctxt = xmlschema.xmlSchemaNewDocParserCtxt(fake_c_doc) if parser_ctxt is NULL: + _destroyFakeDoc(doc._c_doc, fake_c_doc) raise XMLSchemaParseError, "Document is not parsable as XML Schema" self._c_schema = xmlschema.xmlSchemaParse(parser_ctxt) + + xmlschema.xmlSchemaFreeParserCtxt(parser_ctxt) + _destroyFakeDoc(doc._c_doc, fake_c_doc) + if self._c_schema is NULL: - xmlschema.xmlSchemaFreeParserCtxt(parser_ctxt) raise XMLSchemaParseError, "Document is not valid XML Schema" - xmlschema.xmlSchemaFreeParserCtxt(parser_ctxt) self._error_log = _ErrorLog() - + def __dealloc__(self): xmlschema.xmlSchemaFree(self._c_schema) - def validate(self, _ElementTree etree): + def validate(self, etree): """Validate doc using XML Schema. Returns true if document is valid, false if not. """ cdef xmlschema.xmlSchemaValidCtxt* valid_ctxt + cdef _Document doc + cdef _NodeBase root_node cdef xmlDoc* c_doc cdef int ret + + doc = _documentOrRaise(etree) + root_node = _rootNodeOf(etree) + self._error_log.connect() valid_ctxt = xmlschema.xmlSchemaNewValidCtxt(self._c_schema) - c_doc = _fakeRootDoc(etree._doc._c_doc, etree._context_node._c_node) + c_doc = _fakeRootDoc(doc._c_doc, root_node._c_node) ret = xmlschema.xmlSchemaValidateDoc(valid_ctxt, c_doc) - _destroyFakeDoc(etree._doc._c_doc, c_doc) + _destroyFakeDoc(doc._c_doc, c_doc) xmlschema.xmlSchemaFreeValidCtxt(valid_ctxt) self._error_log.disconnect() From scoder at codespeak.net Thu Apr 13 07:59:33 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Thu Apr 13 07:59:34 2006 Subject: [Lxml-checkins] r25764 - lxml/trunk/src/lxml Message-ID: <20060413055933.55E6810082@code0.codespeak.net> Author: scoder Date: Thu Apr 13 07:59:32 2006 New Revision: 25764 Modified: lxml/trunk/src/lxml/relaxng.pxi Log: allow passing Elements to RelaxNG() and .validate(), not only ElementTrees Modified: lxml/trunk/src/lxml/relaxng.pxi ============================================================================== --- lxml/trunk/src/lxml/relaxng.pxi (original) +++ lxml/trunk/src/lxml/relaxng.pxi Thu Apr 13 07:59:32 2006 @@ -20,11 +20,18 @@ cdef relaxng.xmlRelaxNG* _c_schema cdef _ErrorLog _error_log - def __init__(self, _ElementTree etree=None, file=None): + def __init__(self, etree=None, file=None): + cdef _Document doc + cdef _NodeBase root_node + cdef xmlDoc* fake_c_doc cdef relaxng.xmlRelaxNGParserCtxt* parser_ctxt + fake_c_doc = NULL if etree is not None: - parser_ctxt = relaxng.xmlRelaxNGNewDocParserCtxt(etree._doc._c_doc) + doc = _documentOrRaise(etree) + root_node = _rootNodeOf(etree) + fake_c_doc = _fakeRootDoc(doc._c_doc, root_node._c_node) + parser_ctxt = relaxng.xmlRelaxNGNewDocParserCtxt(fake_c_doc) elif file is not None: filename = _getFilenameForFile(file) if filename is None: @@ -33,31 +40,44 @@ parser_ctxt = relaxng.xmlRelaxNGNewParserCtxt(filename) else: raise RelaxNGParseError, "No tree or file given" + if parser_ctxt is NULL: + if fake_c_doc is not NULL: + _destroyFakeDoc(doc._c_doc, fake_c_doc) raise RelaxNGParseError, "Document is not parsable as Relax NG" self._c_schema = relaxng.xmlRelaxNGParse(parser_ctxt) + if self._c_schema is NULL: + if fake_c_doc is not NULL: + _destroyFakeDoc(doc._c_doc, fake_c_doc) raise RelaxNGParseError, "Document is not valid Relax NG" relaxng.xmlRelaxNGFreeParserCtxt(parser_ctxt) - + if fake_c_doc is not NULL: + _destroyFakeDoc(doc._c_doc, fake_c_doc) self._error_log = _ErrorLog() def __dealloc__(self): relaxng.xmlRelaxNGFree(self._c_schema) - def validate(self, _ElementTree etree): + def validate(self, etree): """Validate doc using Relax NG. Returns true if document is valid, false if not.""" + cdef _Document doc + cdef _NodeBase root_node cdef xmlDoc* c_doc cdef relaxng.xmlRelaxNGValidCtxt* valid_ctxt cdef int ret + + doc = _documentOrRaise(etree) + root_node = _rootNodeOf(etree) + self._error_log.connect() valid_ctxt = relaxng.xmlRelaxNGNewValidCtxt(self._c_schema) - c_doc = _fakeRootDoc(etree._doc._c_doc, etree._context_node._c_node) + c_doc = _fakeRootDoc(doc._c_doc, root_node._c_node) ret = relaxng.xmlRelaxNGValidateDoc(valid_ctxt, c_doc) - _destroyFakeDoc(etree._doc._c_doc, c_doc) + _destroyFakeDoc(doc._c_doc, c_doc) relaxng.xmlRelaxNGFreeValidCtxt(valid_ctxt) self._error_log.disconnect() From scoder at codespeak.net Thu Apr 13 08:06:51 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Thu Apr 13 08:06:54 2006 Subject: [Lxml-checkins] r25765 - lxml/trunk/src/lxml Message-ID: <20060413060651.C3D3810078@code0.codespeak.net> Author: scoder Date: Thu Apr 13 08:06:50 2006 New Revision: 25765 Modified: lxml/trunk/src/lxml/relaxng.pxi Log: doc typo Modified: lxml/trunk/src/lxml/relaxng.pxi ============================================================================== --- lxml/trunk/src/lxml/relaxng.pxi (original) +++ lxml/trunk/src/lxml/relaxng.pxi Thu Apr 13 08:06:50 2006 @@ -14,7 +14,7 @@ # RelaxNG cdef class RelaxNG: - """Turn a document into an Relax NG validator. + """Turn a document into a Relax NG validator. Can also load from filesystem directly given file object or filename. """ cdef relaxng.xmlRelaxNG* _c_schema From scoder at codespeak.net Thu Apr 13 10:17:34 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Thu Apr 13 10:17:36 2006 Subject: [Lxml-checkins] r25768 - lxml/trunk/src/lxml Message-ID: <20060413081734.C27851007D@code0.codespeak.net> Author: scoder Date: Thu Apr 13 10:17:33 2006 New Revision: 25768 Modified: lxml/trunk/src/lxml/etree.pyx Log: restrict type test to _ElementTree in _rootNodeOf() Modified: lxml/trunk/src/lxml/etree.pyx ============================================================================== --- lxml/trunk/src/lxml/etree.pyx (original) +++ lxml/trunk/src/lxml/etree.pyx Thu Apr 13 10:17:33 2006 @@ -1292,8 +1292,8 @@ cdef _NodeBase _rootNodeOf(object input): # call this to get the root node of a # _Document, _ElementTree or _NodeBase object - if hasattr(input, 'getroot'): # ElementTree - return <_NodeBase>(input.getroot()) + if isinstance(input, _ElementTree): + return (<_ElementTree>input)._context_node elif isinstance(input, _NodeBase): return <_NodeBase>input elif isinstance(input, _Document): From scoder at codespeak.net Thu Apr 13 10:17:53 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Thu Apr 13 10:17:55 2006 Subject: [Lxml-checkins] r25769 - lxml/trunk/src/lxml Message-ID: <20060413081753.44D621007D@code0.codespeak.net> Author: scoder Date: Thu Apr 13 10:17:52 2006 New Revision: 25769 Modified: lxml/trunk/src/lxml/relaxng.pxi Log: work arounds for libxml2 RelaxNG bug to make the RNG test cases valgrind clean without crashing Modified: lxml/trunk/src/lxml/relaxng.pxi ============================================================================== --- lxml/trunk/src/lxml/relaxng.pxi (original) +++ lxml/trunk/src/lxml/relaxng.pxi Thu Apr 13 10:17:52 2006 @@ -23,6 +23,7 @@ def __init__(self, etree=None, file=None): cdef _Document doc cdef _NodeBase root_node + cdef xmlNode* c_node cdef xmlDoc* fake_c_doc cdef relaxng.xmlRelaxNGParserCtxt* parser_ctxt @@ -30,6 +31,12 @@ if etree is not None: doc = _documentOrRaise(etree) root_node = _rootNodeOf(etree) + c_node = root_node._c_node + # work around for libxml2 bug if document is not RNG at all + if c_node.ns is NULL or c_node.ns.href is NULL or \ + tree.strcmp(c_node.ns.href, + 'http://relaxng.org/ns/structure/1.0') != 0: + raise RelaxNGParseError, "Document is not Relax NG" fake_c_doc = _fakeRootDoc(doc._c_doc, root_node._c_node) parser_ctxt = relaxng.xmlRelaxNGNewDocParserCtxt(fake_c_doc) elif file is not None: @@ -47,8 +54,11 @@ raise RelaxNGParseError, "Document is not parsable as Relax NG" self._c_schema = relaxng.xmlRelaxNGParse(parser_ctxt) + # XXX: freeing parser context will crash if document was not RNG!! + #relaxng.xmlRelaxNGFreeParserCtxt(parser_ctxt) if self._c_schema is NULL: if fake_c_doc is not NULL: + relaxng.xmlRelaxNGFreeParserCtxt(parser_ctxt) _destroyFakeDoc(doc._c_doc, fake_c_doc) raise RelaxNGParseError, "Document is not valid Relax NG" relaxng.xmlRelaxNGFreeParserCtxt(parser_ctxt) From scoder at codespeak.net Thu Apr 13 10:25:35 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Thu Apr 13 10:25:37 2006 Subject: [Lxml-checkins] r25770 - in lxml/branch/lxml-0.9.x/src/lxml: . tests Message-ID: <20060413082535.A97081007C@code0.codespeak.net> Author: scoder Date: Thu Apr 13 10:25:34 2006 New Revision: 25770 Modified: lxml/branch/lxml-0.9.x/src/lxml/etree.pyx lxml/branch/lxml-0.9.x/src/lxml/relaxng.pxi lxml/branch/lxml-0.9.x/src/lxml/tests/test_xmlschema.py lxml/branch/lxml-0.9.x/src/lxml/xmlschema.pxi Log: merged in fixes for crashes and memory leaks in RelaxNG and XML Schema Modified: lxml/branch/lxml-0.9.x/src/lxml/etree.pyx ============================================================================== --- lxml/branch/lxml-0.9.x/src/lxml/etree.pyx (original) +++ lxml/branch/lxml-0.9.x/src/lxml/etree.pyx Thu Apr 13 10:25:34 2006 @@ -1277,8 +1277,8 @@ cdef _NodeBase _rootNodeOf(object input): # call this to get the root node of a # _Document, _ElementTree or _NodeBase object - if hasattr(input, 'getroot'): # ElementTree - return <_NodeBase>(input.getroot()) + if isinstance(input, _ElementTree): + return (<_ElementTree>input)._context_node elif isinstance(input, _NodeBase): return <_NodeBase>input elif isinstance(input, _Document): Modified: lxml/branch/lxml-0.9.x/src/lxml/relaxng.pxi ============================================================================== --- lxml/branch/lxml-0.9.x/src/lxml/relaxng.pxi (original) +++ lxml/branch/lxml-0.9.x/src/lxml/relaxng.pxi Thu Apr 13 10:25:34 2006 @@ -14,17 +14,31 @@ # RelaxNG cdef class RelaxNG: - """Turn a document into an Relax NG validator. + """Turn a document into a Relax NG validator. Can also load from filesystem directly given file object or filename. """ cdef relaxng.xmlRelaxNG* _c_schema cdef _ErrorLog _error_log - def __init__(self, _ElementTree etree=None, file=None): + def __init__(self, etree=None, file=None): + cdef _Document doc + cdef _NodeBase root_node + cdef xmlNode* c_node + cdef xmlDoc* fake_c_doc cdef relaxng.xmlRelaxNGParserCtxt* parser_ctxt + fake_c_doc = NULL if etree is not None: - parser_ctxt = relaxng.xmlRelaxNGNewDocParserCtxt(etree._doc._c_doc) + doc = _documentOrRaise(etree) + root_node = _rootNodeOf(etree) + c_node = root_node._c_node + # work around for libxml2 bug if document is not RNG at all + if c_node.ns is NULL or c_node.ns.href is NULL or \ + tree.strcmp(c_node.ns.href, + 'http://relaxng.org/ns/structure/1.0') != 0: + raise RelaxNGParseError, "Document is not Relax NG" + fake_c_doc = _fakeRootDoc(doc._c_doc, root_node._c_node) + parser_ctxt = relaxng.xmlRelaxNGNewDocParserCtxt(fake_c_doc) elif file is not None: filename = _getFilenameForFile(file) if filename is None: @@ -33,31 +47,47 @@ parser_ctxt = relaxng.xmlRelaxNGNewParserCtxt(filename) else: raise RelaxNGParseError, "No tree or file given" + if parser_ctxt is NULL: + if fake_c_doc is not NULL: + _destroyFakeDoc(doc._c_doc, fake_c_doc) raise RelaxNGParseError, "Document is not parsable as Relax NG" self._c_schema = relaxng.xmlRelaxNGParse(parser_ctxt) + + # XXX: freeing parser context will crash if document was not RNG!! + #relaxng.xmlRelaxNGFreeParserCtxt(parser_ctxt) if self._c_schema is NULL: + if fake_c_doc is not NULL: + relaxng.xmlRelaxNGFreeParserCtxt(parser_ctxt) + _destroyFakeDoc(doc._c_doc, fake_c_doc) raise RelaxNGParseError, "Document is not valid Relax NG" relaxng.xmlRelaxNGFreeParserCtxt(parser_ctxt) - + if fake_c_doc is not NULL: + _destroyFakeDoc(doc._c_doc, fake_c_doc) self._error_log = _ErrorLog() def __dealloc__(self): relaxng.xmlRelaxNGFree(self._c_schema) - def validate(self, _ElementTree etree): + def validate(self, etree): """Validate doc using Relax NG. Returns true if document is valid, false if not.""" + cdef _Document doc + cdef _NodeBase root_node cdef xmlDoc* c_doc cdef relaxng.xmlRelaxNGValidCtxt* valid_ctxt cdef int ret + + doc = _documentOrRaise(etree) + root_node = _rootNodeOf(etree) + self._error_log.connect() valid_ctxt = relaxng.xmlRelaxNGNewValidCtxt(self._c_schema) - c_doc = _fakeRootDoc(etree._doc._c_doc, etree._context_node._c_node) + c_doc = _fakeRootDoc(doc._c_doc, root_node._c_node) ret = relaxng.xmlRelaxNGValidateDoc(valid_ctxt, c_doc) - _destroyFakeDoc(etree._doc._c_doc, c_doc) + _destroyFakeDoc(doc._c_doc, c_doc) relaxng.xmlRelaxNGFreeValidCtxt(valid_ctxt) self._error_log.disconnect() Modified: lxml/branch/lxml-0.9.x/src/lxml/tests/test_xmlschema.py ============================================================================== --- lxml/branch/lxml-0.9.x/src/lxml/tests/test_xmlschema.py (original) +++ lxml/branch/lxml-0.9.x/src/lxml/tests/test_xmlschema.py Thu Apr 13 10:25:34 2006 @@ -26,7 +26,7 @@ self.assert_(schema.validate(tree_valid)) self.assert_(not schema.validate(tree_invalid)) - def test_xmlschema_invalid_schema(self): + def test_xmlschema_invalid_schema1(self): schema = self.parse('''\ @@ -40,6 +40,11 @@ self.assertRaises(etree.XMLSchemaParseError, etree.XMLSchema, schema) + def test_xmlschema_invalid_schema2(self): + schema = self.parse('') + self.assertRaises(etree.XMLSchemaParseError, + etree.XMLSchema, schema) + ## def test_xmlschema_include(self): ## # this will only work if we access the file through path or ## # file object.. Modified: lxml/branch/lxml-0.9.x/src/lxml/xmlschema.pxi ============================================================================== --- lxml/branch/lxml-0.9.x/src/lxml/xmlschema.pxi (original) +++ lxml/branch/lxml-0.9.x/src/lxml/xmlschema.pxi Thu Apr 13 10:25:34 2006 @@ -19,37 +19,59 @@ cdef xmlschema.xmlSchema* _c_schema cdef _ErrorLog _error_log - def __init__(self, _ElementTree etree): + def __init__(self, etree): cdef _Document doc + cdef _NodeBase root_node + cdef xmlDoc* fake_c_doc + cdef xmlNode* c_node cdef xmlschema.xmlSchemaParserCtxt* parser_ctxt - doc = etree._doc - parser_ctxt = xmlschema.xmlSchemaNewDocParserCtxt(doc._c_doc) + + doc = _documentOrRaise(etree) + root_node = _rootNodeOf(etree) + + # work around for libxml2 bug if document is not XML schema at all + c_node = root_node._c_node + if c_node.ns is NULL or c_node.ns.href is NULL or \ + tree.strcmp(c_node.ns.href, 'http://www.w3.org/2001/XMLSchema') != 0: + raise XMLSchemaParseError, "Document is not XML Schema" + + fake_c_doc = _fakeRootDoc(doc._c_doc, root_node._c_node) + parser_ctxt = xmlschema.xmlSchemaNewDocParserCtxt(fake_c_doc) if parser_ctxt is NULL: + _destroyFakeDoc(doc._c_doc, fake_c_doc) raise XMLSchemaParseError, "Document is not parsable as XML Schema" self._c_schema = xmlschema.xmlSchemaParse(parser_ctxt) + + xmlschema.xmlSchemaFreeParserCtxt(parser_ctxt) + _destroyFakeDoc(doc._c_doc, fake_c_doc) + if self._c_schema is NULL: - xmlschema.xmlSchemaFreeParserCtxt(parser_ctxt) raise XMLSchemaParseError, "Document is not valid XML Schema" - xmlschema.xmlSchemaFreeParserCtxt(parser_ctxt) self._error_log = _ErrorLog() - + def __dealloc__(self): xmlschema.xmlSchemaFree(self._c_schema) - def validate(self, _ElementTree etree): + def validate(self, etree): """Validate doc using XML Schema. Returns true if document is valid, false if not. """ cdef xmlschema.xmlSchemaValidCtxt* valid_ctxt + cdef _Document doc + cdef _NodeBase root_node cdef xmlDoc* c_doc cdef int ret + + doc = _documentOrRaise(etree) + root_node = _rootNodeOf(etree) + self._error_log.connect() valid_ctxt = xmlschema.xmlSchemaNewValidCtxt(self._c_schema) - c_doc = _fakeRootDoc(etree._doc._c_doc, etree._context_node._c_node) + c_doc = _fakeRootDoc(doc._c_doc, root_node._c_node) ret = xmlschema.xmlSchemaValidateDoc(valid_ctxt, c_doc) - _destroyFakeDoc(etree._doc._c_doc, c_doc) + _destroyFakeDoc(doc._c_doc, c_doc) xmlschema.xmlSchemaFreeValidCtxt(valid_ctxt) self._error_log.disconnect() From scoder at codespeak.net Thu Apr 13 14:22:23 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Thu Apr 13 14:22:24 2006 Subject: [Lxml-checkins] r25781 - lxml/trunk/src/lxml Message-ID: <20060413122223.A6C551007C@code0.codespeak.net> Author: scoder Date: Thu Apr 13 14:22:22 2006 New Revision: 25781 Modified: lxml/trunk/src/lxml/etree.h lxml/trunk/src/lxml/python.pxd Log: cleaned up declarations in python.pxd and etree.h, added str() macro Modified: lxml/trunk/src/lxml/etree.h ============================================================================== --- lxml/trunk/src/lxml/etree.h (original) +++ lxml/trunk/src/lxml/etree.h Thu Apr 13 14:22:22 2006 @@ -1,9 +1,10 @@ #ifndef HAS_ETREE_H #define HAS_ETREE_H -#define isinstance(a,b) PyObject_IsInstance(a,b) -#define hasattr(a,b) PyObject_HasAttr(a,b) -#define callable(a) PyCallable_Check(a) +#define isinstance(o,c) PyObject_IsInstance(o,c) +#define hasattr(o,a) PyObject_HasAttr(o,a) +#define callable(o) PyCallable_Check(o) +#define str(o) PyObject_Str(o) #define _cstr(s) PyString_AS_STRING(s) #define _isElement(c_node) \ ((c_node)->type == XML_ELEMENT_NODE || \ Modified: lxml/trunk/src/lxml/python.pxd ============================================================================== --- lxml/trunk/src/lxml/python.pxd (original) +++ lxml/trunk/src/lxml/python.pxd Thu Apr 13 14:22:22 2006 @@ -33,12 +33,10 @@ cdef int PyBool_Check(object instance) cdef int PySequence_Check(object instance) cdef int PyType_Check(object instance) - cdef int PyCallable_Check(object instance) - cdef int PyObject_IsInstance(object instance, object classes) - cdef int PyObject_HasAttr(object obj, object attr) cdef extern from "etree.h": # redefines some functions as macros cdef int isinstance(object instance, object classes) cdef int hasattr(object obj, object attr) cdef int callable(object obj) + cdef object str(object obj) cdef char* _cstr(object s) From scoder at codespeak.net Thu Apr 13 14:23:32 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Thu Apr 13 14:23:33 2006 Subject: [Lxml-checkins] r25782 - in lxml/trunk/src/lxml: . tests Message-ID: <20060413122332.7D1E81007C@code0.codespeak.net> Author: scoder Date: Thu Apr 13 14:23:31 2006 New Revision: 25782 Modified: lxml/trunk/src/lxml/etree.pyx lxml/trunk/src/lxml/tests/test_elementtree.py lxml/trunk/src/lxml/tests/test_unicode.py Log: new QName class, allow QName objects to be passed into API functions like Element() etc. Modified: lxml/trunk/src/lxml/etree.pyx ============================================================================== --- lxml/trunk/src/lxml/etree.pyx (original) +++ lxml/trunk/src/lxml/etree.pyx Thu Apr 13 14:23:31 2006 @@ -1,6 +1,6 @@ cimport tree, python from tree cimport xmlDoc, xmlNode, xmlAttr, xmlNs, _isElement -from python cimport isinstance, hasattr, callable, _cstr +from python cimport isinstance, hasattr, callable, str, _cstr cimport xpath cimport xslt cimport xmlerror @@ -1203,6 +1203,20 @@ python.PyDict_SetItem(dic, elem.get('id'), elem) return (root, dic) +cdef class QName: + cdef readonly object text + def __init__(self, text_or_uri, tag=None): + if tag is not None: + text_or_uri = "{%s}%s" % (text_or_uri, tag) + elif not python.PyString_Check(text_or_uri) and \ + not python.PyUnicode_Check(text_or_uri): + text_or_uri = str(text_or_uri) + self.text = text_or_uri + def __str__(self): + return self.text + def __hash__(self): + return self.text.__hash__() + def iselement(element): return isinstance(element, _Element) @@ -1559,6 +1573,8 @@ cdef char* c_tag cdef char* c_pos cdef int nslen + if isinstance(tag, QName): + tag = (tag).text tag = _utf8(tag) c_tag = _cstr(tag) if c_tag[0] == c'{': Modified: lxml/trunk/src/lxml/tests/test_elementtree.py ============================================================================== --- lxml/trunk/src/lxml/tests/test_elementtree.py (original) +++ lxml/trunk/src/lxml/tests/test_elementtree.py Thu Apr 13 14:23:31 2006 @@ -1787,6 +1787,14 @@ self.assertEquals(self._rootstring(t1), '') self.assertEquals(self._rootstring(t), '') + def test_qname(self): + etree = self.etree + qname = etree.QName('myns', 'a') + a1 = etree.Element(qname) + a2 = etree.SubElement(a1, qname) + self.assertEquals(a1.tag, "{myns}a") + self.assertEquals(a2.tag, "{myns}a") + def _writeElement(self, element, encoding='us-ascii'): """Write out element for comparison. """ Modified: lxml/trunk/src/lxml/tests/test_unicode.py ============================================================================== --- lxml/trunk/src/lxml/tests/test_unicode.py (original) +++ lxml/trunk/src/lxml/tests/test_unicode.py Thu Apr 13 14:23:31 2006 @@ -22,6 +22,13 @@ el = etree.Element(tag) self.assertEquals(tag, el.tag) + def test_unicode_qname(self): + qname = etree.QName(uni, uni) + tag = u"{%s}%s" % (uni, uni) + self.assertEquals(qname.text, tag) + self.assertEquals(qname.__str__(), tag) + self.assertEquals(unicode(qname), tag) + def test_unicode_attr(self): el = etree.Element('foo', {'bar': uni}) self.assertEquals(uni, el.attrib['bar']) From scoder at codespeak.net Thu Apr 13 14:35:52 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Thu Apr 13 14:35:53 2006 Subject: [Lxml-checkins] r25783 - in lxml/branch/lxml-0.9.x/src/lxml: . tests Message-ID: <20060413123552.194321007C@code0.codespeak.net> Author: scoder Date: Thu Apr 13 14:35:50 2006 New Revision: 25783 Modified: lxml/branch/lxml-0.9.x/src/lxml/etree.h lxml/branch/lxml-0.9.x/src/lxml/etree.pyx lxml/branch/lxml-0.9.x/src/lxml/python.pxd lxml/branch/lxml-0.9.x/src/lxml/tests/test_elementtree.py lxml/branch/lxml-0.9.x/src/lxml/tests/test_unicode.py Log: merged in QName class from trunk Modified: lxml/branch/lxml-0.9.x/src/lxml/etree.h ============================================================================== --- lxml/branch/lxml-0.9.x/src/lxml/etree.h (original) +++ lxml/branch/lxml-0.9.x/src/lxml/etree.h Thu Apr 13 14:35:50 2006 @@ -1,9 +1,10 @@ #ifndef HAS_ETREE_H #define HAS_ETREE_H -#define isinstance(a,b) PyObject_IsInstance(a,b) -#define hasattr(a,b) PyObject_HasAttr(a,b) -#define callable(a) PyCallable_Check(a) +#define isinstance(o,c) PyObject_IsInstance(o,c) +#define hasattr(o,a) PyObject_HasAttr(o,a) +#define callable(o) PyCallable_Check(o) +#define str(o) PyObject_Str(o) #define _isElement(c_node) \ ((c_node)->type == XML_ELEMENT_NODE || \ (c_node)->type == XML_COMMENT_NODE) Modified: lxml/branch/lxml-0.9.x/src/lxml/etree.pyx ============================================================================== --- lxml/branch/lxml-0.9.x/src/lxml/etree.pyx (original) +++ lxml/branch/lxml-0.9.x/src/lxml/etree.pyx Thu Apr 13 14:35:50 2006 @@ -1,6 +1,6 @@ cimport tree, python from tree cimport xmlDoc, xmlNode, xmlAttr, xmlNs, _isElement -from python cimport isinstance, hasattr +from python cimport isinstance, hasattr, callable, str cimport xpath cimport xslt cimport xmlerror @@ -1188,6 +1188,20 @@ python.PyDict_SetItem(dic, elem.get('id'), elem) return (root, dic) +cdef class QName: + cdef readonly object text + def __init__(self, text_or_uri, tag=None): + if tag is not None: + text_or_uri = "{%s}%s" % (text_or_uri, tag) + elif not python.PyString_Check(text_or_uri) and \ + not python.PyUnicode_Check(text_or_uri): + text_or_uri = str(text_or_uri) + self.text = text_or_uri + def __str__(self): + return self.text + def __hash__(self): + return self.text.__hash__() + def iselement(element): return isinstance(element, _Element) @@ -1544,6 +1558,8 @@ cdef char* c_tag cdef char* c_pos cdef int nslen + if isinstance(tag, QName): + tag = (tag).text tag = _utf8(tag) c_tag = tag if c_tag[0] == c'{': Modified: lxml/branch/lxml-0.9.x/src/lxml/python.pxd ============================================================================== --- lxml/branch/lxml-0.9.x/src/lxml/python.pxd (original) +++ lxml/branch/lxml-0.9.x/src/lxml/python.pxd Thu Apr 13 14:35:50 2006 @@ -33,11 +33,9 @@ cdef int PyBool_Check(object instance) cdef int PySequence_Check(object instance) cdef int PyType_Check(object instance) - cdef int PyCallable_Check(object instance) - cdef int PyObject_IsInstance(object instance, object classes) - cdef int PyObject_HasAttr(object obj, object attr) cdef extern from "etree.h": # redefines some functions as macros cdef int isinstance(object instance, object classes) cdef int hasattr(object obj, object attr) cdef int callable(object obj) + cdef object str(object obj) Modified: lxml/branch/lxml-0.9.x/src/lxml/tests/test_elementtree.py ============================================================================== --- lxml/branch/lxml-0.9.x/src/lxml/tests/test_elementtree.py (original) +++ lxml/branch/lxml-0.9.x/src/lxml/tests/test_elementtree.py Thu Apr 13 14:35:50 2006 @@ -1787,6 +1787,14 @@ self.assertEquals(self._rootstring(t1), '') self.assertEquals(self._rootstring(t), '') + def test_qname(self): + etree = self.etree + qname = etree.QName('myns', 'a') + a1 = etree.Element(qname) + a2 = etree.SubElement(a1, qname) + self.assertEquals(a1.tag, "{myns}a") + self.assertEquals(a2.tag, "{myns}a") + def _writeElement(self, element, encoding='us-ascii'): """Write out element for comparison. """ Modified: lxml/branch/lxml-0.9.x/src/lxml/tests/test_unicode.py ============================================================================== --- lxml/branch/lxml-0.9.x/src/lxml/tests/test_unicode.py (original) +++ lxml/branch/lxml-0.9.x/src/lxml/tests/test_unicode.py Thu Apr 13 14:35:50 2006 @@ -22,6 +22,12 @@ el = etree.Element(tag) self.assertEquals(tag, el.tag) + def test_unicode_qname(self): + qname = etree.QName(uni, uni) + tag = u"{%s}%s" % (uni, uni) + self.assertEquals(qname.text, tag) + self.assertEquals(unicode(qname), tag) + def test_unicode_attr(self): el = etree.Element('foo', {'bar': uni}) self.assertEquals(uni, el.attrib['bar']) From scoder at codespeak.net Thu Apr 13 14:40:43 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Thu Apr 13 14:40:44 2006 Subject: [Lxml-checkins] r25784 - lxml/trunk/src/lxml/tests Message-ID: <20060413124043.513FF1007C@code0.codespeak.net> Author: scoder Date: Thu Apr 13 14:40:42 2006 New Revision: 25784 Modified: lxml/trunk/src/lxml/tests/test_unicode.py Log: clean up test case Modified: lxml/trunk/src/lxml/tests/test_unicode.py ============================================================================== --- lxml/trunk/src/lxml/tests/test_unicode.py (original) +++ lxml/trunk/src/lxml/tests/test_unicode.py Thu Apr 13 14:40:42 2006 @@ -26,7 +26,6 @@ qname = etree.QName(uni, uni) tag = u"{%s}%s" % (uni, uni) self.assertEquals(qname.text, tag) - self.assertEquals(qname.__str__(), tag) self.assertEquals(unicode(qname), tag) def test_unicode_attr(self): From scoder at codespeak.net Thu Apr 13 14:48:40 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Thu Apr 13 14:48:42 2006 Subject: [Lxml-checkins] r25785 - lxml/trunk/src/lxml Message-ID: <20060413124840.CD8461007C@code0.codespeak.net> Author: scoder Date: Thu Apr 13 14:48:39 2006 New Revision: 25785 Modified: lxml/trunk/src/lxml/etree.h lxml/trunk/src/lxml/etree.pyx lxml/trunk/src/lxml/nsclasses.pxi lxml/trunk/src/lxml/python.pxd Log: make issubclass a macro Modified: lxml/trunk/src/lxml/etree.h ============================================================================== --- lxml/trunk/src/lxml/etree.h (original) +++ lxml/trunk/src/lxml/etree.h Thu Apr 13 14:48:39 2006 @@ -2,6 +2,7 @@ #define HAS_ETREE_H #define isinstance(o,c) PyObject_IsInstance(o,c) +#define issubclass(c,csuper) PyObject_IsSubclass(c,csuper) #define hasattr(o,a) PyObject_HasAttr(o,a) #define callable(o) PyCallable_Check(o) #define str(o) PyObject_Str(o) Modified: lxml/trunk/src/lxml/etree.pyx ============================================================================== --- lxml/trunk/src/lxml/etree.pyx (original) +++ lxml/trunk/src/lxml/etree.pyx Thu Apr 13 14:48:39 2006 @@ -1,6 +1,6 @@ cimport tree, python from tree cimport xmlDoc, xmlNode, xmlAttr, xmlNs, _isElement -from python cimport isinstance, hasattr, callable, str, _cstr +from python cimport isinstance, issubclass, hasattr, callable, str, _cstr cimport xpath cimport xslt cimport xmlerror Modified: lxml/trunk/src/lxml/nsclasses.pxi ============================================================================== --- lxml/trunk/src/lxml/nsclasses.pxi (original) +++ lxml/trunk/src/lxml/nsclasses.pxi Thu Apr 13 14:48:39 2006 @@ -10,7 +10,7 @@ persistent state of elements must be stored in the underlying XML.""" pass -class XSLTElement(object): +cdef class XSLTElement: "NOT IMPLEMENTED YET!" pass Modified: lxml/trunk/src/lxml/python.pxd ============================================================================== --- lxml/trunk/src/lxml/python.pxd (original) +++ lxml/trunk/src/lxml/python.pxd Thu Apr 13 14:48:39 2006 @@ -36,6 +36,7 @@ cdef extern from "etree.h": # redefines some functions as macros cdef int isinstance(object instance, object classes) + cdef int issubclass(object instance, object classes) cdef int hasattr(object obj, object attr) cdef int callable(object obj) cdef object str(object obj) From scoder at codespeak.net Thu Apr 13 14:56:51 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Thu Apr 13 14:56:52 2006 Subject: [Lxml-checkins] r25787 - lxml/trunk/src/lxml Message-ID: <20060413125651.D9DFC1007C@code0.codespeak.net> Author: scoder Date: Thu Apr 13 14:56:51 2006 New Revision: 25787 Modified: lxml/trunk/src/lxml/python.pxd Log: clean up in python.pxd Modified: lxml/trunk/src/lxml/python.pxd ============================================================================== --- lxml/trunk/src/lxml/python.pxd (original) +++ lxml/trunk/src/lxml/python.pxd Thu Apr 13 14:56:51 2006 @@ -36,7 +36,7 @@ cdef extern from "etree.h": # redefines some functions as macros cdef int isinstance(object instance, object classes) - cdef int issubclass(object instance, object classes) + cdef int issubclass(object derived, object superclasses) cdef int hasattr(object obj, object attr) cdef int callable(object obj) cdef object str(object obj) From scoder at codespeak.net Thu Apr 13 14:57:49 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Thu Apr 13 14:57:50 2006 Subject: [Lxml-checkins] r25788 - lxml/branch/lxml-0.9.x/src/lxml Message-ID: <20060413125749.7327D1007C@code0.codespeak.net> Author: scoder Date: Thu Apr 13 14:57:48 2006 New Revision: 25788 Modified: lxml/branch/lxml-0.9.x/src/lxml/etree.h lxml/branch/lxml-0.9.x/src/lxml/etree.pyx lxml/branch/lxml-0.9.x/src/lxml/nsclasses.pxi lxml/branch/lxml-0.9.x/src/lxml/python.pxd Log: merged in issubclass macro from trunk Modified: lxml/branch/lxml-0.9.x/src/lxml/etree.h ============================================================================== --- lxml/branch/lxml-0.9.x/src/lxml/etree.h (original) +++ lxml/branch/lxml-0.9.x/src/lxml/etree.h Thu Apr 13 14:57:48 2006 @@ -2,6 +2,7 @@ #define HAS_ETREE_H #define isinstance(o,c) PyObject_IsInstance(o,c) +#define issubclass(c,csuper) PyObject_IsSubclass(c,csuper) #define hasattr(o,a) PyObject_HasAttr(o,a) #define callable(o) PyCallable_Check(o) #define str(o) PyObject_Str(o) Modified: lxml/branch/lxml-0.9.x/src/lxml/etree.pyx ============================================================================== --- lxml/branch/lxml-0.9.x/src/lxml/etree.pyx (original) +++ lxml/branch/lxml-0.9.x/src/lxml/etree.pyx Thu Apr 13 14:57:48 2006 @@ -1,6 +1,6 @@ cimport tree, python from tree cimport xmlDoc, xmlNode, xmlAttr, xmlNs, _isElement -from python cimport isinstance, hasattr, callable, str +from python cimport isinstance, issubclass, hasattr, callable, str cimport xpath cimport xslt cimport xmlerror Modified: lxml/branch/lxml-0.9.x/src/lxml/nsclasses.pxi ============================================================================== --- lxml/branch/lxml-0.9.x/src/lxml/nsclasses.pxi (original) +++ lxml/branch/lxml-0.9.x/src/lxml/nsclasses.pxi Thu Apr 13 14:57:48 2006 @@ -10,7 +10,7 @@ persistent state of elements must be stored in the underlying XML.""" pass -class XSLTElement(object): +cdef class XSLTElement: "NOT IMPLEMENTED YET!" pass Modified: lxml/branch/lxml-0.9.x/src/lxml/python.pxd ============================================================================== --- lxml/branch/lxml-0.9.x/src/lxml/python.pxd (original) +++ lxml/branch/lxml-0.9.x/src/lxml/python.pxd Thu Apr 13 14:57:48 2006 @@ -36,6 +36,7 @@ cdef extern from "etree.h": # redefines some functions as macros cdef int isinstance(object instance, object classes) + cdef int issubclass(object derived, object superclasses) cdef int hasattr(object obj, object attr) cdef int callable(object obj) cdef object str(object obj) From scoder at codespeak.net Thu Apr 13 15:22:55 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Thu Apr 13 15:22:57 2006 Subject: [Lxml-checkins] r25790 - lxml/trunk Message-ID: <20060413132255.F02711007B@code0.codespeak.net> Author: scoder Date: Thu Apr 13 15:22:55 2006 New Revision: 25790 Modified: lxml/trunk/CHANGES.txt Log: updated CHANGES.txt Modified: lxml/trunk/CHANGES.txt ============================================================================== --- lxml/trunk/CHANGES.txt (original) +++ lxml/trunk/CHANGES.txt Thu Apr 13 15:22:55 2006 @@ -1,6 +1,23 @@ lxml changelog ============== +current +======= + +Features added +-------------- + +* ElementTree compatible QName class + +* RelaxNG and XMLSchema now accept any Element, not only ElementTrees + +Bugs fixed +---------- + +* Crash in XMLSchema and RelaxNG when passing non-schema documents + +* Memory leak in RelaxNG() when RelaxNGParseError is raised + 0.9.1 (2006-03-30) ================== From scoder at codespeak.net Thu Apr 13 15:24:28 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Thu Apr 13 15:24:30 2006 Subject: [Lxml-checkins] r25791 - lxml/branch/lxml-0.9.x Message-ID: <20060413132428.B03D11007B@code0.codespeak.net> Author: scoder Date: Thu Apr 13 15:24:27 2006 New Revision: 25791 Modified: lxml/branch/lxml-0.9.x/CHANGES.txt Log: updated CHANGES.txt Modified: lxml/branch/lxml-0.9.x/CHANGES.txt ============================================================================== --- lxml/branch/lxml-0.9.x/CHANGES.txt (original) +++ lxml/branch/lxml-0.9.x/CHANGES.txt Thu Apr 13 15:24:27 2006 @@ -1,6 +1,23 @@ lxml changelog ============== +current +======= + +Features added +-------------- + +* ElementTree compatible QName class + +* RelaxNG and XMLSchema now accept any Element, not only ElementTrees + +Bugs fixed +---------- + +* Crash in XMLSchema and RelaxNG when passing non-schema documents + +* Memory leak in RelaxNG() when RelaxNGParseError is raised + 0.9.1 (2006-03-30) ================== From scoder at codespeak.net Sat Apr 15 16:48:19 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Sat Apr 15 16:48:20 2006 Subject: [Lxml-checkins] r25858 - lxml/trunk Message-ID: <20060415144819.A8913100BC@code0.codespeak.net> Author: scoder Date: Sat Apr 15 16:48:16 2006 New Revision: 25858 Modified: lxml/trunk/bench.py Log: benchmarks for reversed() iteration over children Modified: lxml/trunk/bench.py ============================================================================== --- lxml/trunk/bench.py (original) +++ lxml/trunk/bench.py Sat Apr 15 16:48:16 2006 @@ -240,6 +240,14 @@ ############################################################ class BenchMark(BenchMarkBase): + def bench_iter_children(self, root): + for child in root: + pass + + def bench_iter_children_reversed(self, root): + for child in reversed(root): + pass + def bench_append_from_document(self, root1, root2): # == "1,2 2,3 1,3 3,1 3,2 2,1" # trees 1 and 2, or 2 and 3, or ... for el in root2: From scoder at codespeak.net Sat Apr 15 17:12:37 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Sat Apr 15 17:12:39 2006 Subject: [Lxml-checkins] r25859 - lxml/trunk/src/lxml/tests Message-ID: <20060415151237.7F3D4100BC@code0.codespeak.net> Author: scoder Date: Sat Apr 15 17:12:36 2006 New Revision: 25859 Modified: lxml/trunk/src/lxml/tests/test_elementtree.py Log: better names for element child iteration test cases, new test case for reversed(element) (used in Python >= 2.4 only) Modified: lxml/trunk/src/lxml/tests/test_elementtree.py ============================================================================== --- lxml/trunk/src/lxml/tests/test_elementtree.py (original) +++ lxml/trunk/src/lxml/tests/test_elementtree.py Sat Apr 15 17:12:36 2006 @@ -393,7 +393,7 @@ result.append(el.tag) self.assertEquals(['one', 'two', 'three'], result) - def test_iteration2(self): + def test_iteration_empty(self): XML = self.etree.XML root = XML('') @@ -402,7 +402,7 @@ result.append(el.tag) self.assertEquals([], result) - def test_iteration3(self): + def test_iteration_text_only(self): XML = self.etree.XML root = XML('Text') @@ -418,7 +418,22 @@ for elem in root: elem.tail = '' - def test_iteration4(self): + def test_iteration_reversed(self): + XML = self.etree.XML + + try: + reversed(()) + except NameError: + # before Python 2.4 + return + + root = XML('TwoHm') + result = [] + for el in reversed(root): + result.append(el.tag) + self.assertEquals(['three', 'two', 'one'], result) + + def test_iteration_subelement(self): XML = self.etree.XML root = XML('TwoHm') @@ -431,7 +446,7 @@ add = False self.assertEquals(['one', 'two', 'three', 'four'], result) - def test_iteration5(self): + def test_iteration_del_child(self): XML = self.etree.XML root = XML('TwoHm') @@ -441,7 +456,7 @@ del root[-1] self.assertEquals(['one', 'two'], result) - def test_iteration6(self): + def test_iteration_double(self): XML = self.etree.XML root = XML('') From scoder at codespeak.net Sat Apr 15 17:14:23 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Sat Apr 15 17:14:25 2006 Subject: [Lxml-checkins] r25860 - lxml/trunk/src/lxml Message-ID: <20060415151423.CB615100BC@code0.codespeak.net> Author: scoder Date: Sat Apr 15 17:14:22 2006 New Revision: 25860 Modified: lxml/trunk/src/lxml/etree.pyx Log: new 'reversed' keyword for ElementChildIterator for efficient reversed iteration, used in new method Element.__reversed__ to remove quadratic behaviour Modified: lxml/trunk/src/lxml/etree.pyx ============================================================================== --- lxml/trunk/src/lxml/etree.pyx (original) +++ lxml/trunk/src/lxml/etree.pyx Sat Apr 15 17:14:22 2006 @@ -628,6 +628,9 @@ def __iter__(self): return ElementChildIterator(self) + def __reversed__(self): + return ElementChildIterator(self, reversed=True) + def index(self, _Element x, start=None, stop=None): cdef int k cdef int l @@ -1002,12 +1005,20 @@ registerProxy(result, PROXY_ATTRIB) return result +ctypedef xmlNode* (*_node_to_node_function)(xmlNode*) + cdef class ElementChildIterator: # we keep Python references here to control GC cdef _NodeBase _node - def __init__(self, _NodeBase node): # Python ref! + cdef _node_to_node_function _next_element + def __init__(self, _NodeBase node, reversed=False): # Python ref! cdef xmlNode* c_node - c_node = _findChildForwards(node._c_node, 0) + if reversed: + c_node = _findChildBackwards(node._c_node, 0) + self._next_element = _previousElement + else: + c_node = _findChildForwards(node._c_node, 0) + self._next_element = _nextElement if c_node is NULL: self._node = None else: @@ -1021,7 +1032,7 @@ current_node = self._node if current_node is None: raise StopIteration - c_node = _nextElement(current_node._c_node) + c_node = self._next_element(current_node._c_node) if c_node is NULL: self._node = None else: @@ -1501,6 +1512,16 @@ c_node = c_node.next return NULL +cdef xmlNode* _previousElement(xmlNode* c_node): + """Given a node, find the next sibling that is an element. + """ + c_node = c_node.prev + while c_node is not NULL: + if _isElement(c_node): + return c_node + c_node = c_node.prev + return NULL + cdef void _removeNode(xmlNode* c_node): """Unlink and free a node and subnodes if possible. """ From scoder at codespeak.net Sat Apr 15 17:16:39 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Sat Apr 15 17:16:40 2006 Subject: [Lxml-checkins] r25861 - lxml/trunk Message-ID: <20060415151639.5E484100BC@code0.codespeak.net> Author: scoder Date: Sat Apr 15 17:16:38 2006 New Revision: 25861 Modified: lxml/trunk/CHANGES.txt Log: updated CHANGES.txt Modified: lxml/trunk/CHANGES.txt ============================================================================== --- lxml/trunk/CHANGES.txt (original) +++ lxml/trunk/CHANGES.txt Sat Apr 15 17:16:38 2006 @@ -7,6 +7,8 @@ Features added -------------- +* Speedup for reversed() iteration over element children (Py2.4+ only) + * ElementTree compatible QName class * RelaxNG and XMLSchema now accept any Element, not only ElementTrees From scoder at codespeak.net Sat Apr 15 17:25:19 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Sat Apr 15 17:25:20 2006 Subject: [Lxml-checkins] r25862 - in lxml/branch/lxml-0.9.x: . src/lxml src/lxml/tests Message-ID: <20060415152519.4E288100BC@code0.codespeak.net> Author: scoder Date: Sat Apr 15 17:25:16 2006 New Revision: 25862 Modified: lxml/branch/lxml-0.9.x/CHANGES.txt lxml/branch/lxml-0.9.x/bench.py lxml/branch/lxml-0.9.x/src/lxml/etree.pyx lxml/branch/lxml-0.9.x/src/lxml/tests/test_elementtree.py Log: merged in updates for efficient reversed element child iteration from trunk Modified: lxml/branch/lxml-0.9.x/CHANGES.txt ============================================================================== --- lxml/branch/lxml-0.9.x/CHANGES.txt (original) +++ lxml/branch/lxml-0.9.x/CHANGES.txt Sat Apr 15 17:25:16 2006 @@ -7,6 +7,8 @@ Features added -------------- +* Speedup for reversed() iteration over element children (Py2.4+ only) + * ElementTree compatible QName class * RelaxNG and XMLSchema now accept any Element, not only ElementTrees Modified: lxml/branch/lxml-0.9.x/bench.py ============================================================================== --- lxml/branch/lxml-0.9.x/bench.py (original) +++ lxml/branch/lxml-0.9.x/bench.py Sat Apr 15 17:25:16 2006 @@ -240,6 +240,14 @@ ############################################################ class BenchMark(BenchMarkBase): + def bench_iter_children(self, root): + for child in root: + pass + + def bench_iter_children_reversed(self, root): + for child in reversed(root): + pass + def bench_append_from_document(self, root1, root2): # == "1,2 2,3 1,3 3,1 3,2 2,1" # trees 1 and 2, or 2 and 3, or ... for el in root2: Modified: lxml/branch/lxml-0.9.x/src/lxml/etree.pyx ============================================================================== --- lxml/branch/lxml-0.9.x/src/lxml/etree.pyx (original) +++ lxml/branch/lxml-0.9.x/src/lxml/etree.pyx Sat Apr 15 17:25:16 2006 @@ -627,6 +627,9 @@ def __iter__(self): return ElementChildIterator(self) + def __reversed__(self): + return ElementChildIterator(self, reversed=True) + def index(self, _Element x, start=None, stop=None): cdef int k cdef int l @@ -987,12 +990,20 @@ registerProxy(result, PROXY_ATTRIB) return result +ctypedef xmlNode* (*_node_to_node_function)(xmlNode*) + cdef class ElementChildIterator: # we keep Python references here to control GC cdef _NodeBase _node - def __init__(self, _NodeBase node): # Python ref! + cdef _node_to_node_function _next_element + def __init__(self, _NodeBase node, reversed=False): # Python ref! cdef xmlNode* c_node - c_node = _findChildForwards(node._c_node, 0) + if reversed: + c_node = _findChildBackwards(node._c_node, 0) + self._next_element = _previousElement + else: + c_node = _findChildForwards(node._c_node, 0) + self._next_element = _nextElement if c_node is NULL: self._node = None else: @@ -1006,7 +1017,7 @@ current_node = self._node if current_node is None: raise StopIteration - c_node = _nextElement(current_node._c_node) + c_node = self._next_element(current_node._c_node) if c_node is NULL: self._node = None else: @@ -1486,6 +1497,16 @@ c_node = c_node.next return NULL +cdef xmlNode* _previousElement(xmlNode* c_node): + """Given a node, find the next sibling that is an element. + """ + c_node = c_node.prev + while c_node is not NULL: + if _isElement(c_node): + return c_node + c_node = c_node.prev + return NULL + cdef void _removeNode(xmlNode* c_node): """Unlink and free a node and subnodes if possible. """ Modified: lxml/branch/lxml-0.9.x/src/lxml/tests/test_elementtree.py ============================================================================== --- lxml/branch/lxml-0.9.x/src/lxml/tests/test_elementtree.py (original) +++ lxml/branch/lxml-0.9.x/src/lxml/tests/test_elementtree.py Sat Apr 15 17:25:16 2006 @@ -393,7 +393,7 @@ result.append(el.tag) self.assertEquals(['one', 'two', 'three'], result) - def test_iteration2(self): + def test_iteration_empty(self): XML = self.etree.XML root = XML('') @@ -402,7 +402,7 @@ result.append(el.tag) self.assertEquals([], result) - def test_iteration3(self): + def test_iteration_text_only(self): XML = self.etree.XML root = XML('Text') @@ -418,7 +418,22 @@ for elem in root: elem.tail = '' - def test_iteration4(self): + def test_iteration_reversed(self): + XML = self.etree.XML + + try: + reversed(()) + except NameError: + # before Python 2.4 + return + + root = XML('TwoHm') + result = [] + for el in reversed(root): + result.append(el.tag) + self.assertEquals(['three', 'two', 'one'], result) + + def test_iteration_subelement(self): XML = self.etree.XML root = XML('TwoHm') @@ -431,7 +446,7 @@ add = False self.assertEquals(['one', 'two', 'three', 'four'], result) - def test_iteration5(self): + def test_iteration_del_child(self): XML = self.etree.XML root = XML('TwoHm') @@ -441,7 +456,7 @@ del root[-1] self.assertEquals(['one', 'two'], result) - def test_iteration6(self): + def test_iteration_double(self): XML = self.etree.XML root = XML('') From scoder at codespeak.net Mon Apr 17 17:34:47 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Mon Apr 17 17:34:53 2006 Subject: [Lxml-checkins] r25906 - lxml/branch/htmlparser/src/lxml Message-ID: <20060417153447.1ED33100B0@code0.codespeak.net> Author: scoder Date: Mon Apr 17 17:34:46 2006 New Revision: 25906 Modified: lxml/branch/htmlparser/src/lxml/parser.pxi Log: small refactoring to reduce code duplication between parsers Modified: lxml/branch/htmlparser/src/lxml/parser.pxi ============================================================================== --- lxml/branch/htmlparser/src/lxml/parser.pxi (original) +++ lxml/branch/htmlparser/src/lxml/parser.pxi Mon Apr 17 17:34:46 2006 @@ -63,14 +63,17 @@ cdef class BaseParser: cdef _ErrorLog _error_log - def __init__(self): + cdef object _syntax_error_class + def __init__(self, syntax_error_class): self._error_log = _ErrorLog() + self._syntax_error_class = syntax_error_class property error_log: def __get__(self): return self._error_log.copy() - cdef xmlDoc* _handleResult(self, xmlParserCtxt* ctxt, xmlDoc* result): + cdef xmlDoc* _handleResult(self, xmlParserCtxt* ctxt, + xmlDoc* result) except NULL: if ctxt.wellFormed: __GLOBAL_PARSER_CONTEXT._initDocDict(result) elif result is not NULL: @@ -78,6 +81,8 @@ tree.xmlFreeDoc(result) result = NULL self._error_log.disconnect() + if result is NULL: + raise self._syntax_error_class return result @@ -113,7 +118,7 @@ no_network=False, ns_clean=False): cdef int parse_options self._file_parser_ctxt = NULL - BaseParser.__init__(self) + BaseParser.__init__(self, XMLSyntaxError) parse_options = _XML_DEFAULT_PARSE_OPTIONS if dtd_validation: @@ -158,10 +163,7 @@ __GLOBAL_PARSER_CONTEXT._initParserDict(pctxt) result = xmlparser.xmlCtxtReadDoc( pctxt, _cstr(text_utf), NULL, NULL, self._parse_options) - result = self._handleResult(pctxt, result) - if result is NULL: - raise XMLSyntaxError - return result + return self._handleResult(pctxt, result) cdef xmlDoc* _parseDocFromFile(self, char* filename) except NULL: cdef xmlDoc* result @@ -179,10 +181,7 @@ if pctxt.lastError.domain == xmlerror.XML_FROM_IO: self._error_log.disconnect() raise IOError, "Could not open file %s" % filename - result = self._handleResult(pctxt, result) - if result is NULL: - raise XMLSyntaxError - return result + return self._handleResult(pctxt, result) cdef XMLParser __DEFAULT_XML_PARSER @@ -231,7 +230,7 @@ cdef int parse_options self._memory_parser_ctxt = NULL self._file_parser_ctxt = NULL - BaseParser.__init__(self) + BaseParser.__init__(self, HTMLSyntaxError) parse_options = _HTML_DEFAULT_PARSE_OPTIONS if not recover: @@ -270,10 +269,7 @@ __GLOBAL_PARSER_CONTEXT._initParserDict(pctxt) result = htmlparser.htmlCtxtReadDoc( pctxt, c_text, NULL, NULL, self._parse_options) - result = self._handleResult(pctxt, result) - if result is NULL: - raise HTMLSyntaxError - return result + return self._handleResult(pctxt, result) cdef xmlDoc* _parseDocFromFile(self, char* filename) except NULL: cdef xmlDoc* result @@ -290,10 +286,7 @@ __GLOBAL_PARSER_CONTEXT._initParserDict(pctxt) result = htmlparser.htmlCtxtReadFile( pctxt, filename, NULL, self._parse_options) - result = self._handleResult(pctxt, result) - if result is NULL: - raise HTMLSyntaxError - return result + return self._handleResult(pctxt, result) cdef HTMLParser __DEFAULT_HTML_PARSER __DEFAULT_HTML_PARSER = HTMLParser() From scoder at codespeak.net Mon Apr 17 17:35:34 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Mon Apr 17 17:35:36 2006 Subject: [Lxml-checkins] r25907 - lxml/branch/htmlparser/src/lxml/tests Message-ID: <20060417153534.05ADE100B0@code0.codespeak.net> Author: scoder Date: Mon Apr 17 17:35:33 2006 New Revision: 25907 Modified: lxml/branch/htmlparser/src/lxml/tests/test_elementtree.py Log: clean up Modified: lxml/branch/htmlparser/src/lxml/tests/test_elementtree.py ============================================================================== --- lxml/branch/htmlparser/src/lxml/tests/test_elementtree.py (original) +++ lxml/branch/htmlparser/src/lxml/tests/test_elementtree.py Mon Apr 17 17:35:33 2006 @@ -9,11 +9,9 @@ """ import unittest, doctest - -from StringIO import StringIO import os, shutil, tempfile, copy -from common_imports import etree, ElementTree, HelperTestCase, fileInTestDir, canonicalize +from common_imports import StringIO, etree, ElementTree, HelperTestCase, fileInTestDir, canonicalize class ETreeTestCaseBase(unittest.TestCase): etree = None From scoder at codespeak.net Mon Apr 17 17:36:20 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Mon Apr 17 17:36:22 2006 Subject: [Lxml-checkins] r25908 - lxml/trunk/src/lxml/tests Message-ID: <20060417153620.B2FCF100B0@code0.codespeak.net> Author: scoder Date: Mon Apr 17 17:36:19 2006 New Revision: 25908 Modified: lxml/trunk/src/lxml/tests/test_elementtree.py Log: merged in clean up from branch Modified: lxml/trunk/src/lxml/tests/test_elementtree.py ============================================================================== --- lxml/trunk/src/lxml/tests/test_elementtree.py (original) +++ lxml/trunk/src/lxml/tests/test_elementtree.py Mon Apr 17 17:36:19 2006 @@ -9,11 +9,9 @@ """ import unittest, doctest - -from StringIO import StringIO import os, shutil, tempfile, copy -from common_imports import etree, ElementTree, HelperTestCase, fileInTestDir, canonicalize +from common_imports import StringIO, etree, ElementTree, HelperTestCase, fileInTestDir, canonicalize class ETreeTestCaseBase(unittest.TestCase): etree = None From scoder at codespeak.net Mon Apr 17 17:52:33 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Mon Apr 17 17:52:34 2006 Subject: [Lxml-checkins] r25909 - lxml/branch/htmlparser/src/lxml Message-ID: <20060417155233.0ECC3100B0@code0.codespeak.net> Author: scoder Date: Mon Apr 17 17:52:32 2006 New Revision: 25909 Modified: lxml/branch/htmlparser/src/lxml/parser.pxi Log: clean up parse option setup in HTMLParser.__init__ Modified: lxml/branch/htmlparser/src/lxml/parser.pxi ============================================================================== --- lxml/branch/htmlparser/src/lxml/parser.pxi (original) +++ lxml/branch/htmlparser/src/lxml/parser.pxi Mon Apr 17 17:52:32 2006 @@ -211,9 +211,7 @@ cdef int _HTML_DEFAULT_PARSE_OPTIONS _HTML_DEFAULT_PARSE_OPTIONS = ( - htmlparser.HTML_PARSE_RECOVER | htmlparser.HTML_PARSE_NOWARNING | - htmlparser.HTML_PARSE_COMPACT | htmlparser.HTML_PARSE_NOERROR ) @@ -233,10 +231,10 @@ BaseParser.__init__(self, HTMLSyntaxError) parse_options = _HTML_DEFAULT_PARSE_OPTIONS - if not recover: - parse_options = parse_options & ~htmlparser.HTML_PARSE_RECOVER - if not compact_text: - parse_options = parse_options & ~htmlparser.HTML_PARSE_COMPACT + if recover: + parse_options = parse_options | htmlparser.HTML_PARSE_RECOVER + if compact_text: + parse_options = parse_options | htmlparser.HTML_PARSE_COMPACT if no_network: parse_options = parse_options | htmlparser.HTML_PARSE_NONET if remove_blank_text: From scoder at codespeak.net Mon Apr 17 17:55:12 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Mon Apr 17 17:55:13 2006 Subject: [Lxml-checkins] r25910 - lxml/branch/htmlparser/src/lxml/tests Message-ID: <20060417155512.9C675100B0@code0.codespeak.net> Author: scoder Date: Mon Apr 17 17:55:11 2006 New Revision: 25910 Modified: lxml/branch/htmlparser/src/lxml/tests/test_htmlparser.py Log: new test case for HTMLSyntexError, extended test case for default parser Modified: lxml/branch/htmlparser/src/lxml/tests/test_htmlparser.py ============================================================================== --- lxml/branch/htmlparser/src/lxml/tests/test_htmlparser.py (original) +++ lxml/branch/htmlparser/src/lxml/tests/test_htmlparser.py Mon Apr 17 17:55:11 2006 @@ -26,6 +26,13 @@ self.assertEqual(self.etree.tostring(element), self.html_str) + def test_module_parse_html_error(self): + parser = self.etree.HTMLParser(recover=False) + parse = self.etree.parse + f = StringIO("") + self.assertRaises(self.etree.HTMLSyntaxError, + parse, f, parser) + def test_module_HTML_broken(self): element = self.etree.HTML(self.broken_html_str) self.assertEqual(self.etree.tostring(element), @@ -54,6 +61,10 @@ self.assertEqual(self.etree.tostring(tree.getroot()), self.html_str) + self.etree.set_default_parser() + + self.assertRaises(self.etree.XMLSyntaxError, + self.etree.parse, StringIO(self.broken_html_str)) def test_suite(): suite = unittest.TestSuite() From scoder at codespeak.net Mon Apr 17 18:14:59 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Mon Apr 17 18:15:00 2006 Subject: [Lxml-checkins] r25911 - lxml/branch/htmlparser/src/lxml Message-ID: <20060417161459.75802100A4@code0.codespeak.net> Author: scoder Date: Mon Apr 17 18:14:58 2006 New Revision: 25911 Modified: lxml/branch/htmlparser/src/lxml/parser.pxi Log: doc note on avoiding default parser in threads Modified: lxml/branch/htmlparser/src/lxml/parser.pxi ============================================================================== --- lxml/branch/htmlparser/src/lxml/parser.pxi (original) +++ lxml/branch/htmlparser/src/lxml/parser.pxi Mon Apr 17 18:14:58 2006 @@ -195,6 +195,10 @@ parser is supplied to the various parse functions of the lxml API. If this function is called without a parser (or if it is None), the default parser is reset to the original configuration. + + Note that the default parser is not thread-safe. Avoid the default parser + in multi-threaded environments. You can create a separate parser for each + thread explicitly or use a parser pool. """ global __DEFAULT_PARSER if parser is None: From scoder at codespeak.net Tue Apr 18 12:29:26 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Tue Apr 18 12:29:27 2006 Subject: [Lxml-checkins] r25937 - lxml/trunk Message-ID: <20060418102926.DC82110088@code0.codespeak.net> Author: scoder Date: Tue Apr 18 12:29:25 2006 New Revision: 25937 Modified: lxml/trunk/INSTALL.txt Log: much shorter INSTALL.txt for Pyrex 0.9.4.1 Modified: lxml/trunk/INSTALL.txt ============================================================================== --- lxml/trunk/INSTALL.txt (original) +++ lxml/trunk/INSTALL.txt Tue Apr 18 12:29:25 2006 @@ -14,9 +14,17 @@ * libxslt 1.1.12 (newer versions should work). It can be found here: http://xmlsoft.org/XSLT/downloads.html -See below for instructions how to get these for Windows. On MacOS-X 10.4, you -can use the installed system libraries and the binary egg distribution of -lxml. +For Windows, there is a `binary distribution`_ of libxml2 and libxslt. Note +that you need both libxml2 and libxslt, as well as iconv and zlib. You can +then install the `binary egg distribution`_ of lxml (see below). + +.. _`binary distribution`: http://www.zlatkovic.com/libxml.en.html +.. _`binary egg distribution`: http://cheeseshop.python.org/pypi/lxml + +On MacOS-X 10.4, you can use the installed system libraries and the binary egg +distribution of lxml. Note that the libxslt version on this system is older +than the required version above. While there were not any bug reports so far, +you may still encounter certain differences in behaviour in rare cases. If you want to build lxml from SVN, you also need Pyrex_. If you are using a released version of lxml, it should come with the generated C file in the @@ -24,8 +32,9 @@ .. _Pyrex: http://www.cosc.canterbury.ac.nz/~greg/python/Pyrex/ -See also the notes on building with gcc 4.0 below if you are having -trouble with Pyrex. +Note that Pyrex up to version 0.9.4 has known problems when compiling lxml +with gcc 4.0 or Python 2.4. Do not use it. If you want to build lxml from +non-release sources, please install Pyrex version 0.9.4.1 or later. If you have read these instructions and still cannot manage to install lxml, you can check the archives of the `mailing list`_ to see if your problem is @@ -44,8 +53,8 @@ .. _easy_install: http://peak.telecommunity.com/DevCenter/EasyInstall This has been reported to work on Linux, MacOS-X 10.4 and Windows, as long as -libxml2 and libxslt are installed. To compile and install lxml without -easy_install, download the source tar-ball, unpack it and type:: +libxml2 and libxslt are properly installed. To compile and install lxml +without easy_install, download the source tar-ball, unpack it and type:: python setup.py install @@ -62,78 +71,6 @@ import lxml.etree and play with it. -Installation on Windows ------------------------ - -As always, installation on Windows is different. If you do not want to go -through the hassle of compiling everything by hand, you can use the binary -distribution of libxml2 and libxslt. It is available here: - -http://www.zlatkovic.com/libxml.en.html - -Note that you need both libxml2 and libxslt, as well as iconv and zlib. You -can then download a binary version of lxml 0.9 for Python 2.4 from the -following address: - -http://carcass.dhs.org/lxml-0.9.win32-py2.4.exe - -or the egg distribution from - -http://cheeseshop.python.org/pypi/lxml - -The egg can directly be installed using easy_install_. Both builds were kindly -contributed by Steve Howe. If they do not work for you, feel free to report to -the mailing list. - - -Building lxml with gcc 4.0 or Python 2.4 ----------------------------------------- - -Pyrex 0.9.3.1 generates C code that gcc 4.0 does not accept. Pending an -official release of a version of Pyrex that does work with gcc 4.0, the lxml -project currently provides an updated version of Pyrex in its Subversion -repository: - -http://codespeak.net/svn/lxml/pyrex/ - -To install it, you can just download one of the following files: - -http://codespeak.net/svn/lxml/pyrex/dist/Pyrex-0.9.3.1.tar.gz - -http://codespeak.net/svn/lxml/pyrex/dist/Pyrex-0.9.3.1-1.src.rpm - -It is based on Pyrex 0.9.3.1 and contains a number of patches that make lxml -compile and appear to work with gcc 4.0. If you use this version, you can -simply skip the rest of the section. In case you want to apply them yourself, -the first one is: - -http://codespeak.net/lxml/Pyrex-0.9.3-gcc4.patch - -Some Linux distributions such as Fedora Core 4 and Ubuntu Linux may -already have most of this applied. In that case, this smaller patch -may be applicable to make lxml compile properly: - -http://codespeak.net/lxml/Pyrex-0.9.3-gcc4-small.patch - -It may however actually be that at the time you read this, this extra patch -has been applied by the distributions as well. You may still encounter the -following problem when building the extension on Python 2.4:: - - TypeError: swig_sources() takes exactly 2 arguments (3 given) - -To fix this, look for the following line in Pyrex/Distutils/build_ext.py -(around line 35):: - - def swig_sources (self, sources): - -and change it to:: - - def swig_sources (self, sources, *otherargs): - -The above install files have these changes applied. It should do no harm if -you install them instead of the official Pyrex version. - - Running the tests and reporting errors -------------------------------------- @@ -146,7 +83,7 @@ above), as it searches the "src" directory. You can use the following one-step command to trigger an in-place build and test it:: - make test + make clean test To run the ElementTree and cElementTree compatibility tests, make sure you have lxml on your PYTHONPATH first, then run:: From scoder at codespeak.net Tue Apr 18 16:49:39 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Tue Apr 18 16:49:40 2006 Subject: [Lxml-checkins] r25952 - in lxml/branch/htmlparser/src/lxml: . tests Message-ID: <20060418144939.0C3D610088@code0.codespeak.net> Author: scoder Date: Tue Apr 18 16:49:38 2006 New Revision: 25952 Modified: lxml/branch/htmlparser/src/lxml/parser.pxi lxml/branch/htmlparser/src/lxml/tests/test_htmlparser.py Log: raise IOError on IO errors in HTMLParser (file not found, etc.) Modified: lxml/branch/htmlparser/src/lxml/parser.pxi ============================================================================== --- lxml/branch/htmlparser/src/lxml/parser.pxi (original) +++ lxml/branch/htmlparser/src/lxml/parser.pxi Tue Apr 18 16:49:38 2006 @@ -191,10 +191,10 @@ __DEFAULT_PARSER = __DEFAULT_XML_PARSER def set_default_parser(parser=None): - """Set a default XMLParser. This parser is used globally whenever no - parser is supplied to the various parse functions of the lxml API. If - this function is called without a parser (or if it is None), the default - parser is reset to the original configuration. + """Set a default parser. This parser is used globally whenever no parser + is supplied to the various parse functions of the lxml API. If this + function is called without a parser (or if it is None), the default parser + is reset to the original configuration. Note that the default parser is not thread-safe. Avoid the default parser in multi-threaded environments. You can create a separate parser for each @@ -283,6 +283,9 @@ pctxt = htmlparser.htmlCreateFileParserCtxt(filename, NULL) if pctxt is NULL: self._error_log.disconnect() + warnings = self._error_log.filter_from_warnings() + if warnings and warnings[-1].domain == xmlerror.XML_FROM_IO: + raise IOError, "Could not open file %s" % filename raise ParserError, "Failed to create parser context" self._file_parser_ctxt = pctxt __GLOBAL_PARSER_CONTEXT._initParserDict(pctxt) Modified: lxml/branch/htmlparser/src/lxml/tests/test_htmlparser.py ============================================================================== --- lxml/branch/htmlparser/src/lxml/tests/test_htmlparser.py (original) +++ lxml/branch/htmlparser/src/lxml/tests/test_htmlparser.py Tue Apr 18 16:49:38 2006 @@ -51,6 +51,13 @@ tree = self.etree.parse(f, parser) self.assertEqual(self.etree.tostring(tree.getroot()), self.html_str) + def test_html_file_error(self): + parser = self.etree.HTMLParser() + parse = self.etree.parse + self.assertRaises(IOError, + parse, "__some_hopefully_nonexisting_file__.html", + parser) + def test_default_parser(self): self.assertRaises(self.etree.XMLSyntaxError, self.etree.parse, StringIO(self.broken_html_str)) From scoder at codespeak.net Wed Apr 19 10:42:40 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Wed Apr 19 10:42:46 2006 Subject: [Lxml-checkins] r25973 - lxml/trunk/src/lxml Message-ID: <20060419084240.D0A9C100B5@code0.codespeak.net> Author: scoder Date: Wed Apr 19 10:42:39 2006 New Revision: 25973 Modified: lxml/trunk/src/lxml/etree.pyx Log: reimplemented _Element.makeelement by copying code from Element(), avoids instantiating an empty throw-away document for the new element Modified: lxml/trunk/src/lxml/etree.pyx ============================================================================== --- lxml/trunk/src/lxml/etree.pyx (original) +++ lxml/trunk/src/lxml/etree.pyx Wed Apr 19 10:42:39 2006 @@ -755,8 +755,19 @@ else: return ElementTagFilter(iterator, tag) - def makeelement(self, tag, attrib): - return Element(tag, attrib) + def makeelement(self, _tag, attrib=None, nsmap=None, **_extra): + "Creates a new element associated with the same document." + # a little code duplication, but less overhead through doc reuse + cdef xmlNode* c_node + cdef xmlDoc* c_doc + cdef _Document doc + ns_utf, name_utf = _getNsTag(_tag) + doc = self._doc + c_doc = doc._c_doc + c_node = _createElement(c_doc, name_utf, attrib, _extra) + # add namespaces to node if necessary + doc._setNodeNamespaces(c_node, ns_utf, nsmap) + return _elementFactory(doc, c_node) def find(self, path): return _elementpath.find(self, path) From scoder at codespeak.net Wed Apr 19 11:50:32 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Wed Apr 19 11:50:33 2006 Subject: [Lxml-checkins] r25974 - lxml/trunk Message-ID: <20060419095032.B998C100B5@code0.codespeak.net> Author: scoder Date: Wed Apr 19 11:50:31 2006 New Revision: 25974 Modified: lxml/trunk/CHANGES.txt lxml/trunk/bench.py Log: new benchmark for makeelement, shows that it's about 50% faster now Modified: lxml/trunk/CHANGES.txt ============================================================================== --- lxml/trunk/CHANGES.txt (original) +++ lxml/trunk/CHANGES.txt Wed Apr 19 11:50:31 2006 @@ -7,6 +7,9 @@ Features added -------------- +* Speedup for Element.makeelement(): the new element now reuses the original + libxml2 document instead of creating a new empty one + * Speedup for reversed() iteration over element children (Py2.4+ only) * ElementTree compatible QName class Modified: lxml/trunk/bench.py ============================================================================== --- lxml/trunk/bench.py (original) +++ lxml/trunk/bench.py Wed Apr 19 11:50:31 2006 @@ -299,6 +299,12 @@ el = Element('{test}test') child.append(el) + def bench_makeelement(self, root): + Element = self.etree.Element + empty_attrib = {} + for child in root: + child.makeelement('{test}test', empty_attrib) + def bench_replace_children(self, root): Element = self.etree.Element for child in root: From scoder at codespeak.net Wed Apr 19 11:57:18 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Wed Apr 19 11:57:19 2006 Subject: [Lxml-checkins] r25975 - in lxml/branch/lxml-0.9.x: . src/lxml src/lxml/tests Message-ID: <20060419095718.4E8B8100B5@code0.codespeak.net> Author: scoder Date: Wed Apr 19 11:57:15 2006 New Revision: 25975 Modified: lxml/branch/lxml-0.9.x/CHANGES.txt lxml/branch/lxml-0.9.x/bench.py lxml/branch/lxml-0.9.x/src/lxml/etree.pyx lxml/branch/lxml-0.9.x/src/lxml/tests/test_elementtree.py Log: merged in makeelement() updates from trunk Modified: lxml/branch/lxml-0.9.x/CHANGES.txt ============================================================================== --- lxml/branch/lxml-0.9.x/CHANGES.txt (original) +++ lxml/branch/lxml-0.9.x/CHANGES.txt Wed Apr 19 11:57:15 2006 @@ -7,6 +7,9 @@ Features added -------------- +* Speedup for Element.makeelement(): the new element now reuses the original + libxml2 document instead of creating a new empty one + * Speedup for reversed() iteration over element children (Py2.4+ only) * ElementTree compatible QName class Modified: lxml/branch/lxml-0.9.x/bench.py ============================================================================== --- lxml/branch/lxml-0.9.x/bench.py (original) +++ lxml/branch/lxml-0.9.x/bench.py Wed Apr 19 11:57:15 2006 @@ -299,6 +299,12 @@ el = Element('{test}test') child.append(el) + def bench_makeelement(self, root): + Element = self.etree.Element + empty_attrib = {} + for child in root: + child.makeelement('{test}test', empty_attrib) + def bench_replace_children(self, root): Element = self.etree.Element for child in root: Modified: lxml/branch/lxml-0.9.x/src/lxml/etree.pyx ============================================================================== --- lxml/branch/lxml-0.9.x/src/lxml/etree.pyx (original) +++ lxml/branch/lxml-0.9.x/src/lxml/etree.pyx Wed Apr 19 11:57:15 2006 @@ -752,8 +752,19 @@ else: return ElementTagFilter(iterator, tag) - def makeelement(self, tag, attrib): - return Element(tag, attrib) + def makeelement(self, _tag, attrib=None, nsmap=None, **_extra): + "Creates a new element associated with the same document." + # a little code duplication, but less overhead through doc reuse + cdef xmlNode* c_node + cdef xmlDoc* c_doc + cdef _Document doc + ns_utf, name_utf = _getNsTag(_tag) + doc = self._doc + c_doc = doc._c_doc + c_node = _createElement(c_doc, name_utf, attrib, _extra) + # add namespaces to node if necessary + doc._setNodeNamespaces(c_node, ns_utf, nsmap) + return _elementFactory(doc, c_node) def find(self, path): return _elementpath.find(self, path) Modified: lxml/branch/lxml-0.9.x/src/lxml/tests/test_elementtree.py ============================================================================== --- lxml/branch/lxml-0.9.x/src/lxml/tests/test_elementtree.py (original) +++ lxml/branch/lxml-0.9.x/src/lxml/tests/test_elementtree.py Wed Apr 19 11:57:15 2006 @@ -9,11 +9,9 @@ """ import unittest, doctest - -from StringIO import StringIO import os, shutil, tempfile, copy -from common_imports import etree, ElementTree, HelperTestCase, fileInTestDir, canonicalize +from common_imports import StringIO, etree, ElementTree, HelperTestCase, fileInTestDir, canonicalize class ETreeTestCaseBase(unittest.TestCase): etree = None From scoder at codespeak.net Wed Apr 19 13:47:24 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Wed Apr 19 13:47:27 2006 Subject: [Lxml-checkins] r25981 - lxml/trunk/src/lxml Message-ID: <20060419114724.8CA151009A@code0.codespeak.net> Author: scoder Date: Wed Apr 19 13:47:21 2006 New Revision: 25981 Modified: lxml/trunk/src/lxml/etree.pyx lxml/trunk/src/lxml/xslt.pxi Log: let Pyrex handle None arguments in API functions: raises TypeError as before, less code, more readable Modified: lxml/trunk/src/lxml/etree.pyx ============================================================================== --- lxml/trunk/src/lxml/etree.pyx (original) +++ lxml/trunk/src/lxml/etree.pyx Wed Apr 19 13:47:21 2006 @@ -411,13 +411,13 @@ c_node = _deleteSlice(c_node, start, stop) # if the insertion point is at the end, append there if c_node is NULL: + append = self.append for node in value: - self.append(node) + append(node) return # if the next element is in the list, insert before it - for node in value: - _raiseIfNone(node) - mynode = node + for mynode in value: + _raiseIfNone(mynode) foreign = self._doc is not mynode._doc # store possible text tail c_next = mynode._c_node.next @@ -444,11 +444,10 @@ def set(self, key, value): self.attrib[key] = value - def append(self, _Element element): + def append(self, _Element element not None): cdef xmlNode* c_next cdef xmlNode* c_node cdef int foreign - _raiseIfNone(element) foreign = self._doc is not element._doc c_node = element._c_node # store possible text node @@ -487,11 +486,10 @@ _removeNode(c_node) c_node = c_node_next - def insert(self, index, _Element element): + def insert(self, index, _Element element not None): cdef xmlNode* c_node cdef xmlNode* c_next cdef int foreign - _raiseIfNone(element) c_node = _findChild(self._c_node, index) if c_node is NULL: self.append(element) @@ -502,9 +500,8 @@ _moveTail(c_next, element._c_node) changeDocumentBelow(element, self._doc, foreign) - def remove(self, _Element element): + def remove(self, _Element element not None): cdef xmlNode* c_node - _raiseIfNone(element) c_node = element._c_node if c_node.parent is not self._c_node: raise ValueError, "Element is not a child of this node." @@ -631,14 +628,13 @@ def __reversed__(self): return ElementChildIterator(self, reversed=True) - def index(self, _Element x, start=None, stop=None): + def index(self, _Element x not None, start=None, stop=None): cdef int k cdef int l cdef int c_stop cdef int c_start cdef xmlNode* c_child cdef xmlNode* c_start_node - _raiseIfNone(x) c_child = x._c_node if c_child.parent is not self._c_node: raise ValueError, "Element is not a child of this node." @@ -1058,9 +1054,8 @@ # keep next node to return and a stack of position state in the tree cdef object _stack cdef _NodeBase _next_node - def __init__(self, _NodeBase node): + def __init__(self, _NodeBase node not None): cdef xmlNode* c_node - _raiseIfNone(node) self._next_node = node self._stack = [] self._findAndPushNextNode(node) @@ -1176,10 +1171,10 @@ tree.xmlAddChild(doc._c_doc, c_node) return _commentFactory(doc, c_node) -def SubElement(_Element _parent, _tag, attrib=None, nsmap=None, **_extra): +def SubElement(_Element _parent not None, _tag, + attrib=None, nsmap=None, **_extra): cdef xmlNode* c_node cdef _Document doc - _raiseIfNone(_parent) ns_utf, name_utf = _getNsTag(_tag) doc = _parent._doc c_node = _createElement(doc._c_doc, name_utf, attrib, _extra) Modified: lxml/trunk/src/lxml/xslt.pxi ============================================================================== --- lxml/trunk/src/lxml/xslt.pxi (original) +++ lxml/trunk/src/lxml/xslt.pxi Wed Apr 19 13:47:21 2006 @@ -493,8 +493,10 @@ if isinstance(etree, _Document): doc = <_Document>etree # for internal use only! - else: + elif isinstance(etree, _ElementTree): doc = (<_ElementTree>etree)._doc + else: + raise TypeError, "XPathDocumentEvaluator can only work on ElementTree objects" xpathCtxt = xpath.xmlXPathNewContext(doc._c_doc) if xpathCtxt is NULL: @@ -555,7 +557,7 @@ """ cdef _Element _element - def __init__(self, _Element element, namespaces=None, extensions=None): + def __init__(self, _Element element not None, namespaces=None, extensions=None): XPathDocumentEvaluator.__init__( self, element._doc, namespaces, extensions) self._element = element From scoder at codespeak.net Wed Apr 19 14:50:37 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Wed Apr 19 14:50:39 2006 Subject: [Lxml-checkins] r25988 - lxml/trunk/src/lxml Message-ID: <20060419125037.ABDCD100B9@code0.codespeak.net> Author: scoder Date: Wed Apr 19 14:50:36 2006 New Revision: 25988 Modified: lxml/trunk/src/lxml/etree.pyx Log: use xmlDocCopy for Element.__deepcopy__ instead of xmlDocCopyNode to avoid creating a generic empty document and to copy namespaces etc. - slower, but safer Modified: lxml/trunk/src/lxml/etree.pyx ============================================================================== --- lxml/trunk/src/lxml/etree.pyx (original) +++ lxml/trunk/src/lxml/etree.pyx Wed Apr 19 14:50:36 2006 @@ -435,11 +435,14 @@ def __copy__(self): cdef xmlNode* c_node cdef xmlDoc* c_doc - c_doc = theParser.newDoc() + cdef xmlDoc* fake_c_doc + cdef _Document doc + doc = self._doc + fake_c_doc = _fakeRootDoc(doc._c_doc, self._c_node) + c_doc = tree.xmlCopyDoc(fake_c_doc, 1) # recursive copy + _destroyFakeDoc(doc._c_doc, fake_c_doc) doc = _documentFactory(c_doc) - c_node = tree.xmlDocCopyNode(self._c_node, c_doc, 1) - tree.xmlDocSetRootElement(c_doc, c_node) - return _elementFactory(doc, c_node) + return doc.getroot() def set(self, key, value): self.attrib[key] = value From scoder at codespeak.net Wed Apr 19 17:03:44 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Wed Apr 19 17:03:45 2006 Subject: [Lxml-checkins] r25991 - lxml/trunk/src/lxml/tests Message-ID: <20060419150344.CAA9A100B5@code0.codespeak.net> Author: scoder Date: Wed Apr 19 17:03:43 2006 New Revision: 25991 Modified: lxml/trunk/src/lxml/tests/test_elementtree.py Log: fix the XMLID test case by adding DTD ID information Modified: lxml/trunk/src/lxml/tests/test_elementtree.py ============================================================================== --- lxml/trunk/src/lxml/tests/test_elementtree.py (original) +++ lxml/trunk/src/lxml/tests/test_elementtree.py Wed Apr 19 17:03:43 2006 @@ -336,6 +336,13 @@ XMLID = self.etree.XMLID XML = self.etree.XML xml_text = ''' + + + + + + ]>

...

...

From scoder at codespeak.net Wed Apr 19 17:24:05 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Wed Apr 19 17:24:08 2006 Subject: [Lxml-checkins] r25992 - lxml/trunk/src/lxml Message-ID: <20060419152405.ADD26100B5@code0.codespeak.net> Author: scoder Date: Wed Apr 19 17:24:02 2006 New Revision: 25992 Modified: lxml/trunk/src/lxml/etree.pyx lxml/trunk/src/lxml/tree.pxd Log: rewrite of XMLID to follow the xml:id spec as implemented by libxml2 => only ElementTree compatible if DTD is provided Modified: lxml/trunk/src/lxml/etree.pyx ============================================================================== --- lxml/trunk/src/lxml/etree.pyx (original) +++ lxml/trunk/src/lxml/etree.pyx Wed Apr 19 17:24:02 2006 @@ -1212,15 +1212,30 @@ return etree def XML(text): + "Parse the XML text and return the root node." return _parseMemoryDocument(text, None).getroot() fromstring = XML def XMLID(text): + """Parse the text and return a tuple (root node, ID dictionary). The root + node is the same as returned by the XML() function. The dictionary + contains string-element pairs. The dictionary keys are the values of ID + attributes as specified by the XML DTD. The elements referenced by the ID + are stored as dictionary values. + """ + cdef _NodeBase root root = XML(text) + assert root is not None dic = {} - for elem in root.xpath('//*[string(@id)]'): - python.PyDict_SetItem(dic, elem.get('id'), elem) + if root._doc._c_doc.ids is not NULL: + context = (dic, root._doc) + tree.xmlHashScan(root._doc._c_doc.ids, + _collectIdHashItems, context) + elif 0: + # the ElementTree compatible implementation + for elem in root.xpath('//*[string(@id)]'): + python.PyDict_SetItem(dic, elem.get('id'), elem) return (root, dic) cdef class QName: @@ -1394,6 +1409,15 @@ c_attrib_node.ns.href) return funicode(value) +cdef void _collectIdHashItems(void* payload, void* context, char* name): + # collect elements from ID attribute hash table (used by XMLID) + cdef tree.xmlID* c_id + c_id = payload + if c_id is NULL or c_id.attr is NULL: + return + dic, doc = context + element = _elementFactory(doc, c_id.attr.parent) + python.PyDict_SetItemString(dic, name, element) cdef _dumpToFile(f, xmlDoc* c_doc, xmlNode* c_node): cdef python.PyObject* o Modified: lxml/trunk/src/lxml/tree.pxd ============================================================================== --- lxml/trunk/src/lxml/tree.pxd (original) +++ lxml/trunk/src/lxml/tree.pxd Wed Apr 19 17:24:02 2006 @@ -8,7 +8,12 @@ cdef extern from "libxml/encoding.h": ctypedef struct xmlCharEncodingHandler cdef xmlCharEncodingHandler* xmlFindCharEncodingHandler(char* name) - + +cdef extern from "libxml/hash.h": + ctypedef struct xmlHashTable + ctypedef void xmlHashScanner(void* payload, void* data, char* name) + void xmlHashScan(xmlHashTable* table, xmlHashScanner f, void* data) + cdef extern from "libxml/tree.h": # for some reason need to define this in this section; @@ -69,6 +74,7 @@ xmlNode* prev xmlDoc* doc xmlDict* dict + xmlHashTable* ids char* URL ctypedef struct xmlAttr: @@ -93,6 +99,11 @@ xmlNode* prev xmlDoc* doc + ctypedef struct xmlID: + char* value + xmlAttr* attr + xmlDoc* doc + ctypedef struct xmlBuffer ctypedef struct xmlOutputBuffer: From scoder at codespeak.net Wed Apr 19 21:29:51 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Wed Apr 19 21:29:52 2006 Subject: [Lxml-checkins] r26003 - lxml/trunk/src/lxml Message-ID: <20060419192951.53B0910090@code0.codespeak.net> Author: scoder Date: Wed Apr 19 21:29:50 2006 New Revision: 26003 Modified: lxml/trunk/src/lxml/etree.h lxml/trunk/src/lxml/python.pxd Log: make iter() a C macro Modified: lxml/trunk/src/lxml/etree.h ============================================================================== --- lxml/trunk/src/lxml/etree.h (original) +++ lxml/trunk/src/lxml/etree.h Wed Apr 19 21:29:50 2006 @@ -6,6 +6,7 @@ #define hasattr(o,a) PyObject_HasAttr(o,a) #define callable(o) PyCallable_Check(o) #define str(o) PyObject_Str(o) +#define iter(o) PyObject_GetIter(o) #define _cstr(s) PyString_AS_STRING(s) #define _isElement(c_node) \ ((c_node)->type == XML_ELEMENT_NODE || \ Modified: lxml/trunk/src/lxml/python.pxd ============================================================================== --- lxml/trunk/src/lxml/python.pxd (original) +++ lxml/trunk/src/lxml/python.pxd Wed Apr 19 21:29:50 2006 @@ -40,4 +40,5 @@ cdef int hasattr(object obj, object attr) cdef int callable(object obj) cdef object str(object obj) + cdef object iter(object obj) cdef char* _cstr(object s) From scoder at codespeak.net Wed Apr 19 22:25:37 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Wed Apr 19 22:25:38 2006 Subject: [Lxml-checkins] r26006 - in lxml/trunk: . src/lxml src/lxml/tests Message-ID: <20060419202537.2B7A9100BA@code0.codespeak.net> Author: scoder Date: Wed Apr 19 22:25:34 2006 New Revision: 26006 Added: lxml/trunk/src/lxml/xmlid.pxi Modified: lxml/trunk/CHANGES.txt lxml/trunk/src/lxml/etree.pyx lxml/trunk/src/lxml/python.pxd lxml/trunk/src/lxml/tests/test_elementtree.py lxml/trunk/src/lxml/tests/test_etree.py lxml/trunk/src/lxml/tree.pxd lxml/trunk/src/lxml/xmlerror.pxi Log: new include file xmlid.pxi, reverted XMLID() to old ElementTree compatible implementation, new XMLDTDID function that returns a tuple (root node, ID dict) with a custom dictionary representation backed by a libxml2 hashtable Modified: lxml/trunk/CHANGES.txt ============================================================================== --- lxml/trunk/CHANGES.txt (original) +++ lxml/trunk/CHANGES.txt Wed Apr 19 22:25:34 2006 @@ -7,6 +7,9 @@ Features added -------------- +* XMLDTDID function parses XML into tuple (root node, ID dict) based on xml:id + implementation of libxml2 (as opposed to ET compatible XMLID) + * Speedup for Element.makeelement(): the new element now reuses the original libxml2 document instead of creating a new empty one @@ -68,7 +71,7 @@ * XPath class for compiled XPath expressions -* XMLID module level function +* XMLID module level function (ElementTree compatible) * XMLParser API for customized libxml2 parser configuration Modified: lxml/trunk/src/lxml/etree.pyx ============================================================================== --- lxml/trunk/src/lxml/etree.pyx (original) +++ lxml/trunk/src/lxml/etree.pyx Wed Apr 19 22:25:34 2006 @@ -1,6 +1,6 @@ cimport tree, python from tree cimport xmlDoc, xmlNode, xmlAttr, xmlNs, _isElement -from python cimport isinstance, issubclass, hasattr, callable, str, _cstr +from python cimport isinstance, issubclass, hasattr, callable, iter, str, _cstr cimport xpath cimport xslt cimport xmlerror @@ -1217,27 +1217,6 @@ fromstring = XML -def XMLID(text): - """Parse the text and return a tuple (root node, ID dictionary). The root - node is the same as returned by the XML() function. The dictionary - contains string-element pairs. The dictionary keys are the values of ID - attributes as specified by the XML DTD. The elements referenced by the ID - are stored as dictionary values. - """ - cdef _NodeBase root - root = XML(text) - assert root is not None - dic = {} - if root._doc._c_doc.ids is not NULL: - context = (dic, root._doc) - tree.xmlHashScan(root._doc._c_doc.ids, - _collectIdHashItems, context) - elif 0: - # the ElementTree compatible implementation - for elem in root.xpath('//*[string(@id)]'): - python.PyDict_SetItem(dic, elem.get('id'), elem) - return (root, dic) - cdef class QName: cdef readonly object text def __init__(self, text_or_uri, tag=None): @@ -1300,6 +1279,7 @@ # include submodules include "xmlerror.pxi" # error and log handling +include "xmlid.pxi" # XMLID and IDDict include "nsclasses.pxi" # Namespace implementation and registry include "xslt.pxi" # XPath and XSLT include "relaxng.pxi" # RelaxNG @@ -1409,16 +1389,6 @@ c_attrib_node.ns.href) return funicode(value) -cdef void _collectIdHashItems(void* payload, void* context, char* name): - # collect elements from ID attribute hash table (used by XMLID) - cdef tree.xmlID* c_id - c_id = payload - if c_id is NULL or c_id.attr is NULL: - return - dic, doc = context - element = _elementFactory(doc, c_id.attr.parent) - python.PyDict_SetItemString(dic, name, element) - cdef _dumpToFile(f, xmlDoc* c_doc, xmlNode* c_node): cdef python.PyObject* o cdef tree.xmlOutputBuffer* c_buffer Modified: lxml/trunk/src/lxml/python.pxd ============================================================================== --- lxml/trunk/src/lxml/python.pxd (original) +++ lxml/trunk/src/lxml/python.pxd Wed Apr 19 22:25:34 2006 @@ -27,7 +27,8 @@ cdef int PyDict_DelItem(object d, object key) cdef int PyDict_Clear(object d) cdef object PyList_AsTuple(object o) - cdef object PyObject_GetIter(object o) + cdef object PySequence_List(object o) + cdef object PySequence_Tuple(object o) cdef int PyNumber_Check(object instance) cdef int PyBool_Check(object instance) Modified: lxml/trunk/src/lxml/tests/test_elementtree.py ============================================================================== --- lxml/trunk/src/lxml/tests/test_elementtree.py (original) +++ lxml/trunk/src/lxml/tests/test_elementtree.py Wed Apr 19 22:25:34 2006 @@ -336,13 +336,6 @@ XMLID = self.etree.XMLID XML = self.etree.XML xml_text = ''' - - - - - - ]>

...

...

Modified: lxml/trunk/src/lxml/tests/test_etree.py ============================================================================== --- lxml/trunk/src/lxml/tests/test_etree.py (original) +++ lxml/trunk/src/lxml/tests/test_etree.py Wed Apr 19 22:25:34 2006 @@ -140,6 +140,70 @@ b, d.getparent()) + def test_XMLDTDID(self): + XMLDTDID = etree.XMLDTDID + XML = etree.XML + xml_text = ''' + + + + + + ]> + +

...

+

...

+

Regular paragraph.

+

...

+
+ ''' + + root, dic = XMLDTDID(xml_text) + root2 = XML(xml_text) + self.assertEquals(self._writeElement(root), + self._writeElement(root2)) + expected = { + "chapter1" : root[0], + "warn1" : root[3] + } + + self.assertEquals(dic, expected) + self.assertEquals(sorted(dic.items()), + sorted(expected.items())) + self.assertEquals(sorted(dic.iteritems()), + sorted(expected.iteritems())) + self.assertEquals(sorted(dic.keys()), + sorted(expected.keys())) + self.assert_("chapter1" in dic) + self.assert_("warn1" in dic) + + def test_XMLDTDID_empty(self): + XMLDTDID = etree.XMLDTDID + XML = etree.XML + xml_text = ''' + +

...

+

...

+

Regular paragraph.

+

...

+
+ ''' + + root, dic = XMLDTDID(xml_text) + root2 = XML(xml_text) + self.assertEquals(self._writeElement(root), + self._writeElement(root2)) + expected = {} + + self.assertEquals(dic, expected) + self.assertEquals(sorted(dic.items()), + sorted(expected.items())) + self.assertEquals(sorted(dic.iteritems()), + sorted(expected.iteritems())) + self.assertEquals(sorted(dic.keys()), + sorted(expected.keys())) + def test_namespaces(self): etree = self.etree @@ -225,7 +289,7 @@ data = f.getvalue() return canonicalize(data) - + class ETreeXIncludeTestCase(HelperTestCase): def test_xinclude(self): tree = etree.parse(fileInTestDir('test_xinclude.xml')) Modified: lxml/trunk/src/lxml/tree.pxd ============================================================================== --- lxml/trunk/src/lxml/tree.pxd (original) +++ lxml/trunk/src/lxml/tree.pxd Wed Apr 19 22:25:34 2006 @@ -13,6 +13,7 @@ ctypedef struct xmlHashTable ctypedef void xmlHashScanner(void* payload, void* data, char* name) void xmlHashScan(xmlHashTable* table, xmlHashScanner f, void* data) + void* xmlHashLookup(xmlHashTable* table, char* name) cdef extern from "libxml/tree.h": Modified: lxml/trunk/src/lxml/xmlerror.pxi ============================================================================== --- lxml/trunk/src/lxml/xmlerror.pxi (original) +++ lxml/trunk/src/lxml/xmlerror.pxi Wed Apr 19 22:25:34 2006 @@ -67,7 +67,7 @@ return _BaseErrorLog(self._entries) def __iter__(self): - return python.PyObject_GetIter(self._entries) + return iter(self._entries) def __repr__(self): return '\n'.join(map(repr, self._entries)) @@ -142,7 +142,7 @@ return _BaseErrorLog(self._entries[:]) def __iter__(self): - return python.PyObject_GetIter(self._entries[:]) + return iter(self._entries[:]) cdef void connect(self): del self._entries[:] Added: lxml/trunk/src/lxml/xmlid.pxi ============================================================================== --- (empty file) +++ lxml/trunk/src/lxml/xmlid.pxi Wed Apr 19 22:25:34 2006 @@ -0,0 +1,140 @@ +from UserDict import DictMixin + +def XMLID(text): + """Parse the text and return a tuple (root node, ID dictionary). The root + node is the same as returned by the XML() function. The dictionary + contains string-element pairs. The dictionary keys are the values of 'id' + attributes. The elements referenced by the ID are stored as dictionary + values. + """ + root = XML(text) + # ElementTree compatible implementation: look for 'id' attributes + dic = {} + for elem in root.xpath('//*[string(@id)]'): + python.PyDict_SetItem(dic, elem.get('id'), elem) + return (root, dic) + +def XMLDTDID(text): + """Parse the text and return a tuple (root node, ID dictionary). The root + node is the same as returned by the XML() function. The dictionary + contains string-element pairs. The dictionary keys are the values of ID + attributes as defined by the DTD. The elements referenced by the ID are + stored as dictionary values. + """ + cdef _NodeBase root + root = XML(text) + # xml:id spec compatible implementation: use DTD ID attributes from libxml2 + if root._doc._c_doc.ids is NULL: + return (root, {}) + else: + return (root, _IDDict(root)) + +class _IDDict(DictMixin): + """A dictionary class that mapps ID attributes to elements. + + The dictionary must be instantiated with the root element of a parsed XML + document, otherwise the behaviour is undefined. Elements and XML trees + that were created or modified through the API are not supported. + """ + def __init__(self, etree): + cdef _Document doc + doc = _documentOrRaise(etree) + if doc._c_doc.ids is NULL: + raise ValueError, "No ID dictionary available." + self.__doc = doc + self.__keys = None + self.__items = None + + def copy(self): + return IDDict(self._doc) + + def __getitem__(self, id_name): + cdef tree.xmlHashTable* c_ids + cdef tree.xmlID* c_id + cdef xmlAttr* c_attr + cdef _Document doc + doc = self.__doc + c_ids = doc._c_doc.ids + id_utf = _utf8(id_name) + c_id = tree.xmlHashLookup(c_ids, _cstr(id_utf)) + if c_id is NULL: + raise KeyError, "Key not found." + c_attr = c_id.attr + if c_attr is NULL or c_attr.parent is NULL: + raise KeyError, "ID attribute not found." + return _elementFactory(doc, c_attr.parent) + + def __contains__(self, id_name): + cdef tree.xmlID* c_id + cdef _Document doc + doc = self.__doc + id_utf = _utf8(id_name) + c_id = tree.xmlHashLookup(doc._c_doc.ids, _cstr(id_utf)) + return c_id is not NULL + + def keys(self): + keys = self.__keys + if keys is not None: + return python.PySequence_List(keys) + keys = self.__build_keys() + self.__keys = python.PySequence_Tuple(keys) + return keys + + def __build_keys(self): + cdef _Document doc + keys = [] + doc = self.__doc + tree.xmlHashScan(doc._c_doc.ids, + _collectIdHashKeys, keys) + return keys + + def items(self): + items = self.__items + if items is not None: + return python.PySequence_List(items) + items = self.__build_items() + self.__items = python.PySequence_Tuple(items) + return items + + def iteritems(self): + items = self.__items + if items is None: + items = self.items() + return iter(items) + + def __build_items(self): + cdef _Document doc + items = [] + doc = self.__doc + context = (items, doc) + tree.xmlHashScan(doc._c_doc.ids, + _collectIdHashItemList, context) + return items + + +cdef void _collectIdHashItemDict(void* payload, void* context, char* name): + # collect elements from ID attribute hash table + cdef tree.xmlID* c_id + c_id = payload + if c_id is NULL or c_id.attr is NULL or c_id.attr.parent is NULL: + return + dic, doc = context + element = _elementFactory(doc, c_id.attr.parent) + python.PyDict_SetItem(dic, funicode(name), element) + +cdef void _collectIdHashItemList(void* payload, void* context, char* name): + # collect elements from ID attribute hash table + cdef tree.xmlID* c_id + c_id = payload + if c_id is NULL or c_id.attr is NULL or c_id.attr.parent is NULL: + return + lst, doc = context + element = _elementFactory(doc, c_id.attr.parent) + python.PyList_Append(lst, (funicode(name), element)) + +cdef void _collectIdHashKeys(void* payload, void* collect_list, char* name): + cdef tree.xmlID* c_id + c_id = payload + if c_id is NULL or c_id.attr is NULL or c_id.attr.parent is NULL: + return + python.PyList_Append(collect_list, funicode(name)) From scoder at codespeak.net Thu Apr 20 08:08:35 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Thu Apr 20 08:08:38 2006 Subject: [Lxml-checkins] r26013 - lxml/branch/htmlparser/src/lxml Message-ID: <20060420060835.31230100C6@code0.codespeak.net> Author: scoder Date: Thu Apr 20 08:08:31 2006 New Revision: 26013 Modified: lxml/branch/htmlparser/src/lxml/htmlparser.pxd lxml/branch/htmlparser/src/lxml/parser.pxi lxml/branch/htmlparser/src/lxml/xmlparser.pxd Log: replaced HTML_PARSE_RECOVER by XML_PARSE_RECOVER to make it compile with libxml2 2.6.16, removed compact_text (HTML_PARSE_COMPACT) option completely for the same reason Modified: lxml/branch/htmlparser/src/lxml/htmlparser.pxd ============================================================================== --- lxml/branch/htmlparser/src/lxml/htmlparser.pxd (original) +++ lxml/branch/htmlparser/src/lxml/htmlparser.pxd Thu Apr 20 08:08:31 2006 @@ -4,13 +4,14 @@ cdef extern from "libxml/HTMLparser.h": ctypedef enum htmlParserOption: - HTML_PARSE_RECOVER # Relaxed parsing HTML_PARSE_NOERROR # suppress error reports HTML_PARSE_NOWARNING # suppress warning reports HTML_PARSE_PEDANTIC # pedantic error reporting HTML_PARSE_NOBLANKS # remove blank nodes HTML_PARSE_NONET # Forbid network access - HTML_PARSE_COMPACT # compact small text nodes +# libxml2 2.6.21+ only: +# HTML_PARSE_RECOVER # Relaxed parsing +# HTML_PARSE_COMPACT # compact small text nodes xmlParserCtxt* htmlCreateMemoryParserCtxt(char* buffer, int size) xmlParserCtxt* htmlCreateFileParserCtxt(char* filename, char* encoding) Modified: lxml/branch/htmlparser/src/lxml/parser.pxi ============================================================================== --- lxml/branch/htmlparser/src/lxml/parser.pxi (original) +++ lxml/branch/htmlparser/src/lxml/parser.pxi Thu Apr 20 08:08:31 2006 @@ -220,15 +220,16 @@ ) cdef class HTMLParser(BaseParser): - """The HTML parser. This parser allows reading broken HTML into XML. + """The HTML parser. This parser allows reading HTML into a normal XML + tree. On libxml2 2.6.21 and later, it can read broken (non well-formed) + HTML as well. Note that you must not share parsers between threads. """ cdef int _parse_options cdef xmlParserCtxt* _memory_parser_ctxt cdef xmlParserCtxt* _file_parser_ctxt - def __init__(self, recover=True, no_network=False, - compact_text=True, remove_blank_text=False): + def __init__(self, recover=True, no_network=False, remove_blank_text=False): cdef int parse_options self._memory_parser_ctxt = NULL self._file_parser_ctxt = NULL @@ -236,9 +237,9 @@ parse_options = _HTML_DEFAULT_PARSE_OPTIONS if recover: - parse_options = parse_options | htmlparser.HTML_PARSE_RECOVER - if compact_text: - parse_options = parse_options | htmlparser.HTML_PARSE_COMPACT + # XXX: make it compile on libxml2 < 2.6.21 + #parse_options = parse_options | htmlparser.HTML_PARSE_RECOVER + parse_options = parse_options | xmlparser.XML_PARSE_RECOVER if no_network: parse_options = parse_options | htmlparser.HTML_PARSE_NONET if remove_blank_text: Modified: lxml/branch/htmlparser/src/lxml/xmlparser.pxd ============================================================================== --- lxml/branch/htmlparser/src/lxml/xmlparser.pxd (original) +++ lxml/branch/htmlparser/src/lxml/xmlparser.pxd Thu Apr 20 08:08:31 2006 @@ -30,6 +30,8 @@ XML_PARSE_NSCLEAN = 8192 # remove redundant namespaces declarations XML_PARSE_NOCDATA = 16384 # merge CDATA as text nodes XML_PARSE_NOXINCNODE = 32768 # do not generate XINCLUDE START/END nodes +# libxml2 2.6.21+ only: +# XML_PARSE_COMPACT = 65536 # compact small text nodes cdef void xmlInitParser() cdef xmlParserCtxt* xmlNewParserCtxt() From scoder at codespeak.net Thu Apr 20 08:39:57 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Thu Apr 20 08:39:59 2006 Subject: [Lxml-checkins] r26015 - lxml/trunk/src/lxml/tests Message-ID: <20060420063957.DD86910092@code0.codespeak.net> Author: scoder Date: Thu Apr 20 08:39:54 2006 New Revision: 26015 Modified: lxml/trunk/src/lxml/tests/test_elementtree.py lxml/trunk/src/lxml/tests/test_etree.py Log: extended test cases for handling xml:id attributes in XMLID and XMLDTDID Modified: lxml/trunk/src/lxml/tests/test_elementtree.py ============================================================================== --- lxml/trunk/src/lxml/tests/test_elementtree.py (original) +++ lxml/trunk/src/lxml/tests/test_elementtree.py Thu Apr 20 08:39:54 2006 @@ -340,6 +340,7 @@

...

...

Regular paragraph.

+

XML:ID paragraph.

...

''' @@ -351,7 +352,7 @@ expected = { "chapter1" : root[0], "note1" : root[1], - "warn1" : root[3] + "warn1" : root[4] } self.assertEquals(dic, expected) Modified: lxml/trunk/src/lxml/tests/test_etree.py ============================================================================== --- lxml/trunk/src/lxml/tests/test_etree.py (original) +++ lxml/trunk/src/lxml/tests/test_etree.py Thu Apr 20 08:39:54 2006 @@ -155,6 +155,7 @@

...

...

Regular paragraph.

+

XML:ID paragraph.

...

''' @@ -165,7 +166,8 @@ self._writeElement(root2)) expected = { "chapter1" : root[0], - "warn1" : root[3] + "xmlid" : root[3], + "warn1" : root[4] } self.assertEquals(dic, expected) @@ -177,6 +179,7 @@ sorted(expected.keys())) self.assert_("chapter1" in dic) self.assert_("warn1" in dic) + self.assert_("xmlid" in dic) def test_XMLDTDID_empty(self): XMLDTDID = etree.XMLDTDID From scoder at codespeak.net Thu Apr 20 08:44:42 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Thu Apr 20 08:44:43 2006 Subject: [Lxml-checkins] r26016 - lxml/branch/htmlparser/src/lxml/tests Message-ID: <20060420064442.57F6810092@code0.codespeak.net> Author: scoder Date: Thu Apr 20 08:44:41 2006 New Revision: 26016 Modified: lxml/branch/htmlparser/src/lxml/tests/test_htmlparser.py Log: clean up in HTML parser tests Modified: lxml/branch/htmlparser/src/lxml/tests/test_htmlparser.py ============================================================================== --- lxml/branch/htmlparser/src/lxml/tests/test_htmlparser.py (original) +++ lxml/branch/htmlparser/src/lxml/tests/test_htmlparser.py Thu Apr 20 08:44:41 2006 @@ -16,7 +16,6 @@ html_str = "test

page title

" broken_html_str = "test<body><h1>page title</body></html>" - blank_text_html_str = "<html><head><title>

" def tearDown(self): self.etree.set_default_parser() @@ -58,7 +57,7 @@ parse, "__some_hopefully_nonexisting_file__.html", parser) - def test_default_parser(self): + def test_default_parser_HTML_broken(self): self.assertRaises(self.etree.XMLSyntaxError, self.etree.parse, StringIO(self.broken_html_str)) From scoder at codespeak.net Thu Apr 20 11:35:32 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Thu Apr 20 11:35:34 2006 Subject: [Lxml-checkins] r26022 - in lxml/trunk/src/lxml: . tests Message-ID: <20060420093532.D215F1008B@code0.codespeak.net> Author: scoder Date: Thu Apr 20 11:35:31 2006 New Revision: 26022 Modified: lxml/trunk/src/lxml/python.pxd lxml/trunk/src/lxml/tests/test_etree.py lxml/trunk/src/lxml/xmlid.pxi Log: removed IDDict dependency on UserDict, extended test cases Modified: lxml/trunk/src/lxml/python.pxd ============================================================================== --- lxml/trunk/src/lxml/python.pxd (original) +++ lxml/trunk/src/lxml/python.pxd Thu Apr 20 11:35:31 2006 @@ -29,6 +29,7 @@ cdef object PyList_AsTuple(object o) cdef object PySequence_List(object o) cdef object PySequence_Tuple(object o) + cdef object PyTuple_GET_ITEM(object o, int pos) cdef int PyNumber_Check(object instance) cdef int PyBool_Check(object instance) Modified: lxml/trunk/src/lxml/tests/test_etree.py ============================================================================== --- lxml/trunk/src/lxml/tests/test_etree.py (original) +++ lxml/trunk/src/lxml/tests/test_etree.py Thu Apr 20 11:35:31 2006 @@ -171,12 +171,20 @@ } self.assertEquals(dic, expected) + self.assertEquals(len(dic), + len(expected)) self.assertEquals(sorted(dic.items()), sorted(expected.items())) self.assertEquals(sorted(dic.iteritems()), sorted(expected.iteritems())) self.assertEquals(sorted(dic.keys()), sorted(expected.keys())) + self.assertEquals(sorted(dic.iterkeys()), + sorted(expected.iterkeys())) + self.assertEquals(sorted(dic.values()), + sorted(expected.values())) + self.assertEquals(sorted(dic.itervalues()), + sorted(expected.itervalues())) self.assert_("chapter1" in dic) self.assert_("warn1" in dic) self.assert_("xmlid" in dic) @@ -200,12 +208,12 @@ expected = {} self.assertEquals(dic, expected) - self.assertEquals(sorted(dic.items()), - sorted(expected.items())) + self.assertEquals(dic.items(), + expected.items()) self.assertEquals(sorted(dic.iteritems()), sorted(expected.iteritems())) - self.assertEquals(sorted(dic.keys()), - sorted(expected.keys())) + self.assertEquals(dic.keys(), + expected.keys()) def test_namespaces(self): etree = self.etree Modified: lxml/trunk/src/lxml/xmlid.pxi ============================================================================== --- lxml/trunk/src/lxml/xmlid.pxi (original) +++ lxml/trunk/src/lxml/xmlid.pxi Thu Apr 20 11:35:31 2006 @@ -1,5 +1,3 @@ -from UserDict import DictMixin - def XMLID(text): """Parse the text and return a tuple (root node, ID dictionary). The root node is the same as returned by the XML() function. The dictionary @@ -29,21 +27,24 @@ else: return (root, _IDDict(root)) -class _IDDict(DictMixin): - """A dictionary class that mapps ID attributes to elements. +cdef class _IDDict: + """A dictionary-like proxy class that mapps ID attributes to elements. The dictionary must be instantiated with the root element of a parsed XML document, otherwise the behaviour is undefined. Elements and XML trees that were created or modified through the API are not supported. """ + cdef _Document _doc + cdef object _keys + cdef object _items def __init__(self, etree): cdef _Document doc doc = _documentOrRaise(etree) if doc._c_doc.ids is NULL: raise ValueError, "No ID dictionary available." - self.__doc = doc - self.__keys = None - self.__items = None + self._doc = doc + self._keys = None + self._items = None def copy(self): return IDDict(self._doc) @@ -52,9 +53,7 @@ cdef tree.xmlHashTable* c_ids cdef tree.xmlID* c_id cdef xmlAttr* c_attr - cdef _Document doc - doc = self.__doc - c_ids = doc._c_doc.ids + c_ids = self._doc._c_doc.ids id_utf = _utf8(id_name) c_id = tree.xmlHashLookup(c_ids, _cstr(id_utf)) if c_id is NULL: @@ -62,55 +61,104 @@ c_attr = c_id.attr if c_attr is NULL or c_attr.parent is NULL: raise KeyError, "ID attribute not found." - return _elementFactory(doc, c_attr.parent) + return _elementFactory(self._doc, c_attr.parent) + + def get(self, id_name): + return self[id_name] def __contains__(self, id_name): cdef tree.xmlID* c_id - cdef _Document doc - doc = self.__doc id_utf = _utf8(id_name) - c_id = tree.xmlHashLookup(doc._c_doc.ids, _cstr(id_utf)) + c_id = tree.xmlHashLookup( + self._doc._c_doc.ids, _cstr(id_utf)) return c_id is not NULL + def has_key(self, id_name): + return self.__contains__(id_name) + + def __cmp__(self, other): + if other is None: + return 1 + else: + return cmp(dict(self), other) + + def __richcmp__(self, other, int op): + cdef int c_cmp + if other is None: + return op == 0 or op == 1 or op == 3 + c_cmp = cmp(dict(self), other) + if c_cmp == 0: # equal + return op == 1 or op == 2 or op == 5 + elif c_cmp < 0: + return op == 0 or op == 1 or op == 3 + else: + return op == 4 or op == 5 or op == 3 + + def __repr__(self): + return repr(dict(self)) + def keys(self): - keys = self.__keys + keys = self._keys if keys is not None: return python.PySequence_List(keys) - keys = self.__build_keys() - self.__keys = python.PySequence_Tuple(keys) + keys = self._build_keys() + self._keys = python.PySequence_Tuple(keys) return keys - def __build_keys(self): - cdef _Document doc + def __iter__(self): + keys = self._keys + if keys is None: + keys = self.keys() + return iter(keys) + + def iterkeys(self): + return self.__iter__() + + def __len__(self): + keys = self._keys + if keys is None: + keys = self.keys() + return len(keys) + + cdef object _build_keys(self): keys = [] - doc = self.__doc - tree.xmlHashScan(doc._c_doc.ids, + tree.xmlHashScan(self._doc._c_doc.ids, _collectIdHashKeys, keys) return keys def items(self): - items = self.__items + items = self._items if items is not None: return python.PySequence_List(items) - items = self.__build_items() - self.__items = python.PySequence_Tuple(items) + items = self._build_items() + self._items = python.PySequence_Tuple(items) return items def iteritems(self): - items = self.__items + items = self._items if items is None: items = self.items() return iter(items) - def __build_items(self): - cdef _Document doc + cdef object _build_items(self): items = [] - doc = self.__doc - context = (items, doc) - tree.xmlHashScan(doc._c_doc.ids, + context = (items, self._doc) + tree.xmlHashScan(self._doc._c_doc.ids, _collectIdHashItemList, context) return items - + + def values(self): + items = self._items + if items is None: + items = self.items() + values = [] + for item in items: + value = python.PyTuple_GET_ITEM(item, 1) + python.PyList_Append(values, value) + return values + + def itervalues(self): + return iter(self.values()) cdef void _collectIdHashItemDict(void* payload, void* context, char* name): # collect elements from ID attribute hash table From scoder at codespeak.net Thu Apr 20 11:37:09 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Thu Apr 20 11:37:09 2006 Subject: [Lxml-checkins] r26023 - lxml/trunk/src/lxml Message-ID: <20060420093709.5366C1008B@code0.codespeak.net> Author: scoder Date: Thu Apr 20 11:37:07 2006 New Revision: 26023 Added: lxml/trunk/src/lxml/htmlparser.pxd - copied unchanged from r26022, lxml/branch/htmlparser/src/lxml/htmlparser.pxd Log: merged in new file from htmlparser branch From scoder at codespeak.net Thu Apr 20 11:47:42 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Thu Apr 20 11:47:44 2006 Subject: [Lxml-checkins] r26024 - lxml/trunk/src/lxml/tests Message-ID: <20060420094742.B56CB1008B@code0.codespeak.net> Author: scoder Date: Thu Apr 20 11:47:41 2006 New Revision: 26024 Added: lxml/trunk/src/lxml/tests/test_htmlparser.py - copied unchanged from r26023, lxml/branch/htmlparser/src/lxml/tests/test_htmlparser.py Log: copied test file from htmlparser branch From scoder at codespeak.net Thu Apr 20 12:12:05 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Thu Apr 20 12:12:06 2006 Subject: [Lxml-checkins] r26027 - lxml/trunk/src/lxml Message-ID: <20060420101205.94F1E1008D@code0.codespeak.net> Author: scoder Date: Thu Apr 20 12:12:03 2006 New Revision: 26027 Modified: lxml/trunk/src/lxml/etree.pyx lxml/trunk/src/lxml/parser.pxi lxml/trunk/src/lxml/xmlparser.pxd Log: merge of htmlparser branch Modified: lxml/trunk/src/lxml/etree.pyx ============================================================================== --- lxml/trunk/src/lxml/etree.pyx (original) +++ lxml/trunk/src/lxml/etree.pyx Thu Apr 20 12:12:03 2006 @@ -70,7 +70,7 @@ # the document #print "freeing document:", self._c_doc #displayNode(self._c_doc, 0) - #print self._c_doc.dict is theParser._c_dict + #print self._c_doc, self._c_doc.dict is __GLOBAL_PARSER_CONTEXT._c_dict tree.xmlFreeDoc(self._c_doc) cdef getroot(self): @@ -141,12 +141,8 @@ cdef _Document _parseDocument(source, parser): cdef xmlDoc* c_doc - # XXX simplistic (c)StringIO support - if hasattr(source, 'getvalue'): - return _parseMemoryDocument(source.getvalue(), parser) - filename = _getFilenameForFile(source) - # Support for unamed file-like object (eg urlgrabber.urlopen) + # Support for unamed file-like object (StringIO, urlgrabber.urlopen, ...) if not filename and hasattr(source, 'read'): return _parseMemoryDocument(source.read(), parser) @@ -154,14 +150,14 @@ if filename is None: filename = source # open filename - c_doc = theParser.parseDocFromFile(filename, parser) + c_doc = _parseDocFromFile(filename, parser) return _documentFactory(c_doc) cdef _Document _parseMemoryDocument(text, parser): cdef xmlDoc* c_doc if python.PyUnicode_Check(text): text = _stripDeclaration(_utf8(text)) - c_doc = theParser.parseDoc(text, parser) + c_doc = _parseDoc(text, parser) return _documentFactory(c_doc) cdef _Document _documentFactory(xmlDoc* c_doc): @@ -1154,7 +1150,7 @@ cdef xmlDoc* c_doc cdef _Document doc ns_utf, name_utf = _getNsTag(_tag) - c_doc = theParser.newDoc() + c_doc = _newDoc() c_node = _createElement(c_doc, name_utf, attrib, _extra) tree.xmlDocSetRootElement(c_doc, c_node) doc = _documentFactory(c_doc) @@ -1169,7 +1165,7 @@ text = ' ' else: text = ' %s ' % _utf8(text) - doc = _documentFactory( theParser.newDoc() ) + doc = _documentFactory( _newDoc() ) c_node = _createComment(doc._c_doc, text) tree.xmlAddChild(doc._c_doc, c_node) return _commentFactory(doc, c_node) @@ -1198,7 +1194,7 @@ elif file is not None: doc = _parseDocument(file, parser) else: - doc = _documentFactory( theParser.newDoc() ) + doc = _documentFactory( _newDoc() ) etree = _elementTreeFactory(doc, element) @@ -1211,9 +1207,15 @@ return etree +def HTML(text): + cdef _Document doc + doc = _parseMemoryDocument(text, __DEFAULT_HTML_PARSER) + return doc.getroot() + def XML(text): - "Parse the XML text and return the root node." - return _parseMemoryDocument(text, None).getroot() + cdef _Document doc + doc = _parseMemoryDocument(text, __DEFAULT_XML_PARSER) + return doc.getroot() fromstring = XML @@ -1288,11 +1290,6 @@ include "proxy.pxi" # Proxy handling (element backpointers/memory/etc.) -# Instantiate globally shared XML parser to enable dictionary sharing -cdef Parser theParser -theParser = Parser() - - # Private helper functions cdef void _raiseIfNone(el): if el is None: Modified: lxml/trunk/src/lxml/parser.pxi ============================================================================== --- lxml/trunk/src/lxml/parser.pxi (original) +++ lxml/trunk/src/lxml/parser.pxi Thu Apr 20 12:12:03 2006 @@ -1,24 +1,104 @@ # XML parser that provides dictionary sharing cimport xmlparser +cimport htmlparser from xmlparser cimport xmlParserCtxt, xmlDict class XMLSyntaxError(LxmlSyntaxError): pass -cdef int _DEFAULT_PARSE_OPTIONS -_DEFAULT_PARSE_OPTIONS = ( +class HTMLSyntaxError(LxmlSyntaxError): + pass + +class ParserError(LxmlError): + pass + +cdef class _ParserContext: + """Global parser context to share the string dictionary. + """ + cdef xmlDict* _c_dict + cdef int _initialized + + def __init__(self): + self._c_dict = NULL + self._initialized = 0 + + def __dealloc__(self): + if self._c_dict is not NULL: + xmlparser.xmlDictFree(self._c_dict) + + cdef void _initParser(self): + if not self._initialized: + xmlparser.xmlInitParser() + self._initialized = 1 + + cdef void _initParserDict(self, xmlParserCtxt* pctxt): + "Assure we always use the same string dictionary." + if self._c_dict is NULL or self._c_dict is pctxt.dict: + return + if pctxt.dict is not NULL: + xmlparser.xmlDictFree(pctxt.dict) + pctxt.dict = self._c_dict + xmlparser.xmlDictReference(pctxt.dict) + + cdef void _initDocDict(self, xmlDoc* result): + "Store dict of last object parsed if no shared dict yet" + if result is NULL: + return + if self._c_dict is NULL: + #print "storing shared dict" + if result.dict is NULL: + result.dict = xmlparser.xmlDictCreate() + self._c_dict = result.dict + xmlparser.xmlDictReference(result.dict) + elif result.dict != self._c_dict: + if result.dict is not NULL: + xmlparser.xmlDictFree(result.dict) + result.dict = self._c_dict + xmlparser.xmlDictReference(self._c_dict) + +cdef _ParserContext __GLOBAL_PARSER_CONTEXT +__GLOBAL_PARSER_CONTEXT = _ParserContext() + + +cdef class BaseParser: + cdef _ErrorLog _error_log + cdef object _syntax_error_class + def __init__(self, syntax_error_class): + self._error_log = _ErrorLog() + self._syntax_error_class = syntax_error_class + + property error_log: + def __get__(self): + return self._error_log.copy() + + cdef xmlDoc* _handleResult(self, xmlParserCtxt* ctxt, + xmlDoc* result) except NULL: + if ctxt.wellFormed: + __GLOBAL_PARSER_CONTEXT._initDocDict(result) + elif result is not NULL: + # free broken document + tree.xmlFreeDoc(result) + result = NULL + self._error_log.disconnect() + if result is NULL: + raise self._syntax_error_class + return result + + +############################################################ +## XML parser +############################################################ + +cdef int _XML_DEFAULT_PARSE_OPTIONS +_XML_DEFAULT_PARSE_OPTIONS = ( xmlparser.XML_PARSE_NOENT | xmlparser.XML_PARSE_NOCDATA | xmlparser.XML_PARSE_NOWARNING | xmlparser.XML_PARSE_NOERROR ) -cdef int _ORIG_DEFAULT_PARSE_OPTIONS -_ORIG_DEFAULT_PARSE_OPTIONS = _DEFAULT_PARSE_OPTIONS - - -cdef class XMLParser: +cdef class XMLParser(BaseParser): """The XML parser. Parsers can be supplied as additional argument to various parse functions of the lxml API. A default parser is always available and can be replaced by a call to the global function @@ -28,13 +108,19 @@ The keyword arguments in the constructor are mainly based on the libxml2 parser configuration. A DTD will only be loaded if validation or attribute default values are requested. + + Note that you must not share parsers between threads. """ cdef int _parse_options + cdef xmlParserCtxt* _file_parser_ctxt + cdef xmlParserCtxt* _memory_parser_ctxt def __init__(self, attribute_defaults=False, dtd_validation=False, no_network=False, ns_clean=False): cdef int parse_options - parse_options = _ORIG_DEFAULT_PARSE_OPTIONS + self._file_parser_ctxt = NULL + BaseParser.__init__(self, XMLSyntaxError) + parse_options = _XML_DEFAULT_PARSE_OPTIONS if dtd_validation: parse_options = parse_options | xmlparser.XML_PARSE_DTDLOAD | \ xmlparser.XML_PARSE_DTDVALID @@ -52,156 +138,197 @@ def __get__(self): return __build_error_log_tuple(self) -## def copy(self, attribute_defaults=None, dtd_validation=None, -## no_network=None, ns_clean=None): -## cdef int parse_options -## parse_options = self._parse_options -## if attribute_defaults is None: -## attribute_defaults = parse_options & xmlparser.XML_PARSE_DTDATTR -## if dtd_validation is None: -## dtd_validation = parse_options & xmlparser.XML_PARSE_DTDVALID -## if no_network is None: -## no_network = parse_options & xmlparser.XML_PARSE_NONET -## if ns_clean is None: -## ns_clean = parse_options & xmlparser.XML_PARSE_NSCLEAN - -## return self.__class__(attribute_defaults=attribute_defaults, -## dtd_validation=dtd_validation, -## no_network=no_network, ns_clean=ns_clean) - - -def set_default_parser(parser=None): - """Set a default XMLParser. This parser is used globally whenever no - parser is supplied to the various parse functions of the lxml API. If - this function is called without a parser (or if it is None), the default - parser is reset to the original configuration. - """ - if parser is not None: - _DEFAULT_PARSE_OPTIONS = (parser)._parse_options - else: - _DEFAULT_PARSE_OPTIONS = _ORIG_DEFAULT_PARSE_OPTIONS - + def __dealloc__(self): + if self._file_parser_ctxt != NULL: + xmlparser.xmlFreeParserCtxt(self._file_parser_ctxt) + if self._memory_parser_ctxt != NULL: + xmlparser.xmlFreeParserCtxt(self._memory_parser_ctxt) -cdef class Parser: + cdef xmlParserCtxt* _createContext(self) except NULL: + cdef xmlParserCtxt* pctxt + pctxt = xmlparser.xmlNewParserCtxt() + if pctxt is NULL: + self._error_log.disconnect() + raise ParserError, "Failed to create parser context" + return pctxt - cdef xmlDict* _c_dict - cdef int _parser_initialized - - def __init__(self): - self._c_dict = NULL - self._parser_initialized = 0 - - def __dealloc__(self): - #print "cleanup parser" - if self._c_dict is not NULL: - #print "freeing dictionary (cleanup parser)" - xmlparser.xmlDictFree(self._c_dict) - - cdef xmlDoc* parseDoc(self, text, parser) except NULL: + cdef xmlDoc* _parseDoc(self, text_utf) except NULL: """Parse document, share dictionary if possible. """ cdef xmlDoc* result cdef xmlParserCtxt* pctxt cdef int parse_error - - if parser is not None: - parse_options = (parser)._parse_options - else: - parse_options = _DEFAULT_PARSE_OPTIONS - - self._initParse() - pctxt = xmlparser.xmlCreateDocParserCtxt(_cstr(text)) + self._error_log.connect() + pctxt = self._memory_parser_ctxt if pctxt is NULL: - raise XMLSyntaxError + pctxt = self._createContext() + self._memory_parser_ctxt = pctxt - self._prepareParse(pctxt) - xmlparser.xmlCtxtUseOptions( - pctxt, - parse_options) - parse_error = xmlparser.xmlParseDocument(pctxt) - # in case of errors, clean up context plus any document - if parse_error != 0 or not pctxt.wellFormed: - if pctxt.myDoc is not NULL: - tree.xmlFreeDoc(pctxt.myDoc) - pctxt.myDoc = NULL - xmlparser.xmlFreeParserCtxt(pctxt) - raise XMLSyntaxError - result = pctxt.myDoc - self._finalizeParse(result) - xmlparser.xmlFreeParserCtxt(pctxt) - return result + __GLOBAL_PARSER_CONTEXT._initParserDict(pctxt) + result = xmlparser.xmlCtxtReadDoc( + pctxt, _cstr(text_utf), NULL, NULL, self._parse_options) + return self._handleResult(pctxt, result) - cdef xmlDoc* parseDocFromFile(self, char* filename, parser) except NULL: - cdef int parse_options + cdef xmlDoc* _parseDocFromFile(self, char* filename) except NULL: cdef xmlDoc* result cdef xmlParserCtxt* pctxt + self._error_log.connect() + pctxt = self._file_parser_ctxt + if pctxt is NULL: + pctxt = self._createContext() + self._file_parser_ctxt = pctxt - if parser is not None: - parse_options = (parser)._parse_options - else: - parse_options = _DEFAULT_PARSE_OPTIONS - - self._initParse() - pctxt = xmlparser.xmlNewParserCtxt() - self._prepareParse(pctxt) - # XXX set options twice? needed to shut up libxml2 - xmlparser.xmlCtxtUseOptions(pctxt, parse_options) - result = xmlparser.xmlCtxtReadFile(pctxt, filename, - NULL, parse_options) + __GLOBAL_PARSER_CONTEXT._initParserDict(pctxt) + result = xmlparser.xmlCtxtReadFile( + pctxt, filename, NULL, self._parse_options) if result is NULL: if pctxt.lastError.domain == xmlerror.XML_FROM_IO: + self._error_log.disconnect() raise IOError, "Could not open file %s" % filename - # in case of errors, clean up context plus any document - # XXX other errors? - if not pctxt.wellFormed: - if pctxt.myDoc is not NULL: - tree.xmlFreeDoc(pctxt.myDoc) - pctxt.myDoc = NULL - xmlparser.xmlFreeParserCtxt(pctxt) - raise XMLSyntaxError - self._finalizeParse(result) - xmlparser.xmlFreeParserCtxt(pctxt) - return result - - cdef void _initParse(self): - if not self._parser_initialized: - xmlparser.xmlInitParser() - self._parser_initialized = 1 - - cdef void _prepareParse(self, xmlParserCtxt* pctxt): - if self._c_dict is not NULL and pctxt.dict is not NULL: - #print "sharing dictionary (parseDoc)" - xmlparser.xmlDictFree(pctxt.dict) - pctxt.dict = self._c_dict - xmlparser.xmlDictReference(pctxt.dict) + return self._handleResult(pctxt, result) - cdef void _finalizeParse(self, xmlDoc* result): - # store dict of last object parsed if no shared dict yet - if self._c_dict is NULL: - #print "storing shared dict" - self._c_dict = result.dict - xmlparser.xmlDictReference(self._c_dict) - - cdef xmlDoc* newDoc(self): + +cdef XMLParser __DEFAULT_XML_PARSER +__DEFAULT_XML_PARSER = XMLParser() + +cdef BaseParser __DEFAULT_PARSER +__DEFAULT_PARSER = __DEFAULT_XML_PARSER + +def set_default_parser(parser=None): + """Set a default parser. This parser is used globally whenever no parser + is supplied to the various parse functions of the lxml API. If this + function is called without a parser (or if it is None), the default parser + is reset to the original configuration. + + Note that the default parser is not thread-safe. Avoid the default parser + in multi-threaded environments. You can create a separate parser for each + thread explicitly or use a parser pool. + """ + global __DEFAULT_PARSER + if parser is None: + __DEFAULT_PARSER = __DEFAULT_XML_PARSER + elif isinstance(parser, (HTMLParser, XMLParser)): + __DEFAULT_PARSER = parser + else: + raise TypeError, "Invalid parser" + + +############################################################ +## HTML parser +############################################################ + +cdef int _HTML_DEFAULT_PARSE_OPTIONS +_HTML_DEFAULT_PARSE_OPTIONS = ( + htmlparser.HTML_PARSE_NOWARNING | + htmlparser.HTML_PARSE_NOERROR + ) + +cdef class HTMLParser(BaseParser): + """The HTML parser. This parser allows reading HTML into a normal XML + tree. By default, it can read broken (non well-formed) HTML, depending on + the capabilities of libxml2. Use the 'recover' option to switch this off. + + Note that you must not share parsers between threads. + """ + cdef int _parse_options + cdef xmlParserCtxt* _memory_parser_ctxt + cdef xmlParserCtxt* _file_parser_ctxt + def __init__(self, recover=True, no_network=False, remove_blank_text=False): + cdef int parse_options + self._memory_parser_ctxt = NULL + self._file_parser_ctxt = NULL + BaseParser.__init__(self, HTMLSyntaxError) + + parse_options = _HTML_DEFAULT_PARSE_OPTIONS + if recover: + # XXX: make it compile on libxml2 < 2.6.21 + #parse_options = parse_options | htmlparser.HTML_PARSE_RECOVER + parse_options = parse_options | xmlparser.XML_PARSE_RECOVER + if no_network: + parse_options = parse_options | htmlparser.HTML_PARSE_NONET + if remove_blank_text: + parse_options = parse_options | htmlparser.HTML_PARSE_NOBLANKS + + self._parse_options = parse_options + + def __dealloc__(self): + if self._file_parser_ctxt != NULL: + htmlparser.htmlFreeParserCtxt(self._file_parser_ctxt) + if self._memory_parser_ctxt != NULL: + htmlparser.htmlFreeParserCtxt(self._memory_parser_ctxt) + + cdef xmlDoc* _parseDoc(self, text_utf) except NULL: + """Parse HTML document, share dictionary if possible. + """ cdef xmlDoc* result - cdef xmlDict* d + cdef xmlParserCtxt* pctxt + cdef char* c_text + cdef int c_len + self._error_log.connect() + c_text = _cstr(text_utf) + pctxt = self._memory_parser_ctxt + if pctxt is NULL: + pctxt = htmlparser.htmlCreateMemoryParserCtxt('dummy', 5) + if pctxt is NULL: + self._error_log.disconnect() + raise ParserError, "Failed to create parser context" + self._memory_parser_ctxt = pctxt + __GLOBAL_PARSER_CONTEXT._initParserDict(pctxt) + result = htmlparser.htmlCtxtReadDoc( + pctxt, c_text, NULL, NULL, self._parse_options) + return self._handleResult(pctxt, result) - result = tree.xmlNewDoc("1.0") + cdef xmlDoc* _parseDocFromFile(self, char* filename) except NULL: + cdef xmlDoc* result + cdef xmlParserCtxt* pctxt + cdef int parser_error + self._error_log.connect() + pctxt = self._file_parser_ctxt + if pctxt is NULL: + pctxt = htmlparser.htmlCreateFileParserCtxt(filename, NULL) + if pctxt is NULL: + self._error_log.disconnect() + warnings = self._error_log.filter_from_warnings() + if warnings and warnings[-1].domain == xmlerror.XML_FROM_IO: + raise IOError, "Could not open file %s" % filename + raise ParserError, "Failed to create parser context" + self._file_parser_ctxt = pctxt + __GLOBAL_PARSER_CONTEXT._initParserDict(pctxt) + result = htmlparser.htmlCtxtReadFile( + pctxt, filename, NULL, self._parse_options) + return self._handleResult(pctxt, result) + +cdef HTMLParser __DEFAULT_HTML_PARSER +__DEFAULT_HTML_PARSER = HTMLParser() + +############################################################ +## helper functions for document creation +############################################################ + +cdef xmlDoc* _parseDoc(text_utf, parser) except NULL: + if parser is None: + parser = __DEFAULT_PARSER + __GLOBAL_PARSER_CONTEXT._initParser() + if isinstance(parser, XMLParser): + return (parser)._parseDoc(text_utf) + elif isinstance(parser, HTMLParser): + return (parser)._parseDoc(text_utf) + else: + raise TypeError, "invalid parser" - if self._c_dict is NULL: - # we need to get dict from the new document if it's there, - # otherwise make one - if result.dict is not NULL: - d = result.dict - else: - d = xmlparser.xmlDictCreate() - result.dict = d - self._c_dict = d - xmlparser.xmlDictReference(self._c_dict) - else: - # we need to reuse the central dict and get rid of the new one - if result.dict is not NULL: - xmlparser.xmlDictFree(result.dict) - result.dict = self._c_dict - xmlparser.xmlDictReference(result.dict) - return result +cdef xmlDoc* _parseDocFromFile(filename, parser) except NULL: + if parser is None: + parser = __DEFAULT_PARSER + __GLOBAL_PARSER_CONTEXT._initParser() + if isinstance(parser, XMLParser): + return (parser)._parseDocFromFile(_cstr(filename)) + elif isinstance(parser, HTMLParser): + return (parser)._parseDocFromFile(_cstr(filename)) + else: + raise TypeError, "invalid parser" + +cdef xmlDoc* _newDoc(): + cdef xmlDoc* result + result = tree.xmlNewDoc("1.0") + __GLOBAL_PARSER_CONTEXT._initDocDict(result) + return result Modified: lxml/trunk/src/lxml/xmlparser.pxd ============================================================================== --- lxml/trunk/src/lxml/xmlparser.pxd (original) +++ lxml/trunk/src/lxml/xmlparser.pxd Thu Apr 20 12:12:03 2006 @@ -30,15 +30,15 @@ XML_PARSE_NSCLEAN = 8192 # remove redundant namespaces declarations XML_PARSE_NOCDATA = 16384 # merge CDATA as text nodes XML_PARSE_NOXINCNODE = 32768 # do not generate XINCLUDE START/END nodes +# libxml2 2.6.21+ only: +# XML_PARSE_COMPACT = 65536 # compact small text nodes cdef void xmlInitParser() - cdef xmlParserCtxt* xmlCreateDocParserCtxt(char* cur) cdef xmlParserCtxt* xmlNewParserCtxt() cdef void xmlFreeParserCtxt(xmlParserCtxt* ctxt) - - cdef int xmlCtxtUseOptions(xmlParserCtxt* ctxt, int options) - cdef int xmlParseDocument(xmlParserCtxt* ctxt) - cdef xmlDoc* xmlParseDoc(char* cur) + + cdef xmlDoc* xmlCtxtReadDoc(xmlParserCtxt* ctxt, + char* cur, char* URL, char* encoding, + int options) cdef xmlDoc* xmlCtxtReadFile(xmlParserCtxt* ctxt, char* filename, char* encoding, int options) - From scoder at codespeak.net Thu Apr 20 12:21:40 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Thu Apr 20 12:21:41 2006 Subject: [Lxml-checkins] r26029 - in lxml/trunk/src/lxml: . tests Message-ID: <20060420102140.6A99D1008D@code0.codespeak.net> Author: scoder Date: Thu Apr 20 12:21:39 2006 New Revision: 26029 Modified: lxml/trunk/src/lxml/parser.pxi lxml/trunk/src/lxml/tests/test_htmlparser.py Log: removed HTMLSyntaxError, replaced by XMLSyntaxError to simplify catching exception e.g. from the default parser Modified: lxml/trunk/src/lxml/parser.pxi ============================================================================== --- lxml/trunk/src/lxml/parser.pxi (original) +++ lxml/trunk/src/lxml/parser.pxi Thu Apr 20 12:21:39 2006 @@ -7,9 +7,6 @@ class XMLSyntaxError(LxmlSyntaxError): pass -class HTMLSyntaxError(LxmlSyntaxError): - pass - class ParserError(LxmlError): pass @@ -63,10 +60,8 @@ cdef class BaseParser: cdef _ErrorLog _error_log - cdef object _syntax_error_class - def __init__(self, syntax_error_class): + def __init__(self): self._error_log = _ErrorLog() - self._syntax_error_class = syntax_error_class property error_log: def __get__(self): @@ -82,7 +77,7 @@ result = NULL self._error_log.disconnect() if result is NULL: - raise self._syntax_error_class + raise XMLSyntaxError return result @@ -118,7 +113,7 @@ no_network=False, ns_clean=False): cdef int parse_options self._file_parser_ctxt = NULL - BaseParser.__init__(self, XMLSyntaxError) + BaseParser.__init__(self) parse_options = _XML_DEFAULT_PARSE_OPTIONS if dtd_validation: @@ -237,7 +232,7 @@ cdef int parse_options self._memory_parser_ctxt = NULL self._file_parser_ctxt = NULL - BaseParser.__init__(self, HTMLSyntaxError) + BaseParser.__init__(self) parse_options = _HTML_DEFAULT_PARSE_OPTIONS if recover: Modified: lxml/trunk/src/lxml/tests/test_htmlparser.py ============================================================================== --- lxml/trunk/src/lxml/tests/test_htmlparser.py (original) +++ lxml/trunk/src/lxml/tests/test_htmlparser.py Thu Apr 20 12:21:39 2006 @@ -29,7 +29,7 @@ parser = self.etree.HTMLParser(recover=False) parse = self.etree.parse f = StringIO("") - self.assertRaises(self.etree.HTMLSyntaxError, + self.assertRaises(self.etree.XMLSyntaxError, parse, f, parser) def test_module_HTML_broken(self): From scoder at codespeak.net Thu Apr 20 12:28:54 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Thu Apr 20 12:28:55 2006 Subject: [Lxml-checkins] r26030 - lxml/trunk/src/lxml Message-ID: <20060420102854.EA37C10088@code0.codespeak.net> Author: scoder Date: Thu Apr 20 12:28:53 2006 New Revision: 26030 Modified: lxml/trunk/src/lxml/parser.pxi Log: clean up left over after merge Modified: lxml/trunk/src/lxml/parser.pxi ============================================================================== --- lxml/trunk/src/lxml/parser.pxi (original) +++ lxml/trunk/src/lxml/parser.pxi Thu Apr 20 12:28:53 2006 @@ -129,10 +129,6 @@ self._parse_options = parse_options - property error_log: - def __get__(self): - return __build_error_log_tuple(self) - def __dealloc__(self): if self._file_parser_ctxt != NULL: xmlparser.xmlFreeParserCtxt(self._file_parser_ctxt) From scoder at codespeak.net Thu Apr 20 12:34:25 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Thu Apr 20 12:34:26 2006 Subject: [Lxml-checkins] r26031 - lxml/trunk Message-ID: <20060420103425.0257A1008D@code0.codespeak.net> Author: scoder Date: Thu Apr 20 12:34:25 2006 New Revision: 26031 Modified: lxml/trunk/CHANGES.txt Log: updated CHANGES.txt Modified: lxml/trunk/CHANGES.txt ============================================================================== --- lxml/trunk/CHANGES.txt (original) +++ lxml/trunk/CHANGES.txt Thu Apr 20 12:34:25 2006 @@ -7,6 +7,8 @@ Features added -------------- +* HTMLParser for parsing (broken) HTML + * XMLDTDID function parses XML into tuple (root node, ID dict) based on xml:id implementation of libxml2 (as opposed to ET compatible XMLID) From scoder at codespeak.net Thu Apr 20 12:47:35 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Thu Apr 20 12:47:35 2006 Subject: [Lxml-checkins] r26032 - lxml/trunk Message-ID: <20060420104735.444B910090@code0.codespeak.net> Author: scoder Date: Thu Apr 20 12:47:34 2006 New Revision: 26032 Modified: lxml/trunk/INSTALL.txt Log: state that newer-than-required versions of libxml2/xslt are 'recommended' Modified: lxml/trunk/INSTALL.txt ============================================================================== --- lxml/trunk/INSTALL.txt (original) +++ lxml/trunk/INSTALL.txt Thu Apr 20 12:47:34 2006 @@ -8,10 +8,10 @@ You need libxml2 and libxslt, in particular: -* libxml 2.6.16 (newer versions should work). It can be found here: +* libxml 2.6.16 (newer versions are recommended). It can be found here: http://xmlsoft.org/downloads.html -* libxslt 1.1.12 (newer versions should work). It can be found here: +* libxslt 1.1.12 (newer versions are recommended). It can be found here: http://xmlsoft.org/XSLT/downloads.html For Windows, there is a `binary distribution`_ of libxml2 and libxslt. Note From scoder at codespeak.net Thu Apr 20 12:57:16 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Thu Apr 20 12:57:18 2006 Subject: [Lxml-checkins] r26033 - lxml/trunk/src/lxml Message-ID: <20060420105716.F08FB10090@code0.codespeak.net> Author: scoder Date: Thu Apr 20 12:57:16 2006 New Revision: 26033 Modified: lxml/trunk/src/lxml/etree.pyx lxml/trunk/src/lxml/xmlid.pxi Log: doc updates, make clear that trees that have ID dictionaries must not be modified Modified: lxml/trunk/src/lxml/etree.pyx ============================================================================== --- lxml/trunk/src/lxml/etree.pyx (original) +++ lxml/trunk/src/lxml/etree.pyx Thu Apr 20 12:57:16 2006 @@ -1272,7 +1272,8 @@ return result def parse(source, parser=None): - """Return an ElementTree object loaded with source elements + """Return an ElementTree object loaded with source elements. If no parser + is provided as second argument, the default parser is used. """ cdef _Document doc doc = _parseDocument(source, parser) Modified: lxml/trunk/src/lxml/xmlid.pxi ============================================================================== --- lxml/trunk/src/lxml/xmlid.pxi (original) +++ lxml/trunk/src/lxml/xmlid.pxi Thu Apr 20 12:57:16 2006 @@ -18,6 +18,9 @@ contains string-element pairs. The dictionary keys are the values of ID attributes as defined by the DTD. The elements referenced by the ID are stored as dictionary values. + + Note that you must not modify the XML tree if you use the ID dictionary. + The results are undefined. """ cdef _NodeBase root root = XML(text) From scoder at codespeak.net Thu Apr 20 13:05:45 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Thu Apr 20 13:05:47 2006 Subject: [Lxml-checkins] r26034 - in lxml/trunk/src/lxml: . tests Message-ID: <20060420110545.AD65310088@code0.codespeak.net> Author: scoder Date: Thu Apr 20 13:05:44 2006 New Revision: 26034 Modified: lxml/trunk/src/lxml/etree.pyx lxml/trunk/src/lxml/tests/test_etree.py Log: new module level function parseid() as parse() equivalent to XMLDTDID()/XML(), cleanup in related test cases Modified: lxml/trunk/src/lxml/etree.pyx ============================================================================== --- lxml/trunk/src/lxml/etree.pyx (original) +++ lxml/trunk/src/lxml/etree.pyx Thu Apr 20 13:05:44 2006 @@ -1279,6 +1279,18 @@ doc = _parseDocument(source, parser) return ElementTree(doc.getroot()) +def parseid(source, parser=None): + """Parses the source into a tuple containing an ElementTree object and an + ID dictionary. If no parser is provided as second argument, the default + parser is used. + + Note that you must not modify the XML tree if you use the ID dictionary. + The results are undefined. + """ + cdef _Document doc + doc = _parseDocument(source, parser) + return (ElementTree(doc.getroot()), _IDDict(doc)) + # include submodules include "xmlerror.pxi" # error and log handling Modified: lxml/trunk/src/lxml/tests/test_etree.py ============================================================================== --- lxml/trunk/src/lxml/tests/test_etree.py (original) +++ lxml/trunk/src/lxml/tests/test_etree.py Thu Apr 20 13:05:44 2006 @@ -140,9 +140,44 @@ b, d.getparent()) + def test_parseid(self): + parseid = self.etree.parseid + XML = self.etree.XML + xml_text = ''' + + + + + + ]> + +

...

+

...

+

Regular paragraph.

+

XML:ID paragraph.

+

...

+
+ ''' + + tree, dic = parseid(StringIO(xml_text)) + root = tree.getroot() + root2 = XML(xml_text) + self.assertEquals(self._writeElement(root), + self._writeElement(root2)) + expected = { + "chapter1" : root[0], + "xmlid" : root[3], + "warn1" : root[4] + } + self.assert_("chapter1" in dic) + self.assert_("warn1" in dic) + self.assert_("xmlid" in dic) + self._checkIDDict(dic, expected) + def test_XMLDTDID(self): - XMLDTDID = etree.XMLDTDID - XML = etree.XML + XMLDTDID = self.etree.XMLDTDID + XML = self.etree.XML xml_text = ''' @@ -169,29 +204,14 @@ "xmlid" : root[3], "warn1" : root[4] } - - self.assertEquals(dic, expected) - self.assertEquals(len(dic), - len(expected)) - self.assertEquals(sorted(dic.items()), - sorted(expected.items())) - self.assertEquals(sorted(dic.iteritems()), - sorted(expected.iteritems())) - self.assertEquals(sorted(dic.keys()), - sorted(expected.keys())) - self.assertEquals(sorted(dic.iterkeys()), - sorted(expected.iterkeys())) - self.assertEquals(sorted(dic.values()), - sorted(expected.values())) - self.assertEquals(sorted(dic.itervalues()), - sorted(expected.itervalues())) self.assert_("chapter1" in dic) self.assert_("warn1" in dic) self.assert_("xmlid" in dic) + self._checkIDDict(dic, expected) def test_XMLDTDID_empty(self): - XMLDTDID = etree.XMLDTDID - XML = etree.XML + XMLDTDID = self.etree.XMLDTDID + XML = self.etree.XML xml_text = '''

...

@@ -206,14 +226,24 @@ self.assertEquals(self._writeElement(root), self._writeElement(root2)) expected = {} + self._checkIDDict(dic, expected) + def _checkIDDict(self, dic, expected): self.assertEquals(dic, expected) - self.assertEquals(dic.items(), - expected.items()) + self.assertEquals(len(dic), + len(expected)) + self.assertEquals(sorted(dic.items()), + sorted(expected.items())) self.assertEquals(sorted(dic.iteritems()), sorted(expected.iteritems())) - self.assertEquals(dic.keys(), - expected.keys()) + self.assertEquals(sorted(dic.keys()), + sorted(expected.keys())) + self.assertEquals(sorted(dic.iterkeys()), + sorted(expected.iterkeys())) + self.assertEquals(sorted(dic.values()), + sorted(expected.values())) + self.assertEquals(sorted(dic.itervalues()), + sorted(expected.itervalues())) def test_namespaces(self): etree = self.etree From scoder at codespeak.net Thu Apr 20 13:08:56 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Thu Apr 20 13:08:58 2006 Subject: [Lxml-checkins] r26035 - lxml/trunk/src/lxml Message-ID: <20060420110856.EFBA510088@code0.codespeak.net> Author: scoder Date: Thu Apr 20 13:08:56 2006 New Revision: 26035 Modified: lxml/trunk/src/lxml/etree.pyx Log: removed unused helper function _raiseIfNone Modified: lxml/trunk/src/lxml/etree.pyx ============================================================================== --- lxml/trunk/src/lxml/etree.pyx (original) +++ lxml/trunk/src/lxml/etree.pyx Thu Apr 20 13:08:56 2006 @@ -413,7 +413,8 @@ return # if the next element is in the list, insert before it for mynode in value: - _raiseIfNone(mynode) + if mynode is None: + raise TypeError, "Node must not be None." foreign = self._doc is not mynode._doc # store possible text tail c_next = mynode._c_node.next @@ -1238,7 +1239,7 @@ def dump(_NodeBase elem): assert elem is not None, "Must supply element." - # better, but not ET compatible : _raiseIfNone(elem) + # better, but not ET compatible : "_NodeBase elem not None" _dumpToFile(sys.stdout, elem._doc._c_doc, elem._c_node) def tostring(_NodeBase element, encoding='us-ascii'): @@ -1248,7 +1249,7 @@ cdef char* enc assert element is not None - # better, but not ET compatible : _raiseIfNone(element) + # better, but not ET compatible : "_NodeBase element not None" #if encoding is None: # encoding = 'UTF-8' @@ -1304,10 +1305,6 @@ # Private helper functions -cdef void _raiseIfNone(el): - if el is None: - raise TypeError, "Argument must not be None." - cdef _Document _documentOrRaise(object input): cdef _Document doc doc = _documentOf(input) From scoder at codespeak.net Thu Apr 20 13:19:25 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Thu Apr 20 13:19:26 2006 Subject: [Lxml-checkins] r26036 - lxml/trunk/src/lxml Message-ID: <20060420111925.84BC710088@code0.codespeak.net> Author: scoder Date: Thu Apr 20 13:19:24 2006 New Revision: 26036 Modified: lxml/trunk/src/lxml/etree.pyx lxml/trunk/src/lxml/xmlid.pxi Log: moved parseid() function into xmlid.pxi Modified: lxml/trunk/src/lxml/etree.pyx ============================================================================== --- lxml/trunk/src/lxml/etree.pyx (original) +++ lxml/trunk/src/lxml/etree.pyx Thu Apr 20 13:19:24 2006 @@ -1280,18 +1280,6 @@ doc = _parseDocument(source, parser) return ElementTree(doc.getroot()) -def parseid(source, parser=None): - """Parses the source into a tuple containing an ElementTree object and an - ID dictionary. If no parser is provided as second argument, the default - parser is used. - - Note that you must not modify the XML tree if you use the ID dictionary. - The results are undefined. - """ - cdef _Document doc - doc = _parseDocument(source, parser) - return (ElementTree(doc.getroot()), _IDDict(doc)) - # include submodules include "xmlerror.pxi" # error and log handling Modified: lxml/trunk/src/lxml/xmlid.pxi ============================================================================== --- lxml/trunk/src/lxml/xmlid.pxi (original) +++ lxml/trunk/src/lxml/xmlid.pxi Thu Apr 20 13:19:24 2006 @@ -30,6 +30,18 @@ else: return (root, _IDDict(root)) +def parseid(source, parser=None): + """Parses the source into a tuple containing an ElementTree object and an + ID dictionary. If no parser is provided as second argument, the default + parser is used. + + Note that you must not modify the XML tree if you use the ID dictionary. + The results are undefined. + """ + cdef _Document doc + doc = _parseDocument(source, parser) + return (ElementTree(doc.getroot()), _IDDict(doc)) + cdef class _IDDict: """A dictionary-like proxy class that mapps ID attributes to elements. From scoder at codespeak.net Thu Apr 20 14:39:21 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Thu Apr 20 14:39:22 2006 Subject: [Lxml-checkins] r26039 - lxml/branch/resolver-new Message-ID: <20060420123921.0D1F61008D@code0.codespeak.net> Author: scoder Date: Thu Apr 20 14:39:19 2006 New Revision: 26039 Added: lxml/branch/resolver-new/ - copied from r26038, lxml/trunk/ Log: new branch for resolvers (2nd try) From scoder at codespeak.net Thu Apr 20 18:24:27 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Thu Apr 20 18:24:29 2006 Subject: [Lxml-checkins] r26055 - in lxml/branch/resolver-new/src/lxml: . tests Message-ID: <20060420162427.7703D10088@code0.codespeak.net> Author: scoder Date: Thu Apr 20 18:24:25 2006 New Revision: 26055 Modified: lxml/branch/resolver-new/src/lxml/etree.pyx lxml/branch/resolver-new/src/lxml/parser.pxi lxml/branch/resolver-new/src/lxml/tests/test_etree.py lxml/branch/resolver-new/src/lxml/xmlparser.pxd lxml/branch/resolver-new/src/lxml/xslt.pxi Log: initial working implementation of parser entity resolvers Modified: lxml/branch/resolver-new/src/lxml/etree.pyx ============================================================================== --- lxml/branch/resolver-new/src/lxml/etree.pyx (original) +++ lxml/branch/resolver-new/src/lxml/etree.pyx Thu Apr 20 18:24:25 2006 @@ -1281,6 +1281,19 @@ return ElementTree(doc.getroot()) +# class for temporary storage of Python references +cdef class _TempStore: + cdef object _storage + def __init__(self): + self._storage = {} + + cdef void add(self, obj): + python.PyDict_SetItem(self._storage, id(obj), obj) + + cdef void clear(self): + python.PyDict_Clear(self._storage) + + # include submodules include "xmlerror.pxi" # error and log handling include "xmlid.pxi" # XMLID and IDDict Modified: lxml/branch/resolver-new/src/lxml/parser.pxi ============================================================================== --- lxml/branch/resolver-new/src/lxml/parser.pxi (original) +++ lxml/branch/resolver-new/src/lxml/parser.pxi Thu Apr 20 18:24:25 2006 @@ -58,15 +58,156 @@ __GLOBAL_PARSER_CONTEXT = _ParserContext() +############################################################ +## Custom resolver API +############################################################ + +cdef class _ResolverRegistry # forward declaration + +cdef class _ParserInput: + cdef xmlparser.xmlParserInput* _input + cdef object _pyref # to keep Python references + +cdef class _ResolverContext: + cdef xmlparser.xmlParserCtxt* _ctxt + cdef _ResolverRegistry _resolvers + cdef _TempStore _storage + +cdef class Resolver: + def resolve(self, system_url, public_id, _ResolverContext context not None): + cdef _ParserInput parser_input + cdef char* c_url + cdef char* c_id + if __DEFAULT_ENTITY_LOADER is NULL: + return None + if system_url is None: + c_url = NULL + else: + url_utf = _utf8(system_url) + c_url = _cstr(url_utf) + if public_id is None: + c_id = NULL + else: + id_utf = _utf8(public_id) + c_id = _cstr(id_utf) + parser_input = _ParserInput() + parser_input._input = __DEFAULT_ENTITY_LOADER( + c_url, c_id, context._ctxt) + return parser_input + + def resolve_string(self, string, _ResolverContext context not None): + cdef _ParserInput parser_input + string_utf = _utf8(string) + parser_input = _ParserInput() + parser_input._input = xmlparser.xmlNewStringInputStream( + context._ctxt, _cstr(string_utf)) + parser_input._pyref = string_utf + return parser_input + + def resolve_filename(self, filename, _ResolverContext context not None): + cdef _ParserInput parser_input + filename_utf = _utf8(filename) + parser_input = _ParserInput() + parser_input._input = xmlparser.xmlNewInputFromFile( + context._ctxt, _cstr(filename_utf)) + return parser_input + +cdef class _ResolverRegistry: + cdef object _resolvers + cdef Resolver _default_resolver + def __init__(self, Resolver default_resolver=None): + try: + self._resolvers = set() + except NameError: + from sets import Set + self._resolvers = Set() + if default_resolver is None: + self._default_resolver = Resolver() + else: + self._default_resolver = default_resolver + + def add(self, Resolver resolver not None): + """Register a resolver. + + For each requested entity, the 'resolve' method of the resolver will + be called and the result will be passed to the parser. If this method + returns None, the request will be delegated to other resolvers or the + default resolver. The resolvers will be tested in an arbitrary order + until the first match is found. + """ + self._resolvers.add(resolver) + + def remove(self, resolver): + self._resolvers.discard(resolver) + + def resolve(self, system_url, public_id, _ResolverContext context not None): + for resolver in self._resolvers: + result = resolver.resolve(system_url, public_id, context) + if result is not None: + return result + return self._default_resolver.resolve(system_url, public_id, context) + +cdef xmlparser.xmlParserInput* _local_resolver(char* c_url, char* c_pubid, + xmlParserCtxt* c_context): + cdef _ResolverContext context + cdef _ParserInput parser_input + if c_context._private is NULL: + return __DEFAULT_ENTITY_LOADER(c_url, c_pubid, c_context) + + if c_url is NULL: + url = None + else: + url = funicode(c_url) + if c_pubid is NULL: + pubid = None + else: + pubid = funicode(c_pubid) + + context = <_ResolverContext>c_context._private + try: + parser_input = context._resolvers.resolve(url, pubid, context) + except Exception, e: + print e + if parser_input is None: + return NULL + context._storage.add(parser_input) + return parser_input._input + +cdef xmlparser.xmlExternalEntityLoader __DEFAULT_ENTITY_LOADER +__DEFAULT_ENTITY_LOADER = xmlparser.xmlGetExternalEntityLoader() + +xmlparser.xmlSetExternalEntityLoader(_local_resolver) + +############################################################ +## Parsers +############################################################ + cdef class BaseParser: cdef _ErrorLog _error_log + cdef readonly object resolvers + cdef _ResolverContext _context def __init__(self): self._error_log = _ErrorLog() + self.resolvers = _ResolverRegistry() + self._context = None property error_log: def __get__(self): return self._error_log.copy() + cdef _initContext(self, xmlParserCtxt* c_ctxt): + cdef _ResolverContext context + __GLOBAL_PARSER_CONTEXT._initParserDict(c_ctxt) + context = _ResolverContext() + context._ctxt = c_ctxt + context._resolvers = self.resolvers + context._storage = _TempStore() + self._context = context + c_ctxt._private = context + + cdef _clearContext(self): + self._context = None + cdef xmlDoc* _handleResult(self, xmlParserCtxt* ctxt, xmlDoc* result) except NULL: if ctxt.wellFormed: @@ -80,7 +221,6 @@ raise XMLSyntaxError return result - ############################################################ ## XML parser ############################################################ @@ -154,10 +294,10 @@ if pctxt is NULL: pctxt = self._createContext() self._memory_parser_ctxt = pctxt - - __GLOBAL_PARSER_CONTEXT._initParserDict(pctxt) + self._initContext(pctxt) result = xmlparser.xmlCtxtReadDoc( pctxt, _cstr(text_utf), NULL, NULL, self._parse_options) + self._clearContext() return self._handleResult(pctxt, result) cdef xmlDoc* _parseDocFromFile(self, char* filename) except NULL: @@ -168,10 +308,10 @@ if pctxt is NULL: pctxt = self._createContext() self._file_parser_ctxt = pctxt - - __GLOBAL_PARSER_CONTEXT._initParserDict(pctxt) + self._initContext(pctxt) result = xmlparser.xmlCtxtReadFile( pctxt, filename, NULL, self._parse_options) + self._clearContext() if result is NULL: if pctxt.lastError.domain == xmlerror.XML_FROM_IO: self._error_log.disconnect() @@ -203,6 +343,8 @@ else: raise TypeError, "Invalid parser" +def get_default_parser(): + return __DEFAULT_PARSER ############################################################ ## HTML parser @@ -264,9 +406,10 @@ self._error_log.disconnect() raise ParserError, "Failed to create parser context" self._memory_parser_ctxt = pctxt - __GLOBAL_PARSER_CONTEXT._initParserDict(pctxt) + self._initContext(pctxt) result = htmlparser.htmlCtxtReadDoc( pctxt, c_text, NULL, NULL, self._parse_options) + self._clearContext() return self._handleResult(pctxt, result) cdef xmlDoc* _parseDocFromFile(self, char* filename) except NULL: @@ -284,9 +427,10 @@ raise IOError, "Could not open file %s" % filename raise ParserError, "Failed to create parser context" self._file_parser_ctxt = pctxt - __GLOBAL_PARSER_CONTEXT._initParserDict(pctxt) + self._initContext(pctxt) result = htmlparser.htmlCtxtReadFile( pctxt, filename, NULL, self._parse_options) + self._clearContext() return self._handleResult(pctxt, result) cdef HTMLParser __DEFAULT_HTML_PARSER Modified: lxml/branch/resolver-new/src/lxml/tests/test_etree.py ============================================================================== --- lxml/branch/resolver-new/src/lxml/tests/test_etree.py (original) +++ lxml/branch/resolver-new/src/lxml/tests/test_etree.py Thu Apr 20 18:24:25 2006 @@ -49,7 +49,26 @@ f = open(fileInTestDir('test_broken.xml'), 'r') self.assertRaises(SyntaxError, parse, f) f.close() - + + def test_resolve_string_dtd(self): + parse = self.etree.parse + parser = self.etree.XMLParser(dtd_validation=True) + assertEqual = self.assertEqual + test_url = u"__nosuch.dtd" + + class MyResolver(self.etree.Resolver): + def resolve(self, url, id, context): + assertEqual(url, test_url) + return self.resolve_string( + u'' % url, context) + + parser.resolvers.add(MyResolver()) + + xml = u'&myentity;' % test_url + tree = parse(StringIO(xml), parser) + root = tree.getroot() + self.assertEquals(root.text, test_url) + # TypeError in etree, AssertionError in ElementTree; def test_setitem_assert(self): Element = self.etree.Element Modified: lxml/branch/resolver-new/src/lxml/xmlparser.pxd ============================================================================== --- lxml/branch/resolver-new/src/lxml/xmlparser.pxd (original) +++ lxml/branch/resolver-new/src/lxml/xmlparser.pxd Thu Apr 20 18:24:25 2006 @@ -1,6 +1,9 @@ from tree cimport xmlDoc, xmlDict from xmlerror cimport xmlError +cdef extern from "libxml/tree.h": + ctypedef struct xmlParserInput + cdef extern from "libxml/parser.h": cdef xmlDict* xmlDictCreate() @@ -10,6 +13,7 @@ ctypedef struct xmlParserCtxt: xmlDoc* myDoc xmlDict* dict + void* _private int wellFormed xmlError lastError @@ -42,3 +46,17 @@ int options) cdef xmlDoc* xmlCtxtReadFile(xmlParserCtxt* ctxt, char* filename, char* encoding, int options) + + # entity loaders + + ctypedef xmlParserInput* (*xmlExternalEntityLoader)(char * URL, + char * ID, + xmlParserCtxt* context) + cdef xmlExternalEntityLoader xmlGetExternalEntityLoader() + cdef void xmlSetExternalEntityLoader(xmlExternalEntityLoader f) + +cdef extern from "libxml/parserInternals.h": + cdef xmlParserInput* xmlNewStringInputStream(xmlParserCtxt* ctxt, + char* buffer) + cdef xmlParserInput* xmlNewInputFromFile(xmlParserCtxt* ctxt, + char* filename) Modified: lxml/branch/resolver-new/src/lxml/xslt.pxi ============================================================================== --- lxml/branch/resolver-new/src/lxml/xslt.pxi (original) +++ lxml/branch/resolver-new/src/lxml/xslt.pxi Thu Apr 20 18:24:25 2006 @@ -40,8 +40,8 @@ cdef object _extension_functions cdef object _utf_refs # for exception handling and temporary reference keeping: - cdef object _temp_elements - cdef object _temp_docs + cdef _TempStore _temp_elements + cdef _TempStore _temp_docs cdef object _exc_info def __init__(self, namespaces, extensions): @@ -68,8 +68,8 @@ self._registered_namespaces = [] self._registered_extensions = [] self._extension_functions = {} - self._temp_elements = {} - self._temp_docs = {} + self._temp_elements = _TempStore() + self._temp_docs = _TempStore() cdef object _to_utf(self, s): "Convert to UTF-8 and keep a reference to the encoded string" @@ -174,8 +174,8 @@ cdef _release_temp_refs(self): "Free temporarily referenced objects from this context." - python.PyDict_Clear(self._temp_elements) - python.PyDict_Clear(self._temp_docs) + self._temp_elements.clear() + self._temp_docs.clear() cdef _hold(self, obj): """A way to temporarily hold references to nodes in the evaluator. @@ -193,9 +193,9 @@ if isinstance(o, _NodeBase): element = <_NodeBase>o #print "Holding element:", element._c_node - python.PyDict_SetItem(self._temp_elements, id(element), element) + self._temp_elements.add(element) #print "Holding document:", element._doc._c_doc - python.PyDict_SetItem(self._temp_docs, id(element._doc), element._doc) + self._temp_docs.add(element._doc) ################################################################################ From scoder at codespeak.net Thu Apr 20 18:51:19 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Thu Apr 20 18:51:20 2006 Subject: [Lxml-checkins] r26060 - in lxml/branch/resolver-new/src/lxml: . tests Message-ID: <20060420165119.A945710073@code0.codespeak.net> Author: scoder Date: Thu Apr 20 18:51:18 2006 New Revision: 26060 Modified: lxml/branch/resolver-new/src/lxml/parser.pxi lxml/branch/resolver-new/src/lxml/tests/test_etree.py Log: make exception raising work from within resolvers Modified: lxml/branch/resolver-new/src/lxml/parser.pxi ============================================================================== --- lxml/branch/resolver-new/src/lxml/parser.pxi (original) +++ lxml/branch/resolver-new/src/lxml/parser.pxi Thu Apr 20 18:51:18 2006 @@ -72,6 +72,16 @@ cdef xmlparser.xmlParserCtxt* _ctxt cdef _ResolverRegistry _resolvers cdef _TempStore _storage + cdef object _exc_info + + cdef _store_exception(self, e): + self._exc_info = sys.exc_info() + + cdef _check_exception(self): + _exc_info = self._exc_info + if _exc_info is not None: + type, value, traceback = _exc_info + raise type, value, traceback cdef class Resolver: def resolve(self, system_url, public_id, _ResolverContext context not None): @@ -154,20 +164,22 @@ if c_context._private is NULL: return __DEFAULT_ENTITY_LOADER(c_url, c_pubid, c_context) - if c_url is NULL: - url = None - else: - url = funicode(c_url) - if c_pubid is NULL: - pubid = None - else: - pubid = funicode(c_pubid) - - context = <_ResolverContext>c_context._private try: + if c_url is NULL: + url = None + else: + url = funicode(c_url) + if c_pubid is NULL: + pubid = None + else: + pubid = funicode(c_pubid) + + context = <_ResolverContext>c_context._private + parser_input = context._resolvers.resolve(url, pubid, context) except Exception, e: - print e + context._store_exception(e) + return NULL if parser_input is None: return NULL context._storage.add(parser_input) @@ -206,6 +218,8 @@ c_ctxt._private = context cdef _clearContext(self): + if self._context is not None: + self._context._check_exception() self._context = None cdef xmlDoc* _handleResult(self, xmlParserCtxt* ctxt, Modified: lxml/branch/resolver-new/src/lxml/tests/test_etree.py ============================================================================== --- lxml/branch/resolver-new/src/lxml/tests/test_etree.py (original) +++ lxml/branch/resolver-new/src/lxml/tests/test_etree.py Thu Apr 20 18:51:18 2006 @@ -69,6 +69,23 @@ root = tree.getroot() self.assertEquals(root.text, test_url) + def test_resolve_error(self): + parse = self.etree.parse + parser = self.etree.XMLParser(dtd_validation=True) + test_url = u"__nosuch.dtd" + + class _LocalException(Exception): + pass + + class MyResolver(self.etree.Resolver): + def resolve(self, url, id, context): + raise _LocalException + + parser.resolvers.add(MyResolver()) + + xml = u'&myentity;' + self.assertRaises(_LocalException, parse, StringIO(xml), parser) + # TypeError in etree, AssertionError in ElementTree; def test_setitem_assert(self): Element = self.etree.Element From scoder at codespeak.net Thu Apr 20 21:13:37 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Thu Apr 20 21:13:38 2006 Subject: [Lxml-checkins] r26063 - lxml/branch/resolver-new/src/lxml Message-ID: <20060420191337.452A910080@code0.codespeak.net> Author: scoder Date: Thu Apr 20 21:13:36 2006 New Revision: 26063 Modified: lxml/branch/resolver-new/src/lxml/parser.pxi lxml/branch/resolver-new/src/lxml/xmlparser.pxd Log: some clean up, free input stream in case of exceptions Modified: lxml/branch/resolver-new/src/lxml/parser.pxi ============================================================================== --- lxml/branch/resolver-new/src/lxml/parser.pxi (original) +++ lxml/branch/resolver-new/src/lxml/parser.pxi Thu Apr 20 21:13:36 2006 @@ -179,6 +179,8 @@ parser_input = context._resolvers.resolve(url, pubid, context) except Exception, e: context._store_exception(e) + if parser_input is not None and parser_input._input is not NULL: + xmlparser.xmlFreeInputStream(parser_input._input) return NULL if parser_input is None: return NULL @@ -217,7 +219,7 @@ self._context = context c_ctxt._private = context - cdef _clearContext(self): + cdef _cleanupContext(self): if self._context is not None: self._context._check_exception() self._context = None @@ -311,7 +313,7 @@ self._initContext(pctxt) result = xmlparser.xmlCtxtReadDoc( pctxt, _cstr(text_utf), NULL, NULL, self._parse_options) - self._clearContext() + self._cleanupContext() return self._handleResult(pctxt, result) cdef xmlDoc* _parseDocFromFile(self, char* filename) except NULL: @@ -325,7 +327,7 @@ self._initContext(pctxt) result = xmlparser.xmlCtxtReadFile( pctxt, filename, NULL, self._parse_options) - self._clearContext() + self._cleanupContext() if result is NULL: if pctxt.lastError.domain == xmlerror.XML_FROM_IO: self._error_log.disconnect() @@ -423,7 +425,7 @@ self._initContext(pctxt) result = htmlparser.htmlCtxtReadDoc( pctxt, c_text, NULL, NULL, self._parse_options) - self._clearContext() + self._cleanupContext() return self._handleResult(pctxt, result) cdef xmlDoc* _parseDocFromFile(self, char* filename) except NULL: @@ -444,7 +446,7 @@ self._initContext(pctxt) result = htmlparser.htmlCtxtReadFile( pctxt, filename, NULL, self._parse_options) - self._clearContext() + self._cleanupContext() return self._handleResult(pctxt, result) cdef HTMLParser __DEFAULT_HTML_PARSER Modified: lxml/branch/resolver-new/src/lxml/xmlparser.pxd ============================================================================== --- lxml/branch/resolver-new/src/lxml/xmlparser.pxd (original) +++ lxml/branch/resolver-new/src/lxml/xmlparser.pxd Thu Apr 20 21:13:36 2006 @@ -34,8 +34,8 @@ XML_PARSE_NSCLEAN = 8192 # remove redundant namespaces declarations XML_PARSE_NOCDATA = 16384 # merge CDATA as text nodes XML_PARSE_NOXINCNODE = 32768 # do not generate XINCLUDE START/END nodes -# libxml2 2.6.21+ only: -# XML_PARSE_COMPACT = 65536 # compact small text nodes + # libxml2 2.6.21+ only: + #XML_PARSE_COMPACT = 65536 # compact small text nodes cdef void xmlInitParser() cdef xmlParserCtxt* xmlNewParserCtxt() @@ -47,7 +47,7 @@ cdef xmlDoc* xmlCtxtReadFile(xmlParserCtxt* ctxt, char* filename, char* encoding, int options) - # entity loaders +# entity loaders: ctypedef xmlParserInput* (*xmlExternalEntityLoader)(char * URL, char * ID, @@ -60,3 +60,4 @@ char* buffer) cdef xmlParserInput* xmlNewInputFromFile(xmlParserCtxt* ctxt, char* filename) + cdef void xmlFreeInputStream(xmlParserInput* input) From scoder at codespeak.net Thu Apr 20 21:24:46 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Thu Apr 20 21:24:47 2006 Subject: [Lxml-checkins] r26064 - lxml/branch/resolver-new/src/lxml Message-ID: <20060420192446.6B08E10080@code0.codespeak.net> Author: scoder Date: Thu Apr 20 21:24:45 2006 New Revision: 26064 Modified: lxml/branch/resolver-new/src/lxml/parser.pxi Log: removed some race conditions related to exception handling Modified: lxml/branch/resolver-new/src/lxml/parser.pxi ============================================================================== --- lxml/branch/resolver-new/src/lxml/parser.pxi (original) +++ lxml/branch/resolver-new/src/lxml/parser.pxi Thu Apr 20 21:24:45 2006 @@ -219,7 +219,7 @@ self._context = context c_ctxt._private = context - cdef _cleanupContext(self): + cdef _cleanupContextAndRaisePending(self): if self._context is not None: self._context._check_exception() self._context = None @@ -232,7 +232,6 @@ # free broken document tree.xmlFreeDoc(result) result = NULL - self._error_log.disconnect() if result is NULL: raise XMLSyntaxError return result @@ -313,7 +312,8 @@ self._initContext(pctxt) result = xmlparser.xmlCtxtReadDoc( pctxt, _cstr(text_utf), NULL, NULL, self._parse_options) - self._cleanupContext() + self._error_log.disconnect() + self._cleanupContextAndRaisePending() return self._handleResult(pctxt, result) cdef xmlDoc* _parseDocFromFile(self, char* filename) except NULL: @@ -327,10 +327,10 @@ self._initContext(pctxt) result = xmlparser.xmlCtxtReadFile( pctxt, filename, NULL, self._parse_options) - self._cleanupContext() + self._error_log.disconnect() + self._cleanupContextAndRaisePending() if result is NULL: if pctxt.lastError.domain == xmlerror.XML_FROM_IO: - self._error_log.disconnect() raise IOError, "Could not open file %s" % filename return self._handleResult(pctxt, result) @@ -425,7 +425,8 @@ self._initContext(pctxt) result = htmlparser.htmlCtxtReadDoc( pctxt, c_text, NULL, NULL, self._parse_options) - self._cleanupContext() + self._error_log.disconnect() + self._cleanupContextAndRaisePending() return self._handleResult(pctxt, result) cdef xmlDoc* _parseDocFromFile(self, char* filename) except NULL: @@ -446,7 +447,8 @@ self._initContext(pctxt) result = htmlparser.htmlCtxtReadFile( pctxt, filename, NULL, self._parse_options) - self._cleanupContext() + self._error_log.disconnect() + self._cleanupContextAndRaisePending() return self._handleResult(pctxt, result) cdef HTMLParser __DEFAULT_HTML_PARSER From scoder at codespeak.net Fri Apr 21 11:07:44 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Fri Apr 21 11:07:46 2006 Subject: [Lxml-checkins] r26078 - lxml/trunk Message-ID: <20060421090744.E5ED71007A@code0.codespeak.net> Author: scoder Date: Fri Apr 21 11:07:44 2006 New Revision: 26078 Modified: lxml/trunk/INSTALL.txt Log: slight clarification Modified: lxml/trunk/INSTALL.txt ============================================================================== --- lxml/trunk/INSTALL.txt (original) +++ lxml/trunk/INSTALL.txt Fri Apr 21 11:07:44 2006 @@ -32,9 +32,10 @@ .. _Pyrex: http://www.cosc.canterbury.ac.nz/~greg/python/Pyrex/ -Note that Pyrex up to version 0.9.4 has known problems when compiling lxml -with gcc 4.0 or Python 2.4. Do not use it. If you want to build lxml from -non-release sources, please install Pyrex version 0.9.4.1 or later. +Note that Pyrex up to and including version 0.9.4 has known problems when +compiling lxml with gcc 4.0 or Python 2.4. Do not use it. If you want to +build lxml from non-release sources, please install Pyrex version 0.9.4.1 or +later. If you have read these instructions and still cannot manage to install lxml, you can check the archives of the `mailing list`_ to see if your problem is From scoder at codespeak.net Fri Apr 21 11:12:47 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Fri Apr 21 11:12:49 2006 Subject: [Lxml-checkins] r26079 - lxml/branch/lxml-0.9.x Message-ID: <20060421091247.8C54F1007B@code0.codespeak.net> Author: scoder Date: Fri Apr 21 11:12:46 2006 New Revision: 26079 Modified: lxml/branch/lxml-0.9.x/INSTALL.txt Log: merged updated INSTALL.txt from trunk Modified: lxml/branch/lxml-0.9.x/INSTALL.txt ============================================================================== --- lxml/branch/lxml-0.9.x/INSTALL.txt (original) +++ lxml/branch/lxml-0.9.x/INSTALL.txt Fri Apr 21 11:12:46 2006 @@ -8,15 +8,23 @@ You need libxml2 and libxslt, in particular: -* libxml 2.6.16 (newer versions should work). It can be found here: +* libxml 2.6.16 (newer versions are recommended). It can be found here: http://xmlsoft.org/downloads.html -* libxslt 1.1.12 (newer versions should work). It can be found here: +* libxslt 1.1.12 (newer versions are recommended). It can be found here: http://xmlsoft.org/XSLT/downloads.html -See below for instructions how to get these for Windows. On MacOS-X 10.4, you -can use the installed system libraries and the binary egg distribution of -lxml. +For Windows, there is a `binary distribution`_ of libxml2 and libxslt. Note +that you need both libxml2 and libxslt, as well as iconv and zlib. You can +then install the `binary egg distribution`_ of lxml (see below). + +.. _`binary distribution`: http://www.zlatkovic.com/libxml.en.html +.. _`binary egg distribution`: http://cheeseshop.python.org/pypi/lxml + +On MacOS-X 10.4, you can use the installed system libraries and the binary egg +distribution of lxml. Note that the libxslt version on this system is older +than the required version above. While there were not any bug reports so far, +you may still encounter certain differences in behaviour in rare cases. If you want to build lxml from SVN, you also need Pyrex_. If you are using a released version of lxml, it should come with the generated C file in the @@ -24,8 +32,10 @@ .. _Pyrex: http://www.cosc.canterbury.ac.nz/~greg/python/Pyrex/ -See also the notes on building with gcc 4.0 below if you are having -trouble with Pyrex. +Note that Pyrex up to and including version 0.9.4 has known problems when +compiling lxml with gcc 4.0 or Python 2.4. Do not use it. If you want to +build lxml from non-release sources, please install Pyrex version 0.9.4.1 or +later. If you have read these instructions and still cannot manage to install lxml, you can check the archives of the `mailing list`_ to see if your problem is @@ -44,8 +54,8 @@ .. _easy_install: http://peak.telecommunity.com/DevCenter/EasyInstall This has been reported to work on Linux, MacOS-X 10.4 and Windows, as long as -libxml2 and libxslt are installed. To compile and install lxml without -easy_install, download the source tar-ball, unpack it and type:: +libxml2 and libxslt are properly installed. To compile and install lxml +without easy_install, download the source tar-ball, unpack it and type:: python setup.py install @@ -62,78 +72,6 @@ import lxml.etree and play with it. -Installation on Windows ------------------------ - -As always, installation on Windows is different. If you do not want to go -through the hassle of compiling everything by hand, you can use the binary -distribution of libxml2 and libxslt. It is available here: - -http://www.zlatkovic.com/libxml.en.html - -Note that you need both libxml2 and libxslt, as well as iconv and zlib. You -can then download a binary version of lxml 0.9 for Python 2.4 from the -following address: - -http://carcass.dhs.org/lxml-0.9.win32-py2.4.exe - -or the egg distribution from - -http://cheeseshop.python.org/pypi/lxml - -The egg can directly be installed using easy_install_. Both builds were kindly -contributed by Steve Howe. If they do not work for you, feel free to report to -the mailing list. - - -Building lxml with gcc 4.0 or Python 2.4 ----------------------------------------- - -Pyrex 0.9.3.1 generates C code that gcc 4.0 does not accept. Pending an -official release of a version of Pyrex that does work with gcc 4.0, the lxml -project currently provides an updated version of Pyrex in its Subversion -repository: - -http://codespeak.net/svn/lxml/pyrex/ - -To install it, you can just download one of the following files: - -http://codespeak.net/svn/lxml/pyrex/dist/Pyrex-0.9.3.1.tar.gz - -http://codespeak.net/svn/lxml/pyrex/dist/Pyrex-0.9.3.1-1.src.rpm - -It is based on Pyrex 0.9.3.1 and contains a number of patches that make lxml -compile and appear to work with gcc 4.0. If you use this version, you can -simply skip the rest of the section. In case you want to apply them yourself, -the first one is: - -http://codespeak.net/lxml/Pyrex-0.9.3-gcc4.patch - -Some Linux distributions such as Fedora Core 4 and Ubuntu Linux may -already have most of this applied. In that case, this smaller patch -may be applicable to make lxml compile properly: - -http://codespeak.net/lxml/Pyrex-0.9.3-gcc4-small.patch - -It may however actually be that at the time you read this, this extra patch -has been applied by the distributions as well. You may still encounter the -following problem when building the extension on Python 2.4:: - - TypeError: swig_sources() takes exactly 2 arguments (3 given) - -To fix this, look for the following line in Pyrex/Distutils/build_ext.py -(around line 35):: - - def swig_sources (self, sources): - -and change it to:: - - def swig_sources (self, sources, *otherargs): - -The above install files have these changes applied. It should do no harm if -you install them instead of the official Pyrex version. - - Running the tests and reporting errors -------------------------------------- @@ -146,7 +84,7 @@ above), as it searches the "src" directory. You can use the following one-step command to trigger an in-place build and test it:: - make test + make clean test To run the ElementTree and cElementTree compatibility tests, make sure you have lxml on your PYTHONPATH first, then run:: From scoder at codespeak.net Fri Apr 21 13:08:23 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Fri Apr 21 13:08:25 2006 Subject: [Lxml-checkins] r26083 - lxml/branch/resolver-new/src/lxml Message-ID: <20060421110823.05E581007B@code0.codespeak.net> Author: scoder Date: Fri Apr 21 13:08:22 2006 New Revision: 26083 Modified: lxml/branch/resolver-new/src/lxml/tree.pxd lxml/branch/resolver-new/src/lxml/xslt.pxd lxml/branch/resolver-new/src/lxml/xslt.pxi Log: initial implementation for replacing XSLT document loader, some debug output Modified: lxml/branch/resolver-new/src/lxml/tree.pxd ============================================================================== --- lxml/branch/resolver-new/src/lxml/tree.pxd (original) +++ lxml/branch/resolver-new/src/lxml/tree.pxd Fri Apr 21 13:08:22 2006 @@ -4,6 +4,7 @@ ctypedef struct FILE cdef int strlen(char* s) cdef int strcmp(char* s1, char* s2) + cdef int strncmp(char* s1, char* s2, int n) cdef extern from "libxml/encoding.h": ctypedef struct xmlCharEncodingHandler Modified: lxml/branch/resolver-new/src/lxml/xslt.pxd ============================================================================== --- lxml/branch/resolver-new/src/lxml/xslt.pxd (original) +++ lxml/branch/resolver-new/src/lxml/xslt.pxd Fri Apr 21 13:08:22 2006 @@ -1,18 +1,16 @@ -from tree cimport xmlDoc +from tree cimport xmlDoc, xmlDict from xpath cimport xmlXPathContext, xmlXPathFunction cdef extern from "libxslt/xsltInternals.h": ctypedef struct xsltStylesheet: - pass + xmlDoc* doc ctypedef struct xsltTransformContext: xmlXPathContext* xpathCtxt + void* _private cdef xsltStylesheet* xsltParseStylesheetDoc(xmlDoc* doc) cdef void xsltFreeStylesheet(xsltStylesheet* sheet) - -#cdef extern from "libxslt/xslt.h": -# pass cdef extern from "libxslt/extensions.h": cdef int xsltRegisterExtFunction(xsltTransformContext* ctxt, @@ -20,6 +18,19 @@ char * URI, xmlXPathFunction function) +cdef extern from "libxslt/documents.h": + ctypedef enum xsltLoadType: + XSLT_LOAD_START + XSLT_LOAD_STYLESHEET + XSLT_LOAD_DOCUMENT + + ctypedef xmlDoc* (*xsltDocLoaderFunc)(char* URI, xmlDict* dict, + int options, + void* ctxt, + xsltLoadType type) + cdef xsltDocLoaderFunc xsltDocDefaultLoader + cdef void xsltSetLoaderFunc (xsltDocLoaderFunc f) + cdef extern from "libxslt/transform.h": cdef xmlDoc* xsltApplyStylesheet(xsltStylesheet* style, xmlDoc* doc, char** params) Modified: lxml/branch/resolver-new/src/lxml/xslt.pxi ============================================================================== --- lxml/branch/resolver-new/src/lxml/xslt.pxi (original) +++ lxml/branch/resolver-new/src/lxml/xslt.pxi Fri Apr 21 13:08:22 2006 @@ -270,9 +270,10 @@ # of libxml2) if doc._c_doc.URL is not NULL: c_doc.URL = tree.xmlStrdup(doc._c_doc.URL) - + c_style = xslt.xsltParseStylesheetDoc(c_doc) if c_style is NULL: + tree.xmlFreeDoc(c_doc) raise XSLTParseError, "Cannot parse style sheet" self._c_style = c_style @@ -283,6 +284,9 @@ # this cleans up copy of doc as well xslt.xsltFreeStylesheet(self._c_style) + cdef xmlDoc* _copyXslDoc(self): + return tree.xmlCopyDoc(self._c_style.doc, 1) + def __call__(self, _input, **_kw): cdef _Document input_doc cdef _NodeBase root_node @@ -303,6 +307,7 @@ if transform_ctxt is NULL: _destroyFakeDoc(input_doc._c_doc, c_doc) raise XSLTApplyError, "Error preparing stylesheet run" + transform_ctxt._private = self if _kw: # allocate space for parameters @@ -375,6 +380,26 @@ ################################################################################ +# XSLT document resolvers + +cdef xslt.xsltDocLoaderFunc __xsltDocDefaultLoader +__xsltDocDefaultLoader = xslt.xsltDocDefaultLoader + +cdef xmlDoc* _doc_loader(char* uri, tree.xmlDict* dict, int options, void* ctxt, + xslt.xsltLoadType type): + if tree.strncmp(uri, "__STRING__XSLT__", 16) != 0: + # use default loader for stylesheet + print uri, type, options + if type == xslt.XSLT_LOAD_DOCUMENT: + print uri + else: + print type, uri + return __xsltDocDefaultLoader(uri, dict, options, ctxt, type) + +xslt.xsltSetLoaderFunc(_doc_loader) + + +################################################################################ # XPath cdef class XPathContext(BaseContext): From scoder at codespeak.net Fri Apr 21 13:10:21 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Fri Apr 21 13:10:23 2006 Subject: [Lxml-checkins] r26084 - in lxml/branch/resolver-new/src/lxml: . tests Message-ID: <20060421111021.BC35B1007D@code0.codespeak.net> Author: scoder Date: Fri Apr 21 13:10:20 2006 New Revision: 26084 Modified: lxml/branch/resolver-new/src/lxml/tests/test_xslt.py lxml/branch/resolver-new/src/lxml/xslt.pxi Log: fix document('') for XSLTs read from strings, includes test case Modified: lxml/branch/resolver-new/src/lxml/tests/test_xslt.py ============================================================================== --- lxml/branch/resolver-new/src/lxml/tests/test_xslt.py (original) +++ lxml/branch/resolver-new/src/lxml/tests/test_xslt.py Fri Apr 21 13:10:20 2006 @@ -335,6 +335,25 @@ self.assertEquals(self._rootstring(result), 'X') + def test_xslt_document_XML(self): + # make sure document('') works from loaded files + xslt = etree.XSLT(etree.XML("""\ + + + + + +""")) + result = xslt(etree.XML('')) + root = result.getroot() + self.assertEquals(root.tag, + 'test') + self.assertEquals(root[0].tag, + 'test') + self.assertEquals(root[0][0].tag, + '{http://www.w3.org/1999/XSL/Transform}copy-of') + def test_xslt_document_parse(self): # make sure document('') works from loaded files xslt = etree.XSLT(etree.parse(fileInTestDir("test-document.xslt"))) Modified: lxml/branch/resolver-new/src/lxml/xslt.pxi ============================================================================== --- lxml/branch/resolver-new/src/lxml/xslt.pxi (original) +++ lxml/branch/resolver-new/src/lxml/xslt.pxi Fri Apr 21 13:10:20 2006 @@ -249,6 +249,7 @@ """ cdef XSLTContext _context cdef xslt.xsltStylesheet* _c_style + cdef object _doc_url_utf def __init__(self, xslt_input, extensions=None): # make a copy of the document as stylesheet needs to assume it @@ -268,8 +269,14 @@ # XXX work around bug in xmlCopyDoc (fix is upcoming in new release # of libxml2) + if c_doc.URL is not NULL and c_doc.URL != doc._c_doc.URL: + tree.xmlFree(c_doc.URL) if doc._c_doc.URL is not NULL: + self._doc_url_utf = doc._c_doc.URL c_doc.URL = tree.xmlStrdup(doc._c_doc.URL) + else: + self._doc_url_utf = "__STRING__XSLT__%s" % id(self) + c_doc.URL = tree.xmlStrdup(_cstr(self._doc_url_utf)) c_style = xslt.xsltParseStylesheetDoc(c_doc) if c_style is NULL: From scoder at codespeak.net Fri Apr 21 13:17:53 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Fri Apr 21 13:17:55 2006 Subject: [Lxml-checkins] r26086 - in lxml/trunk/src/lxml: . tests Message-ID: <20060421111753.DD79410082@code0.codespeak.net> Author: scoder Date: Fri Apr 21 13:17:52 2006 New Revision: 26086 Modified: lxml/trunk/src/lxml/tests/test_xslt.py lxml/trunk/src/lxml/xslt.pxi Log: merged document('') fixes from resolver-new branch Modified: lxml/trunk/src/lxml/tests/test_xslt.py ============================================================================== --- lxml/trunk/src/lxml/tests/test_xslt.py (original) +++ lxml/trunk/src/lxml/tests/test_xslt.py Fri Apr 21 13:17:52 2006 @@ -335,6 +335,25 @@ self.assertEquals(self._rootstring(result), 'X') + def test_xslt_document_XML(self): + # make sure document('') works from loaded files + xslt = etree.XSLT(etree.XML("""\ + + + + + +""")) + result = xslt(etree.XML('')) + root = result.getroot() + self.assertEquals(root.tag, + 'test') + self.assertEquals(root[0].tag, + 'test') + self.assertEquals(root[0][0].tag, + '{http://www.w3.org/1999/XSL/Transform}copy-of') + def test_xslt_document_parse(self): # make sure document('') works from loaded files xslt = etree.XSLT(etree.parse(fileInTestDir("test-document.xslt"))) Modified: lxml/trunk/src/lxml/xslt.pxi ============================================================================== --- lxml/trunk/src/lxml/xslt.pxi (original) +++ lxml/trunk/src/lxml/xslt.pxi Fri Apr 21 13:17:52 2006 @@ -249,6 +249,7 @@ """ cdef XSLTContext _context cdef xslt.xsltStylesheet* _c_style + cdef object _doc_url_utf def __init__(self, xslt_input, extensions=None): # make a copy of the document as stylesheet needs to assume it @@ -268,9 +269,15 @@ # XXX work around bug in xmlCopyDoc (fix is upcoming in new release # of libxml2) + if c_doc.URL is not NULL and c_doc.URL != doc._c_doc.URL: + tree.xmlFree(c_doc.URL) if doc._c_doc.URL is not NULL: + self._doc_url_utf = doc._c_doc.URL c_doc.URL = tree.xmlStrdup(doc._c_doc.URL) - + else: + self._doc_url_utf = "__STRING__XSLT__%s" % id(self) + c_doc.URL = tree.xmlStrdup(_cstr(self._doc_url_utf)) + c_style = xslt.xsltParseStylesheetDoc(c_doc) if c_style is NULL: raise XSLTParseError, "Cannot parse style sheet" From scoder at codespeak.net Fri Apr 21 13:20:30 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Fri Apr 21 13:20:31 2006 Subject: [Lxml-checkins] r26088 - lxml/trunk Message-ID: <20060421112030.578CE1007F@code0.codespeak.net> Author: scoder Date: Fri Apr 21 13:20:29 2006 New Revision: 26088 Modified: lxml/trunk/CHANGES.txt Log: updated CHANGES.txt to mark document('') bug fixed Modified: lxml/trunk/CHANGES.txt ============================================================================== --- lxml/trunk/CHANGES.txt (original) +++ lxml/trunk/CHANGES.txt Fri Apr 21 13:20:29 2006 @@ -24,6 +24,8 @@ Bugs fixed ---------- +* document('') now works in XSLT documents parsed from strings + * Crash in XMLSchema and RelaxNG when passing non-schema documents * Memory leak in RelaxNG() when RelaxNGParseError is raised From scoder at codespeak.net Fri Apr 21 16:27:15 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Fri Apr 21 16:27:17 2006 Subject: [Lxml-checkins] r26103 - lxml/branch/resolver-new/src/lxml Message-ID: <20060421142715.58BF810086@code0.codespeak.net> Author: scoder Date: Fri Apr 21 16:27:13 2006 New Revision: 26103 Added: lxml/branch/resolver-new/src/lxml/docloader.pxi Modified: lxml/branch/resolver-new/src/lxml/etree.pyx lxml/branch/resolver-new/src/lxml/parser.pxi lxml/branch/resolver-new/src/lxml/tree.pxd lxml/branch/resolver-new/src/lxml/xslt.pxd lxml/branch/resolver-new/src/lxml/xslt.pxi Log: moved document loader API into docloader.pxi, doc loader support in XSLT, merged with parser doc loader API, store parser reference in _Documents to be resused in related documents (XSLT etc.) Added: lxml/branch/resolver-new/src/lxml/docloader.pxi ============================================================================== --- (empty file) +++ lxml/branch/resolver-new/src/lxml/docloader.pxi Fri Apr 21 16:27:13 2006 @@ -0,0 +1,80 @@ +# Custom resolver API + +ctypedef enum _InputDocumentDataType: + PARSER_DATA_STRING + PARSER_DATA_FILENAME + +cdef class _InputDocument: + cdef _InputDocumentDataType _type + cdef object _data_utf + +cdef class Resolver: + "This is the base class of all resolvers." + def resolve(self, system_url, public_id, context): + return None + + def resolve_string(self, string, context): + cdef _InputDocument doc_ref + doc_ref = _InputDocument() + doc_ref._type = PARSER_DATA_STRING + doc_ref._data_utf = _utf8(string) + return doc_ref + + def resolve_filename(self, filename, context): + cdef _InputDocument doc_ref + doc_ref = _ParserInput() + doc_ref._type = PARSER_DATA_FILENAME + doc_ref._data_utf = _utf8(filename) + return doc_ref + +cdef class _ResolverRegistry: + cdef object _resolvers + cdef Resolver _default_resolver + def __init__(self, Resolver default_resolver=None): + try: + self._resolvers = set() + except NameError: + from sets import Set + self._resolvers = Set() + self._default_resolver = default_resolver + + def add(self, Resolver resolver not None): + """Register a resolver. + + For each requested entity, the 'resolve' method of the resolver will + be called and the result will be passed to the parser. If this method + returns None, the request will be delegated to other resolvers or the + default resolver. The resolvers will be tested in an arbitrary order + until the first match is found. + """ + self._resolvers.add(resolver) + + def remove(self, resolver): + self._resolvers.discard(resolver) + + def resolve(self, system_url, public_id, context): + for resolver in self._resolvers: + result = resolver.resolve(system_url, public_id, context) + if result is not None: + return result + if self._default_resolver is None: + return None + return self._default_resolver.resolve(system_url, public_id, context) + +cdef class _ResolverContext: + cdef _ResolverRegistry _resolvers + cdef _TempStore _storage + cdef object _exc_info + def __init__(self, _ResolverRegistry resolvers not None): + self._resolvers = resolvers + self._storage = _TempStore() + self._exc_info = None + + cdef _store_exception(self, e): + self._exc_info = sys.exc_info() + + cdef _check_exception(self): + _exc_info = self._exc_info + if _exc_info is not None: + type, value, traceback = _exc_info + raise type, value, traceback Modified: lxml/branch/resolver-new/src/lxml/etree.pyx ============================================================================== --- lxml/branch/resolver-new/src/lxml/etree.pyx (original) +++ lxml/branch/resolver-new/src/lxml/etree.pyx Fri Apr 21 16:27:13 2006 @@ -63,6 +63,7 @@ """ cdef int _ns_counter cdef xmlDoc* _c_doc + cdef object _parser def __dealloc__(self): # if there are no more references to the document, it is safe @@ -150,21 +151,22 @@ if filename is None: filename = source # open filename - c_doc = _parseDocFromFile(filename, parser) - return _documentFactory(c_doc) + c_doc = _parseDocFromFile(_utf8(filename), parser) + return _documentFactory(c_doc, parser) cdef _Document _parseMemoryDocument(text, parser): cdef xmlDoc* c_doc if python.PyUnicode_Check(text): text = _stripDeclaration(_utf8(text)) c_doc = _parseDoc(text, parser) - return _documentFactory(c_doc) + return _documentFactory(c_doc, parser) -cdef _Document _documentFactory(xmlDoc* c_doc): +cdef _Document _documentFactory(xmlDoc* c_doc, parser): cdef _Document result result = _Document() result._c_doc = c_doc result._ns_counter = 0 + result._parser = parser return result # to help with debugging @@ -438,7 +440,7 @@ fake_c_doc = _fakeRootDoc(doc._c_doc, self._c_node) c_doc = tree.xmlCopyDoc(fake_c_doc, 1) # recursive copy _destroyFakeDoc(doc._c_doc, fake_c_doc) - doc = _documentFactory(c_doc) + doc = _documentFactory(c_doc, doc._parser) return doc.getroot() def set(self, key, value): @@ -1154,7 +1156,7 @@ c_doc = _newDoc() c_node = _createElement(c_doc, name_utf, attrib, _extra) tree.xmlDocSetRootElement(c_doc, c_node) - doc = _documentFactory(c_doc) + doc = _documentFactory(c_doc, None) # add namespaces to node if necessary doc._setNodeNamespaces(c_node, ns_utf, nsmap) return _elementFactory(doc, c_node) @@ -1162,13 +1164,15 @@ def Comment(text=None): cdef _Document doc cdef xmlNode* c_node + cdef xmlDoc* c_doc if text is None: text = ' ' else: text = ' %s ' % _utf8(text) - doc = _documentFactory( _newDoc() ) - c_node = _createComment(doc._c_doc, text) - tree.xmlAddChild(doc._c_doc, c_node) + c_doc = _newDoc() + doc = _documentFactory(c_doc, None) + c_node = _createComment(c_doc, text) + tree.xmlAddChild(c_doc, c_node) return _commentFactory(doc, c_node) def SubElement(_Element _parent not None, _tag, @@ -1187,6 +1191,7 @@ cdef xmlNode* c_next cdef xmlNode* c_node cdef xmlNode* c_node_copy + cdef xmlDoc* c_doc cdef _ElementTree etree cdef _Document doc @@ -1195,7 +1200,8 @@ elif file is not None: doc = _parseDocument(file, parser) else: - doc = _documentFactory( _newDoc() ) + c_doc = _newDoc() + doc = _documentFactory(c_doc, parser) etree = _elementTreeFactory(doc, element) @@ -1298,6 +1304,7 @@ include "xmlerror.pxi" # error and log handling include "xmlid.pxi" # XMLID and IDDict include "nsclasses.pxi" # Namespace implementation and registry +include "docloader.pxi" # Support for custom document loaders include "xslt.pxi" # XPath and XSLT include "relaxng.pxi" # RelaxNG include "xmlschema.pxi" # XMLSchema Modified: lxml/branch/resolver-new/src/lxml/parser.pxi ============================================================================== --- lxml/branch/resolver-new/src/lxml/parser.pxi (original) +++ lxml/branch/resolver-new/src/lxml/parser.pxi Fri Apr 21 16:27:13 2006 @@ -59,109 +59,17 @@ ############################################################ -## Custom resolver API +## support for custom document loaders ############################################################ -cdef class _ResolverRegistry # forward declaration - -cdef class _ParserInput: - cdef xmlparser.xmlParserInput* _input - cdef object _pyref # to keep Python references - -cdef class _ResolverContext: - cdef xmlparser.xmlParserCtxt* _ctxt - cdef _ResolverRegistry _resolvers - cdef _TempStore _storage - cdef object _exc_info - - cdef _store_exception(self, e): - self._exc_info = sys.exc_info() - - cdef _check_exception(self): - _exc_info = self._exc_info - if _exc_info is not None: - type, value, traceback = _exc_info - raise type, value, traceback - -cdef class Resolver: - def resolve(self, system_url, public_id, _ResolverContext context not None): - cdef _ParserInput parser_input - cdef char* c_url - cdef char* c_id - if __DEFAULT_ENTITY_LOADER is NULL: - return None - if system_url is None: - c_url = NULL - else: - url_utf = _utf8(system_url) - c_url = _cstr(url_utf) - if public_id is None: - c_id = NULL - else: - id_utf = _utf8(public_id) - c_id = _cstr(id_utf) - parser_input = _ParserInput() - parser_input._input = __DEFAULT_ENTITY_LOADER( - c_url, c_id, context._ctxt) - return parser_input - - def resolve_string(self, string, _ResolverContext context not None): - cdef _ParserInput parser_input - string_utf = _utf8(string) - parser_input = _ParserInput() - parser_input._input = xmlparser.xmlNewStringInputStream( - context._ctxt, _cstr(string_utf)) - parser_input._pyref = string_utf - return parser_input - - def resolve_filename(self, filename, _ResolverContext context not None): - cdef _ParserInput parser_input - filename_utf = _utf8(filename) - parser_input = _ParserInput() - parser_input._input = xmlparser.xmlNewInputFromFile( - context._ctxt, _cstr(filename_utf)) - return parser_input - -cdef class _ResolverRegistry: - cdef object _resolvers - cdef Resolver _default_resolver - def __init__(self, Resolver default_resolver=None): - try: - self._resolvers = set() - except NameError: - from sets import Set - self._resolvers = Set() - if default_resolver is None: - self._default_resolver = Resolver() - else: - self._default_resolver = default_resolver - - def add(self, Resolver resolver not None): - """Register a resolver. - - For each requested entity, the 'resolve' method of the resolver will - be called and the result will be passed to the parser. If this method - returns None, the request will be delegated to other resolvers or the - default resolver. The resolvers will be tested in an arbitrary order - until the first match is found. - """ - self._resolvers.add(resolver) - - def remove(self, resolver): - self._resolvers.discard(resolver) - - def resolve(self, system_url, public_id, _ResolverContext context not None): - for resolver in self._resolvers: - result = resolver.resolve(system_url, public_id, context) - if result is not None: - return result - return self._default_resolver.resolve(system_url, public_id, context) - cdef xmlparser.xmlParserInput* _local_resolver(char* c_url, char* c_pubid, xmlParserCtxt* c_context): cdef _ResolverContext context - cdef _ParserInput parser_input + cdef _InputDocument doc_ref + cdef xmlparser.xmlParserInput* c_input if c_context._private is NULL: + if __DEFAULT_ENTITY_LOADER is NULL: + return NULL return __DEFAULT_ENTITY_LOADER(c_url, c_pubid, c_context) try: @@ -175,17 +83,27 @@ pubid = funicode(c_pubid) context = <_ResolverContext>c_context._private - - parser_input = context._resolvers.resolve(url, pubid, context) + doc_ref = context._resolvers.resolve(url, pubid, context) except Exception, e: context._store_exception(e) - if parser_input is not None and parser_input._input is not NULL: - xmlparser.xmlFreeInputStream(parser_input._input) return NULL - if parser_input is None: - return NULL - context._storage.add(parser_input) - return parser_input._input + + if doc_ref is None: + if __DEFAULT_ENTITY_LOADER is NULL: + return NULL + return __DEFAULT_ENTITY_LOADER(c_url, c_pubid, c_context) + + c_input = NULL + if doc_ref._type == PARSER_DATA_STRING: + c_input = xmlparser.xmlNewStringInputStream( + c_context, _cstr(doc_ref._data_utf)) + elif doc_ref._type == PARSER_DATA_FILENAME: + c_input = xmlparser.xmlNewInputFromFile( + c_context, _cstr(doc_ref._data_utf)) + + if c_input != NULL: + context._storage.add(doc_ref._data_utf) + return c_input cdef xmlparser.xmlExternalEntityLoader __DEFAULT_ENTITY_LOADER __DEFAULT_ENTITY_LOADER = xmlparser.xmlGetExternalEntityLoader() @@ -201,28 +119,21 @@ cdef readonly object resolvers cdef _ResolverContext _context def __init__(self): + cdef _ResolverContext context self._error_log = _ErrorLog() self.resolvers = _ResolverRegistry() - self._context = None + self._context = _ResolverContext(self.resolvers) property error_log: def __get__(self): return self._error_log.copy() cdef _initContext(self, xmlParserCtxt* c_ctxt): - cdef _ResolverContext context __GLOBAL_PARSER_CONTEXT._initParserDict(c_ctxt) - context = _ResolverContext() - context._ctxt = c_ctxt - context._resolvers = self.resolvers - context._storage = _TempStore() - self._context = context - c_ctxt._private = context + c_ctxt._private = self._context cdef _cleanupContextAndRaisePending(self): - if self._context is not None: - self._context._check_exception() - self._context = None + self._context._check_exception() cdef xmlDoc* _handleResult(self, xmlParserCtxt* ctxt, xmlDoc* result) except NULL: Modified: lxml/branch/resolver-new/src/lxml/tree.pxd ============================================================================== --- lxml/branch/resolver-new/src/lxml/tree.pxd (original) +++ lxml/branch/resolver-new/src/lxml/tree.pxd Fri Apr 21 16:27:13 2006 @@ -78,6 +78,7 @@ xmlDict* dict xmlHashTable* ids char* URL + void* _private ctypedef struct xmlAttr: void* _private Modified: lxml/branch/resolver-new/src/lxml/xslt.pxd ============================================================================== --- lxml/branch/resolver-new/src/lxml/xslt.pxd (original) +++ lxml/branch/resolver-new/src/lxml/xslt.pxd Fri Apr 21 16:27:13 2006 @@ -2,13 +2,16 @@ from xpath cimport xmlXPathContext, xmlXPathFunction cdef extern from "libxslt/xsltInternals.h": + ctypedef struct xsltDocument: + xmlDoc* doc + ctypedef struct xsltStylesheet: xmlDoc* doc ctypedef struct xsltTransformContext: xmlXPathContext* xpathCtxt - void* _private - + xsltDocument* document + cdef xsltStylesheet* xsltParseStylesheetDoc(xmlDoc* doc) cdef void xsltFreeStylesheet(xsltStylesheet* sheet) Modified: lxml/branch/resolver-new/src/lxml/xslt.pxi ============================================================================== --- lxml/branch/resolver-new/src/lxml/xslt.pxi (original) +++ lxml/branch/resolver-new/src/lxml/xslt.pxi Fri Apr 21 16:27:13 2006 @@ -250,6 +250,7 @@ cdef XSLTContext _context cdef xslt.xsltStylesheet* _c_style cdef object _doc_url_utf + cdef object _parser def __init__(self, xslt_input, extensions=None): # make a copy of the document as stylesheet needs to assume it @@ -275,14 +276,16 @@ self._doc_url_utf = doc._c_doc.URL c_doc.URL = tree.xmlStrdup(doc._c_doc.URL) else: - self._doc_url_utf = "__STRING__XSLT__%s" % id(self) + self._doc_url_utf = "XSLT:__STRING__XSLT__%s" % id(self) c_doc.URL = tree.xmlStrdup(_cstr(self._doc_url_utf)) + c_doc._private = doc c_style = xslt.xsltParseStylesheetDoc(c_doc) if c_style is NULL: tree.xmlFreeDoc(c_doc) raise XSLTParseError, "Cannot parse style sheet" self._c_style = c_style + self._parser = doc._parser self._context = XSLTContext(None, extensions) # XXX is it worthwile to use xsltPrecomputeStylesheet here? @@ -309,12 +312,12 @@ root_node = _rootNodeOf(_input) c_doc = _fakeRootDoc(input_doc._c_doc, root_node._c_node) + c_doc._private = input_doc transform_ctxt = xslt.xsltNewTransformContext(self._c_style, c_doc) if transform_ctxt is NULL: _destroyFakeDoc(input_doc._c_doc, c_doc) raise XSLTApplyError, "Error preparing stylesheet run" - transform_ctxt._private = self if _kw: # allocate space for parameters @@ -352,14 +355,14 @@ if c_result is NULL: raise XSLTApplyError, "Error applying stylesheet" - result_doc = _documentFactory(c_result) + result_doc = _documentFactory(c_result, input_doc._parser) return _xsltResultTreeFactory(result_doc, self) def apply(self, _input, **_kw): return self(_input, **_kw) def tostring(self, _ElementTree result_tree): - """Save result doc to string using stylesheet as guidance. + """Save result doc to string based on stylesheet output method. """ return str(result_tree) @@ -387,21 +390,69 @@ ################################################################################ -# XSLT document resolvers +# XSLT document loaders cdef xslt.xsltDocLoaderFunc __xsltDocDefaultLoader __xsltDocDefaultLoader = xslt.xsltDocDefaultLoader -cdef xmlDoc* _doc_loader(char* uri, tree.xmlDict* dict, int options, void* ctxt, - xslt.xsltLoadType type): - if tree.strncmp(uri, "__STRING__XSLT__", 16) != 0: - # use default loader for stylesheet - print uri, type, options - if type == xslt.XSLT_LOAD_DOCUMENT: - print uri - else: - print type, uri - return __xsltDocDefaultLoader(uri, dict, options, ctxt, type) +cdef xmlDoc* _doc_loader(char* c_uri, tree.xmlDict* c_dict, int c_options, + void* c_ctxt, xslt.xsltLoadType c_type): + cdef xslt.xsltTransformContext* transform_ctxt + cdef xslt.xsltStylesheet* c_style + cdef _ResolverRegistry resolvers + cdef _InputDocument doc_ref + cdef xmlDoc* c_doc + cdef _Document reference_doc + # find reference _Document to retrieve resolvers + c_doc = NULL + reference_doc = None + if c_type == xslt.XSLT_LOAD_DOCUMENT: + transform_ctxt = c_ctxt + if transform_ctxt.document is not NULL and \ + transform_ctxt.document.doc is not NULL and \ + transform_ctxt.document.doc._private is not NULL: + reference_doc = <_Document>transform_ctxt.document.doc._private + elif c_type == xslt.XSLT_LOAD_STYLESHEET: + c_style = c_ctxt + if c_style.doc is not NULL and c_style.doc._private is not NULL: + reference_doc = <_Document>c_style.doc._private + + # try default loader first to take advantage of libxslt's doc reuse + if tree.strncmp(c_uri, "py:", 3) != 0: + c_doc = __xsltDocDefaultLoader(c_uri, c_dict, c_options, c_ctxt, c_type) + if c_doc is not NULL: + if c_doc._private is NULL and reference_doc is not None: + c_doc._private = reference_doc + return c_doc + + # stylesheets always use default loader + if c_doc is NULL and tree.strncmp(c_uri, "XSLT:__STRING__XSLT__", 16) != 0: + # otherwise delegate to custom resolvers + if reference_doc is not None: + resolvers = reference_doc._parser.resolvers + # FIXME: we should pass a _ResolverContext here! + try: + uri = funicode(c_uri) + doc_ref = resolvers.resolve(uri, None, None) + except Exception, e: + # FIXME: where to put the exception? + print e + raise + + if doc_ref is not None: + c_doc = NULL + if doc_ref._type == PARSER_DATA_STRING: + c_doc = _parseDoc(doc_ref._data_utf, + reference_doc._parser) + elif doc_ref._type == PARSER_DATA_FILENAME: + c_doc = _parseDocFromFile(doc_ref._data_utf, + reference_doc._parser) + + if c_doc is NULL: + c_doc = __xsltDocDefaultLoader(c_uri, c_dict, c_options, c_ctxt, c_type) + if c_doc is not NULL and c_doc._private is NULL and reference_doc is not None: + c_doc._private = reference_doc + return c_doc xslt.xsltSetLoaderFunc(_doc_loader) From scoder at codespeak.net Fri Apr 21 16:37:27 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Fri Apr 21 16:37:28 2006 Subject: [Lxml-checkins] r26105 - lxml/branch/resolver-new/src/lxml Message-ID: <20060421143727.5489A10086@code0.codespeak.net> Author: scoder Date: Fri Apr 21 16:37:26 2006 New Revision: 26105 Modified: lxml/branch/resolver-new/src/lxml/xslt.pxi Log: clean up Modified: lxml/branch/resolver-new/src/lxml/xslt.pxi ============================================================================== --- lxml/branch/resolver-new/src/lxml/xslt.pxi (original) +++ lxml/branch/resolver-new/src/lxml/xslt.pxi Fri Apr 21 16:37:26 2006 @@ -392,9 +392,6 @@ ################################################################################ # XSLT document loaders -cdef xslt.xsltDocLoaderFunc __xsltDocDefaultLoader -__xsltDocDefaultLoader = xslt.xsltDocDefaultLoader - cdef xmlDoc* _doc_loader(char* c_uri, tree.xmlDict* c_dict, int c_options, void* c_ctxt, xslt.xsltLoadType c_type): cdef xslt.xsltTransformContext* transform_ctxt @@ -419,7 +416,8 @@ # try default loader first to take advantage of libxslt's doc reuse if tree.strncmp(c_uri, "py:", 3) != 0: - c_doc = __xsltDocDefaultLoader(c_uri, c_dict, c_options, c_ctxt, c_type) + c_doc = XSLT_DOC_DEFAULT_LOADER( + c_uri, c_dict, c_options, c_ctxt, c_type) if c_doc is not NULL: if c_doc._private is NULL and reference_doc is not None: c_doc._private = reference_doc @@ -449,11 +447,15 @@ reference_doc._parser) if c_doc is NULL: - c_doc = __xsltDocDefaultLoader(c_uri, c_dict, c_options, c_ctxt, c_type) + c_doc = XSLT_DOC_DEFAULT_LOADER( + c_uri, c_dict, c_options, c_ctxt, c_type) if c_doc is not NULL and c_doc._private is NULL and reference_doc is not None: c_doc._private = reference_doc return c_doc +cdef xslt.xsltDocLoaderFunc XSLT_DOC_DEFAULT_LOADER +XSLT_DOC_DEFAULT_LOADER = xslt.xsltDocDefaultLoader + xslt.xsltSetLoaderFunc(_doc_loader) From scoder at codespeak.net Fri Apr 21 16:38:30 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Fri Apr 21 16:38:32 2006 Subject: [Lxml-checkins] r26106 - lxml/branch/resolver-new/src/lxml/tests Message-ID: <20060421143830.05CDA10086@code0.codespeak.net> Author: scoder Date: Fri Apr 21 16:38:30 2006 New Revision: 26106 Modified: lxml/branch/resolver-new/src/lxml/tests/test_xslt.py Log: extended test case test_xslt_document_XML to check for text in the stylesheet Modified: lxml/branch/resolver-new/src/lxml/tests/test_xslt.py ============================================================================== --- lxml/branch/resolver-new/src/lxml/tests/test_xslt.py (original) +++ lxml/branch/resolver-new/src/lxml/tests/test_xslt.py Fri Apr 21 16:38:30 2006 @@ -336,12 +336,12 @@ 'X') def test_xslt_document_XML(self): - # make sure document('') works from loaded files + # make sure document('') works from parsed strings xslt = etree.XSLT(etree.XML("""\ - + TEXT """)) @@ -351,6 +351,8 @@ 'test') self.assertEquals(root[0].tag, 'test') + self.assertEquals(root[0].text, + 'TEXT') self.assertEquals(root[0][0].tag, '{http://www.w3.org/1999/XSL/Transform}copy-of') From scoder at codespeak.net Fri Apr 21 22:24:55 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Fri Apr 21 22:24:55 2006 Subject: [Lxml-checkins] r26116 - lxml/branch/resolver-new/src/lxml Message-ID: <20060421202455.3A65D10086@code0.codespeak.net> Author: scoder Date: Fri Apr 21 22:24:54 2006 New Revision: 26116 Modified: lxml/branch/resolver-new/src/lxml/docloader.pxi Log: allow resolving to 'EMPTY' Modified: lxml/branch/resolver-new/src/lxml/docloader.pxi ============================================================================== --- lxml/branch/resolver-new/src/lxml/docloader.pxi (original) +++ lxml/branch/resolver-new/src/lxml/docloader.pxi Fri Apr 21 22:24:54 2006 @@ -1,6 +1,7 @@ # Custom resolver API ctypedef enum _InputDocumentDataType: + PARSER_DATA_EMPTY PARSER_DATA_STRING PARSER_DATA_FILENAME @@ -13,6 +14,12 @@ def resolve(self, system_url, public_id, context): return None + def resolve_empty(self, context): + cdef _InputDocument doc_ref + doc_ref = _InputDocument() + doc_ref._type = PARSER_DATA_EMPTY + return doc_ref + def resolve_string(self, string, context): cdef _InputDocument doc_ref doc_ref = _InputDocument() From scoder at codespeak.net Fri Apr 21 22:30:15 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Fri Apr 21 22:30:17 2006 Subject: [Lxml-checkins] r26117 - lxml/branch/resolver-new/src/lxml Message-ID: <20060421203015.E12A010086@code0.codespeak.net> Author: scoder Date: Fri Apr 21 22:30:14 2006 New Revision: 26117 Modified: lxml/branch/resolver-new/src/lxml/etree.pyx lxml/branch/resolver-new/src/lxml/xslt.pxi Log: use _XSLTResolverContext for XSLT document loaders, store exceptions to raise them after the transform Modified: lxml/branch/resolver-new/src/lxml/etree.pyx ============================================================================== --- lxml/branch/resolver-new/src/lxml/etree.pyx (original) +++ lxml/branch/resolver-new/src/lxml/etree.pyx Fri Apr 21 22:30:14 2006 @@ -55,6 +55,8 @@ class C14NError(LxmlError): pass +cdef class BaseParser # forward declaration + cdef class _Document: """Internal base class to reference a libxml document. @@ -63,7 +65,7 @@ """ cdef int _ns_counter cdef xmlDoc* _c_doc - cdef object _parser + cdef BaseParser _parser def __dealloc__(self): # if there are no more references to the document, it is safe @@ -166,6 +168,8 @@ result = _Document() result._c_doc = c_doc result._ns_counter = 0 + if parser is None: + parser = __DEFAULT_PARSER result._parser = parser return result Modified: lxml/branch/resolver-new/src/lxml/xslt.pxi ============================================================================== --- lxml/branch/resolver-new/src/lxml/xslt.pxi (original) +++ lxml/branch/resolver-new/src/lxml/xslt.pxi Fri Apr 21 22:30:14 2006 @@ -199,6 +199,81 @@ ################################################################################ +# XSLT document loaders + +cdef class _XSLTResolverContext(_ResolverContext): + cdef BaseParser _parser + def __init__(self, BaseParser parser not None): + _ResolverContext.__init__(self, parser.resolvers) + self._parser = parser + +cdef xmlDoc* _doc_loader(char* c_uri, tree.xmlDict* c_dict, int c_options, + void* c_ctxt, xslt.xsltLoadType c_type): + cdef xslt.xsltTransformContext* transform_ctxt + cdef _ResolverRegistry resolvers + cdef _InputDocument doc_ref + cdef xmlDoc* c_doc + cdef _XSLTResolverContext resolver_context + # find reference _Document to retrieve resolvers + c_doc = NULL + resolver_context = None + if c_type == xslt.XSLT_LOAD_DOCUMENT: + transform_ctxt = c_ctxt + if transform_ctxt.document is not NULL: + c_doc = transform_ctxt.document.doc + elif c_type == xslt.XSLT_LOAD_STYLESHEET: + c_doc = (c_ctxt).doc + + if c_doc is NULL or c_doc._private is NULL: + # can't call Python without context, fall back to default loader + c_doc = XSLT_DOC_DEFAULT_LOADER( + c_uri, c_dict, c_options, c_ctxt, c_type) + if c_doc is not NULL: + if c_doc._private is NULL and reference_doc is not None: + c_doc._private = reference_doc + return c_doc + + resolver_context = <_XSLTResolverContext>c_doc._private + + # call the Python document loaders - except for stylesheets + c_doc = NULL + if tree.strncmp(c_uri, "XSLT:__STRING__XSLT__", 16) != 0: + resolvers = resolver_context._resolvers + try: + uri = funicode(c_uri) + # FIXME: we should pass a _ResolverContext here! + doc_ref = resolvers.resolve(uri, None, resolver_context) + except Exception, e: + resolver_context._store_exception(e) + return NULL + + if doc_ref is not None: + c_doc = NULL + if doc_ref._type == PARSER_DATA_STRING: + c_doc = _parseDoc(doc_ref._data_utf, + resolver_context._parser) + elif doc_ref._type == PARSER_DATA_FILENAME: + c_doc = _parseDocFromFile(doc_ref._data_utf, + resolver_context._parser) + elif doc_ref._type == PARSER_DATA_EMPTY: + return NULL + if c_doc is not NULL and c_doc.URL is NULL: + c_doc.URL = tree.xmlStrdup(c_uri) + + if c_doc is NULL: + c_doc = XSLT_DOC_DEFAULT_LOADER( + c_uri, c_dict, c_options, c_ctxt, c_type) + if c_doc is not NULL and c_doc._private is NULL and resolver_context is not None: + c_doc._private = resolver_context + return c_doc + +cdef xslt.xsltDocLoaderFunc XSLT_DOC_DEFAULT_LOADER +XSLT_DOC_DEFAULT_LOADER = xslt.xsltDocDefaultLoader + +xslt.xsltSetLoaderFunc(_doc_loader) + + +################################################################################ # XSLT cdef class XSLTContext(BaseContext): @@ -250,8 +325,8 @@ cdef XSLTContext _context cdef xslt.xsltStylesheet* _c_style cdef object _doc_url_utf - cdef object _parser - + cdef _XSLTResolverContext _xslt_resolver_context + def __init__(self, xslt_input, extensions=None): # make a copy of the document as stylesheet needs to assume it # doesn't change @@ -279,13 +354,13 @@ self._doc_url_utf = "XSLT:__STRING__XSLT__%s" % id(self) c_doc.URL = tree.xmlStrdup(_cstr(self._doc_url_utf)) - c_doc._private = doc + self._xslt_resolver_context = _XSLTResolverContext(doc._parser) + c_doc._private = self._xslt_resolver_context c_style = xslt.xsltParseStylesheetDoc(c_doc) if c_style is NULL: tree.xmlFreeDoc(c_doc) raise XSLTParseError, "Cannot parse style sheet" self._c_style = c_style - self._parser = doc._parser self._context = XSLTContext(None, extensions) # XXX is it worthwile to use xsltPrecomputeStylesheet here? @@ -301,6 +376,7 @@ cdef _Document input_doc cdef _NodeBase root_node cdef _Document result_doc + cdef _XSLTResolverContext resolver_context cdef xslt.xsltTransformContext* transform_ctxt cdef xmlDoc* c_result cdef xmlDoc* c_doc @@ -312,7 +388,9 @@ root_node = _rootNodeOf(_input) c_doc = _fakeRootDoc(input_doc._c_doc, root_node._c_node) - c_doc._private = input_doc + + resolver_context = _XSLTResolverContext(input_doc._parser) + c_doc._private = resolver_context transform_ctxt = xslt.xsltNewTransformContext(self._c_style, c_doc) if transform_ctxt is NULL: @@ -350,8 +428,10 @@ cstd.free(params) self._context.free_context() + c_doc._private = input_doc # restore just in case... _destroyFakeDoc(input_doc._c_doc, c_doc) + resolver_context._check_exception() if c_result is NULL: raise XSLTApplyError, "Error applying stylesheet" @@ -390,76 +470,6 @@ ################################################################################ -# XSLT document loaders - -cdef xmlDoc* _doc_loader(char* c_uri, tree.xmlDict* c_dict, int c_options, - void* c_ctxt, xslt.xsltLoadType c_type): - cdef xslt.xsltTransformContext* transform_ctxt - cdef xslt.xsltStylesheet* c_style - cdef _ResolverRegistry resolvers - cdef _InputDocument doc_ref - cdef xmlDoc* c_doc - cdef _Document reference_doc - # find reference _Document to retrieve resolvers - c_doc = NULL - reference_doc = None - if c_type == xslt.XSLT_LOAD_DOCUMENT: - transform_ctxt = c_ctxt - if transform_ctxt.document is not NULL and \ - transform_ctxt.document.doc is not NULL and \ - transform_ctxt.document.doc._private is not NULL: - reference_doc = <_Document>transform_ctxt.document.doc._private - elif c_type == xslt.XSLT_LOAD_STYLESHEET: - c_style = c_ctxt - if c_style.doc is not NULL and c_style.doc._private is not NULL: - reference_doc = <_Document>c_style.doc._private - - # try default loader first to take advantage of libxslt's doc reuse - if tree.strncmp(c_uri, "py:", 3) != 0: - c_doc = XSLT_DOC_DEFAULT_LOADER( - c_uri, c_dict, c_options, c_ctxt, c_type) - if c_doc is not NULL: - if c_doc._private is NULL and reference_doc is not None: - c_doc._private = reference_doc - return c_doc - - # stylesheets always use default loader - if c_doc is NULL and tree.strncmp(c_uri, "XSLT:__STRING__XSLT__", 16) != 0: - # otherwise delegate to custom resolvers - if reference_doc is not None: - resolvers = reference_doc._parser.resolvers - # FIXME: we should pass a _ResolverContext here! - try: - uri = funicode(c_uri) - doc_ref = resolvers.resolve(uri, None, None) - except Exception, e: - # FIXME: where to put the exception? - print e - raise - - if doc_ref is not None: - c_doc = NULL - if doc_ref._type == PARSER_DATA_STRING: - c_doc = _parseDoc(doc_ref._data_utf, - reference_doc._parser) - elif doc_ref._type == PARSER_DATA_FILENAME: - c_doc = _parseDocFromFile(doc_ref._data_utf, - reference_doc._parser) - - if c_doc is NULL: - c_doc = XSLT_DOC_DEFAULT_LOADER( - c_uri, c_dict, c_options, c_ctxt, c_type) - if c_doc is not NULL and c_doc._private is NULL and reference_doc is not None: - c_doc._private = reference_doc - return c_doc - -cdef xslt.xsltDocLoaderFunc XSLT_DOC_DEFAULT_LOADER -XSLT_DOC_DEFAULT_LOADER = xslt.xsltDocDefaultLoader - -xslt.xsltSetLoaderFunc(_doc_loader) - - -################################################################################ # XPath cdef class XPathContext(BaseContext): From scoder at codespeak.net Sat Apr 22 10:12:47 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Sat Apr 22 10:12:50 2006 Subject: [Lxml-checkins] r26120 - lxml/branch/resolver-new/doc Message-ID: <20060422081247.C0C531008D@code0.codespeak.net> Author: scoder Date: Sat Apr 22 10:12:46 2006 New Revision: 26120 Added: lxml/branch/resolver-new/doc/resolvers.txt Log: first shot on the doctest in doc/resolvers.txt Added: lxml/branch/resolver-new/doc/resolvers.txt ============================================================================== --- (empty file) +++ lxml/branch/resolver-new/doc/resolvers.txt Sat Apr 22 10:12:46 2006 @@ -0,0 +1,117 @@ +Document loading and URL resolving +================================== + +Lxml has support for custom document loaders in both the parsers and XSL +transformations. These so-called resolvers are subclasses of the +etree.Resolver class as in the following example:: + + >>> from lxml import etree + + >>> class DTDResolver(etree.Resolver): + ... def resolve(self, url, id, context): + ... print "Resolving URL '%s'" % url + ... return self.resolve_string( + ... '' % url, context) + +This defines a resolver that always returns a dynamically generated DTD +fragment defining an entity. The 'url' argument passes the system URL of the +requested document, the 'id' argument is the public ID. Note that any of +these may be None. The context object is not normally used by client code. + +Resolving is based on three methods of the Resolver object that build internal +representations of the result document. The method 'resolve_string' takes a +document as parsable string, 'resolve_filename' takes a filename and +'resolve_empty' resolves into an empty document. The 'resolve' method may +choose to return None, in which case the next registered resolver (or the +default resolver) is consulted. + +Resolvers are registered local to a parser:: + + >>> parser = etree.XMLParser(dtd_validation=True) + >>> parser.resolvers.add( DTDResolver() ) + +When we use this parser to parse a document that requires resolving a URL, it +will call our custom resolver:: + + >>> xml = u'&myentity;' + >>> from StringIO import StringIO + >>> tree = etree.parse(StringIO(xml), parser) + Resolving URL 'MissingDTD.dtd' + >>> root = tree.getroot() + >>> print root.text + resolved text: MissingDTD.dtd + +The entity was correctly resolved by the generated DTD fragment. + + +Document loaders in context +--------------------------- + +XSLT document loading uses the same type of resolvers. It distinguished +between the context used when parsing the XSLT document (i.e. when resolving +xsl:import and xsl:include elements) and the context at + +Let's defines a resolver that only responds to a specific prefix:: + + >>> class PrefixResolver(etree.Resolver): + ... def __init__(self, prefix): + ... self.prefix = prefix + ... self.xml = "%s-TEST" % prefix + ... def resolve(self, url, pubid, context): + ... print "Resolving url %s as prefix %s ..." % (url, self.prefix), + ... if url.startswith(self.prefix): + ... print "done" + ... return self.resolve_string(self.xml, context) + ... print "failed" + +We use the following stylesheet as an example:: + + >>> xml_text = """\ + ... + ... + ... + ... + ... + ... + ... + ... + ... """ + +If we now register different resolvers with two different parsers, we can +parse our document twice in different resolver contexts:: + + >>> uri_parser = etree.XMLParser() + >>> uri_parser.resolvers.add( PrefixResolver("uri") ) + + >>> uro_parser = etree.XMLParser() + >>> uro_parser.resolvers.add( PrefixResolver("uro") ) + + >>> uri_doc = etree.parse(StringIO(xml_text), uri_parser) + >>> uro_doc = etree.parse(StringIO(xml_text), uro_parser) + +These contexts are important for the further behaviour of the documents. They +memorise their original parser so that the correct set of resolvers is used in +subsequent lookups:: + + >>> transform = etree.XSLT(uro_doc) + >>> result = transform(uri_doc) + Resolving url uri:test as prefix uri ... done + >>> print str(result), + + uri-TEST + +We can see that the "uri" resolver was called to generate a document that was +then inserted into the result document by the XSLT transformation. Note that +the "uri" resolver is attached to the transformed document, which defines the +context for the transformation process. If we reverse the contexts, the other +resolver will get called. Obviously, the "uro" resolver does not know how to +resolve "uri" URLs, so the default resolver is called in this case:: + + >>> transform = etree.XSLT(uri_doc) + >>> result = transform(uro_doc) + Resolving url uri:test as prefix uro ... failed + >>> print str(result), + + + From scoder at codespeak.net Sat Apr 22 13:21:04 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Sat Apr 22 13:21:05 2006 Subject: [Lxml-checkins] r26134 - lxml/branch/resolver-new/doc Message-ID: <20060422112104.868CA1007E@code0.codespeak.net> Author: scoder Date: Sat Apr 22 13:21:03 2006 New Revision: 26134 Modified: lxml/branch/resolver-new/doc/resolvers.txt Log: restructured doctest example on document contexts in doc/resolvers.txt Modified: lxml/branch/resolver-new/doc/resolvers.txt ============================================================================== --- lxml/branch/resolver-new/doc/resolvers.txt (original) +++ lxml/branch/resolver-new/doc/resolvers.txt Sat Apr 22 13:21:03 2006 @@ -11,7 +11,7 @@ ... def resolve(self, url, id, context): ... print "Resolving URL '%s'" % url ... return self.resolve_string( - ... '' % url, context) + ... '' % url, context) This defines a resolver that always returns a dynamically generated DTD fragment defining an entity. The 'url' argument passes the system URL of the @@ -27,11 +27,12 @@ Resolvers are registered local to a parser:: - >>> parser = etree.XMLParser(dtd_validation=True) + >>> parser = etree.XMLParser(load_dtd=True) >>> parser.resolvers.add( DTDResolver() ) -When we use this parser to parse a document that requires resolving a URL, it -will call our custom resolver:: +Note that we instantiate a parser that loads the DTD. This is not done by the +default parser, which does no validation. When we use this parser to parse a +document that requires resolving a URL, it will call our custom resolver:: >>> xml = u'&myentity;' >>> from StringIO import StringIO @@ -39,19 +40,26 @@ Resolving URL 'MissingDTD.dtd' >>> root = tree.getroot() >>> print root.text - resolved text: MissingDTD.dtd + [resolved text: MissingDTD.dtd] -The entity was correctly resolved by the generated DTD fragment. +The entity in the document was correctly resolved by the generated DTD +fragment. Document loaders in context --------------------------- -XSLT document loading uses the same type of resolvers. It distinguished -between the context used when parsing the XSLT document (i.e. when resolving -xsl:import and xsl:include elements) and the context at +XML documents memorise their initial parser (and its resolvers) during their +life-time. This means that a lookup process related to a document will use +the resolvers of the document's parser. + +This behaviour is most visible in XSLT, where two documents are used: the XSL +document and the transformed XML document. XSLT thus distinguishes between +the context when parsing the XSLT document (i.e. when resolving xsl:import and +xsl:include elements) and the context at transformation time (calls to the +'document' function). -Let's defines a resolver that only responds to a specific prefix:: +We start with a resolver that only responds to a specific prefix:: >>> class PrefixResolver(etree.Resolver): ... def __init__(self, prefix): @@ -67,9 +75,9 @@ We use the following stylesheet as an example:: >>> xml_text = """\ - ... ... + ... ... ... ... @@ -83,18 +91,21 @@ >>> uri_parser = etree.XMLParser() >>> uri_parser.resolvers.add( PrefixResolver("uri") ) + >>> uri_doc = etree.parse(StringIO(xml_text), uri_parser) >>> uro_parser = etree.XMLParser() >>> uro_parser.resolvers.add( PrefixResolver("uro") ) - - >>> uri_doc = etree.parse(StringIO(xml_text), uri_parser) >>> uro_doc = etree.parse(StringIO(xml_text), uro_parser) These contexts are important for the further behaviour of the documents. They memorise their original parser so that the correct set of resolvers is used in subsequent lookups:: - >>> transform = etree.XSLT(uro_doc) + >>> try: + ... transform = etree.XSLT(uro_doc) + ... except Exception, e: + ... print e, e.error_log + Resolving url uro:test as prefix uro ... done >>> result = transform(uri_doc) Resolving url uri:test as prefix uri ... done >>> print str(result), @@ -114,4 +125,3 @@ >>> print str(result), - From scoder at codespeak.net Sat Apr 22 13:26:57 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Sat Apr 22 13:26:59 2006 Subject: [Lxml-checkins] r26135 - lxml/branch/resolver-new/src/lxml Message-ID: <20060422112657.D56FB1007E@code0.codespeak.net> Author: scoder Date: Sat Apr 22 13:26:56 2006 New Revision: 26135 Modified: lxml/branch/resolver-new/src/lxml/etree.pyx lxml/branch/resolver-new/src/lxml/parser.pxi lxml/branch/resolver-new/src/lxml/xslt.pxi Log: clean up in parsers, new internal parser functions used by XSLT to obey the libxslt-provided parse options and reuse resolver context Modified: lxml/branch/resolver-new/src/lxml/etree.pyx ============================================================================== --- lxml/branch/resolver-new/src/lxml/etree.pyx (original) +++ lxml/branch/resolver-new/src/lxml/etree.pyx Sat Apr 22 13:26:56 2006 @@ -1309,10 +1309,10 @@ include "xmlid.pxi" # XMLID and IDDict include "nsclasses.pxi" # Namespace implementation and registry include "docloader.pxi" # Support for custom document loaders +include "parser.pxi" # XML Parser include "xslt.pxi" # XPath and XSLT include "relaxng.pxi" # RelaxNG include "xmlschema.pxi" # XMLSchema -include "parser.pxi" # XML Parser include "proxy.pxi" # Proxy handling (element backpointers/memory/etc.) Modified: lxml/branch/resolver-new/src/lxml/parser.pxi ============================================================================== --- lxml/branch/resolver-new/src/lxml/parser.pxi (original) +++ lxml/branch/resolver-new/src/lxml/parser.pxi Sat Apr 22 13:26:56 2006 @@ -132,20 +132,27 @@ __GLOBAL_PARSER_CONTEXT._initParserDict(c_ctxt) c_ctxt._private = self._context - cdef _cleanupContextAndRaisePending(self): - self._context._check_exception() - - cdef xmlDoc* _handleResult(self, xmlParserCtxt* ctxt, - xmlDoc* result) except NULL: - if ctxt.wellFormed: - __GLOBAL_PARSER_CONTEXT._initDocDict(result) - elif result is not NULL: - # free broken document - tree.xmlFreeDoc(result) - result = NULL - if result is NULL: +cdef xmlDoc* _handleParseResult(xmlParserCtxt* ctxt, xmlDoc* result, + char* c_filename) except NULL: + cdef _ResolverContext context + if ctxt.wellFormed: + __GLOBAL_PARSER_CONTEXT._initDocDict(result) + elif result is not NULL: + # free broken document + tree.xmlFreeDoc(result) + result = NULL + + if ctxt._private is not NULL: + context = <_ResolverContext>ctxt._private + context._check_exception() + + if result is NULL: + if c_filename is not NULL and \ + ctxt.lastError.domain == xmlerror.XML_FROM_IO: + raise IOError, "Could not open file %s" % c_filename + else: raise XMLSyntaxError - return result + return result ############################################################ ## XML parser @@ -176,12 +183,14 @@ cdef xmlParserCtxt* _file_parser_ctxt cdef xmlParserCtxt* _memory_parser_ctxt def __init__(self, attribute_defaults=False, dtd_validation=False, - no_network=False, ns_clean=False): + load_dtd=False, no_network=False, ns_clean=False): cdef int parse_options self._file_parser_ctxt = NULL BaseParser.__init__(self) parse_options = _XML_DEFAULT_PARSE_OPTIONS + if load_dtd: + parse_options = parse_options | xmlparser.XML_PARSE_DTDLOAD if dtd_validation: parse_options = parse_options | xmlparser.XML_PARSE_DTDLOAD | \ xmlparser.XML_PARSE_DTDVALID @@ -209,7 +218,7 @@ raise ParserError, "Failed to create parser context" return pctxt - cdef xmlDoc* _parseDoc(self, text_utf) except NULL: + cdef xmlDoc* _parseDoc(self, char* c_text) except NULL: """Parse document, share dictionary if possible. """ cdef xmlDoc* result @@ -222,12 +231,11 @@ self._memory_parser_ctxt = pctxt self._initContext(pctxt) result = xmlparser.xmlCtxtReadDoc( - pctxt, _cstr(text_utf), NULL, NULL, self._parse_options) + pctxt, c_text, NULL, NULL, self._parse_options) self._error_log.disconnect() - self._cleanupContextAndRaisePending() - return self._handleResult(pctxt, result) + return _handleParseResult(pctxt, result, NULL) - cdef xmlDoc* _parseDocFromFile(self, char* filename) except NULL: + cdef xmlDoc* _parseDocFromFile(self, char* c_filename) except NULL: cdef xmlDoc* result cdef xmlParserCtxt* pctxt self._error_log.connect() @@ -237,13 +245,37 @@ self._file_parser_ctxt = pctxt self._initContext(pctxt) result = xmlparser.xmlCtxtReadFile( - pctxt, filename, NULL, self._parse_options) + pctxt, c_filename, NULL, self._parse_options) self._error_log.disconnect() - self._cleanupContextAndRaisePending() - if result is NULL: - if pctxt.lastError.domain == xmlerror.XML_FROM_IO: - raise IOError, "Could not open file %s" % filename - return self._handleResult(pctxt, result) + return _handleParseResult(pctxt, result, c_filename) + +cdef xmlDoc* _internalParseDoc(char* c_text, int options, + _ResolverContext context) except NULL: + # internal parser function for XSLT + cdef xmlParserCtxt* pctxt + cdef xmlDoc* c_doc + pctxt = xmlparser.xmlNewParserCtxt() + if pctxt is NULL: + return NULL + __GLOBAL_PARSER_CONTEXT._initParserDict(pctxt) + pctxt._private = context + c_doc = xmlparser.xmlCtxtReadDoc( + pctxt, c_text, NULL, NULL, options) + return _handleParseResult(pctxt, c_doc, NULL) + +cdef xmlDoc* _internalParseDocFromFile(char* c_filename, int options, + _ResolverContext context) except NULL: + # internal parser function for XSLT + cdef xmlParserCtxt* pctxt + cdef xmlDoc* c_doc + pctxt = xmlparser.xmlNewParserCtxt() + if pctxt is NULL: + return NULL + __GLOBAL_PARSER_CONTEXT._initParserDict(pctxt) + pctxt._private = context + c_doc = xmlparser.xmlCtxtReadFile( + pctxt, c_filename, NULL, options) + return _handleParseResult(pctxt, c_doc, c_filename) cdef XMLParser __DEFAULT_XML_PARSER @@ -317,15 +349,13 @@ if self._memory_parser_ctxt != NULL: htmlparser.htmlFreeParserCtxt(self._memory_parser_ctxt) - cdef xmlDoc* _parseDoc(self, text_utf) except NULL: + cdef xmlDoc* _parseDoc(self, char* c_text) except NULL: """Parse HTML document, share dictionary if possible. """ cdef xmlDoc* result cdef xmlParserCtxt* pctxt - cdef char* c_text cdef int c_len self._error_log.connect() - c_text = _cstr(text_utf) pctxt = self._memory_parser_ctxt if pctxt is NULL: pctxt = htmlparser.htmlCreateMemoryParserCtxt('dummy', 5) @@ -337,30 +367,28 @@ result = htmlparser.htmlCtxtReadDoc( pctxt, c_text, NULL, NULL, self._parse_options) self._error_log.disconnect() - self._cleanupContextAndRaisePending() - return self._handleResult(pctxt, result) + return _handleParseResult(pctxt, result, NULL) - cdef xmlDoc* _parseDocFromFile(self, char* filename) except NULL: + cdef xmlDoc* _parseDocFromFile(self, char* c_filename) except NULL: cdef xmlDoc* result cdef xmlParserCtxt* pctxt cdef int parser_error self._error_log.connect() pctxt = self._file_parser_ctxt if pctxt is NULL: - pctxt = htmlparser.htmlCreateFileParserCtxt(filename, NULL) + pctxt = htmlparser.htmlCreateFileParserCtxt(c_filename, NULL) if pctxt is NULL: self._error_log.disconnect() warnings = self._error_log.filter_from_warnings() if warnings and warnings[-1].domain == xmlerror.XML_FROM_IO: - raise IOError, "Could not open file %s" % filename + raise IOError, "Could not open file %s" % c_filename raise ParserError, "Failed to create parser context" self._file_parser_ctxt = pctxt self._initContext(pctxt) result = htmlparser.htmlCtxtReadFile( - pctxt, filename, NULL, self._parse_options) + pctxt, c_filename, NULL, self._parse_options) self._error_log.disconnect() - self._cleanupContextAndRaisePending() - return self._handleResult(pctxt, result) + return _handleParseResult(pctxt, result, c_filename) cdef HTMLParser __DEFAULT_HTML_PARSER __DEFAULT_HTML_PARSER = HTMLParser() @@ -374,9 +402,9 @@ parser = __DEFAULT_PARSER __GLOBAL_PARSER_CONTEXT._initParser() if isinstance(parser, XMLParser): - return (parser)._parseDoc(text_utf) + return (parser)._parseDoc(_cstr(text_utf)) elif isinstance(parser, HTMLParser): - return (parser)._parseDoc(text_utf) + return (parser)._parseDoc(_cstr(text_utf)) else: raise TypeError, "invalid parser" Modified: lxml/branch/resolver-new/src/lxml/xslt.pxi ============================================================================== --- lxml/branch/resolver-new/src/lxml/xslt.pxi (original) +++ lxml/branch/resolver-new/src/lxml/xslt.pxi Sat Apr 22 13:26:56 2006 @@ -207,16 +207,16 @@ _ResolverContext.__init__(self, parser.resolvers) self._parser = parser -cdef xmlDoc* _doc_loader(char* c_uri, tree.xmlDict* c_dict, int c_options, +cdef xmlDoc* _doc_loader(char* c_uri, tree.xmlDict* c_dict, int parse_options, void* c_ctxt, xslt.xsltLoadType c_type): cdef xslt.xsltTransformContext* transform_ctxt + cdef xmlDoc* c_doc cdef _ResolverRegistry resolvers cdef _InputDocument doc_ref - cdef xmlDoc* c_doc cdef _XSLTResolverContext resolver_context + cdef XMLParser parser # find reference _Document to retrieve resolvers c_doc = NULL - resolver_context = None if c_type == xslt.XSLT_LOAD_DOCUMENT: transform_ctxt = c_ctxt if transform_ctxt.document is not NULL: @@ -226,12 +226,8 @@ if c_doc is NULL or c_doc._private is NULL: # can't call Python without context, fall back to default loader - c_doc = XSLT_DOC_DEFAULT_LOADER( - c_uri, c_dict, c_options, c_ctxt, c_type) - if c_doc is not NULL: - if c_doc._private is NULL and reference_doc is not None: - c_doc._private = reference_doc - return c_doc + return XSLT_DOC_DEFAULT_LOADER( + c_uri, c_dict, parse_options, c_ctxt, c_type) resolver_context = <_XSLTResolverContext>c_doc._private @@ -248,22 +244,22 @@ return NULL if doc_ref is not None: + if doc_ref._type == PARSER_DATA_EMPTY: + return NULL c_doc = NULL if doc_ref._type == PARSER_DATA_STRING: - c_doc = _parseDoc(doc_ref._data_utf, - resolver_context._parser) + c_doc = _internalParseDoc( + doc_ref._data_utf, parse_options, resolver_context) elif doc_ref._type == PARSER_DATA_FILENAME: - c_doc = _parseDocFromFile(doc_ref._data_utf, - resolver_context._parser) - elif doc_ref._type == PARSER_DATA_EMPTY: - return NULL + c_doc = _internalParseDocFromFile( + doc_ref._data_utf, parse_options, resolver_context) if c_doc is not NULL and c_doc.URL is NULL: c_doc.URL = tree.xmlStrdup(c_uri) if c_doc is NULL: c_doc = XSLT_DOC_DEFAULT_LOADER( - c_uri, c_dict, c_options, c_ctxt, c_type) - if c_doc is not NULL and c_doc._private is NULL and resolver_context is not None: + c_uri, c_dict, parse_options, c_ctxt, c_type) + if c_doc is not NULL and c_doc._private is NULL: c_doc._private = resolver_context return c_doc From scoder at codespeak.net Sat Apr 22 13:35:31 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Sat Apr 22 13:35:32 2006 Subject: [Lxml-checkins] r26137 - lxml/branch/resolver-new/src/lxml Message-ID: <20060422113531.A879510080@code0.codespeak.net> Author: scoder Date: Sat Apr 22 13:35:30 2006 New Revision: 26137 Modified: lxml/branch/resolver-new/src/lxml/parser.pxi Log: better error reporting after parsing: reuse libxml2 generated message Modified: lxml/branch/resolver-new/src/lxml/parser.pxi ============================================================================== --- lxml/branch/resolver-new/src/lxml/parser.pxi (original) +++ lxml/branch/resolver-new/src/lxml/parser.pxi Sat Apr 22 13:35:30 2006 @@ -149,7 +149,14 @@ if result is NULL: if c_filename is not NULL and \ ctxt.lastError.domain == xmlerror.XML_FROM_IO: - raise IOError, "Could not open file %s" % c_filename + if ctxt.lastError.message is not NULL: + message = "Error reading file %s: %s" % ( + funicode(c_filename), funicode(ctxt.lastError.message)) + else: + message = "Error reading file %s" % funicode(c_filename) + raise IOError, message + elif ctxt.lastError.message is not NULL: + raise XMLSyntaxError, funicode(ctxt.lastError.message) else: raise XMLSyntaxError return result From scoder at codespeak.net Sat Apr 22 21:12:35 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Sat Apr 22 21:12:37 2006 Subject: [Lxml-checkins] r26146 - lxml/branch/resolver-new/src/lxml Message-ID: <20060422191235.CD7E110079@code0.codespeak.net> Author: scoder Date: Sat Apr 22 21:12:34 2006 New Revision: 26146 Modified: lxml/branch/resolver-new/src/lxml/docloader.pxi lxml/branch/resolver-new/src/lxml/etree.pyx lxml/branch/resolver-new/src/lxml/parser.pxi lxml/branch/resolver-new/src/lxml/xslt.pxi Log: clean up, fixed exception forwarding, removed some race conditions in memory handling (XSLT.__call__), fix document('') again: store second copy of stylesheet document and resolve its URL to a new copy on request Modified: lxml/branch/resolver-new/src/lxml/docloader.pxi ============================================================================== --- lxml/branch/resolver-new/src/lxml/docloader.pxi (original) +++ lxml/branch/resolver-new/src/lxml/docloader.pxi Sat Apr 22 21:12:34 2006 @@ -68,6 +68,9 @@ return None return self._default_resolver.resolve(system_url, public_id, context) + def __repr__(self): + return repr(self._resolvers) + cdef class _ResolverContext: cdef _ResolverRegistry _resolvers cdef _TempStore _storage @@ -77,11 +80,17 @@ self._storage = _TempStore() self._exc_info = None - cdef _store_exception(self, e): + cdef void _store_raised(self): self._exc_info = sys.exc_info() - cdef _check_exception(self): + cdef void _store_exception(self, exception): + self._exc_info = (exception, None, None) + + cdef _raise_if_stored(self): _exc_info = self._exc_info if _exc_info is not None: type, value, traceback = _exc_info - raise type, value, traceback + if traceback is None and value is None: + raise type + else: + raise type, value, traceback Modified: lxml/branch/resolver-new/src/lxml/etree.pyx ============================================================================== --- lxml/branch/resolver-new/src/lxml/etree.pyx (original) +++ lxml/branch/resolver-new/src/lxml/etree.pyx Sat Apr 22 21:12:34 2006 @@ -1,6 +1,7 @@ cimport tree, python from tree cimport xmlDoc, xmlNode, xmlAttr, xmlNs, _isElement -from python cimport isinstance, issubclass, hasattr, callable, iter, str, _cstr +from python cimport isinstance, issubclass, hasattr, callable +from python cimport iter, str, _cstr cimport xpath cimport xslt cimport xmlerror Modified: lxml/branch/resolver-new/src/lxml/parser.pxi ============================================================================== --- lxml/branch/resolver-new/src/lxml/parser.pxi (original) +++ lxml/branch/resolver-new/src/lxml/parser.pxi Sat Apr 22 21:12:34 2006 @@ -84,8 +84,8 @@ context = <_ResolverContext>c_context._private doc_ref = context._resolvers.resolve(url, pubid, context) - except Exception, e: - context._store_exception(e) + except Exception: + context._store_raised() return NULL if doc_ref is None: @@ -144,7 +144,7 @@ if ctxt._private is not NULL: context = <_ResolverContext>ctxt._private - context._check_exception() + context._raise_if_stored() if result is NULL: if c_filename is not NULL and \ Modified: lxml/branch/resolver-new/src/lxml/xslt.pxi ============================================================================== --- lxml/branch/resolver-new/src/lxml/xslt.pxi (original) +++ lxml/branch/resolver-new/src/lxml/xslt.pxi Sat Apr 22 21:12:34 2006 @@ -202,6 +202,7 @@ # XSLT document loaders cdef class _XSLTResolverContext(_ResolverContext): + cdef xmlDoc* _c_style_doc cdef BaseParser _parser def __init__(self, BaseParser parser not None): _ResolverContext.__init__(self, parser.resolvers) @@ -224,41 +225,50 @@ elif c_type == xslt.XSLT_LOAD_STYLESHEET: c_doc = (c_ctxt).doc - if c_doc is NULL or c_doc._private is NULL: + if c_doc is NULL or c_doc._private is NULL or \ + not isinstance(c_doc._private, _XSLTResolverContext): # can't call Python without context, fall back to default loader return XSLT_DOC_DEFAULT_LOADER( c_uri, c_dict, parse_options, c_ctxt, c_type) resolver_context = <_XSLTResolverContext>c_doc._private - # call the Python document loaders - except for stylesheets + # quick check if we are looking for the stylesheet + if tree.strncmp(c_uri, "XSLT:__STRING__XSLT__", 21) == 0: + c_doc = resolver_context._c_style_doc + if c_doc is not NULL and c_doc.URL is not NULL: + if tree.strcmp(c_uri, c_doc.URL) == 0: + return tree.xmlCopyDoc(c_doc, 1) + + # call the Python document loaders c_doc = NULL - if tree.strncmp(c_uri, "XSLT:__STRING__XSLT__", 16) != 0: - resolvers = resolver_context._resolvers - try: - uri = funicode(c_uri) - # FIXME: we should pass a _ResolverContext here! - doc_ref = resolvers.resolve(uri, None, resolver_context) - except Exception, e: - resolver_context._store_exception(e) - return NULL + resolvers = resolver_context._resolvers + try: + uri = funicode(c_uri) + doc_ref = resolvers.resolve(uri, None, resolver_context) + except Exception: + resolver_context._store_raised() + return NULL - if doc_ref is not None: - if doc_ref._type == PARSER_DATA_EMPTY: - return NULL - c_doc = NULL - if doc_ref._type == PARSER_DATA_STRING: - c_doc = _internalParseDoc( - doc_ref._data_utf, parse_options, resolver_context) - elif doc_ref._type == PARSER_DATA_FILENAME: - c_doc = _internalParseDocFromFile( - doc_ref._data_utf, parse_options, resolver_context) - if c_doc is not NULL and c_doc.URL is NULL: - c_doc.URL = tree.xmlStrdup(c_uri) + if doc_ref is not None: + if doc_ref._type == PARSER_DATA_EMPTY: + return NULL + if doc_ref._type == PARSER_DATA_STRING: + c_doc = _internalParseDoc( + doc_ref._data_utf, parse_options, resolver_context) + elif doc_ref._type == PARSER_DATA_FILENAME: + c_doc = _internalParseDocFromFile( + doc_ref._data_utf, parse_options, resolver_context) + if c_doc is not NULL and c_doc.URL is NULL: + c_doc.URL = tree.xmlStrdup(c_uri) if c_doc is NULL: c_doc = XSLT_DOC_DEFAULT_LOADER( c_uri, c_dict, parse_options, c_ctxt, c_type) + if c_doc is NULL: + exception = XSLTError("Cannot resolve URI %s" % funicode(c_uri)) + resolver_context._store_exception(exception) + return NULL if c_doc is not NULL and c_doc._private is NULL: c_doc._private = resolver_context return c_doc @@ -341,7 +351,7 @@ # XXX work around bug in xmlCopyDoc (fix is upcoming in new release # of libxml2) - if c_doc.URL is not NULL and c_doc.URL != doc._c_doc.URL: + if c_doc.URL is not NULL: tree.xmlFree(c_doc.URL) if doc._c_doc.URL is not NULL: self._doc_url_utf = doc._c_doc.URL @@ -351,7 +361,9 @@ c_doc.URL = tree.xmlStrdup(_cstr(self._doc_url_utf)) self._xslt_resolver_context = _XSLTResolverContext(doc._parser) + self._xslt_resolver_context._c_style_doc = tree.xmlCopyDoc(c_doc, 1) c_doc._private = self._xslt_resolver_context + c_style = xslt.xsltParseStylesheetDoc(c_doc) if c_style is NULL: tree.xmlFreeDoc(c_doc) @@ -365,9 +377,6 @@ # this cleans up copy of doc as well xslt.xsltFreeStylesheet(self._c_style) - cdef xmlDoc* _copyXslDoc(self): - return tree.xmlCopyDoc(self._c_style.doc, 1) - def __call__(self, _input, **_kw): cdef _Document input_doc cdef _NodeBase root_node @@ -377,22 +386,26 @@ cdef xmlDoc* c_result cdef xmlDoc* c_doc cdef char** params + cdef void* ptemp cdef int i cdef int j input_doc = _documentOrRaise(_input) root_node = _rootNodeOf(_input) - c_doc = _fakeRootDoc(input_doc._c_doc, root_node._c_node) - resolver_context = _XSLTResolverContext(input_doc._parser) - c_doc._private = resolver_context + resolver_context._c_style_doc = self._xslt_resolver_context._c_style_doc + + c_doc = _fakeRootDoc(input_doc._c_doc, root_node._c_node) transform_ctxt = xslt.xsltNewTransformContext(self._c_style, c_doc) if transform_ctxt is NULL: _destroyFakeDoc(input_doc._c_doc, c_doc) raise XSLTApplyError, "Error preparing stylesheet run" + ptemp = c_doc._private + c_doc._private = resolver_context + if _kw: # allocate space for parameters # * 2 as we want an entry for both key and value, @@ -424,10 +437,10 @@ cstd.free(params) self._context.free_context() - c_doc._private = input_doc # restore just in case... + c_doc._private = ptemp _destroyFakeDoc(input_doc._c_doc, c_doc) - resolver_context._check_exception() + resolver_context._raise_if_stored() if c_result is NULL: raise XSLTApplyError, "Error applying stylesheet" From scoder at codespeak.net Sat Apr 22 21:13:38 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Sat Apr 22 21:13:40 2006 Subject: [Lxml-checkins] r26147 - in lxml/branch/resolver-new: doc src/lxml/tests Message-ID: <20060422191338.4CA1D10079@code0.codespeak.net> Author: scoder Date: Sat Apr 22 21:13:36 2006 New Revision: 26147 Modified: lxml/branch/resolver-new/doc/resolvers.txt lxml/branch/resolver-new/src/lxml/tests/test_etree.py Log: rewrite of doctest section on parser/resolver contexts, add resolver.txt doctest to test_etree.py Modified: lxml/branch/resolver-new/doc/resolvers.txt ============================================================================== --- lxml/branch/resolver-new/doc/resolvers.txt (original) +++ lxml/branch/resolver-new/doc/resolvers.txt Sat Apr 22 21:13:36 2006 @@ -64,7 +64,11 @@ >>> class PrefixResolver(etree.Resolver): ... def __init__(self, prefix): ... self.prefix = prefix - ... self.xml = "%s-TEST" % prefix + ... self.xml = '''\ + ... + ... %s-TEST + ... + ... ''' % prefix ... def resolve(self, url, pubid, context): ... print "Resolving url %s as prefix %s ..." % (url, self.prefix), ... if url.startswith(self.prefix): @@ -77,10 +81,10 @@ >>> xml_text = """\ ... - ... + ... ... ... - ... + ... ... ... ... @@ -89,39 +93,43 @@ If we now register different resolvers with two different parsers, we can parse our document twice in different resolver contexts:: - >>> uri_parser = etree.XMLParser() - >>> uri_parser.resolvers.add( PrefixResolver("uri") ) - >>> uri_doc = etree.parse(StringIO(xml_text), uri_parser) - - >>> uro_parser = etree.XMLParser() - >>> uro_parser.resolvers.add( PrefixResolver("uro") ) - >>> uro_doc = etree.parse(StringIO(xml_text), uro_parser) + >>> hoi_parser = etree.XMLParser() + >>> hoi_parser.resolvers.add( PrefixResolver("hoi") ) + >>> hoi_doc = etree.parse(StringIO(xml_text), hoi_parser) + + >>> honk_parser = etree.XMLParser() + >>> honk_parser.resolvers.add( PrefixResolver("honk") ) + >>> honk_doc = etree.parse(StringIO(xml_text), honk_parser) These contexts are important for the further behaviour of the documents. They memorise their original parser so that the correct set of resolvers is used in -subsequent lookups:: +subsequent lookups. To compile the stylesheet, XSLT must resolve the +honk:test URI in the xsl:include element. The "hoi" resolver cannot do that:: - >>> try: - ... transform = etree.XSLT(uro_doc) - ... except Exception, e: - ... print e, e.error_log - Resolving url uro:test as prefix uro ... done - >>> result = transform(uri_doc) - Resolving url uri:test as prefix uri ... done - >>> print str(result), - - uri-TEST + >>> transform = etree.XSLT(hoi_doc) + Traceback (most recent call last): + [...] + XSLTParseError: Cannot parse style sheet + +However, if we use the "honk" resolver associated with the second document, +everything works fine:: -We can see that the "uri" resolver was called to generate a document that was + >>> transform = etree.XSLT(honk_doc) + Resolving url honk:test as prefix honk ... done + +We can see that the "hoi" resolver was called to generate a document that was then inserted into the result document by the XSLT transformation. Note that -the "uri" resolver is attached to the transformed document, which defines the -context for the transformation process. If we reverse the contexts, the other -resolver will get called. Obviously, the "uro" resolver does not know how to -resolve "uri" URLs, so the default resolver is called in this case:: - - >>> transform = etree.XSLT(uri_doc) - >>> result = transform(uro_doc) - Resolving url uri:test as prefix uro ... failed +the "hoi" resolver is attached to the transformed document, which defines the +context for the transformation process. + + >>> result = transform(hoi_doc) + Resolving url hoi:test as prefix hoi ... done >>> print str(result), - + hoi-TEST + +If we reverse the contexts, the other +resolver will get called. Obviously, the "honk" resolver does not know how to +resolve "hoi" URLs, so the default resolver is called in this case. If the +default resolver cannot find the requested document either, you will get an +exception:: Modified: lxml/branch/resolver-new/src/lxml/tests/test_etree.py ============================================================================== --- lxml/branch/resolver-new/src/lxml/tests/test_etree.py (original) +++ lxml/branch/resolver-new/src/lxml/tests/test_etree.py Sat Apr 22 21:13:36 2006 @@ -393,6 +393,8 @@ suite.addTests([unittest.makeSuite(ETreeC14NTestCase)]) suite.addTests( [doctest.DocFileSuite('../../../doc/api.txt')]) + suite.addTests( + [doctest.DocFileSuite('../../../doc/resolvers.txt')]) return suite if __name__ == '__main__': From scoder at codespeak.net Sat Apr 22 21:26:22 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Sat Apr 22 21:26:24 2006 Subject: [Lxml-checkins] r26148 - lxml/branch/resolver-new/src/lxml Message-ID: <20060422192622.77CE51006E@code0.codespeak.net> Author: scoder Date: Sat Apr 22 21:26:21 2006 New Revision: 26148 Modified: lxml/branch/resolver-new/src/lxml/xslt.pxi Log: give the default resolver the first shot on stylesheets Modified: lxml/branch/resolver-new/src/lxml/xslt.pxi ============================================================================== --- lxml/branch/resolver-new/src/lxml/xslt.pxi (original) +++ lxml/branch/resolver-new/src/lxml/xslt.pxi Sat Apr 22 21:26:21 2006 @@ -233,12 +233,17 @@ resolver_context = <_XSLTResolverContext>c_doc._private - # quick check if we are looking for the stylesheet + # quick check if we are looking for a stylesheet if tree.strncmp(c_uri, "XSLT:__STRING__XSLT__", 21) == 0: - c_doc = resolver_context._c_style_doc - if c_doc is not NULL and c_doc.URL is not NULL: - if tree.strcmp(c_uri, c_doc.URL) == 0: - return tree.xmlCopyDoc(c_doc, 1) + # already stored by libxslt? + c_doc = XSLT_DOC_DEFAULT_LOADER( + c_uri, c_dict, parse_options, c_ctxt, c_type) + if c_doc is NULL: + # otherwise, check if we are looking for the current stylesheet + c_doc = resolver_context._c_style_doc + if c_doc is not NULL and c_doc.URL is not NULL: + if tree.strcmp(c_uri, c_doc.URL) == 0: + return tree.xmlCopyDoc(c_doc, 1) # call the Python document loaders c_doc = NULL From scoder at codespeak.net Sat Apr 22 21:43:13 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Sat Apr 22 21:43:14 2006 Subject: [Lxml-checkins] r26149 - lxml/branch/resolver-new/doc Message-ID: <20060422194313.1DD721007A@code0.codespeak.net> Author: scoder Date: Sat Apr 22 21:43:12 2006 New Revision: 26149 Modified: lxml/branch/resolver-new/doc/resolvers.txt Log: doctest cleanup Modified: lxml/branch/resolver-new/doc/resolvers.txt ============================================================================== --- lxml/branch/resolver-new/doc/resolvers.txt (original) +++ lxml/branch/resolver-new/doc/resolvers.txt Sat Apr 22 21:43:12 2006 @@ -64,8 +64,9 @@ >>> class PrefixResolver(etree.Resolver): ... def __init__(self, prefix): ... self.prefix = prefix - ... self.xml = '''\ - ... + ... self.result_xml = '''\ + ... ... %s-TEST ... ... ''' % prefix @@ -73,7 +74,7 @@ ... print "Resolving url %s as prefix %s ..." % (url, self.prefix), ... if url.startswith(self.prefix): ... print "done" - ... return self.resolve_string(self.xml, context) + ... return self.resolve_string(self.result_xml, context) ... print "failed" We use the following stylesheet as an example:: From scoder at codespeak.net Sat Apr 22 21:48:40 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Sat Apr 22 21:48:41 2006 Subject: [Lxml-checkins] r26150 - lxml/branch/resolver-new/src/lxml Message-ID: <20060422194840.86D4C1007A@code0.codespeak.net> Author: scoder Date: Sat Apr 22 21:48:39 2006 New Revision: 26150 Modified: lxml/branch/resolver-new/src/lxml/parser.pxi Log: fix a potential race condition in parser.pxi:_local_resolver Modified: lxml/branch/resolver-new/src/lxml/parser.pxi ============================================================================== --- lxml/branch/resolver-new/src/lxml/parser.pxi (original) +++ lxml/branch/resolver-new/src/lxml/parser.pxi Sat Apr 22 21:48:39 2006 @@ -67,7 +67,8 @@ cdef _ResolverContext context cdef _InputDocument doc_ref cdef xmlparser.xmlParserInput* c_input - if c_context._private is NULL: + if c_context._private is NULL or \ + not isinstance(c_context._private, _ResolverContext): if __DEFAULT_ENTITY_LOADER is NULL: return NULL return __DEFAULT_ENTITY_LOADER(c_url, c_pubid, c_context) From scoder at codespeak.net Sat Apr 22 21:48:45 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Sat Apr 22 21:48:47 2006 Subject: [Lxml-checkins] r26151 - lxml/branch/resolver-new/src/lxml Message-ID: <20060422194845.E35791007A@code0.codespeak.net> Author: scoder Date: Sat Apr 22 21:48:44 2006 New Revision: 26151 Modified: lxml/branch/resolver-new/src/lxml/xslt.pxi Log: clean up Modified: lxml/branch/resolver-new/src/lxml/xslt.pxi ============================================================================== --- lxml/branch/resolver-new/src/lxml/xslt.pxi (original) +++ lxml/branch/resolver-new/src/lxml/xslt.pxi Sat Apr 22 21:48:44 2006 @@ -260,10 +260,10 @@ return NULL if doc_ref._type == PARSER_DATA_STRING: c_doc = _internalParseDoc( - doc_ref._data_utf, parse_options, resolver_context) + _cstr(doc_ref._data_utf), parse_options, resolver_context) elif doc_ref._type == PARSER_DATA_FILENAME: c_doc = _internalParseDocFromFile( - doc_ref._data_utf, parse_options, resolver_context) + _cstr(doc_ref._data_utf), parse_options, resolver_context) if c_doc is not NULL and c_doc.URL is NULL: c_doc.URL = tree.xmlStrdup(c_uri) From scoder at codespeak.net Sun Apr 23 15:11:59 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Sun Apr 23 15:12:00 2006 Subject: [Lxml-checkins] r26182 - lxml/branch/resolver-new Message-ID: <20060423131159.4C16210084@code0.codespeak.net> Author: scoder Date: Sun Apr 23 15:11:58 2006 New Revision: 26182 Modified: lxml/branch/resolver-new/bench.py Log: benchmark for XSLT and document(''), showed that deep copying is faster than parsing when building copies of small stylesheets Modified: lxml/branch/resolver-new/bench.py ============================================================================== --- lxml/branch/resolver-new/bench.py (original) +++ lxml/branch/resolver-new/bench.py Sun Apr 23 15:11:58 2006 @@ -407,6 +407,25 @@ xpath = self.etree.XPathElementEvaluator(child) xpath.evaluate("./*[0]") + @onlylib('lxe') + def bench_xslt_document(self, root): + transform = self.etree.XSLT(self.etree.XML("""\ + + TEST + + + + + + + + +""")) + transform(root) + + ############################################################ # Main program ############################################################ From scoder at codespeak.net Sun Apr 23 15:26:50 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Sun Apr 23 15:26:51 2006 Subject: [Lxml-checkins] r26186 - lxml/branch/resolver-new/src/lxml Message-ID: <20060423132650.5B29210098@code0.codespeak.net> Author: scoder Date: Sun Apr 23 15:26:49 2006 New Revision: 26186 Modified: lxml/branch/resolver-new/src/lxml/xslt.pxi Log: always special case the current stylesheet in XSLT document lookup Modified: lxml/branch/resolver-new/src/lxml/xslt.pxi ============================================================================== --- lxml/branch/resolver-new/src/lxml/xslt.pxi (original) +++ lxml/branch/resolver-new/src/lxml/xslt.pxi Sun Apr 23 15:26:49 2006 @@ -203,10 +203,15 @@ cdef class _XSLTResolverContext(_ResolverContext): cdef xmlDoc* _c_style_doc + cdef object _style_url_utf + cdef object _style_doc_utf cdef BaseParser _parser def __init__(self, BaseParser parser not None): _ResolverContext.__init__(self, parser.resolvers) self._parser = parser + self._c_style_doc = NULL + self._style_url_utf = None + self._style_doc_utf = None cdef xmlDoc* _doc_loader(char* c_uri, tree.xmlDict* c_dict, int parse_options, void* c_ctxt, xslt.xsltLoadType c_type): @@ -233,17 +238,11 @@ resolver_context = <_XSLTResolverContext>c_doc._private - # quick check if we are looking for a stylesheet - if tree.strncmp(c_uri, "XSLT:__STRING__XSLT__", 21) == 0: - # already stored by libxslt? - c_doc = XSLT_DOC_DEFAULT_LOADER( - c_uri, c_dict, parse_options, c_ctxt, c_type) - if c_doc is NULL: - # otherwise, check if we are looking for the current stylesheet - c_doc = resolver_context._c_style_doc - if c_doc is not NULL and c_doc.URL is not NULL: - if tree.strcmp(c_uri, c_doc.URL) == 0: - return tree.xmlCopyDoc(c_doc, 1) + # quick check if we are looking for the current stylesheet + c_doc = resolver_context._c_style_doc + if c_doc is not NULL and c_doc.URL is not NULL: + if tree.strcmp(c_uri, c_doc.URL) == 0: + return tree.xmlCopyDoc(c_doc, 1) # call the Python document loaders c_doc = NULL @@ -335,12 +334,9 @@ """ cdef XSLTContext _context cdef xslt.xsltStylesheet* _c_style - cdef object _doc_url_utf cdef _XSLTResolverContext _xslt_resolver_context def __init__(self, xslt_input, extensions=None): - # make a copy of the document as stylesheet needs to assume it - # doesn't change cdef xslt.xsltStylesheet* c_style cdef xmlDoc* c_doc cdef xmlDoc* fake_c_doc @@ -350,22 +346,22 @@ doc = _documentOrRaise(xslt_input) root_node = _rootNodeOf(xslt_input) + # make a copy of the document as stylesheet parsing modifies it fake_c_doc = _fakeRootDoc(doc._c_doc, root_node._c_node) c_doc = tree.xmlCopyDoc(fake_c_doc, 1) _destroyFakeDoc(doc._c_doc, fake_c_doc) - # XXX work around bug in xmlCopyDoc (fix is upcoming in new release - # of libxml2) + # make sure we always have a stylesheet URL if c_doc.URL is not NULL: tree.xmlFree(c_doc.URL) if doc._c_doc.URL is not NULL: - self._doc_url_utf = doc._c_doc.URL c_doc.URL = tree.xmlStrdup(doc._c_doc.URL) else: - self._doc_url_utf = "XSLT:__STRING__XSLT__%s" % id(self) - c_doc.URL = tree.xmlStrdup(_cstr(self._doc_url_utf)) + doc_url_utf = "XSLT:__STRING__XSLT__%s" % id(self) + c_doc.URL = tree.xmlStrdup(_cstr(doc_url_utf)) self._xslt_resolver_context = _XSLTResolverContext(doc._parser) + # keep a copy in case we need to access the stylesheet via 'document()' self._xslt_resolver_context._c_style_doc = tree.xmlCopyDoc(c_doc, 1) c_doc._private = self._xslt_resolver_context From scoder at codespeak.net Sun Apr 23 16:59:12 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Sun Apr 23 16:59:13 2006 Subject: [Lxml-checkins] r26190 - lxml/branch/resolver-new/src/lxml Message-ID: <20060423145912.184291009D@code0.codespeak.net> Author: scoder Date: Sun Apr 23 16:59:10 2006 New Revision: 26190 Modified: lxml/branch/resolver-new/src/lxml/xslt.pxi Log: free document copy on XSLT.__dealloc__ Modified: lxml/branch/resolver-new/src/lxml/xslt.pxi ============================================================================== --- lxml/branch/resolver-new/src/lxml/xslt.pxi (original) +++ lxml/branch/resolver-new/src/lxml/xslt.pxi Sun Apr 23 16:59:10 2006 @@ -375,6 +375,9 @@ # XXX is it worthwile to use xsltPrecomputeStylesheet here? def __dealloc__(self): + if self._xslt_resolver_context is not None and \ + self._xslt_resolver_context._c_style_doc is not NULL: + tree.xmlFreeDoc(self._xslt_resolver_context._c_style_doc) # this cleans up copy of doc as well xslt.xsltFreeStylesheet(self._c_style) From scoder at codespeak.net Sun Apr 23 17:15:51 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Sun Apr 23 17:15:52 2006 Subject: [Lxml-checkins] r26191 - lxml/branch/resolver-new/src/lxml Message-ID: <20060423151551.BBC751009F@code0.codespeak.net> Author: scoder Date: Sun Apr 23 17:15:50 2006 New Revision: 26191 Modified: lxml/branch/resolver-new/src/lxml/xslt.pxi Log: some C-ification Modified: lxml/branch/resolver-new/src/lxml/xslt.pxi ============================================================================== --- lxml/branch/resolver-new/src/lxml/xslt.pxi (original) +++ lxml/branch/resolver-new/src/lxml/xslt.pxi Sun Apr 23 17:15:50 2006 @@ -825,6 +825,7 @@ cdef xmlNode* c_node cdef char* s cdef _NodeBase element + cdef int i result = [] if xpathObj.nodesetval is NULL: return result @@ -850,6 +851,7 @@ cdef _Document doc cdef xpath.xmlXPathObject* obj cdef BaseContext extensions + cdef int i rctxt = ctxt.context @@ -869,7 +871,7 @@ args = [] doc = extensions._doc for i from 0 <= i < nargs: - args.append(_unwrapXPathObject(xpath.valuePop(ctxt), doc)) + python.PyList_Append(args, _unwrapXPathObject(xpath.valuePop(ctxt), doc)) args.reverse() try: From scoder at codespeak.net Sun Apr 23 18:44:30 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Sun Apr 23 18:44:31 2006 Subject: [Lxml-checkins] r26196 - lxml/branch/resolver-new/src/lxml Message-ID: <20060423164430.7BC7B1009D@code0.codespeak.net> Author: scoder Date: Sun Apr 23 18:44:29 2006 New Revision: 26196 Modified: lxml/branch/resolver-new/src/lxml/xslt.pxi Log: prefix internal classnames by '_' Modified: lxml/branch/resolver-new/src/lxml/xslt.pxi ============================================================================== --- lxml/branch/resolver-new/src/lxml/xslt.pxi (original) +++ lxml/branch/resolver-new/src/lxml/xslt.pxi Sun Apr 23 18:44:29 2006 @@ -30,7 +30,7 @@ ################################################################################ # support for extension functions in XPath/XSLT -cdef class BaseContext: +cdef class _BaseContext: cdef xpath.xmlXPathContext* _xpathCtxt cdef _Document _doc cdef object _extensions @@ -221,7 +221,7 @@ cdef _InputDocument doc_ref cdef _XSLTResolverContext resolver_context cdef XMLParser parser - # find reference _Document to retrieve resolvers + # find current xmlDoc to retrieve resolvers c_doc = NULL if c_type == xslt.XSLT_LOAD_DOCUMENT: transform_ctxt = c_ctxt @@ -286,11 +286,11 @@ ################################################################################ # XSLT -cdef class XSLTContext(BaseContext): +cdef class _XSLTContext(_BaseContext): cdef xslt.xsltTransformContext* _xsltCtxt def __init__(self, namespaces, extensions): self._xsltCtxt = NULL - BaseContext.__init__(self, namespaces, extensions) + _BaseContext.__init__(self, namespaces, extensions) cdef register_context(self, xslt.xsltTransformContext* xsltCtxt, _Document doc): self._xsltCtxt = xsltCtxt @@ -332,7 +332,7 @@ cdef class XSLT: """Turn a document into an XSLT object. """ - cdef XSLTContext _context + cdef _XSLTContext _context cdef xslt.xsltStylesheet* _c_style cdef _XSLTResolverContext _xslt_resolver_context @@ -371,7 +371,7 @@ raise XSLTParseError, "Cannot parse style sheet" self._c_style = c_style - self._context = XSLTContext(None, extensions) + self._context = _XSLTContext(None, extensions) # XXX is it worthwile to use xsltPrecomputeStylesheet here? def __dealloc__(self): @@ -481,15 +481,14 @@ result._xslt = xslt return result - ################################################################################ # XPath -cdef class XPathContext(BaseContext): +cdef class _XPathContext(_BaseContext): cdef object _variables cdef object _registered_variables def __init__(self, namespaces, extensions, variables): - BaseContext.__init__(self, namespaces, extensions) + _BaseContext.__init__(self, namespaces, extensions) self._variables = variables self._registered_variables = [] @@ -567,10 +566,10 @@ cdef class XPathEvaluatorBase: - cdef XPathContext _context + cdef _XPathContext _context def __init__(self, namespaces, extensions, variables=None): - self._context = XPathContext(namespaces, extensions, variables) + self._context = _XPathContext(namespaces, extensions, variables) cdef object _handle_result(self, xpath.xmlXPathObject* xpathObj, _Document doc): _exc_info = self._context._exc_info @@ -706,7 +705,7 @@ cdef xpath.xmlXPathObject* xpathObj cdef _Document document cdef _NodeBase element - cdef XPathContext context + cdef _XPathContext context document = _documentOrRaise(_etree_or_element) element = _rootNodeOf(_etree_or_element) @@ -850,7 +849,7 @@ cdef xpath.xmlXPathContext* rctxt cdef _Document doc cdef xpath.xmlXPathObject* obj - cdef BaseContext extensions + cdef _BaseContext extensions cdef int i rctxt = ctxt.context @@ -863,7 +862,7 @@ uri = None # get our evaluator - extensions = (rctxt.userData) + extensions = <_BaseContext>(rctxt.userData) # lookup up the extension function in the context f = extensions.find_extension(uri, name) From scoder at codespeak.net Sun Apr 23 18:53:41 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Sun Apr 23 18:53:42 2006 Subject: [Lxml-checkins] r26197 - in lxml/branch/resolver-new: . src/lxml src/lxml/tests Message-ID: <20060423165341.5B4551009D@code0.codespeak.net> Author: scoder Date: Sun Apr 23 18:53:39 2006 New Revision: 26197 Modified: lxml/branch/resolver-new/setup.py lxml/branch/resolver-new/src/lxml/tests/test_xslt.py lxml/branch/resolver-new/src/lxml/xslt.pxd lxml/branch/resolver-new/src/lxml/xslt.pxi Log: compile with EXSLT support Modified: lxml/branch/resolver-new/setup.py ============================================================================== --- lxml/branch/resolver-new/setup.py (original) +++ lxml/branch/resolver-new/setup.py Sun Apr 23 18:53:39 2006 @@ -11,6 +11,7 @@ try: from setuptools import setup from setuptools.extension import Extension + # prevent setuptools from making local etree.so copies: setup_args['zip_safe'] = False except ImportError: from distutils.core import setup @@ -47,6 +48,16 @@ changelog.close() +# compile also against libexslt! +xslt_libs = flags('xslt-config --libs') +xslt_libs.append('-lexslt') +for i, libname in (): # enumerate(xslt_libs): + if 'exslt' in libname: + break + if 'xslt' in libname: + xslt_libs.insert(i, libname.replace('xslt', 'exslt')) + break + setup( name = "lxml", version = version, @@ -85,7 +96,7 @@ "lxml.etree", sources = sources, extra_compile_args = ['-w'] + flags('xslt-config --cflags'), - extra_link_args = flags('xslt-config --libs') + extra_link_args = xslt_libs )], **setup_args ) Modified: lxml/branch/resolver-new/src/lxml/tests/test_xslt.py ============================================================================== --- lxml/branch/resolver-new/src/lxml/tests/test_xslt.py (original) +++ lxml/branch/resolver-new/src/lxml/tests/test_xslt.py Sun Apr 23 18:53:39 2006 @@ -29,6 +29,36 @@ B ''', st.tostring(res)) + + def test_exslt(self): + tree = self.parse('BC') + style = self.parse('''\ + + + + + + + + + + + + +''') + + st = etree.XSLT(style) + res = st.apply(tree) + self.assertEquals('''\ + +-B--C- +''', + st.tostring(res)) + def test_xslt_input(self): tree = self.parse('BC') style = self.parse('''\ Modified: lxml/branch/resolver-new/src/lxml/xslt.pxd ============================================================================== --- lxml/branch/resolver-new/src/lxml/xslt.pxd (original) +++ lxml/branch/resolver-new/src/lxml/xslt.pxd Sun Apr 23 18:53:39 2006 @@ -56,4 +56,8 @@ cdef void xsltSetTransformErrorFunc(xsltTransformContext*, void* ctxt, void (*handler)(void* ctxt, char* msg, ...)) - + + cdef void xsltRegisterAllExtras() + +cdef extern from "libexslt/exslt.h": + cdef void exsltRegisterAll() Modified: lxml/branch/resolver-new/src/lxml/xslt.pxi ============================================================================== --- lxml/branch/resolver-new/src/lxml/xslt.pxi (original) +++ lxml/branch/resolver-new/src/lxml/xslt.pxi Sun Apr 23 18:53:39 2006 @@ -481,6 +481,10 @@ result._xslt = xslt return result +# enable EXSLT and Saxon support for XSLT +xslt.xsltRegisterAllExtras() +xslt.exsltRegisterAll() + ################################################################################ # XPath From scoder at codespeak.net Sun Apr 23 20:24:50 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Sun Apr 23 20:24:51 2006 Subject: [Lxml-checkins] r26200 - lxml/branch/resolver-new/src/lxml Message-ID: <20060423182450.5EC0F100B7@code0.codespeak.net> Author: scoder Date: Sun Apr 23 20:24:49 2006 New Revision: 26200 Modified: lxml/branch/resolver-new/src/lxml/xslt.pxi Log: make self call in XSLT.apply() more explicit Modified: lxml/branch/resolver-new/src/lxml/xslt.pxi ============================================================================== --- lxml/branch/resolver-new/src/lxml/xslt.pxi (original) +++ lxml/branch/resolver-new/src/lxml/xslt.pxi Sun Apr 23 20:24:49 2006 @@ -452,7 +452,7 @@ return _xsltResultTreeFactory(result_doc, self) def apply(self, _input, **_kw): - return self(_input, **_kw) + return self.__call__(_input, **_kw) def tostring(self, _ElementTree result_tree): """Save result doc to string based on stylesheet output method. From scoder at codespeak.net Sun Apr 23 21:31:19 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Sun Apr 23 21:31:20 2006 Subject: [Lxml-checkins] r26202 - lxml/branch/resolver-new/src/lxml Message-ID: <20060423193119.389571008A@code0.codespeak.net> Author: scoder Date: Sun Apr 23 21:31:18 2006 New Revision: 26202 Modified: lxml/branch/resolver-new/src/lxml/xslt.pxi Log: doc Modified: lxml/branch/resolver-new/src/lxml/xslt.pxi ============================================================================== --- lxml/branch/resolver-new/src/lxml/xslt.pxi (original) +++ lxml/branch/resolver-new/src/lxml/xslt.pxi Sun Apr 23 21:31:18 2006 @@ -353,6 +353,7 @@ # make sure we always have a stylesheet URL if c_doc.URL is not NULL: + # handle a bug in older libxml2 versions tree.xmlFree(c_doc.URL) if doc._c_doc.URL is not NULL: c_doc.URL = tree.xmlStrdup(doc._c_doc.URL) From scoder at codespeak.net Sun Apr 23 21:58:46 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Sun Apr 23 21:58:47 2006 Subject: [Lxml-checkins] r26203 - lxml/trunk/src/lxml Message-ID: <20060423195846.36BF110087@code0.codespeak.net> Author: scoder Date: Sun Apr 23 21:58:45 2006 New Revision: 26203 Modified: lxml/trunk/src/lxml/etree.pyx Log: trivial implementations for Attrib.iter*() Modified: lxml/trunk/src/lxml/etree.pyx ============================================================================== --- lxml/trunk/src/lxml/etree.pyx (original) +++ lxml/trunk/src/lxml/etree.pyx Sun Apr 23 21:58:45 2006 @@ -931,9 +931,6 @@ except KeyError: return default - def __iter__(self): - return iter(self.keys()) - def keys(self): result = [] cdef xmlNode* c_node @@ -944,6 +941,12 @@ c_node = c_node.next return result + def __iter__(self): + return iter(self.keys()) + + def iterkeys(self): + return iter(self.keys()) + def values(self): cdef xmlNode* c_node result = [] @@ -954,7 +957,10 @@ result, _attributeValue(self._c_node, c_node)) c_node = c_node.next return result - + + def itervalues(self): + return iter(self.values()) + def items(self): result = [] cdef xmlNode* c_node @@ -968,6 +974,9 @@ c_node = c_node.next return result + def iteritems(self): + return iter(self.items()) + def has_key(self, key): cdef xmlNs* c_ns cdef char* result From scoder at codespeak.net Mon Apr 24 14:51:01 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Mon Apr 24 14:51:03 2006 Subject: [Lxml-checkins] r26263 - lxml/trunk/src/lxml Message-ID: <20060424125101.C82501007C@code0.codespeak.net> Author: scoder Date: Mon Apr 24 14:51:00 2006 New Revision: 26263 Modified: lxml/trunk/src/lxml/xslt.pxi Log: merged in name change of internal classes from resolver-new branch Modified: lxml/trunk/src/lxml/xslt.pxi ============================================================================== --- lxml/trunk/src/lxml/xslt.pxi (original) +++ lxml/trunk/src/lxml/xslt.pxi Mon Apr 24 14:51:00 2006 @@ -30,7 +30,7 @@ ################################################################################ # support for extension functions in XPath/XSLT -cdef class BaseContext: +cdef class _BaseContext: cdef xpath.xmlXPathContext* _xpathCtxt cdef _Document _doc cdef object _extensions @@ -201,11 +201,11 @@ ################################################################################ # XSLT -cdef class XSLTContext(BaseContext): +cdef class _XSLTContext(_BaseContext): cdef xslt.xsltTransformContext* _xsltCtxt def __init__(self, namespaces, extensions): self._xsltCtxt = NULL - BaseContext.__init__(self, namespaces, extensions) + _BaseContext.__init__(self, namespaces, extensions) cdef register_context(self, xslt.xsltTransformContext* xsltCtxt, _Document doc): self._xsltCtxt = xsltCtxt @@ -247,7 +247,7 @@ cdef class XSLT: """Turn a document into an XSLT object. """ - cdef XSLTContext _context + cdef _XSLTContext _context cdef xslt.xsltStylesheet* _c_style cdef object _doc_url_utf @@ -283,7 +283,7 @@ raise XSLTParseError, "Cannot parse style sheet" self._c_style = c_style - self._context = XSLTContext(None, extensions) + self._context = _XSLTContext(None, extensions) # XXX is it worthwile to use xsltPrecomputeStylesheet here? def __dealloc__(self): @@ -380,15 +380,14 @@ result._xslt = xslt return result - ################################################################################ # XPath -cdef class XPathContext(BaseContext): +cdef class _XPathContext(_BaseContext): cdef object _variables cdef object _registered_variables def __init__(self, namespaces, extensions, variables): - BaseContext.__init__(self, namespaces, extensions) + _BaseContext.__init__(self, namespaces, extensions) self._variables = variables self._registered_variables = [] @@ -466,10 +465,10 @@ cdef class XPathEvaluatorBase: - cdef XPathContext _context + cdef _XPathContext _context def __init__(self, namespaces, extensions, variables=None): - self._context = XPathContext(namespaces, extensions, variables) + self._context = _XPathContext(namespaces, extensions, variables) cdef object _handle_result(self, xpath.xmlXPathObject* xpathObj, _Document doc): _exc_info = self._context._exc_info @@ -605,7 +604,7 @@ cdef xpath.xmlXPathObject* xpathObj cdef _Document document cdef _NodeBase element - cdef XPathContext context + cdef _XPathContext context document = _documentOrRaise(_etree_or_element) element = _rootNodeOf(_etree_or_element) @@ -748,7 +747,7 @@ cdef xpath.xmlXPathContext* rctxt cdef _Document doc cdef xpath.xmlXPathObject* obj - cdef BaseContext extensions + cdef _BaseContext extensions rctxt = ctxt.context @@ -760,7 +759,7 @@ uri = None # get our evaluator - extensions = (rctxt.userData) + extensions = <_BaseContext>(rctxt.userData) # lookup up the extension function in the context f = extensions.find_extension(uri, name) From scoder at codespeak.net Mon Apr 24 16:42:58 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Mon Apr 24 16:42:59 2006 Subject: [Lxml-checkins] r26268 - in lxml/trunk: . src/lxml src/lxml/tests Message-ID: <20060424144258.64B6C1008D@code0.codespeak.net> Author: scoder Date: Mon Apr 24 16:42:56 2006 New Revision: 26268 Modified: lxml/trunk/CHANGES.txt lxml/trunk/src/lxml/tests/test_xslt.py lxml/trunk/src/lxml/xslt.pxi Log: Exslt:regexp implementation for XSLT based on Python 're' module Modified: lxml/trunk/CHANGES.txt ============================================================================== --- lxml/trunk/CHANGES.txt (original) +++ lxml/trunk/CHANGES.txt Mon Apr 24 16:42:56 2006 @@ -7,6 +7,8 @@ Features added -------------- +* Exslt:regexp implementation for XSLT based on the Python 're' module + * HTMLParser for parsing (broken) HTML * XMLDTDID function parses XML into tuple (root node, ID dict) based on xml:id Modified: lxml/trunk/src/lxml/tests/test_xslt.py ============================================================================== --- lxml/trunk/src/lxml/tests/test_xslt.py (original) +++ lxml/trunk/src/lxml/tests/test_xslt.py Mon Apr 24 16:42:56 2006 @@ -374,6 +374,84 @@ self.assertEquals(root[0].tag, '{http://www.w3.org/1999/XSL/Transform}stylesheet') + def test_exslt_regexp_test(self): + xslt = etree.XSLT(etree.XML("""\ + + + + + +""")) + result = xslt(etree.XML('123098987')) + root = result.getroot() + self.assertEquals(root.tag, + 'test') + self.assertEquals(len(root), 1) + self.assertEquals(root[0].tag, + 'b') + self.assertEquals(root[0].text, + '987') + + def test_exslt_regexp_replace(self): + xslt = etree.XSLT(etree.XML("""\ + + + + + - + + + + +""")) + result = xslt(etree.XML('abdCdEeDed')) + root = result.getroot() + self.assertEquals(root.tag, + 'test') + self.assertEquals(len(root), 0) + self.assertEquals(root.text, 'abXXdEeDed-abXXXXeXXd') + + def test_exslt_regexp_match(self): + xslt = etree.XSLT(etree.XML("""\ + + + + + + + + + +""")) + result = xslt(etree.XML('abdCdEeDed')) + root = result.getroot() + self.assertEquals(root.tag, 'test') + self.assertEquals(len(root), 3) + + self.assertEquals(len(root[0]), 1) + self.assertEquals(root[0][0].tag, 'match') + self.assertEquals(root[0][0].text, 'dC') + + self.assertEquals(len(root[1]), 2) + self.assertEquals(root[1][0].tag, 'match') + self.assertEquals(root[1][0].text, 'dC') + self.assertEquals(root[1][1].tag, 'match') + self.assertEquals(root[1][1].text, 'dE') + + self.assertEquals(len(root[2]), 3) + self.assertEquals(root[2][0].tag, 'match') + self.assertEquals(root[2][0].text, 'dC') + self.assertEquals(root[2][1].tag, 'match') + self.assertEquals(root[2][1].text, 'dE') + self.assertEquals(root[2][2].tag, 'match') + self.assertEquals(root[2][2].text, 'De') + def test_suite(): suite = unittest.TestSuite() suite.addTests([unittest.makeSuite(ETreeXSLTTestCase)]) Modified: lxml/trunk/src/lxml/xslt.pxi ============================================================================== --- lxml/trunk/src/lxml/xslt.pxi (original) +++ lxml/trunk/src/lxml/xslt.pxi Mon Apr 24 16:42:56 2006 @@ -197,6 +197,72 @@ #print "Holding document:", element._doc._c_doc python.PyDict_SetItem(self._temp_docs, id(element._doc), element._doc) +################################################################################ +# EXSLT regexp implementation + +cdef object RE_COMPILE +RE_COMPILE = re.compile + +cdef class _ExsltRegExp: + cdef object _compile_map + def __init__(self): + self._compile_map = {} + + cdef _compile(self, rexp, ignore_case): + cdef python.PyObject* c_result + key = (rexp, ignore_case) + c_result = python.PyDict_GetItem(self._compile_map, key) + if c_result is not NULL: + return c_result + if ignore_case: + py_flags = re.IGNORECASE + else: + py_flags = 0 + rexp_compiled = RE_COMPILE(rexp, py_flags) + python.PyDict_SetItem(self._compile_map, key, rexp_compiled) + return rexp_compiled + + def test(self, ctxt, s, rexp, flags=''): + rexpc = self._compile(rexp, 'i' in flags) + if rexpc.search(s) is None: + return False + else: + return True + + def match(self, ctxt, s, rexp, flags=''): + rexpc = self._compile(rexp, 'i' in flags) + if 'g' in flags: + results = rexpc.findall(s) + if not results: + return () + result_list = [] + root = Element('matches') + for s_match in results: + elem = SubElement(root, 'match') + elem.text = s_match + python.PyList_Append(result_list, elem) + return result_list + else: + result = rexpc.search(s) + if result is None: + return () + root = Element('match') + root.text = result.group() + return (root,) + + def replace(self, ctxt, s, rexp, flags, replacement): + rexpc = self._compile(rexp, 'i' in flags) + if 'g' in flags: + count = 0 + else: + count = 1 + return rexpc.sub(replacement, s, count) + + cdef void _register_exslt_regexp(self, _BaseContext context): + ns = "http://exslt.org/regular-expressions" + context._registerExtensionFunction(ns, "test", self.test) + context._registerExtensionFunction(ns, "match", self.match) + context._registerExtensionFunction(ns, "replace", self.replace) ################################################################################ # XSLT @@ -249,6 +315,7 @@ """ cdef _XSLTContext _context cdef xslt.xsltStylesheet* _c_style + cdef _ExsltRegExp _regexp cdef object _doc_url_utf def __init__(self, xslt_input, extensions=None): @@ -284,6 +351,7 @@ self._c_style = c_style self._context = _XSLTContext(None, extensions) + self._regexp = _ExsltRegExp() # XXX is it worthwile to use xsltPrecomputeStylesheet here? def __dealloc__(self): @@ -333,6 +401,7 @@ self._context._release_temp_refs() self._context.register_context(transform_ctxt, input_doc) + self._regexp._register_exslt_regexp(self._context) c_result = xslt.xsltApplyStylesheetUser(self._c_style, c_doc, params, NULL, NULL, transform_ctxt) From scoder at codespeak.net Mon Apr 24 17:08:46 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Mon Apr 24 17:08:47 2006 Subject: [Lxml-checkins] r26271 - in lxml/trunk/src/lxml: . tests Message-ID: <20060424150846.5F27F10093@code0.codespeak.net> Author: scoder Date: Mon Apr 24 17:08:45 2006 New Revision: 26271 Modified: lxml/trunk/src/lxml/tests/test_xslt.py lxml/trunk/src/lxml/xslt.pxi Log: clean up, check type of input arguments in exslt:regexp extension functions Modified: lxml/trunk/src/lxml/tests/test_xslt.py ============================================================================== --- lxml/trunk/src/lxml/tests/test_xslt.py (original) +++ lxml/trunk/src/lxml/tests/test_xslt.py Mon Apr 24 17:08:45 2006 @@ -422,8 +422,8 @@ xmlns:xsl="http://www.w3.org/1999/XSL/Transform"> - - + + Modified: lxml/trunk/src/lxml/xslt.pxi ============================================================================== --- lxml/trunk/src/lxml/xslt.pxi (original) +++ lxml/trunk/src/lxml/xslt.pxi Mon Apr 24 17:08:45 2006 @@ -208,8 +208,15 @@ def __init__(self): self._compile_map = {} + cdef _make_string(self, value): + if python.PyString_Check(value) or python.PyUnicode_Check(value): + return value + else: + raise TypeError, "Invalid argument type %s" % type(value) + cdef _compile(self, rexp, ignore_case): cdef python.PyObject* c_result + rexp = self._make_string(rexp) key = (rexp, ignore_case) c_result = python.PyDict_GetItem(self._compile_map, key) if c_result is not NULL: @@ -223,6 +230,8 @@ return rexp_compiled def test(self, ctxt, s, rexp, flags=''): + flags = self._make_string(flags) + s = self._make_string(s) rexpc = self._compile(rexp, 'i' in flags) if rexpc.search(s) is None: return False @@ -230,6 +239,8 @@ return True def match(self, ctxt, s, rexp, flags=''): + flags = self._make_string(flags) + s = self._make_string(s) rexpc = self._compile(rexp, 'i' in flags) if 'g' in flags: results = rexpc.findall(s) @@ -251,6 +262,9 @@ return (root,) def replace(self, ctxt, s, rexp, flags, replacement): + replacement = self._make_string(replacement) + flags = self._make_string(flags) + s = self._make_string(s) rexpc = self._compile(rexp, 'i' in flags) if 'g' in flags: count = 0 From scoder at codespeak.net Tue Apr 25 08:05:58 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Tue Apr 25 08:06:00 2006 Subject: [Lxml-checkins] r26288 - in lxml/trunk: . src/lxml Message-ID: <20060425060558.30E5F1007E@code0.codespeak.net> Author: scoder Date: Tue Apr 25 08:05:56 2006 New Revision: 26288 Modified: lxml/trunk/CHANGES.txt lxml/trunk/src/lxml/xslt.pxi Log: some cleanup in XPath/XSLT impl, reuse XPath contexts in XPath(): substantial speedup Modified: lxml/trunk/CHANGES.txt ============================================================================== --- lxml/trunk/CHANGES.txt (original) +++ lxml/trunk/CHANGES.txt Tue Apr 25 08:05:56 2006 @@ -7,6 +7,8 @@ Features added -------------- +* Substantial speedup in XPath.evaluate() + * Exslt:regexp implementation for XSLT based on the Python 're' module * HTMLParser for parsing (broken) HTML @@ -73,6 +75,8 @@ * Support for registering extension functions through new FunctionNamespace class (see doc/extensions.txt) +* ETXPath class for XPath expressions in ElementTree notation ('//{ns}tag') + * Support for variables in XPath expressions (also in XPath class) * XPath class for compiled XPath expressions Modified: lxml/trunk/src/lxml/xslt.pxi ============================================================================== --- lxml/trunk/src/lxml/xslt.pxi (original) +++ lxml/trunk/src/lxml/xslt.pxi Tue Apr 25 08:05:56 2006 @@ -110,8 +110,8 @@ self._free_context() cdef _free_context(self): - self._registered_namespaces = [] - self._registered_extensions = [] + del self._registered_namespaces[:] + del self._registered_extensions[:] python.PyDict_Clear(self._utf_refs) self._doc = None if self._xpathCtxt is not NULL: @@ -122,9 +122,8 @@ def addNamespace(self, prefix, uri): if self._namespaces is None: - self._namespaces = {prefix : uri} - else: - self._namespaces[prefix] = uri + self._namespaces = {} + python.PyDict_SetItem(self._namespaces, prefix, uri) def registerNamespaces(self, namespaces): for prefix, uri in namespaces.items(): @@ -134,7 +133,7 @@ prefix_utf = self._to_utf(prefix) ns_uri_utf = self._to_utf(ns_uri) xpath.xmlXPathRegisterNs(self._xpathCtxt, prefix_utf, ns_uri_utf) - self._registered_namespaces.append(prefix_utf) + python.PyList_Append(self._registered_namespaces, prefix_utf) cdef _unregisterNamespaces(self): cdef xpath.xmlXPathContext* xpathCtxt @@ -147,7 +146,8 @@ def registerExtensionFunctions(self, extensions): for ns_uri, extension in extensions.items(): for name, function in extension.items(): - self.registerExtensionFunction(ns_uri, name, function) + self._registerExtensionFunction( + self._to_utf(ns_uri), self._to_utf(name), function) def registerExtensionFunction(self, ns_uri, name, function): self._registerExtensionFunction( @@ -293,14 +293,6 @@ self._register_context(doc, 0) xsltCtxt.xpathCtxt.userData = self - cdef unregister_context(self): - cdef xslt.xsltTransformContext* xsltCtxt - xsltCtxt = self._xsltCtxt - if xsltCtxt is NULL: - return - self._unregister_context() - self._xsltCtxt = NULL - cdef free_context(self): cdef xslt.xsltTransformContext* xsltCtxt xsltCtxt = self._xsltCtxt @@ -471,7 +463,7 @@ cdef object _registered_variables def __init__(self, namespaces, extensions, variables): _BaseContext.__init__(self, namespaces, extensions) - self._variables = variables + self._variables = variables self._registered_variables = [] cdef register_context(self, xpath.xmlXPathContext* xpathCtxt, _Document doc): @@ -488,46 +480,37 @@ xpathCtxt = self._xpathCtxt if xpathCtxt is NULL: return - xpathCtxt.userData = NULL - self._unregister_context() self._unregisterVariables() self._registered_variables = [] - self._xpathCtxt = NULL + self._unregister_context() - cdef free_context(self): + cdef void _unregisterVariables(self): cdef xpath.xmlXPathContext* xpathCtxt + cdef xpath.xmlXPathObject* xpathVarValue + cdef char* c_name xpathCtxt = self._xpathCtxt - if xpathCtxt is NULL: - return - self._free_context() - self._registered_variables = [] - xpath.xmlXPathFreeContext(xpathCtxt) + for name_utf in self._registered_variables: + c_name = _cstr(name_utf) + xpathVarValue = xpath.xmlXPathVariableLookup(xpathCtxt, c_name) + if xpathVarValue is not NULL: + xpath.xmlXPathRegisterVariable(xpathCtxt, c_name, NULL) + xpath.xmlXPathFreeObject(xpathVarValue) def registerVariables(self, variable_dict): for name, value in variable_dict.items(): - self.registerVariable(name, value) - - cdef void _unregisterVariables(self): - for name in self._registered_variables: - self._unregisterVariable(name) + name_utf = self._to_utf(name) + self._registerVariable(name_utf, value) + python.PyList_Append(self._registered_variables, name_utf) def registerVariable(self, name, value): - self._registerVariable(self._to_utf(name), value) - self._registered_variables.append(name) + name_utf = self._to_utf(name) + self._registerVariable(name_utf, value) + python.PyList_Append(self._registered_variables, name_utf) cdef void _registerVariable(self, name_utf, value): xpath.xmlXPathRegisterVariable( self._xpathCtxt, _cstr(name_utf), _wrapXPathObject(value)) - cdef void _unregisterVariable(self, name_utf): - cdef xpath.xmlXPathContext* xpathCtxt - cdef xpath.xmlXPathObject* xpathVarValue - xpathCtxt = self._xpathCtxt - xpathVarValue = xpath.xmlXPathVariableLookup(xpathCtxt, _cstr(name_utf)) - if xpathVarValue is not NULL: - xpath.xmlXPathRegisterVariable(xpathCtxt, _cstr(name_utf), NULL) - xpath.xmlXPathFreeObject(xpathVarValue) - def _contextRegisterExtensionFunction(self, ns_uri_utf, name_utf): if ns_uri_utf is not None: xpath.xmlXPathRegisterFuncNS( @@ -598,7 +581,8 @@ XPathEvaluatorBase.__init__(self, namespaces, extensions) def __dealloc__(self): - xpath.xmlXPathFreeContext(self._c_ctxt) + if self._c_ctxt is not NULL: + xpath.xmlXPathFreeContext(self._c_ctxt) def registerNamespace(self, prefix, uri): """Register a namespace with the XPath context. @@ -670,6 +654,7 @@ return {ns_uri : functions} cdef class XPath(XPathEvaluatorBase): + cdef xpath.xmlXPathContext* _xpathCtxt cdef xpath.xmlXPathCompExpr* _xpath cdef object _prefix_map cdef readonly object path @@ -681,6 +666,7 @@ self._xpath = xpath.xmlXPathCompile(_cstr(path)) if self._xpath is NULL: raise XPathSyntaxError, "Error in xpath expression." + self._xpathCtxt = xpath.xmlXPathNewContext(NULL) def __call__(self, _etree_or_element, **_variables): cdef xpath.xmlXPathContext* xpathCtxt @@ -692,7 +678,8 @@ document = _documentOrRaise(_etree_or_element) element = _rootNodeOf(_etree_or_element) - xpathCtxt = xpath.xmlXPathNewContext(document._c_doc) + xpathCtxt = self._xpathCtxt + xpathCtxt.doc = document._c_doc xpathCtxt.node = element._c_node context = self._context @@ -701,17 +688,15 @@ context.registerVariables(_variables) xpathObj = xpath.xmlXPathCompiledEval(self._xpath, xpathCtxt) - context.unregister_context() - - xpath.xmlXPathFreeContext(xpathCtxt) - return self._handle_result(xpathObj, document) def evaluate(self, _tree, **_variables): return self(_tree, **_variables) def __dealloc__(self): + if self._xpathCtxt is not NULL: + xpath.xmlXPathFreeContext(self._xpathCtxt) if self._xpath is not NULL: xpath.xmlXPathFreeCompExpr(self._xpath) From scoder at codespeak.net Tue Apr 25 10:17:51 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Tue Apr 25 10:17:53 2006 Subject: [Lxml-checkins] r26306 - lxml/branch/py_ssize_t Message-ID: <20060425081751.255C01007D@code0.codespeak.net> Author: scoder Date: Tue Apr 25 10:17:49 2006 New Revision: 26306 Added: lxml/branch/py_ssize_t/ - copied from r26305, lxml/trunk/ Log: new branch for Py_ssize_t migration From scoder at codespeak.net Tue Apr 25 13:08:13 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Tue Apr 25 13:08:14 2006 Subject: [Lxml-checkins] r26323 - lxml/trunk Message-ID: <20060425110813.45CB110088@code0.codespeak.net> Author: scoder Date: Tue Apr 25 13:08:12 2006 New Revision: 26323 Modified: lxml/trunk/Makefile Log: allow overriding python executable in Makefile by passing PYTHON variable Modified: lxml/trunk/Makefile ============================================================================== --- lxml/trunk/Makefile (original) +++ lxml/trunk/Makefile Tue Apr 25 13:08:12 2006 @@ -1,4 +1,4 @@ -PYTHON=python +PYTHON?=python TESTFLAGS=-p -v TESTOPTS= SETUPFLAGS= From scoder at codespeak.net Tue Apr 25 14:06:04 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Tue Apr 25 14:06:05 2006 Subject: [Lxml-checkins] r26329 - lxml/branch/py_ssize_t/src/lxml Message-ID: <20060425120604.78EBA1007E@code0.codespeak.net> Author: scoder Date: Tue Apr 25 14:06:02 2006 New Revision: 26329 Modified: lxml/branch/py_ssize_t/src/lxml/etree.pyx lxml/branch/py_ssize_t/src/lxml/parser.pxi lxml/branch/py_ssize_t/src/lxml/python.pxd lxml/branch/py_ssize_t/src/lxml/xslt.pxi Log: Py_ssize_t fixes for Python 2.5, needs patched Pyrex Modified: lxml/branch/py_ssize_t/src/lxml/etree.pyx ============================================================================== --- lxml/branch/py_ssize_t/src/lxml/etree.pyx (original) +++ lxml/branch/py_ssize_t/src/lxml/etree.pyx Tue Apr 25 14:06:02 2006 @@ -368,7 +368,7 @@ # MANIPULATORS - def __setitem__(self, index, _NodeBase element): + def __setitem__(self, Py_ssize_t index, _NodeBase element): cdef xmlNode* c_node cdef xmlNode* c_next cdef int foreign @@ -382,7 +382,7 @@ _moveTail(c_next, element._c_node) changeDocumentBelow(element, self._doc, foreign) - def __delitem__(self, index): + def __delitem__(self, Py_ssize_t index): cdef xmlNode* c_node c_node = _findChild(self._c_node, index) if c_node is NULL: @@ -390,12 +390,12 @@ _removeText(c_node.next) _removeNode(c_node) - def __delslice__(self, start, stop): + def __delslice__(self, Py_ssize_t start, Py_ssize_t stop): cdef xmlNode* c_node c_node = _findChild(self._c_node, start) _deleteSlice(c_node, start, stop) - def __setslice__(self, start, stop, value): + def __setslice__(self, Py_ssize_t start, Py_ssize_t stop, value): cdef xmlNode* c_node cdef xmlNode* c_next cdef _Element mynode @@ -576,17 +576,17 @@ def __repr__(self): return "" % (self.tag, id(self)) - def __getitem__(self, index): + def __getitem__(self, Py_ssize_t index): cdef xmlNode* c_node c_node = _findChild(self._c_node, index) if c_node is NULL: raise IndexError, "list index out of range" return _elementFactory(self._doc, c_node) - def __getslice__(self, start, stop): + def __getslice__(self, Py_ssize_t start, Py_ssize_t stop): cdef xmlNode* c_node cdef _Document doc - cdef int c, c_stop + cdef Py_ssize_t c # this does not work for negative start, stop, however, # python seems to convert these to positive start, stop before # calling, so this all works perfectly (at the cost of a len() call) @@ -594,10 +594,9 @@ if c_node is NULL: return [] c = start - c_stop = stop result = [] doc = self._doc - while c_node is not NULL and c < c_stop: + while c_node is not NULL and c < stop: if _isElement(c_node): ret = python.PyList_Append(result, _elementFactory(doc, c_node)) if ret: @@ -607,7 +606,7 @@ return result def __len__(self): - cdef int c + cdef Py_ssize_t c cdef xmlNode* c_node c = 0 c_node = self._c_node.children @@ -629,10 +628,8 @@ return ElementChildIterator(self, reversed=True) def index(self, _Element x not None, start=None, stop=None): - cdef int k - cdef int l - cdef int c_stop - cdef int c_start + cdef Py_ssize_t k, l + cdef Py_ssize_t c_start, c_stop cdef xmlNode* c_child cdef xmlNode* c_start_node c_child = x._c_node @@ -693,7 +690,7 @@ return k else: return k - if c_start or c_stop: + if c_start != 0 or c_stop != 0: raise ValueError, "list.index(x): x not in slice" else: raise ValueError, "list.index(x): x not in list" @@ -915,7 +912,7 @@ return result def __len__(self): - cdef int c + cdef Py_ssize_t c cdef xmlNode* c_node c = 0 c_node = (self._c_node.properties) @@ -1431,7 +1428,7 @@ If there was no text to collect, return None """ - cdef int scount + cdef Py_ssize_t scount cdef char* text cdef xmlNode* c_node_cur # check for multiple text nodes @@ -1470,17 +1467,17 @@ tree.xmlFreeNode(c_node) c_node = c_next -cdef xmlNode* _findChild(xmlNode* c_node, int index): +cdef xmlNode* _findChild(xmlNode* c_node, Py_ssize_t index): if index < 0: return _findChildBackwards(c_node, -index - 1) else: return _findChildForwards(c_node, index) -cdef xmlNode* _findChildForwards(xmlNode* c_node, int index): +cdef xmlNode* _findChildForwards(xmlNode* c_node, Py_ssize_t index): """Return child element of c_node with index, or return NULL if not found. """ cdef xmlNode* c_child - cdef int c + cdef Py_ssize_t c c_child = c_node.children c = 0 while c_child is not NULL: @@ -1492,12 +1489,12 @@ else: return NULL -cdef xmlNode* _findChildBackwards(xmlNode* c_node, int index): +cdef xmlNode* _findChildBackwards(xmlNode* c_node, Py_ssize_t index): """Return child element of c_node with index, or return NULL if not found. Search from the end. """ cdef xmlNode* c_child - cdef int c + cdef Py_ssize_t c c_child = c_node.last c = 0 while c_child is not NULL: @@ -1551,11 +1548,11 @@ ## return (c_node.type == tree.XML_ELEMENT_NODE or ## c_node.type == tree.XML_COMMENT_NODE) -cdef xmlNode* _deleteSlice(xmlNode* c_node, int start, int stop): +cdef xmlNode* _deleteSlice(xmlNode* c_node, Py_ssize_t start, Py_ssize_t stop): """Delete slice, starting with c_node, start counting at start, end at stop. """ cdef xmlNode* c_next - cdef int c + cdef Py_ssize_t c if c_node is NULL: return NULL # now start deleting nodes @@ -1571,7 +1568,7 @@ return c_node cdef int isutf8(char* string): - cdef int i + cdef Py_ssize_t i i = 0 while 1: if string[i] == c'\0': @@ -1600,7 +1597,7 @@ """ cdef char* c_tag cdef char* c_pos - cdef int nslen + cdef Py_ssize_t nslen if isinstance(tag, QName): tag = (tag).text tag = _utf8(tag) Modified: lxml/branch/py_ssize_t/src/lxml/parser.pxi ============================================================================== --- lxml/branch/py_ssize_t/src/lxml/parser.pxi (original) +++ lxml/branch/py_ssize_t/src/lxml/parser.pxi Tue Apr 25 14:06:02 2006 @@ -254,7 +254,6 @@ cdef xmlDoc* result cdef xmlParserCtxt* pctxt cdef char* c_text - cdef int c_len self._error_log.connect() c_text = _cstr(text_utf) pctxt = self._memory_parser_ctxt Modified: lxml/branch/py_ssize_t/src/lxml/python.pxd ============================================================================== --- lxml/branch/py_ssize_t/src/lxml/python.pxd (original) +++ lxml/branch/py_ssize_t/src/lxml/python.pxd Tue Apr 25 14:06:02 2006 @@ -12,13 +12,13 @@ cdef object PyUnicode_FromEncodedObject(object s, char* encoding, char* errors) - cdef object PyUnicode_DecodeUTF8(char* s, int size, char* errors) + cdef object PyUnicode_DecodeUTF8(char* s, Py_ssize_t size, char* errors) cdef object PyUnicode_AsUTF8String(object ustring) - cdef object PyString_FromStringAndSize(char* s, int size) + cdef object PyString_FromStringAndSize(char* s, Py_ssize_t size) cdef object PyString_FromString(char* s) cdef object PyString_FromFormat(char* format, ...) - cdef int PyList_GET_SIZE(object l) + cdef Py_ssize_t PyList_GET_SIZE(object l) cdef int PyList_Append(object l, object obj) cdef int PyDict_SetItemString(object d, char* key, object value) cdef int PyDict_SetItem(object d, object key, object value) @@ -29,7 +29,7 @@ cdef object PyList_AsTuple(object o) cdef object PySequence_List(object o) cdef object PySequence_Tuple(object o) - cdef object PyTuple_GET_ITEM(object o, int pos) + cdef object PyTuple_GET_ITEM(object o, Py_ssize_t pos) cdef int PyNumber_Check(object instance) cdef int PyBool_Check(object instance) Modified: lxml/branch/py_ssize_t/src/lxml/xslt.pxi ============================================================================== --- lxml/branch/py_ssize_t/src/lxml/xslt.pxi (original) +++ lxml/branch/py_ssize_t/src/lxml/xslt.pxi Tue Apr 25 14:06:02 2006 @@ -372,8 +372,7 @@ cdef xmlDoc* c_result cdef xmlDoc* c_doc cdef char** params - cdef int i - cdef int j + cdef Py_ssize_t i input_doc = _documentOrRaise(_input) root_node = _rootNodeOf(_input) @@ -816,6 +815,7 @@ cdef _Document doc cdef xpath.xmlXPathObject* obj cdef _BaseContext extensions + cdef int i rctxt = ctxt.context From scoder at codespeak.net Tue Apr 25 17:27:35 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Tue Apr 25 17:27:37 2006 Subject: [Lxml-checkins] r26339 - lxml/trunk/src/lxml Message-ID: <20060425152735.BD0C81008B@code0.codespeak.net> Author: scoder Date: Tue Apr 25 17:27:34 2006 New Revision: 26339 Modified: lxml/trunk/src/lxml/xslt.pxi Log: always use unicode regexps (it's XML after all) Modified: lxml/trunk/src/lxml/xslt.pxi ============================================================================== --- lxml/trunk/src/lxml/xslt.pxi (original) +++ lxml/trunk/src/lxml/xslt.pxi Tue Apr 25 17:27:34 2006 @@ -221,10 +221,9 @@ c_result = python.PyDict_GetItem(self._compile_map, key) if c_result is not NULL: return c_result + py_flags = re.UNICODE if ignore_case: - py_flags = re.IGNORECASE - else: - py_flags = 0 + py_flags = py_flags | re.IGNORECASE rexp_compiled = RE_COMPILE(rexp, py_flags) python.PyDict_SetItem(self._compile_map, key, rexp_compiled) return rexp_compiled From scoder at codespeak.net Tue Apr 25 17:29:59 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Tue Apr 25 17:30:03 2006 Subject: [Lxml-checkins] r26340 - lxml/branch/resolver-new/src/lxml Message-ID: <20060425152959.839101008B@code0.codespeak.net> Author: scoder Date: Tue Apr 25 17:29:55 2006 New Revision: 26340 Modified: lxml/branch/resolver-new/src/lxml/xslt.pxd lxml/branch/resolver-new/src/lxml/xslt.pxi Log: do not register all extra functions, just 'node-set()' to prevent potential security risks through write/output/debug extra elements Modified: lxml/branch/resolver-new/src/lxml/xslt.pxd ============================================================================== --- lxml/branch/resolver-new/src/lxml/xslt.pxd (original) +++ lxml/branch/resolver-new/src/lxml/xslt.pxd Tue Apr 25 17:29:55 2006 @@ -18,8 +18,11 @@ cdef extern from "libxslt/extensions.h": cdef int xsltRegisterExtFunction(xsltTransformContext* ctxt, char* name, - char * URI, + char* URI, xmlXPathFunction function) + cdef int xsltRegisterExtModuleFunction(char* name, char* URI, + xmlXPathFunction function) + cdef int xsltUnregisterExtModuleFunction(char* name, char* URI) cdef extern from "libxslt/documents.h": ctypedef enum xsltLoadType: @@ -32,7 +35,7 @@ void* ctxt, xsltLoadType type) cdef xsltDocLoaderFunc xsltDocDefaultLoader - cdef void xsltSetLoaderFunc (xsltDocLoaderFunc f) + cdef void xsltSetLoaderFunc(xsltDocLoaderFunc f) cdef extern from "libxslt/transform.h": cdef xmlDoc* xsltApplyStylesheet(xsltStylesheet* style, xmlDoc* doc, @@ -57,6 +60,13 @@ void* ctxt, void (*handler)(void* ctxt, char* msg, ...)) +cdef extern from "libxslt/extra.h": + cdef char* XSLT_LIBXSLT_NAMESPACE + cdef char* XSLT_XALAN_NAMESPACE + cdef char* XSLT_SAXON_NAMESPACE + cdef char* XSLT_XT_NAMESPACE + + cdef xmlXPathFunction xsltFunctionNodeSet cdef void xsltRegisterAllExtras() cdef extern from "libexslt/exslt.h": Modified: lxml/branch/resolver-new/src/lxml/xslt.pxi ============================================================================== --- lxml/branch/resolver-new/src/lxml/xslt.pxi (original) +++ lxml/branch/resolver-new/src/lxml/xslt.pxi Tue Apr 25 17:29:55 2006 @@ -482,10 +482,23 @@ result._xslt = xslt return result -# enable EXSLT and Saxon support for XSLT -xslt.xsltRegisterAllExtras() +# enable EXSLT support for XSLT xslt.exsltRegisterAll() +# do not register all libxslt extra function, provide only "node-set" +# functions like "output" and "write" are a potential security risk +#xslt.xsltRegisterAllExtras() +xslt.xsltRegisterExtModuleFunction("node-set", + xslt.XSLT_LIBXSLT_NAMESPACE, + xslt.xsltFunctionNodeSet); +xslt.xsltRegisterExtModuleFunction("node-set", + xslt.XSLT_SAXON_NAMESPACE, + xslt.xsltFunctionNodeSet); +xslt.xsltRegisterExtModuleFunction("node-set", + xslt.XSLT_XT_NAMESPACE, + xslt.xsltFunctionNodeSet); + + ################################################################################ # XPath From scoder at codespeak.net Tue Apr 25 18:07:30 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Tue Apr 25 18:07:33 2006 Subject: [Lxml-checkins] r26341 - lxml/trunk/src/lxml Message-ID: <20060425160730.D30581009F@code0.codespeak.net> Author: scoder Date: Tue Apr 25 18:07:28 2006 New Revision: 26341 Modified: lxml/trunk/src/lxml/xslt.pxi Log: clean up in XPath context/extension function registration Modified: lxml/trunk/src/lxml/xslt.pxi ============================================================================== --- lxml/trunk/src/lxml/xslt.pxi (original) +++ lxml/trunk/src/lxml/xslt.pxi Tue Apr 25 18:07:28 2006 @@ -36,7 +36,6 @@ cdef object _extensions cdef object _namespaces cdef object _registered_namespaces - cdef object _registered_extensions cdef object _extension_functions cdef object _utf_refs # for exception handling and temporary reference keeping: @@ -66,7 +65,6 @@ self._extensions = extensions self._namespaces = namespaces self._registered_namespaces = [] - self._registered_extensions = [] self._extension_functions = {} self._temp_elements = {} self._temp_docs = {} @@ -111,7 +109,7 @@ cdef _free_context(self): del self._registered_namespaces[:] - del self._registered_extensions[:] + python.PyDict_Clear(self._extension_functions) python.PyDict_Clear(self._utf_refs) self._doc = None if self._xpathCtxt is not NULL: @@ -161,10 +159,9 @@ cdef _registerExtensionFunction(self, ns_uri_utf, name_utf, function): self._contextRegisterExtensionFunction(ns_uri_utf, name_utf) self._extension_functions[(ns_uri_utf, name_utf)] = function - self._registered_extensions.append((ns_uri_utf, name_utf)) cdef _unregisterExtensionFunctions(self): - for ns_uri_utf, name_utf in self._registered_extensions: + for ns_uri_utf, name_utf in self._extension_functions: self._contextUnregisterExtensionFunction(ns_uri_utf, name_utf) def find_extension(self, ns_uri_utf, name_utf): From scoder at codespeak.net Wed Apr 26 12:38:25 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Wed Apr 26 12:38:26 2006 Subject: [Lxml-checkins] r26357 - lxml/trunk/src/lxml Message-ID: <20060426103825.1874810084@code0.codespeak.net> Author: scoder Date: Wed Apr 26 12:38:22 2006 New Revision: 26357 Modified: lxml/trunk/src/lxml/nsclasses.pxi lxml/trunk/src/lxml/xslt.pxd lxml/trunk/src/lxml/xslt.pxi Log: rewrite of internal extension function registration * register XSLT functions at module level directly in FunctionNamespace with xsltRegisterExtModuleFunction * lookup XPath functions at XPath run-time rather than registering everything at call-time * new 'regexp' keyword for XSLT() to switch off regexp support Modified: lxml/trunk/src/lxml/nsclasses.pxi ============================================================================== --- lxml/trunk/src/lxml/nsclasses.pxi (original) +++ lxml/trunk/src/lxml/nsclasses.pxi Wed Apr 26 12:38:22 2006 @@ -53,11 +53,19 @@ cdef class _NamespaceRegistry: "Dictionary-like registry for namespace implementations" cdef object _ns_uri + cdef object _ns_uri_utf cdef object _classes cdef object _extensions cdef object _xslt_elements + cdef char* _c_ns_uri_utf def __init__(self, ns_uri): self._ns_uri = ns_uri + if ns_uri is None: + self._ns_uri_utf = None + self._c_ns_uri_utf = NULL + else: + self._ns_uri_utf = _utf8(ns_uri) + self._c_ns_uri_utf = _cstr(self._ns_uri_utf) self._classes = {} self._extensions = {} self._xslt_elements = {} @@ -95,14 +103,11 @@ d[name_utf] = item def __getitem__(self, name): - name_utf = _utf8(name) - return self._get(name_utf) - - cdef object _get(self, object name): cdef python.PyObject* dict_result - dict_result = python.PyDict_GetItem(self._classes, name) + name_utf = _utf8(name) + dict_result = python.PyDict_GetItem(self._classes, name_utf) if dict_result is NULL: - dict_result = python.PyDict_GetItem(self._extensions, name) + dict_result = python.PyDict_GetItem(self._extensions, name_utf) if dict_result is NULL: raise KeyError, "Name not registered." return dict_result @@ -130,34 +135,34 @@ self._prefix_utf = _utf8(prefix) self._prefix = prefix - def __setitem__(self, name, item): - if not callable(item): + def __setitem__(self, name, function): + if not callable(function): raise NamespaceRegistryError, "Registered function must be callable." if name is None: name_utf = None else: name_utf = _utf8(name) - self._extensions[name_utf] = item + self._extensions[name_utf] = function + _register_global_xslt_function(self._c_ns_uri_utf, _cstr(name_utf)) - cdef object _get(self, object name): + def __getitem__(self, name): cdef python.PyObject* dict_result - dict_result = python.PyDict_GetItem(self._extensions, name) + name_utf = _utf8(name) + dict_result = python.PyDict_GetItem(self._extensions, name_utf) if dict_result is NULL: raise KeyError, "Name not registered." return dict_result + def clear(self): + cdef char* c_uri_utf + c_uri_utf = self._c_ns_uri_utf + for name_utf in self._extensions: + _unregister_global_xslt_function(c_uri_utf, _cstr(name_utf)) + _NamespaceRegistry.clear(self) + def __repr__(self): return "FunctionNamespace(%r)" % self._ns_uri -cdef object _find_all_extensions(): - "Internal lookup function to find all extension functions for XSLT/XPath." - cdef _NamespaceRegistry registry - ns_extensions = {} - for (ns_utf, registry) in __FUNCTION_NAMESPACE_REGISTRIES.iteritems(): - if registry._extensions: - ns_extensions[ns_utf] = registry._extensions - return ns_extensions - cdef object _find_all_extension_prefixes(): "Internal lookup function to find all function prefixes for XSLT/XPath." cdef _FunctionNamespaceRegistry registry @@ -167,25 +172,18 @@ ns_prefixes[registry._prefix_utf] = ns_utf return ns_prefixes -cdef _find_extensions(namespaces): - """Returns a dictionary that maps each namespace in the provided list to a - dictionary of name-function mappings defined under that namespace.""" +cdef object _find_extension(ns_uri_utf, name_utf): cdef python.PyObject* dict_result - cdef char* c_ns_utf - extension_dict = {} - for ns_uri in namespaces: - if ns_uri is None: - ns_utf = None - else: - ns_utf = _utf8(ns_uri) - dict_result = python.PyDict_GetItem( - __FUNCTION_NAMESPACE_REGISTRIES, ns_utf) - if dict_result is NULL: - continue - extensions = (<_NamespaceRegistry>dict_result)._extensions - if extensions: - python.PyDict_SetItem(extension_dict, ns_utf, extensions) - return extension_dict + dict_result = python.PyDict_GetItem( + __FUNCTION_NAMESPACE_REGISTRIES, ns_uri_utf) + if dict_result is NULL: + return None + extensions = (<_NamespaceRegistry>dict_result)._extensions + dict_result = python.PyDict_GetItem(extensions, name_utf) + if dict_result is NULL: + return None + else: + return dict_result cdef object _find_element_class(char* c_namespace_utf, char* c_element_name_utf): Modified: lxml/trunk/src/lxml/xslt.pxd ============================================================================== --- lxml/trunk/src/lxml/xslt.pxd (original) +++ lxml/trunk/src/lxml/xslt.pxd Wed Apr 26 12:38:22 2006 @@ -19,6 +19,9 @@ char* name, char * URI, xmlXPathFunction function) + cdef int xsltRegisterExtModuleFunction(char* name, char* URI, + xmlXPathFunction function) + cdef int xsltUnregisterExtModuleFunction(char* name, char* URI) cdef extern from "libxslt/transform.h": cdef xmlDoc* xsltApplyStylesheet(xsltStylesheet* style, xmlDoc* doc, Modified: lxml/trunk/src/lxml/xslt.pxi ============================================================================== --- lxml/trunk/src/lxml/xslt.pxi (original) +++ lxml/trunk/src/lxml/xslt.pxi Wed Apr 26 12:38:22 2006 @@ -36,8 +36,8 @@ cdef object _extensions cdef object _namespaces cdef object _registered_namespaces - cdef object _extension_functions cdef object _utf_refs + cdef object _temp_last_function # for exception handling and temporary reference keeping: cdef object _temp_elements cdef object _temp_docs @@ -46,8 +46,9 @@ def __init__(self, namespaces, extensions): self._xpathCtxt = NULL self._utf_refs = {} + self._temp_last_function = (None, None, None) - # fix old format extensions + # convert old format extensions to UTF-8 if isinstance(extensions, (list, tuple)): new_extensions = {} for extension in extensions: @@ -65,7 +66,6 @@ self._extensions = extensions self._namespaces = namespaces self._registered_namespaces = [] - self._extension_functions = {} self._temp_elements = {} self._temp_docs = {} @@ -88,28 +88,19 @@ cdef _register_context(self, _Document doc, int allow_none_namespace): self._doc = doc self._exc_info = None + self._temp_last_function = (None, None, None) namespaces = self._namespaces if namespaces is not None: self.registerNamespaces(namespaces) - extensions = _find_extensions(namespaces.values()) - else: - extensions = _find_all_extensions() - if self._extensions is not None: - # add user provided extensions - extensions.update(self._extensions) - if extensions: - if not allow_none_namespace: - python.PyDict_DelItem(extensions, None) - self._registerExtensionFunctions(extensions) + xpath.xmlXPathRegisterFuncLookup(self._xpathCtxt, _function_check, + self) cdef _unregister_context(self): - self._unregisterExtensionFunctions() self._unregisterNamespaces() self._free_context() cdef _free_context(self): del self._registered_namespaces[:] - python.PyDict_Clear(self._extension_functions) python.PyDict_Clear(self._utf_refs) self._doc = None if self._xpathCtxt is not NULL: @@ -139,33 +130,25 @@ for prefix_utf in self._registered_namespaces: xpath.xmlXPathRegisterNs(xpathCtxt, prefix_utf, NULL) - # extension functions (internal UTF-8 methods with leading '_') - - def registerExtensionFunctions(self, extensions): - for ns_uri, extension in extensions.items(): - for name, function in extension.items(): - self._registerExtensionFunction( - self._to_utf(ns_uri), self._to_utf(name), function) - - def registerExtensionFunction(self, ns_uri, name, function): - self._registerExtensionFunction( - self._to_utf(ns_uri), self._to_utf(name), function) + # extension functions - cdef _registerExtensionFunctions(self, extensions_utf): - for ns_uri_utf, extension in extensions_utf.items(): - for name_utf, function in extension.items(): - self._registerExtensionFunction(ns_uri_utf, name_utf, function) + cdef _lookup_extension(self, ns_uri_utf, name_utf): + cdef python.PyObject* dict_result + if self._temp_last_function[0] == ns_uri_utf and \ + self._temp_last_function[1] == name_utf: + return self._temp_last_function[2] - cdef _registerExtensionFunction(self, ns_uri_utf, name_utf, function): - self._contextRegisterExtensionFunction(ns_uri_utf, name_utf) - self._extension_functions[(ns_uri_utf, name_utf)] = function - - cdef _unregisterExtensionFunctions(self): - for ns_uri_utf, name_utf in self._extension_functions: - self._contextUnregisterExtensionFunction(ns_uri_utf, name_utf) + dict_result = python.PyDict_GetItem(self._extensions, ns_uri_utf) + if dict_result is not NULL: + dict_result = python.PyDict_GetItem(dict_result, name_utf) + if dict_result is not NULL: + function = dict_result + else: + function = _find_extension(ns_uri_utf, name_utf) - def find_extension(self, ns_uri_utf, name_utf): - return self._extension_functions[(ns_uri_utf, name_utf)] + # store temporarily as it will be looked up again in the next call + self._temp_last_function = (ns_uri_utf, name_utf, function) + return function # Python reference keeping during XPath function evaluation @@ -194,86 +177,27 @@ #print "Holding document:", element._doc._c_doc python.PyDict_SetItem(self._temp_docs, id(element._doc), element._doc) -################################################################################ -# EXSLT regexp implementation - -cdef object RE_COMPILE -RE_COMPILE = re.compile - -cdef class _ExsltRegExp: - cdef object _compile_map - def __init__(self): - self._compile_map = {} - - cdef _make_string(self, value): - if python.PyString_Check(value) or python.PyUnicode_Check(value): - return value - else: - raise TypeError, "Invalid argument type %s" % type(value) - - cdef _compile(self, rexp, ignore_case): - cdef python.PyObject* c_result - rexp = self._make_string(rexp) - key = (rexp, ignore_case) - c_result = python.PyDict_GetItem(self._compile_map, key) - if c_result is not NULL: - return c_result - py_flags = re.UNICODE - if ignore_case: - py_flags = py_flags | re.IGNORECASE - rexp_compiled = RE_COMPILE(rexp, py_flags) - python.PyDict_SetItem(self._compile_map, key, rexp_compiled) - return rexp_compiled - - def test(self, ctxt, s, rexp, flags=''): - flags = self._make_string(flags) - s = self._make_string(s) - rexpc = self._compile(rexp, 'i' in flags) - if rexpc.search(s) is None: - return False - else: - return True - - def match(self, ctxt, s, rexp, flags=''): - flags = self._make_string(flags) - s = self._make_string(s) - rexpc = self._compile(rexp, 'i' in flags) - if 'g' in flags: - results = rexpc.findall(s) - if not results: - return () - result_list = [] - root = Element('matches') - for s_match in results: - elem = SubElement(root, 'match') - elem.text = s_match - python.PyList_Append(result_list, elem) - return result_list - else: - result = rexpc.search(s) - if result is None: - return () - root = Element('match') - root.text = result.group() - return (root,) +cdef xpath.xmlXPathFunction _function_check(void* ctxt, char* c_name, char* c_ns_uri): + cdef _BaseContext context + if c_name is NULL: + return NULL + if c_ns_uri is NULL: + ns_uri = None + else: + ns_uri = c_ns_uri + context = <_BaseContext>ctxt + if context._lookup_extension(ns_uri, c_name) is None: + return NULL + else: + return _xpath_function_call - def replace(self, ctxt, s, rexp, flags, replacement): - replacement = self._make_string(replacement) - flags = self._make_string(flags) - s = self._make_string(s) - rexpc = self._compile(rexp, 'i' in flags) - if 'g' in flags: - count = 0 - else: - count = 1 - return rexpc.sub(replacement, s, count) +cdef void _register_global_xslt_function(char* ns_uri, char* name): + xslt.xsltRegisterExtModuleFunction(ns_uri, name, _xpath_function_call) - cdef void _register_exslt_regexp(self, _BaseContext context): - ns = "http://exslt.org/regular-expressions" - context._registerExtensionFunction(ns, "test", self.test) - context._registerExtensionFunction(ns, "match", self.match) - context._registerExtensionFunction(ns, "replace", self.replace) +cdef void _unregister_global_xslt_function(char* ns_uri, char* name): + xslt.xsltUnRegisterExtModuleFunction(ns_uri, name) + ################################################################################ # XSLT @@ -281,13 +205,17 @@ cdef xslt.xsltTransformContext* _xsltCtxt def __init__(self, namespaces, extensions): self._xsltCtxt = NULL + if extensions and None in extensions: + raise XSLTExtensionError, "extensions must have non-empty namespaces" _BaseContext.__init__(self, namespaces, extensions) - cdef register_context(self, xslt.xsltTransformContext* xsltCtxt, _Document doc): + cdef register_context(self, xslt.xsltTransformContext* xsltCtxt, + _Document doc): self._xsltCtxt = xsltCtxt self._set_xpath_context(xsltCtxt.xpathCtxt) self._register_context(doc, 0) xsltCtxt.xpathCtxt.userData = self + self._registerLocalExtensionFunctions() cdef free_context(self): cdef xslt.xsltTransformContext* xsltCtxt @@ -298,19 +226,31 @@ self._xsltCtxt = NULL xslt.xsltFreeTransformContext(xsltCtxt) - def _contextRegisterExtensionFunction(self, ns_uri_utf, name_utf): - if ns_uri_utf is None: - raise XSLTExtensionError, "extensions must have non-empty namespaces" + cdef _registerLocalExtensionFunction(self, ns_utf, name_utf, function): + extensions = self._extensions + if self._extensions is None: + self._extensions = {ns_utf:{name_utf:function}} + else: + if ns_utf in self._extensions: + self._extensions[ns_utf][name_utf] = function + else: + self._extensions[ns_utf] = ns_extensions = {name_utf:function} xslt.xsltRegisterExtFunction( - self._xsltCtxt, _cstr(name_utf), _cstr(ns_uri_utf), - _xpathCallback) + self._xsltCtxt, _cstr(name_utf), _cstr(ns_utf), + _xpath_function_call) - def _contextUnregisterExtensionFunction(self, ns_uri_utf, name_utf): - if ns_uri_utf is not None: - xslt.xsltRegisterExtFunction( - self._xsltCtxt, _cstr(name_utf), _cstr(ns_uri_utf), - _xpathCallback) + cdef _registerLocalExtensionFunctions(self): + cdef xslt.xsltTransformContext* xsltCtxt + if self._extensions is None: + return + xsltCtxt = self._xsltCtxt + for ns_uri_utf, extension in self._extensions.items(): + for name_utf, function in extension.items(): + xslt.xsltRegisterExtFunction( + xsltCtxt, _cstr(name_utf), _cstr(ns_uri_utf), + _xpath_function_call) +cdef class _ExsltRegExp # forward declaration cdef class XSLT: """Turn a document into an XSLT object. @@ -320,7 +260,7 @@ cdef _ExsltRegExp _regexp cdef object _doc_url_utf - def __init__(self, xslt_input, extensions=None): + def __init__(self, xslt_input, extensions=None, regexp=True): # make a copy of the document as stylesheet needs to assume it # doesn't change cdef xslt.xsltStylesheet* c_style @@ -353,7 +293,10 @@ self._c_style = c_style self._context = _XSLTContext(None, extensions) - self._regexp = _ExsltRegExp() + if regexp: + self._regexp = _ExsltRegExp() + else: + self._regexp = None # XXX is it worthwile to use xsltPrecomputeStylesheet here? def __dealloc__(self): @@ -403,7 +346,8 @@ self._context._release_temp_refs() self._context.register_context(transform_ctxt, input_doc) - self._regexp._register_exslt_regexp(self._context) + if self._regexp is not None: + self._regexp._register_in_context(self._context) c_result = xslt.xsltApplyStylesheetUser(self._c_style, c_doc, params, NULL, NULL, transform_ctxt) @@ -452,6 +396,86 @@ return result ################################################################################ +# EXSLT regexp implementation + +cdef object RE_COMPILE +RE_COMPILE = re.compile + +cdef class _ExsltRegExp: + cdef object _compile_map + def __init__(self): + self._compile_map = {} + + cdef _make_string(self, value): + if python.PyString_Check(value) or python.PyUnicode_Check(value): + return value + else: + raise TypeError, "Invalid argument type %s" % type(value) + + cdef _compile(self, rexp, ignore_case): + cdef python.PyObject* c_result + rexp = self._make_string(rexp) + key = (rexp, ignore_case) + c_result = python.PyDict_GetItem(self._compile_map, key) + if c_result is not NULL: + return c_result + py_flags = re.UNICODE + if ignore_case: + py_flags = py_flags | re.IGNORECASE + rexp_compiled = RE_COMPILE(rexp, py_flags) + python.PyDict_SetItem(self._compile_map, key, rexp_compiled) + return rexp_compiled + + def test(self, ctxt, s, rexp, flags=''): + flags = self._make_string(flags) + s = self._make_string(s) + rexpc = self._compile(rexp, 'i' in flags) + if rexpc.search(s) is None: + return False + else: + return True + + def match(self, ctxt, s, rexp, flags=''): + flags = self._make_string(flags) + s = self._make_string(s) + rexpc = self._compile(rexp, 'i' in flags) + if 'g' in flags: + results = rexpc.findall(s) + if not results: + return () + result_list = [] + root = Element('matches') + for s_match in results: + elem = SubElement(root, 'match') + elem.text = s_match + python.PyList_Append(result_list, elem) + return result_list + else: + result = rexpc.search(s) + if result is None: + return () + root = Element('match') + root.text = result.group() + return (root,) + + def replace(self, ctxt, s, rexp, flags, replacement): + replacement = self._make_string(replacement) + flags = self._make_string(flags) + s = self._make_string(s) + rexpc = self._compile(rexp, 'i' in flags) + if 'g' in flags: + count = 0 + else: + count = 1 + return rexpc.sub(replacement, s, count) + + cdef _register_in_context(self, _XSLTContext context): + ns = "http://exslt.org/regular-expressions" + context._registerLocalExtensionFunction(ns, "test", self.test) + context._registerLocalExtensionFunction(ns, "match", self.match) + context._registerLocalExtensionFunction(ns, "replace", self.replace) + +################################################################################ # XPath cdef class _XPathContext(_BaseContext): @@ -507,24 +531,6 @@ xpath.xmlXPathRegisterVariable( self._xpathCtxt, _cstr(name_utf), _wrapXPathObject(value)) - def _contextRegisterExtensionFunction(self, ns_uri_utf, name_utf): - if ns_uri_utf is not None: - xpath.xmlXPathRegisterFuncNS( - self._xpathCtxt, _cstr(name_utf), _cstr(ns_uri_utf), - _xpathCallback) - else: - xpath.xmlXPathRegisterFunc( - self._xpathCtxt, _cstr(name_utf), - _xpathCallback) - - def _contextUnregisterExtensionFunction(self, ns_uri_utf, name_utf): - if ns_uri_utf is not None: - xpath.xmlXPathRegisterFuncNS( - self._xpathCtxt, _cstr(name_utf), _cstr(ns_uri_utf), NULL) - else: - xpath.xmlXPathRegisterFunc( - self._xpathCtxt, _cstr(name_utf), NULL) - cdef class XPathEvaluatorBase: cdef _XPathContext _context @@ -807,7 +813,7 @@ raise NotImplementedError return result -cdef void _xpathCallback(xpath.xmlXPathParserContext* ctxt, int nargs): +cdef void _xpath_function_call(xpath.xmlXPathParserContext* ctxt, int nargs): cdef xpath.xmlXPathContext* rctxt cdef _Document doc cdef xpath.xmlXPathObject* obj @@ -826,7 +832,7 @@ extensions = <_BaseContext>(rctxt.userData) # lookup up the extension function in the context - f = extensions.find_extension(uri, name) + f = extensions._lookup_extension(uri, name) args = [] doc = extensions._doc From scoder at codespeak.net Wed Apr 26 12:42:27 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Wed Apr 26 12:42:28 2006 Subject: [Lxml-checkins] r26358 - in lxml/trunk: . src/lxml Message-ID: <20060426104227.4454910084@code0.codespeak.net> Author: scoder Date: Wed Apr 26 12:42:26 2006 New Revision: 26358 Modified: lxml/trunk/CHANGES.txt lxml/trunk/src/lxml/xslt.pxi Log: clean up, updated CHANGES.txt Modified: lxml/trunk/CHANGES.txt ============================================================================== --- lxml/trunk/CHANGES.txt (original) +++ lxml/trunk/CHANGES.txt Wed Apr 26 12:42:26 2006 @@ -10,6 +10,7 @@ * Substantial speedup in XPath.evaluate() * Exslt:regexp implementation for XSLT based on the Python 're' module + on by default, can be switched off with 'regexp=False' keyword argument * HTMLParser for parsing (broken) HTML Modified: lxml/trunk/src/lxml/xslt.pxi ============================================================================== --- lxml/trunk/src/lxml/xslt.pxi (original) +++ lxml/trunk/src/lxml/xslt.pxi Wed Apr 26 12:42:26 2006 @@ -191,12 +191,6 @@ else: return _xpath_function_call -cdef void _register_global_xslt_function(char* ns_uri, char* name): - xslt.xsltRegisterExtModuleFunction(ns_uri, name, _xpath_function_call) - -cdef void _unregister_global_xslt_function(char* ns_uri, char* name): - xslt.xsltUnRegisterExtModuleFunction(ns_uri, name) - ################################################################################ # XSLT @@ -395,6 +389,13 @@ result._xslt = xslt return result +# used by FunctionNamespace(): +cdef void _register_global_xslt_function(char* ns_uri, char* name): + xslt.xsltRegisterExtModuleFunction(ns_uri, name, _xpath_function_call) + +cdef void _unregister_global_xslt_function(char* ns_uri, char* name): + xslt.xsltUnRegisterExtModuleFunction(ns_uri, name) + ################################################################################ # EXSLT regexp implementation From scoder at codespeak.net Wed Apr 26 13:39:57 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Wed Apr 26 13:39:59 2006 Subject: [Lxml-checkins] r26360 - lxml/trunk Message-ID: <20060426113957.9609810084@code0.codespeak.net> Author: scoder Date: Wed Apr 26 13:39:56 2006 New Revision: 26360 Modified: lxml/trunk/bench.py Log: benchmarks for XPath/XSLT extension calls Modified: lxml/trunk/bench.py ============================================================================== --- lxml/trunk/bench.py (original) +++ lxml/trunk/bench.py Wed Apr 26 13:39:56 2006 @@ -407,6 +407,44 @@ xpath = self.etree.XPathElementEvaluator(child) xpath.evaluate("./*[0]") + @onlylib('lxe') + def bench_xpath_extensions_old(self, root): + def return_child(_, element): + if element: + return element[0] + else: + return () + extensions = {None : {'child' : return_child}} + xpath = self.etree.XPath("child(.)", extensions=extensions) + for child in root: + xpath(child) + + @onlylib('lxe') + def bench_xslt_extensions_old(self, root): + tree = self.etree.XML("""\ + + TEST + + + + + + + + +""") + def return_child(_, elements): + return elements[0][0] + + extensions = {'testns' : {'child' : return_child}} + + transform = self.etree.XSLT(tree, extensions) + for i in range(10): + transform(root) + ############################################################ # Main program ############################################################ From scoder at codespeak.net Wed Apr 26 13:48:41 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Wed Apr 26 13:48:42 2006 Subject: [Lxml-checkins] r26362 - lxml/trunk/src/lxml Message-ID: <20060426114841.A977610084@code0.codespeak.net> Author: scoder Date: Wed Apr 26 13:48:40 2006 New Revision: 26362 Modified: lxml/trunk/src/lxml/xslt.pxi Log: rewrote fast path in _lookup_extension to use dictionary Modified: lxml/trunk/src/lxml/xslt.pxi ============================================================================== --- lxml/trunk/src/lxml/xslt.pxi (original) +++ lxml/trunk/src/lxml/xslt.pxi Wed Apr 26 13:48:40 2006 @@ -37,7 +37,7 @@ cdef object _namespaces cdef object _registered_namespaces cdef object _utf_refs - cdef object _temp_last_function + cdef object _temp_functions # for exception handling and temporary reference keeping: cdef object _temp_elements cdef object _temp_docs @@ -46,7 +46,7 @@ def __init__(self, namespaces, extensions): self._xpathCtxt = NULL self._utf_refs = {} - self._temp_last_function = (None, None, None) + self._temp_functions = {} # convert old format extensions to UTF-8 if isinstance(extensions, (list, tuple)): @@ -88,7 +88,7 @@ cdef _register_context(self, _Document doc, int allow_none_namespace): self._doc = doc self._exc_info = None - self._temp_last_function = (None, None, None) + self._temp_functions.clear() namespaces = self._namespaces if namespaces is not None: self.registerNamespaces(namespaces) @@ -134,9 +134,10 @@ cdef _lookup_extension(self, ns_uri_utf, name_utf): cdef python.PyObject* dict_result - if self._temp_last_function[0] == ns_uri_utf and \ - self._temp_last_function[1] == name_utf: - return self._temp_last_function[2] + key = (ns_uri_utf, name_utf) + dict_result = python.PyDict_GetItem(self._temp_functions, key) + if dict_result is not NULL: + return dict_result dict_result = python.PyDict_GetItem(self._extensions, ns_uri_utf) if dict_result is not NULL: @@ -146,8 +147,7 @@ else: function = _find_extension(ns_uri_utf, name_utf) - # store temporarily as it will be looked up again in the next call - self._temp_last_function = (ns_uri_utf, name_utf, function) + python.PyDict_SetItem(self._temp_functions, key, function) return function # Python reference keeping during XPath function evaluation @@ -182,6 +182,7 @@ if c_name is NULL: return NULL if c_ns_uri is NULL: + c_ns_uri = '' ns_uri = None else: ns_uri = c_ns_uri From scoder at codespeak.net Wed Apr 26 22:05:20 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Wed Apr 26 22:05:25 2006 Subject: [Lxml-checkins] r26384 - lxml/trunk/src/lxml Message-ID: <20060426200520.EF48210087@code0.codespeak.net> Author: scoder Date: Wed Apr 26 22:05:19 2006 New Revision: 26384 Modified: lxml/trunk/src/lxml/etree.pyx Log: about 30% faster isutf8() Modified: lxml/trunk/src/lxml/etree.pyx ============================================================================== --- lxml/trunk/src/lxml/etree.pyx (original) +++ lxml/trunk/src/lxml/etree.pyx Wed Apr 26 22:05:19 2006 @@ -1570,15 +1570,15 @@ c_node = c_next return c_node -cdef int isutf8(char* string): - cdef int i - i = 0 - while 1: - if string[i] == c'\0': - return 0 - if string[i] & 0x80: +cdef int isutf8(char* s): + cdef char c + c = s[0] + while c != c'\0': + if c & 0x80: return 1 - i = i + 1 + s = s + 1 + c = s[0] + return 0 cdef object funicode(char* s): if isutf8(s): From scoder at codespeak.net Thu Apr 27 07:24:40 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Thu Apr 27 07:24:42 2006 Subject: [Lxml-checkins] r26398 - in lxml/branch/resolver-new: doc src/lxml Message-ID: <20060427052440.958A510095@code0.codespeak.net> Author: scoder Date: Thu Apr 27 07:24:38 2006 New Revision: 26398 Modified: lxml/branch/resolver-new/doc/resolvers.txt lxml/branch/resolver-new/src/lxml/docloader.pxi lxml/branch/resolver-new/src/lxml/parser.pxi lxml/branch/resolver-new/src/lxml/xslt.pxi Log: doc updates, new resolver method 'resolve_file', fix semantics of 'resolve_empty' to be treated as empty document instead of 'not found' Modified: lxml/branch/resolver-new/doc/resolvers.txt ============================================================================== --- lxml/branch/resolver-new/doc/resolvers.txt (original) +++ lxml/branch/resolver-new/doc/resolvers.txt Thu Apr 27 07:24:38 2006 @@ -19,11 +19,17 @@ these may be None. The context object is not normally used by client code. Resolving is based on three methods of the Resolver object that build internal -representations of the result document. The method 'resolve_string' takes a -document as parsable string, 'resolve_filename' takes a filename and -'resolve_empty' resolves into an empty document. The 'resolve' method may -choose to return None, in which case the next registered resolver (or the -default resolver) is consulted. +representations of the result document. The following methods exist: + +* 'resolve_string' takes a parsable string as result document +* 'resolve_filename' takes a filename +* 'resolve_file' takes an open file-like object that has at least a read() method +* 'resolve_empty' resolves into an empty document + +The 'resolve' method may choose to return None, in which case the next +registered resolver (or the default resolver) is consulted. It is never +called if the resolver returns the result of any of the above 'resolve_*' +methods. Resolvers are registered local to a parser:: Modified: lxml/branch/resolver-new/src/lxml/docloader.pxi ============================================================================== --- lxml/branch/resolver-new/src/lxml/docloader.pxi (original) +++ lxml/branch/resolver-new/src/lxml/docloader.pxi Thu Apr 27 07:24:38 2006 @@ -4,10 +4,12 @@ PARSER_DATA_EMPTY PARSER_DATA_STRING PARSER_DATA_FILENAME + PARSER_DATA_FILE cdef class _InputDocument: cdef _InputDocumentDataType _type cdef object _data_utf + cdef object _file cdef class Resolver: "This is the base class of all resolvers." @@ -15,12 +17,14 @@ return None def resolve_empty(self, context): + "Return an empty input document." cdef _InputDocument doc_ref doc_ref = _InputDocument() doc_ref._type = PARSER_DATA_EMPTY return doc_ref def resolve_string(self, string, context): + "Return a parsable string as input document." cdef _InputDocument doc_ref doc_ref = _InputDocument() doc_ref._type = PARSER_DATA_STRING @@ -28,12 +32,23 @@ return doc_ref def resolve_filename(self, filename, context): + "Return the name of a parsable file as input document." cdef _InputDocument doc_ref doc_ref = _ParserInput() doc_ref._type = PARSER_DATA_FILENAME doc_ref._data_utf = _utf8(filename) return doc_ref + def resolve_file(self, f, context): + "Return an open file-like object as input document." + cdef _InputDocument doc_ref + if not hasattr(f, 'read'): + raise TypeError, "Argument is not a file-like object" + doc_ref = _ParserInput() + doc_ref._type = PARSER_DATA_FILE + doc_ref._file = f + return doc_ref + cdef class _ResolverRegistry: cdef object _resolvers cdef Resolver _default_resolver Modified: lxml/branch/resolver-new/src/lxml/parser.pxi ============================================================================== --- lxml/branch/resolver-new/src/lxml/parser.pxi (original) +++ lxml/branch/resolver-new/src/lxml/parser.pxi Thu Apr 27 07:24:38 2006 @@ -95,15 +95,21 @@ return __DEFAULT_ENTITY_LOADER(c_url, c_pubid, c_context) c_input = NULL + data = None if doc_ref._type == PARSER_DATA_STRING: + data = doc_ref._data_utf c_input = xmlparser.xmlNewStringInputStream( c_context, _cstr(doc_ref._data_utf)) elif doc_ref._type == PARSER_DATA_FILENAME: c_input = xmlparser.xmlNewInputFromFile( c_context, _cstr(doc_ref._data_utf)) + elif doc_ref._type == PARSER_DATA_FILE: + data = doc_ref._file.read() + c_input = xmlparser.xmlNewStringInputStream( + c_context, _cstr(data)) - if c_input != NULL: - context._storage.add(doc_ref._data_utf) + if data is not None: + context._storage.add(data) return c_input cdef xmlparser.xmlExternalEntityLoader __DEFAULT_ENTITY_LOADER @@ -269,7 +275,11 @@ pctxt._private = context c_doc = xmlparser.xmlCtxtReadDoc( pctxt, c_text, NULL, NULL, options) - return _handleParseResult(pctxt, c_doc, NULL) + try: + c_doc = _handleParseResult(pctxt, c_doc, NULL) + finally: + xmlparser.xmlFreeParserCtxt(pctxt) + return c_doc cdef xmlDoc* _internalParseDocFromFile(char* c_filename, int options, _ResolverContext context) except NULL: @@ -283,7 +293,11 @@ pctxt._private = context c_doc = xmlparser.xmlCtxtReadFile( pctxt, c_filename, NULL, options) - return _handleParseResult(pctxt, c_doc, c_filename) + try: + c_doc = _handleParseResult(pctxt, c_doc, c_filename) + finally: + xmlparser.xmlFreeParserCtxt(pctxt) + return c_doc cdef XMLParser __DEFAULT_XML_PARSER Modified: lxml/branch/resolver-new/src/lxml/xslt.pxi ============================================================================== --- lxml/branch/resolver-new/src/lxml/xslt.pxi (original) +++ lxml/branch/resolver-new/src/lxml/xslt.pxi Thu Apr 27 07:24:38 2006 @@ -256,10 +256,14 @@ if doc_ref is not None: if doc_ref._type == PARSER_DATA_EMPTY: - return NULL + c_doc = _newDoc() if doc_ref._type == PARSER_DATA_STRING: c_doc = _internalParseDoc( _cstr(doc_ref._data_utf), parse_options, resolver_context) + elif doc_ref._type == PARSER_DATA_FILE: + data = doc_ref._file.read() + c_doc = _internalParseDoc( + _cstr(data), parse_options, resolver_context) elif doc_ref._type == PARSER_DATA_FILENAME: c_doc = _internalParseDocFromFile( _cstr(doc_ref._data_utf), parse_options, resolver_context) From scoder at codespeak.net Thu Apr 27 07:33:59 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Thu Apr 27 07:34:00 2006 Subject: [Lxml-checkins] r26399 - lxml/branch/resolver-new/src/lxml Message-ID: <20060427053359.14E131008F@code0.codespeak.net> Author: scoder Date: Thu Apr 27 07:33:58 2006 New Revision: 26399 Modified: lxml/branch/resolver-new/src/lxml/xslt.pxi Log: handle resolver exceptions that were raised in the parser functions Modified: lxml/branch/resolver-new/src/lxml/xslt.pxi ============================================================================== --- lxml/branch/resolver-new/src/lxml/xslt.pxi (original) +++ lxml/branch/resolver-new/src/lxml/xslt.pxi Thu Apr 27 07:33:58 2006 @@ -250,26 +250,27 @@ try: uri = funicode(c_uri) doc_ref = resolvers.resolve(uri, None, resolver_context) + + if doc_ref is not None: + if doc_ref._type == PARSER_DATA_EMPTY: + c_doc = _newDoc() + if doc_ref._type == PARSER_DATA_STRING: + c_doc = _internalParseDoc( + _cstr(doc_ref._data_utf), parse_options, resolver_context) + elif doc_ref._type == PARSER_DATA_FILE: + data = doc_ref._file.read() + c_doc = _internalParseDoc( + _cstr(data), parse_options, resolver_context) + elif doc_ref._type == PARSER_DATA_FILENAME: + c_doc = _internalParseDocFromFile( + _cstr(doc_ref._data_utf), parse_options, resolver_context) + if c_doc is not NULL and c_doc.URL is NULL: + c_doc.URL = tree.xmlStrdup(c_uri) + except Exception: resolver_context._store_raised() return NULL - if doc_ref is not None: - if doc_ref._type == PARSER_DATA_EMPTY: - c_doc = _newDoc() - if doc_ref._type == PARSER_DATA_STRING: - c_doc = _internalParseDoc( - _cstr(doc_ref._data_utf), parse_options, resolver_context) - elif doc_ref._type == PARSER_DATA_FILE: - data = doc_ref._file.read() - c_doc = _internalParseDoc( - _cstr(data), parse_options, resolver_context) - elif doc_ref._type == PARSER_DATA_FILENAME: - c_doc = _internalParseDocFromFile( - _cstr(doc_ref._data_utf), parse_options, resolver_context) - if c_doc is not NULL and c_doc.URL is NULL: - c_doc.URL = tree.xmlStrdup(c_uri) - if c_doc is NULL: c_doc = XSLT_DOC_DEFAULT_LOADER( c_uri, c_dict, parse_options, c_ctxt, c_type) From scoder at codespeak.net Thu Apr 27 09:03:10 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Thu Apr 27 09:03:11 2006 Subject: [Lxml-checkins] r26407 - lxml/branch/resolver-new/src/lxml Message-ID: <20060427070310.1071F1009D@code0.codespeak.net> Author: scoder Date: Thu Apr 27 09:03:08 2006 New Revision: 26407 Modified: lxml/branch/resolver-new/src/lxml/docloader.pxi Log: fix exception forwarding: clear after re-raising Modified: lxml/branch/resolver-new/src/lxml/docloader.pxi ============================================================================== --- lxml/branch/resolver-new/src/lxml/docloader.pxi (original) +++ lxml/branch/resolver-new/src/lxml/docloader.pxi Thu Apr 27 09:03:08 2006 @@ -104,6 +104,7 @@ cdef _raise_if_stored(self): _exc_info = self._exc_info if _exc_info is not None: + self._exc_info = None type, value, traceback = _exc_info if traceback is None and value is None: raise type From scoder at codespeak.net Thu Apr 27 09:27:09 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Thu Apr 27 09:27:11 2006 Subject: [Lxml-checkins] r26410 - in lxml/branch/resolver-new: doc src/lxml Message-ID: <20060427072709.4CDF1100A8@code0.codespeak.net> Author: scoder Date: Thu Apr 27 09:27:07 2006 New Revision: 26410 Modified: lxml/branch/resolver-new/doc/resolvers.txt lxml/branch/resolver-new/src/lxml/xslt.pxd lxml/branch/resolver-new/src/lxml/xslt.pxi Log: fix XSLT document lookup for xsl:document() function * always use resolver context of XSL doc rather than transformed doc to get it right in most cases FIXME: need to determine special cases where the transformed doc is actually meant * infrastructure is there: xslt_resolver_context and doc_resolver_context are determined in _doc_loader function * which one to take depends on what context document() is called for! Does libxslt tell us? Modified: lxml/branch/resolver-new/doc/resolvers.txt ============================================================================== --- lxml/branch/resolver-new/doc/resolvers.txt (original) +++ lxml/branch/resolver-new/doc/resolvers.txt Thu Apr 27 09:27:07 2006 @@ -57,15 +57,8 @@ XML documents memorise their initial parser (and its resolvers) during their life-time. This means that a lookup process related to a document will use -the resolvers of the document's parser. - -This behaviour is most visible in XSLT, where two documents are used: the XSL -document and the transformed XML document. XSLT thus distinguishes between -the context when parsing the XSLT document (i.e. when resolving xsl:import and -xsl:include elements) and the context at transformation time (calls to the -'document' function). - -We start with a resolver that only responds to a specific prefix:: +the resolvers of the document's parser. We can demonstrate this with a +resolver that only responds to a specific prefix:: >>> class PrefixResolver(etree.Resolver): ... def __init__(self, prefix): @@ -83,7 +76,7 @@ ... return self.resolve_string(self.result_xml, context) ... print "failed" -We use the following stylesheet as an example:: +We demonstrate this in XSLT and use the following stylesheet as an example:: >>> xml_text = """\ ... ... """ -If we now register different resolvers with two different parsers, we can -parse our document twice in different resolver contexts:: +Note that it needs to resolve two URIs: 'honk:test' when compiling the XSLT +document (i.e. when resolving xsl:import and xsl:include elements) and +'hoi:test' at transformation time, when calls to the 'document' function are +resolved. If we now register different resolvers with two different parsers, +we can parse our document twice in different resolver contexts:: >>> hoi_parser = etree.XMLParser() >>> hoi_parser.resolvers.add( PrefixResolver("hoi") ) @@ -116,7 +112,7 @@ >>> transform = etree.XSLT(hoi_doc) Traceback (most recent call last): [...] - XSLTParseError: Cannot parse style sheet + XSLTParseError: Cannot resolve URI honk:test However, if we use the "honk" resolver associated with the second document, everything works fine:: @@ -124,19 +120,33 @@ >>> transform = etree.XSLT(honk_doc) Resolving url honk:test as prefix honk ... done -We can see that the "hoi" resolver was called to generate a document that was -then inserted into the result document by the XSLT transformation. Note that -the "hoi" resolver is attached to the transformed document, which defines the -context for the transformation process. +Running the transform accesses the same parser context again, but since it now +needs to resolve the "hoi" URI in the call to the document function, its +"honk" resolver will fail to do so:: + + >>> result = transform(hoi_doc) + Traceback (most recent call last): + [...] + XSLTApplyError: Cannot resolve URI hoi:test +This can only be solved by adding a "hoi" resolver to the parser:: + + >>> honk_parser.resolvers.add( PrefixResolver("hoi") ) >>> result = transform(hoi_doc) + Resolving url hoi:test as prefix honk ... failed Resolving url hoi:test as prefix hoi ... done >>> print str(result), hoi-TEST -If we reverse the contexts, the other -resolver will get called. Obviously, the "honk" resolver does not know how to -resolve "hoi" URLs, so the default resolver is called in this case. If the -default resolver cannot find the requested document either, you will get an -exception:: +We can see that the "hoi" resolver was called to generate a document that was +then inserted into the result document by the XSLT transformation. Note that +this is completely independent of the XML file you transform, as the URI is +resolved from within the stylesheet context:: + + >>> result = transform(honk_doc) + Resolving url hoi:test as prefix honk ... failed + Resolving url hoi:test as prefix hoi ... done + >>> print str(result), + + hoi-TEST Modified: lxml/branch/resolver-new/src/lxml/xslt.pxd ============================================================================== --- lxml/branch/resolver-new/src/lxml/xslt.pxd (original) +++ lxml/branch/resolver-new/src/lxml/xslt.pxd Thu Apr 27 09:27:07 2006 @@ -9,6 +9,7 @@ xmlDoc* doc ctypedef struct xsltTransformContext: + xsltStylesheet* style xmlXPathContext* xpathCtxt xsltDocument* document Modified: lxml/branch/resolver-new/src/lxml/xslt.pxi ============================================================================== --- lxml/branch/resolver-new/src/lxml/xslt.pxi (original) +++ lxml/branch/resolver-new/src/lxml/xslt.pxi Thu Apr 27 09:27:07 2006 @@ -215,18 +215,17 @@ cdef xmlDoc* _doc_loader(char* c_uri, tree.xmlDict* c_dict, int parse_options, void* c_ctxt, xslt.xsltLoadType c_type): - cdef xslt.xsltTransformContext* transform_ctxt cdef xmlDoc* c_doc cdef _ResolverRegistry resolvers cdef _InputDocument doc_ref + cdef _XSLTResolverContext xslt_resolver_context + cdef _XSLTResolverContext doc_resolver_context cdef _XSLTResolverContext resolver_context cdef XMLParser parser - # find current xmlDoc to retrieve resolvers + # find stylesheet xmlDoc to retrieve resolvers c_doc = NULL if c_type == xslt.XSLT_LOAD_DOCUMENT: - transform_ctxt = c_ctxt - if transform_ctxt.document is not NULL: - c_doc = transform_ctxt.document.doc + c_doc = (c_ctxt).style.doc elif c_type == xslt.XSLT_LOAD_STYLESHEET: c_doc = (c_ctxt).doc @@ -236,16 +235,25 @@ return XSLT_DOC_DEFAULT_LOADER( c_uri, c_dict, parse_options, c_ctxt, c_type) - resolver_context = <_XSLTResolverContext>c_doc._private + xslt_resolver_context = <_XSLTResolverContext>c_doc._private + + # find resolver context for transformed document + doc_resolver_context = None + if c_type == xslt.XSLT_LOAD_DOCUMENT: + c_doc = (c_ctxt).document.doc + if c_doc is not NULL and c_doc._private is not NULL and \ + isinstance(c_doc._private, _XSLTResolverContext): + doc_resolver_context = <_XSLTResolverContext>c_doc._private # quick check if we are looking for the current stylesheet - c_doc = resolver_context._c_style_doc + c_doc = xslt_resolver_context._c_style_doc if c_doc is not NULL and c_doc.URL is not NULL: if tree.strcmp(c_uri, c_doc.URL) == 0: return tree.xmlCopyDoc(c_doc, 1) # call the Python document loaders c_doc = NULL + resolver_context = xslt_resolver_context # currently use only XSLT resolvers resolvers = resolver_context._resolvers try: uri = funicode(c_uri) @@ -256,30 +264,37 @@ c_doc = _newDoc() if doc_ref._type == PARSER_DATA_STRING: c_doc = _internalParseDoc( - _cstr(doc_ref._data_utf), parse_options, resolver_context) + _cstr(doc_ref._data_utf), parse_options, + resolver_context) elif doc_ref._type == PARSER_DATA_FILE: data = doc_ref._file.read() c_doc = _internalParseDoc( - _cstr(data), parse_options, resolver_context) + _cstr(data), parse_options, + resolver_context) elif doc_ref._type == PARSER_DATA_FILENAME: c_doc = _internalParseDocFromFile( - _cstr(doc_ref._data_utf), parse_options, resolver_context) + _cstr(doc_ref._data_utf), parse_options, + resolver_context) if c_doc is not NULL and c_doc.URL is NULL: c_doc.URL = tree.xmlStrdup(c_uri) - except Exception: - resolver_context._store_raised() + except Exception, e: + xslt_resolver_context._store_raised() return NULL if c_doc is NULL: c_doc = XSLT_DOC_DEFAULT_LOADER( c_uri, c_dict, parse_options, c_ctxt, c_type) if c_doc is NULL: - exception = XSLTError("Cannot resolve URI %s" % funicode(c_uri)) - resolver_context._store_exception(exception) + message = "Cannot resolve URI %s" % funicode(c_uri) + if c_type == xslt.XSLT_LOAD_DOCUMENT: + exception = XSLTApplyError(message) + else: + exception = XSLTParseError(message) + xslt_resolver_context._store_exception(exception) return NULL if c_doc is not NULL and c_doc._private is NULL: - c_doc._private = resolver_context + c_doc._private = xslt_resolver_context return c_doc cdef xslt.xsltDocLoaderFunc XSLT_DOC_DEFAULT_LOADER @@ -374,6 +389,7 @@ c_style = xslt.xsltParseStylesheetDoc(c_doc) if c_style is NULL: tree.xmlFreeDoc(c_doc) + self._xslt_resolver_context._raise_if_stored() raise XSLTParseError, "Cannot parse style sheet" self._c_style = c_style @@ -447,10 +463,10 @@ cstd.free(params) self._context.free_context() - c_doc._private = ptemp + c_doc._private = ptemp # restore _private before _destroyFakeDoc! _destroyFakeDoc(input_doc._c_doc, c_doc) - resolver_context._raise_if_stored() + self._xslt_resolver_context._raise_if_stored() if c_result is NULL: raise XSLTApplyError, "Error applying stylesheet" From scoder at codespeak.net Thu Apr 27 09:47:19 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Thu Apr 27 09:47:20 2006 Subject: [Lxml-checkins] r26411 - lxml/branch/resolver-new/src/lxml Message-ID: <20060427074719.6398B100A8@code0.codespeak.net> Author: scoder Date: Thu Apr 27 09:47:18 2006 New Revision: 26411 Modified: lxml/branch/resolver-new/src/lxml/xslt.pxi Log: some cleanup in _doc_loader Modified: lxml/branch/resolver-new/src/lxml/xslt.pxi ============================================================================== --- lxml/branch/resolver-new/src/lxml/xslt.pxi (original) +++ lxml/branch/resolver-new/src/lxml/xslt.pxi Thu Apr 27 09:47:18 2006 @@ -222,9 +222,14 @@ cdef _XSLTResolverContext doc_resolver_context cdef _XSLTResolverContext resolver_context cdef XMLParser parser - # find stylesheet xmlDoc to retrieve resolvers + # find resolver contexts of stylesheet and transformed doc c_doc = NULL + doc_resolver_context = None if c_type == xslt.XSLT_LOAD_DOCUMENT: + c_doc = (c_ctxt).document.doc + if c_doc is not NULL and c_doc._private is not NULL: + if isinstance(c_doc._private, _XSLTResolverContext): + doc_resolver_context = <_XSLTResolverContext>c_doc._private c_doc = (c_ctxt).style.doc elif c_type == xslt.XSLT_LOAD_STYLESHEET: c_doc = (c_ctxt).doc @@ -237,14 +242,6 @@ xslt_resolver_context = <_XSLTResolverContext>c_doc._private - # find resolver context for transformed document - doc_resolver_context = None - if c_type == xslt.XSLT_LOAD_DOCUMENT: - c_doc = (c_ctxt).document.doc - if c_doc is not NULL and c_doc._private is not NULL and \ - isinstance(c_doc._private, _XSLTResolverContext): - doc_resolver_context = <_XSLTResolverContext>c_doc._private - # quick check if we are looking for the current stylesheet c_doc = xslt_resolver_context._c_style_doc if c_doc is not NULL and c_doc.URL is not NULL: From scoder at codespeak.net Thu Apr 27 10:16:12 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Thu Apr 27 10:16:14 2006 Subject: [Lxml-checkins] r26416 - in lxml/branch/resolver-new: doc src/lxml Message-ID: <20060427081612.5116A10089@code0.codespeak.net> Author: scoder Date: Thu Apr 27 10:16:10 2006 New Revision: 26416 Modified: lxml/branch/resolver-new/doc/resolvers.txt lxml/branch/resolver-new/src/lxml/docloader.pxi lxml/branch/resolver-new/src/lxml/etree.pyx lxml/branch/resolver-new/src/lxml/parser.pxi Log: parser.copy() method to allow copying parsers (including their registered resolvers), store a copy of the parser in each document instead of a reference to memorise the resolver state at parse time Modified: lxml/branch/resolver-new/doc/resolvers.txt ============================================================================== --- lxml/branch/resolver-new/doc/resolvers.txt (original) +++ lxml/branch/resolver-new/doc/resolvers.txt Thu Apr 27 10:16:10 2006 @@ -97,6 +97,8 @@ we can parse our document twice in different resolver contexts:: >>> hoi_parser = etree.XMLParser() + >>> normal_doc = etree.parse(StringIO(xml_text), hoi_parser) + >>> hoi_parser.resolvers.add( PrefixResolver("hoi") ) >>> hoi_doc = etree.parse(StringIO(xml_text), hoi_parser) @@ -109,6 +111,11 @@ subsequent lookups. To compile the stylesheet, XSLT must resolve the honk:test URI in the xsl:include element. The "hoi" resolver cannot do that:: + >>> transform = etree.XSLT(normal_doc) + Traceback (most recent call last): + [...] + XSLTParseError: Cannot resolve URI honk:test + >>> transform = etree.XSLT(hoi_doc) Traceback (most recent call last): [...] @@ -124,15 +131,38 @@ needs to resolve the "hoi" URI in the call to the document function, its "honk" resolver will fail to do so:: + >>> result = transform(normal_doc) + Traceback (most recent call last): + [...] + XSLTApplyError: Cannot resolve URI hoi:test + >>> result = transform(hoi_doc) Traceback (most recent call last): [...] XSLTApplyError: Cannot resolve URI hoi:test -This can only be solved by adding a "hoi" resolver to the parser:: + >>> result = transform(honk_doc) + Traceback (most recent call last): + [...] + XSLTApplyError: Cannot resolve URI hoi:test + +This can only be solved by adding a "hoi" resolver to the parser. Note that +adding it after parsing the XSL document will not work as parsed documents +remember the state of the parser at the time of their creation:: >>> honk_parser.resolvers.add( PrefixResolver("hoi") ) - >>> result = transform(hoi_doc) + >>> result = transform(honk_doc) + Traceback (most recent call last): + [...] + XSLTApplyError: Cannot resolve URI hoi:test + +You have to parse the document again with this updated parser to copy the +resolver state to the stylesheet:: + + >>> honk_doc = etree.parse(StringIO(xml_text), honk_parser) + >>> transform = etree.XSLT(honk_doc) + Resolving url honk:test as prefix honk ... done + >>> result = transform(honk_doc) Resolving url hoi:test as prefix honk ... failed Resolving url hoi:test as prefix hoi ... done >>> print str(result), @@ -144,7 +174,7 @@ this is completely independent of the XML file you transform, as the URI is resolved from within the stylesheet context:: - >>> result = transform(honk_doc) + >>> result = transform(normal_doc) Resolving url hoi:test as prefix honk ... failed Resolving url hoi:test as prefix hoi ... done >>> print str(result), Modified: lxml/branch/resolver-new/src/lxml/docloader.pxi ============================================================================== --- lxml/branch/resolver-new/src/lxml/docloader.pxi (original) +++ lxml/branch/resolver-new/src/lxml/docloader.pxi Thu Apr 27 10:16:10 2006 @@ -74,6 +74,12 @@ def remove(self, resolver): self._resolvers.discard(resolver) + def copy(self): + cdef _ResolverRegistry registry + registry = _ResolverRegistry(self._default_resolver) + registry._resolvers = self._resolvers.copy() + return registry + def resolve(self, system_url, public_id, context): for resolver in self._resolvers: result = resolver.resolve(system_url, public_id, context) Modified: lxml/branch/resolver-new/src/lxml/etree.pyx ============================================================================== --- lxml/branch/resolver-new/src/lxml/etree.pyx (original) +++ lxml/branch/resolver-new/src/lxml/etree.pyx Thu Apr 27 10:16:10 2006 @@ -171,7 +171,7 @@ result._ns_counter = 0 if parser is None: parser = __DEFAULT_PARSER - result._parser = parser + result._parser = parser.copy() return result # to help with debugging Modified: lxml/branch/resolver-new/src/lxml/parser.pxi ============================================================================== --- lxml/branch/resolver-new/src/lxml/parser.pxi (original) +++ lxml/branch/resolver-new/src/lxml/parser.pxi Thu Apr 27 10:16:10 2006 @@ -135,6 +135,13 @@ def __get__(self): return self._error_log.copy() + cdef _copy(self): + cdef BaseParser parser + parser = self.__class__() + parser.resolvers = self.resolvers.copy() + parser._context = _ResolverContext(parser.resolvers) + return parser + cdef _initContext(self, xmlParserCtxt* c_ctxt): __GLOBAL_PARSER_CONTEXT._initParserDict(c_ctxt) c_ctxt._private = self._context @@ -224,6 +231,12 @@ if self._memory_parser_ctxt != NULL: xmlparser.xmlFreeParserCtxt(self._memory_parser_ctxt) + def copy(self): + cdef XMLParser parser + parser = self._copy() + parser._parse_options = self._parse_options + return parser + cdef xmlParserCtxt* _createContext(self) except NULL: cdef xmlParserCtxt* pctxt pctxt = xmlparser.xmlNewParserCtxt() @@ -371,6 +384,12 @@ if self._memory_parser_ctxt != NULL: htmlparser.htmlFreeParserCtxt(self._memory_parser_ctxt) + def copy(self): + cdef HTMLParser parser + parser = self._copy() + parser._parse_options = self._parse_options + return parser + cdef xmlDoc* _parseDoc(self, char* c_text) except NULL: """Parse HTML document, share dictionary if possible. """ From scoder at codespeak.net Thu Apr 27 10:31:08 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Thu Apr 27 10:31:11 2006 Subject: [Lxml-checkins] r26418 - lxml/branch/resolver-new/doc Message-ID: <20060427083108.DD274100B3@code0.codespeak.net> Author: scoder Date: Thu Apr 27 10:31:06 2006 New Revision: 26418 Modified: lxml/branch/resolver-new/doc/resolvers.txt Log: state that XSLT generated documents inherit the parsers of their input document Modified: lxml/branch/resolver-new/doc/resolvers.txt ============================================================================== --- lxml/branch/resolver-new/doc/resolvers.txt (original) +++ lxml/branch/resolver-new/doc/resolvers.txt Thu Apr 27 10:31:06 2006 @@ -121,8 +121,8 @@ [...] XSLTParseError: Cannot resolve URI honk:test -However, if we use the "honk" resolver associated with the second document, -everything works fine:: +However, if we use the "honk" resolver associated with the respective +document, everything works fine:: >>> transform = etree.XSLT(honk_doc) Resolving url honk:test as prefix honk ... done @@ -180,3 +180,8 @@ >>> print str(result), hoi-TEST + +It may be seen as a matter of taste what resolvers the generated document +inherits. For XSLT, the output document inherits the resolvers of the input +document and not those of the stylesheet. Therefore, the last result does not +inherit any resolvers at all. From scoder at codespeak.net Thu Apr 27 11:56:39 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Thu Apr 27 11:56:40 2006 Subject: [Lxml-checkins] r26428 - lxml/branch/resolver-new/src/lxml Message-ID: <20060427095639.120CF100D2@code0.codespeak.net> Author: scoder Date: Thu Apr 27 11:56:36 2006 New Revision: 26428 Modified: lxml/branch/resolver-new/src/lxml/docloader.pxi lxml/branch/resolver-new/src/lxml/etree.pyx lxml/branch/resolver-new/src/lxml/xslt.pxi Log: refactoring of exception storage: new _ExceptionContext class, used by BaseContext (XSLT), inherited by _ResolverContext Modified: lxml/branch/resolver-new/src/lxml/docloader.pxi ============================================================================== --- lxml/branch/resolver-new/src/lxml/docloader.pxi (original) +++ lxml/branch/resolver-new/src/lxml/docloader.pxi Thu Apr 27 11:56:36 2006 @@ -92,27 +92,10 @@ def __repr__(self): return repr(self._resolvers) -cdef class _ResolverContext: +cdef class _ResolverContext(_ExceptionContext): cdef _ResolverRegistry _resolvers cdef _TempStore _storage - cdef object _exc_info def __init__(self, _ResolverRegistry resolvers not None): + _ExceptionContext.__init__(self) self._resolvers = resolvers self._storage = _TempStore() - self._exc_info = None - - cdef void _store_raised(self): - self._exc_info = sys.exc_info() - - cdef void _store_exception(self, exception): - self._exc_info = (exception, None, None) - - cdef _raise_if_stored(self): - _exc_info = self._exc_info - if _exc_info is not None: - self._exc_info = None - type, value, traceback = _exc_info - if traceback is None and value is None: - raise type - else: - raise type, value, traceback Modified: lxml/branch/resolver-new/src/lxml/etree.pyx ============================================================================== --- lxml/branch/resolver-new/src/lxml/etree.pyx (original) +++ lxml/branch/resolver-new/src/lxml/etree.pyx Thu Apr 27 11:56:36 2006 @@ -56,6 +56,51 @@ class C14NError(LxmlError): pass + +# class for temporary storage of Python references +cdef class _TempStore: + cdef object _storage + def __init__(self): + self._storage = {} + + cdef void add(self, obj): + python.PyDict_SetItem(self._storage, id(obj), obj) + + cdef void clear(self): + python.PyDict_Clear(self._storage) + + cdef object dictcopy(self): + return self._storage.copy() + +# class for temporarily storing exceptions raised in extensions +cdef class _ExceptionContext: + cdef object _exc_info + def __init__(self): + self._exc_info = None + + cdef void clear(self): + self._exc_info = None + + cdef void _store_raised(self): + self._exc_info = sys.exc_info() + + cdef void _store_exception(self, exception): + self._exc_info = (exception, None, None) + + cdef _has_raised(self): + return self._exc_info is not None + + cdef _raise_if_stored(self): + _exc_info = self._exc_info + if _exc_info is not None: + self._exc_info = None + type, value, traceback = _exc_info + if traceback is None and value is None: + raise type + else: + raise type, value, traceback + + cdef class BaseParser # forward declaration cdef class _Document: @@ -1292,19 +1337,6 @@ return ElementTree(doc.getroot()) -# class for temporary storage of Python references -cdef class _TempStore: - cdef object _storage - def __init__(self): - self._storage = {} - - cdef void add(self, obj): - python.PyDict_SetItem(self._storage, id(obj), obj) - - cdef void clear(self): - python.PyDict_Clear(self._storage) - - # include submodules include "xmlerror.pxi" # error and log handling include "xmlid.pxi" # XMLID and IDDict Modified: lxml/branch/resolver-new/src/lxml/xslt.pxi ============================================================================== --- lxml/branch/resolver-new/src/lxml/xslt.pxi (original) +++ lxml/branch/resolver-new/src/lxml/xslt.pxi Thu Apr 27 11:56:36 2006 @@ -40,9 +40,8 @@ cdef object _extension_functions cdef object _utf_refs # for exception handling and temporary reference keeping: - cdef _TempStore _temp_elements - cdef _TempStore _temp_docs - cdef object _exc_info + cdef _TempStore _temp_refs + cdef _ExceptionContext _exc def __init__(self, namespaces, extensions): self._xpathCtxt = NULL @@ -62,14 +61,13 @@ extensions = new_extensions or None self._doc = None - self._exc_info = None + self._exc = _ExceptionContext() self._extensions = extensions self._namespaces = namespaces self._registered_namespaces = [] self._registered_extensions = [] self._extension_functions = {} - self._temp_elements = _TempStore() - self._temp_docs = _TempStore() + self._temp_refs = _TempStore() cdef object _to_utf(self, s): "Convert to UTF-8 and keep a reference to the encoded string" @@ -88,8 +86,8 @@ xpathCtxt.userData = self cdef _register_context(self, _Document doc, int allow_none_namespace): - self._doc = doc - self._exc_info = None + self._doc = doc + self._exc.clear() namespaces = self._namespaces if namespaces is not None: self.registerNamespaces(namespaces) @@ -167,15 +165,14 @@ for ns_uri_utf, name_utf in self._registered_extensions: self._contextUnregisterExtensionFunction(ns_uri_utf, name_utf) - def find_extension(self, ns_uri_utf, name_utf): + cdef _find_extension(self, ns_uri_utf, name_utf): return self._extension_functions[(ns_uri_utf, name_utf)] # Python reference keeping during XPath function evaluation cdef _release_temp_refs(self): "Free temporarily referenced objects from this context." - self._temp_elements.clear() - self._temp_docs.clear() + self._temp_refs.clear() cdef _hold(self, obj): """A way to temporarily hold references to nodes in the evaluator. @@ -193,9 +190,9 @@ if isinstance(o, _NodeBase): element = <_NodeBase>o #print "Holding element:", element._c_node - self._temp_elements.add(element) + self._temp_refs.add(element) #print "Holding document:", element._doc._c_doc - self._temp_docs.add(element._doc) + self._temp_refs.add(element._doc) ################################################################################ @@ -463,7 +460,11 @@ c_doc._private = ptemp # restore _private before _destroyFakeDoc! _destroyFakeDoc(input_doc._c_doc, c_doc) - self._xslt_resolver_context._raise_if_stored() + if self._xslt_resolver_context._has_raised(): + if c_result is not NULL: + tree.xmlFreeDoc(c_result) + self._xslt_resolver_context._raise_if_stored() + if c_result is NULL: raise XSLTApplyError, "Error applying stylesheet" @@ -608,17 +609,21 @@ self._context = _XPathContext(namespaces, extensions, variables) cdef object _handle_result(self, xpath.xmlXPathObject* xpathObj, _Document doc): - _exc_info = self._context._exc_info - if _exc_info is not None: - type, value, traceback = _exc_info - raise type, value, traceback + if self._context._exc._has_raised(): + if xpathObj is not NULL: + xpath.xmlXPathFreeObject(xpathObj) + xpathObj = NULL + self._context._exc._raise_if_stored() + if xpathObj is NULL: raise XPathSyntaxError, "Error in xpath expression." + try: result = _unwrapXPathObject(xpathObj, doc) except XPathResultError: xpath.xmlXPathFreeObject(xpathObj) raise + xpath.xmlXPathFreeObject(xpathObj) return result @@ -885,7 +890,7 @@ cdef xpath.xmlXPathContext* rctxt cdef _Document doc cdef xpath.xmlXPathObject* obj - cdef _BaseContext extensions + cdef _BaseContext context cdef int i rctxt = ctxt.context @@ -898,13 +903,13 @@ uri = None # get our evaluator - extensions = <_BaseContext>(rctxt.userData) + context = <_BaseContext>(rctxt.userData) # lookup up the extension function in the context - f = extensions.find_extension(uri, name) + f = context._find_extension(uri, name) args = [] - doc = extensions._doc + doc = context._doc for i from 0 <= i < nargs: python.PyList_Append(args, _unwrapXPathObject(xpath.valuePop(ctxt), doc)) args.reverse() @@ -914,13 +919,13 @@ res = f(None, *args) # hold python objects temporarily so that they won't get deallocated # during processing - extensions._hold(res) + context._hold(res) # now wrap for XPath consumption obj = _wrapXPathObject(res) except: xpath.xmlXPathErr( ctxt, xmlerror.XML_XPATH_EXPR_ERROR - xmlerror.XML_XPATH_EXPRESSION_OK) - extensions._exc_info = sys.exc_info() + context._exc._store_raised() return xpath.valuePush(ctxt, obj) From scoder at codespeak.net Thu Apr 27 22:51:47 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Thu Apr 27 22:51:49 2006 Subject: [Lxml-checkins] r26466 - in lxml/trunk: . doc src/lxml src/lxml/tests Message-ID: <20060427205147.87558100B8@code0.codespeak.net> Author: scoder Date: Thu Apr 27 22:51:43 2006 New Revision: 26466 Added: lxml/trunk/doc/resolvers.txt - copied unchanged from r26455, lxml/branch/resolver-new/doc/resolvers.txt lxml/trunk/src/lxml/docloader.pxi - copied unchanged from r26455, lxml/branch/resolver-new/src/lxml/docloader.pxi Modified: lxml/trunk/CHANGES.txt lxml/trunk/bench.py lxml/trunk/setup.py lxml/trunk/src/lxml/etree.pyx lxml/trunk/src/lxml/parser.pxi lxml/trunk/src/lxml/tests/test_etree.py lxml/trunk/src/lxml/tests/test_xslt.py lxml/trunk/src/lxml/tree.pxd lxml/trunk/src/lxml/xmlparser.pxd lxml/trunk/src/lxml/xslt.pxd lxml/trunk/src/lxml/xslt.pxi Log: merge of resolver-new branch: support for custom URI resolvers in parsers and XSLT Modified: lxml/trunk/CHANGES.txt ============================================================================== --- lxml/trunk/CHANGES.txt (original) +++ lxml/trunk/CHANGES.txt Thu Apr 27 22:51:43 2006 @@ -7,11 +7,16 @@ Features added -------------- -* Substantial speedup in XPath.evaluate() +* Support for custom document loaders (URI resolvers) in parsers and XSLT, + resolvers are registered at parser level * Exslt:regexp implementation for XSLT based on the Python 're' module on by default, can be switched off with 'regexp=False' keyword argument +* Support for exslt extensions (libexslt) and node-set function + +* Substantial speedup in XPath.evaluate() + * HTMLParser for parsing (broken) HTML * XMLDTDID function parses XML into tuple (root node, ID dict) based on xml:id @@ -29,7 +34,7 @@ Bugs fixed ---------- -* document('') now works in XSLT documents parsed from strings +* document('') now works in all XSLT documents * Crash in XMLSchema and RelaxNG when passing non-schema documents Modified: lxml/trunk/bench.py ============================================================================== --- lxml/trunk/bench.py (original) +++ lxml/trunk/bench.py Thu Apr 27 22:51:43 2006 @@ -445,6 +445,25 @@ for i in range(10): transform(root) + @onlylib('lxe') + def bench_xslt_document(self, root): + transform = self.etree.XSLT(self.etree.XML("""\ + + TEST + + + + + + + + +""")) + transform(root) + + ############################################################ # Main program ############################################################ Modified: lxml/trunk/setup.py ============================================================================== --- lxml/trunk/setup.py (original) +++ lxml/trunk/setup.py Thu Apr 27 22:51:43 2006 @@ -11,6 +11,7 @@ try: from setuptools import setup from setuptools.extension import Extension + # prevent setuptools from making local etree.so copies: setup_args['zip_safe'] = False except ImportError: from distutils.core import setup @@ -47,6 +48,16 @@ changelog.close() +# compile also against libexslt! +xslt_libs = flags('xslt-config --libs') +xslt_libs.append('-lexslt') +for i, libname in (): # enumerate(xslt_libs): + if 'exslt' in libname: + break + if 'xslt' in libname: + xslt_libs.insert(i, libname.replace('xslt', 'exslt')) + break + setup( name = "lxml", version = version, @@ -85,7 +96,7 @@ "lxml.etree", sources = sources, extra_compile_args = ['-w'] + flags('xslt-config --cflags'), - extra_link_args = flags('xslt-config --libs') + extra_link_args = xslt_libs )], **setup_args ) Modified: lxml/trunk/src/lxml/etree.pyx ============================================================================== --- lxml/trunk/src/lxml/etree.pyx (original) +++ lxml/trunk/src/lxml/etree.pyx Thu Apr 27 22:51:43 2006 @@ -1,6 +1,7 @@ cimport tree, python from tree cimport xmlDoc, xmlNode, xmlAttr, xmlNs, _isElement -from python cimport isinstance, issubclass, hasattr, callable, iter, str, _cstr +from python cimport isinstance, issubclass, hasattr, callable +from python cimport iter, str, _cstr cimport xpath cimport xslt cimport xmlerror @@ -55,6 +56,53 @@ class C14NError(LxmlError): pass + +# class for temporary storage of Python references +cdef class _TempStore: + cdef object _storage + def __init__(self): + self._storage = {} + + cdef void add(self, obj): + python.PyDict_SetItem(self._storage, id(obj), obj) + + cdef void clear(self): + python.PyDict_Clear(self._storage) + + cdef object dictcopy(self): + return self._storage.copy() + +# class for temporarily storing exceptions raised in extensions +cdef class _ExceptionContext: + cdef object _exc_info + def __init__(self): + self._exc_info = None + + cdef void clear(self): + self._exc_info = None + + cdef void _store_raised(self): + self._exc_info = sys.exc_info() + + cdef void _store_exception(self, exception): + self._exc_info = (exception, None, None) + + cdef _has_raised(self): + return self._exc_info is not None + + cdef _raise_if_stored(self): + _exc_info = self._exc_info + if _exc_info is not None: + self._exc_info = None + type, value, traceback = _exc_info + if traceback is None and value is None: + raise type + else: + raise type, value, traceback + + +cdef class BaseParser # forward declaration + cdef class _Document: """Internal base class to reference a libxml document. @@ -63,6 +111,7 @@ """ cdef int _ns_counter cdef xmlDoc* _c_doc + cdef BaseParser _parser def __dealloc__(self): # if there are no more references to the document, it is safe @@ -150,21 +199,24 @@ if filename is None: filename = source # open filename - c_doc = _parseDocFromFile(filename, parser) - return _documentFactory(c_doc) + c_doc = _parseDocFromFile(_utf8(filename), parser) + return _documentFactory(c_doc, parser) cdef _Document _parseMemoryDocument(text, parser): cdef xmlDoc* c_doc if python.PyUnicode_Check(text): text = _stripDeclaration(_utf8(text)) c_doc = _parseDoc(text, parser) - return _documentFactory(c_doc) + return _documentFactory(c_doc, parser) -cdef _Document _documentFactory(xmlDoc* c_doc): +cdef _Document _documentFactory(xmlDoc* c_doc, parser): cdef _Document result result = _Document() result._c_doc = c_doc result._ns_counter = 0 + if parser is None: + parser = __DEFAULT_PARSER + result._parser = parser.copy() return result # to help with debugging @@ -438,7 +490,7 @@ fake_c_doc = _fakeRootDoc(doc._c_doc, self._c_node) c_doc = tree.xmlCopyDoc(fake_c_doc, 1) # recursive copy _destroyFakeDoc(doc._c_doc, fake_c_doc) - doc = _documentFactory(c_doc) + doc = _documentFactory(c_doc, doc._parser) return doc.getroot() def set(self, key, value): @@ -1163,7 +1215,7 @@ c_doc = _newDoc() c_node = _createElement(c_doc, name_utf, attrib, _extra) tree.xmlDocSetRootElement(c_doc, c_node) - doc = _documentFactory(c_doc) + doc = _documentFactory(c_doc, None) # add namespaces to node if necessary doc._setNodeNamespaces(c_node, ns_utf, nsmap) return _elementFactory(doc, c_node) @@ -1171,13 +1223,15 @@ def Comment(text=None): cdef _Document doc cdef xmlNode* c_node + cdef xmlDoc* c_doc if text is None: text = ' ' else: text = ' %s ' % _utf8(text) - doc = _documentFactory( _newDoc() ) - c_node = _createComment(doc._c_doc, text) - tree.xmlAddChild(doc._c_doc, c_node) + c_doc = _newDoc() + doc = _documentFactory(c_doc, None) + c_node = _createComment(c_doc, text) + tree.xmlAddChild(c_doc, c_node) return _commentFactory(doc, c_node) def SubElement(_Element _parent not None, _tag, @@ -1196,6 +1250,7 @@ cdef xmlNode* c_next cdef xmlNode* c_node cdef xmlNode* c_node_copy + cdef xmlDoc* c_doc cdef _ElementTree etree cdef _Document doc @@ -1204,7 +1259,8 @@ elif file is not None: doc = _parseDocument(file, parser) else: - doc = _documentFactory( _newDoc() ) + c_doc = _newDoc() + doc = _documentFactory(c_doc, parser) etree = _elementTreeFactory(doc, element) @@ -1294,10 +1350,11 @@ include "xmlerror.pxi" # error and log handling include "xmlid.pxi" # XMLID and IDDict include "nsclasses.pxi" # Namespace implementation and registry +include "docloader.pxi" # Support for custom document loaders +include "parser.pxi" # XML Parser include "xslt.pxi" # XPath and XSLT include "relaxng.pxi" # RelaxNG include "xmlschema.pxi" # XMLSchema -include "parser.pxi" # XML Parser include "proxy.pxi" # Proxy handling (element backpointers/memory/etc.) Modified: lxml/trunk/src/lxml/parser.pxi ============================================================================== --- lxml/trunk/src/lxml/parser.pxi (original) +++ lxml/trunk/src/lxml/parser.pxi Thu Apr 27 22:51:43 2006 @@ -58,28 +58,122 @@ __GLOBAL_PARSER_CONTEXT = _ParserContext() +############################################################ +## support for custom document loaders +############################################################ + +cdef xmlparser.xmlParserInput* _local_resolver(char* c_url, char* c_pubid, + xmlParserCtxt* c_context): + cdef _ResolverContext context + cdef _InputDocument doc_ref + cdef xmlparser.xmlParserInput* c_input + if c_context._private is NULL or \ + not isinstance(c_context._private, _ResolverContext): + if __DEFAULT_ENTITY_LOADER is NULL: + return NULL + return __DEFAULT_ENTITY_LOADER(c_url, c_pubid, c_context) + + try: + if c_url is NULL: + url = None + else: + url = funicode(c_url) + if c_pubid is NULL: + pubid = None + else: + pubid = funicode(c_pubid) + + context = <_ResolverContext>c_context._private + doc_ref = context._resolvers.resolve(url, pubid, context) + except Exception: + context._store_raised() + return NULL + + if doc_ref is None: + if __DEFAULT_ENTITY_LOADER is NULL: + return NULL + return __DEFAULT_ENTITY_LOADER(c_url, c_pubid, c_context) + + c_input = NULL + data = None + if doc_ref._type == PARSER_DATA_STRING: + data = doc_ref._data_utf + c_input = xmlparser.xmlNewStringInputStream( + c_context, _cstr(doc_ref._data_utf)) + elif doc_ref._type == PARSER_DATA_FILENAME: + c_input = xmlparser.xmlNewInputFromFile( + c_context, _cstr(doc_ref._data_utf)) + elif doc_ref._type == PARSER_DATA_FILE: + data = doc_ref._file.read() + c_input = xmlparser.xmlNewStringInputStream( + c_context, _cstr(data)) + + if data is not None: + context._storage.add(data) + return c_input + +cdef xmlparser.xmlExternalEntityLoader __DEFAULT_ENTITY_LOADER +__DEFAULT_ENTITY_LOADER = xmlparser.xmlGetExternalEntityLoader() + +xmlparser.xmlSetExternalEntityLoader(_local_resolver) + +############################################################ +## Parsers +############################################################ + cdef class BaseParser: cdef _ErrorLog _error_log + cdef readonly object resolvers + cdef _ResolverContext _context def __init__(self): + cdef _ResolverContext context self._error_log = _ErrorLog() + self.resolvers = _ResolverRegistry() + self._context = _ResolverContext(self.resolvers) property error_log: def __get__(self): return self._error_log.copy() - cdef xmlDoc* _handleResult(self, xmlParserCtxt* ctxt, - xmlDoc* result) except NULL: - if ctxt.wellFormed: - __GLOBAL_PARSER_CONTEXT._initDocDict(result) - elif result is not NULL: - # free broken document - tree.xmlFreeDoc(result) - result = NULL - self._error_log.disconnect() - if result is NULL: + cdef _copy(self): + cdef BaseParser parser + parser = self.__class__() + parser.resolvers = self.resolvers.copy() + parser._context = _ResolverContext(parser.resolvers) + return parser + + cdef _initContext(self, xmlParserCtxt* c_ctxt): + __GLOBAL_PARSER_CONTEXT._initParserDict(c_ctxt) + c_ctxt._private = self._context + +cdef xmlDoc* _handleParseResult(xmlParserCtxt* ctxt, xmlDoc* result, + char* c_filename) except NULL: + cdef _ResolverContext context + if ctxt.wellFormed: + __GLOBAL_PARSER_CONTEXT._initDocDict(result) + elif result is not NULL: + # free broken document + tree.xmlFreeDoc(result) + result = NULL + + if ctxt._private is not NULL: + context = <_ResolverContext>ctxt._private + context._raise_if_stored() + + if result is NULL: + if c_filename is not NULL and \ + ctxt.lastError.domain == xmlerror.XML_FROM_IO: + if ctxt.lastError.message is not NULL: + message = "Error reading file %s: %s" % ( + funicode(c_filename), funicode(ctxt.lastError.message)) + else: + message = "Error reading file %s" % funicode(c_filename) + raise IOError, message + elif ctxt.lastError.message is not NULL: + raise XMLSyntaxError, funicode(ctxt.lastError.message) + else: raise XMLSyntaxError - return result - + return result ############################################################ ## XML parser @@ -110,12 +204,14 @@ cdef xmlParserCtxt* _file_parser_ctxt cdef xmlParserCtxt* _memory_parser_ctxt def __init__(self, attribute_defaults=False, dtd_validation=False, - no_network=False, ns_clean=False): + load_dtd=False, no_network=False, ns_clean=False): cdef int parse_options self._file_parser_ctxt = NULL BaseParser.__init__(self) parse_options = _XML_DEFAULT_PARSE_OPTIONS + if load_dtd: + parse_options = parse_options | xmlparser.XML_PARSE_DTDLOAD if dtd_validation: parse_options = parse_options | xmlparser.XML_PARSE_DTDLOAD | \ xmlparser.XML_PARSE_DTDVALID @@ -135,6 +231,12 @@ if self._memory_parser_ctxt != NULL: xmlparser.xmlFreeParserCtxt(self._memory_parser_ctxt) + def copy(self): + cdef XMLParser parser + parser = self._copy() + parser._parse_options = self._parse_options + return parser + cdef xmlParserCtxt* _createContext(self) except NULL: cdef xmlParserCtxt* pctxt pctxt = xmlparser.xmlNewParserCtxt() @@ -143,7 +245,7 @@ raise ParserError, "Failed to create parser context" return pctxt - cdef xmlDoc* _parseDoc(self, text_utf) except NULL: + cdef xmlDoc* _parseDoc(self, char* c_text) except NULL: """Parse document, share dictionary if possible. """ cdef xmlDoc* result @@ -154,13 +256,13 @@ if pctxt is NULL: pctxt = self._createContext() self._memory_parser_ctxt = pctxt - - __GLOBAL_PARSER_CONTEXT._initParserDict(pctxt) + self._initContext(pctxt) result = xmlparser.xmlCtxtReadDoc( - pctxt, _cstr(text_utf), NULL, NULL, self._parse_options) - return self._handleResult(pctxt, result) + pctxt, c_text, NULL, NULL, self._parse_options) + self._error_log.disconnect() + return _handleParseResult(pctxt, result, NULL) - cdef xmlDoc* _parseDocFromFile(self, char* filename) except NULL: + cdef xmlDoc* _parseDocFromFile(self, char* c_filename) except NULL: cdef xmlDoc* result cdef xmlParserCtxt* pctxt self._error_log.connect() @@ -168,15 +270,47 @@ if pctxt is NULL: pctxt = self._createContext() self._file_parser_ctxt = pctxt - - __GLOBAL_PARSER_CONTEXT._initParserDict(pctxt) + self._initContext(pctxt) result = xmlparser.xmlCtxtReadFile( - pctxt, filename, NULL, self._parse_options) - if result is NULL: - if pctxt.lastError.domain == xmlerror.XML_FROM_IO: - self._error_log.disconnect() - raise IOError, "Could not open file %s" % filename - return self._handleResult(pctxt, result) + pctxt, c_filename, NULL, self._parse_options) + self._error_log.disconnect() + return _handleParseResult(pctxt, result, c_filename) + +cdef xmlDoc* _internalParseDoc(char* c_text, int options, + _ResolverContext context) except NULL: + # internal parser function for XSLT + cdef xmlParserCtxt* pctxt + cdef xmlDoc* c_doc + pctxt = xmlparser.xmlNewParserCtxt() + if pctxt is NULL: + return NULL + __GLOBAL_PARSER_CONTEXT._initParserDict(pctxt) + pctxt._private = context + c_doc = xmlparser.xmlCtxtReadDoc( + pctxt, c_text, NULL, NULL, options) + try: + c_doc = _handleParseResult(pctxt, c_doc, NULL) + finally: + xmlparser.xmlFreeParserCtxt(pctxt) + return c_doc + +cdef xmlDoc* _internalParseDocFromFile(char* c_filename, int options, + _ResolverContext context) except NULL: + # internal parser function for XSLT + cdef xmlParserCtxt* pctxt + cdef xmlDoc* c_doc + pctxt = xmlparser.xmlNewParserCtxt() + if pctxt is NULL: + return NULL + __GLOBAL_PARSER_CONTEXT._initParserDict(pctxt) + pctxt._private = context + c_doc = xmlparser.xmlCtxtReadFile( + pctxt, c_filename, NULL, options) + try: + c_doc = _handleParseResult(pctxt, c_doc, c_filename) + finally: + xmlparser.xmlFreeParserCtxt(pctxt) + return c_doc cdef XMLParser __DEFAULT_XML_PARSER @@ -203,6 +337,8 @@ else: raise TypeError, "Invalid parser" +def get_default_parser(): + return __DEFAULT_PARSER ############################################################ ## HTML parser @@ -248,15 +384,19 @@ if self._memory_parser_ctxt != NULL: htmlparser.htmlFreeParserCtxt(self._memory_parser_ctxt) - cdef xmlDoc* _parseDoc(self, text_utf) except NULL: + def copy(self): + cdef HTMLParser parser + parser = self._copy() + parser._parse_options = self._parse_options + return parser + + cdef xmlDoc* _parseDoc(self, char* c_text) except NULL: """Parse HTML document, share dictionary if possible. """ cdef xmlDoc* result cdef xmlParserCtxt* pctxt - cdef char* c_text cdef int c_len self._error_log.connect() - c_text = _cstr(text_utf) pctxt = self._memory_parser_ctxt if pctxt is NULL: pctxt = htmlparser.htmlCreateMemoryParserCtxt('dummy', 5) @@ -264,30 +404,32 @@ self._error_log.disconnect() raise ParserError, "Failed to create parser context" self._memory_parser_ctxt = pctxt - __GLOBAL_PARSER_CONTEXT._initParserDict(pctxt) + self._initContext(pctxt) result = htmlparser.htmlCtxtReadDoc( pctxt, c_text, NULL, NULL, self._parse_options) - return self._handleResult(pctxt, result) + self._error_log.disconnect() + return _handleParseResult(pctxt, result, NULL) - cdef xmlDoc* _parseDocFromFile(self, char* filename) except NULL: + cdef xmlDoc* _parseDocFromFile(self, char* c_filename) except NULL: cdef xmlDoc* result cdef xmlParserCtxt* pctxt cdef int parser_error self._error_log.connect() pctxt = self._file_parser_ctxt if pctxt is NULL: - pctxt = htmlparser.htmlCreateFileParserCtxt(filename, NULL) + pctxt = htmlparser.htmlCreateFileParserCtxt(c_filename, NULL) if pctxt is NULL: self._error_log.disconnect() warnings = self._error_log.filter_from_warnings() if warnings and warnings[-1].domain == xmlerror.XML_FROM_IO: - raise IOError, "Could not open file %s" % filename + raise IOError, "Could not open file %s" % c_filename raise ParserError, "Failed to create parser context" self._file_parser_ctxt = pctxt - __GLOBAL_PARSER_CONTEXT._initParserDict(pctxt) + self._initContext(pctxt) result = htmlparser.htmlCtxtReadFile( - pctxt, filename, NULL, self._parse_options) - return self._handleResult(pctxt, result) + pctxt, c_filename, NULL, self._parse_options) + self._error_log.disconnect() + return _handleParseResult(pctxt, result, c_filename) cdef HTMLParser __DEFAULT_HTML_PARSER __DEFAULT_HTML_PARSER = HTMLParser() @@ -301,9 +443,9 @@ parser = __DEFAULT_PARSER __GLOBAL_PARSER_CONTEXT._initParser() if isinstance(parser, XMLParser): - return (parser)._parseDoc(text_utf) + return (parser)._parseDoc(_cstr(text_utf)) elif isinstance(parser, HTMLParser): - return (parser)._parseDoc(text_utf) + return (parser)._parseDoc(_cstr(text_utf)) else: raise TypeError, "invalid parser" Modified: lxml/trunk/src/lxml/tests/test_etree.py ============================================================================== --- lxml/trunk/src/lxml/tests/test_etree.py (original) +++ lxml/trunk/src/lxml/tests/test_etree.py Thu Apr 27 22:51:43 2006 @@ -49,7 +49,67 @@ f = open(fileInTestDir('test_broken.xml'), 'r') self.assertRaises(SyntaxError, parse, f) f.close() - + + def test_resolve_string_dtd(self): + parse = self.etree.parse + parser = self.etree.XMLParser(dtd_validation=True) + assertEqual = self.assertEqual + test_url = u"__nosuch.dtd" + + class MyResolver(self.etree.Resolver): + def resolve(self, url, id, context): + assertEqual(url, test_url) + return self.resolve_string( + u'' % url, context) + + parser.resolvers.add(MyResolver()) + + xml = u'&myentity;' % test_url + tree = parse(StringIO(xml), parser) + root = tree.getroot() + self.assertEquals(root.text, test_url) + + def test_resolve_empty(self): + parse = self.etree.parse + parser = self.etree.XMLParser(dtd_validation=True) + assertEqual = self.assertEqual + test_url = u"__nosuch.dtd" + + class check(object): + resolved = False + + class MyResolver(self.etree.Resolver): + def resolve(self, url, id, context): + assertEqual(url, test_url) + check.resolved = True + return self.resolve_empty(context) + + parser.resolvers.add(MyResolver()) + + xml = u'&myentity;' % test_url + tree = parse(StringIO(xml), parser) + self.assert_(check.resolved) + + root = tree.getroot() + self.assertEquals(root.text, None) + + def test_resolve_error(self): + parse = self.etree.parse + parser = self.etree.XMLParser(dtd_validation=True) + test_url = u"__nosuch.dtd" + + class _LocalException(Exception): + pass + + class MyResolver(self.etree.Resolver): + def resolve(self, url, id, context): + raise _LocalException + + parser.resolvers.add(MyResolver()) + + xml = u'&myentity;' + self.assertRaises(_LocalException, parse, StringIO(xml), parser) + # TypeError in etree, AssertionError in ElementTree; def test_setitem_assert(self): Element = self.etree.Element @@ -357,6 +417,8 @@ suite.addTests([unittest.makeSuite(ETreeC14NTestCase)]) suite.addTests( [doctest.DocFileSuite('../../../doc/api.txt')]) + suite.addTests( + [doctest.DocFileSuite('../../../doc/resolvers.txt')]) return suite if __name__ == '__main__': Modified: lxml/trunk/src/lxml/tests/test_xslt.py ============================================================================== --- lxml/trunk/src/lxml/tests/test_xslt.py (original) +++ lxml/trunk/src/lxml/tests/test_xslt.py Thu Apr 27 22:51:43 2006 @@ -29,6 +29,36 @@ B ''', st.tostring(res)) + + def test_exslt(self): + tree = self.parse('BC') + style = self.parse('''\ + + + + + + + + + + + + +''') + + st = etree.XSLT(style) + res = st(tree) + self.assertEquals('''\ + +-B--C- +''', + st.tostring(res)) + def test_xslt_input(self): tree = self.parse('BC') style = self.parse('''\ @@ -336,12 +366,12 @@ 'X') def test_xslt_document_XML(self): - # make sure document('') works from loaded files + # make sure document('') works from parsed strings xslt = etree.XSLT(etree.XML("""\ - + TEXT """)) @@ -351,6 +381,8 @@ 'test') self.assertEquals(root[0].tag, 'test') + self.assertEquals(root[0].text, + 'TEXT') self.assertEquals(root[0][0].tag, '{http://www.w3.org/1999/XSL/Transform}copy-of') Modified: lxml/trunk/src/lxml/tree.pxd ============================================================================== --- lxml/trunk/src/lxml/tree.pxd (original) +++ lxml/trunk/src/lxml/tree.pxd Thu Apr 27 22:51:43 2006 @@ -77,6 +77,7 @@ xmlDict* dict xmlHashTable* ids char* URL + void* _private ctypedef struct xmlAttr: void* _private Modified: lxml/trunk/src/lxml/xmlparser.pxd ============================================================================== --- lxml/trunk/src/lxml/xmlparser.pxd (original) +++ lxml/trunk/src/lxml/xmlparser.pxd Thu Apr 27 22:51:43 2006 @@ -1,6 +1,9 @@ from tree cimport xmlDoc, xmlDict from xmlerror cimport xmlError +cdef extern from "libxml/tree.h": + ctypedef struct xmlParserInput + cdef extern from "libxml/parser.h": cdef xmlDict* xmlDictCreate() @@ -10,6 +13,7 @@ ctypedef struct xmlParserCtxt: xmlDoc* myDoc xmlDict* dict + void* _private int wellFormed xmlError lastError @@ -30,8 +34,8 @@ XML_PARSE_NSCLEAN = 8192 # remove redundant namespaces declarations XML_PARSE_NOCDATA = 16384 # merge CDATA as text nodes XML_PARSE_NOXINCNODE = 32768 # do not generate XINCLUDE START/END nodes -# libxml2 2.6.21+ only: -# XML_PARSE_COMPACT = 65536 # compact small text nodes + # libxml2 2.6.21+ only: + #XML_PARSE_COMPACT = 65536 # compact small text nodes cdef void xmlInitParser() cdef xmlParserCtxt* xmlNewParserCtxt() @@ -42,3 +46,18 @@ int options) cdef xmlDoc* xmlCtxtReadFile(xmlParserCtxt* ctxt, char* filename, char* encoding, int options) + +# entity loaders: + + ctypedef xmlParserInput* (*xmlExternalEntityLoader)(char * URL, + char * ID, + xmlParserCtxt* context) + cdef xmlExternalEntityLoader xmlGetExternalEntityLoader() + cdef void xmlSetExternalEntityLoader(xmlExternalEntityLoader f) + +cdef extern from "libxml/parserInternals.h": + cdef xmlParserInput* xmlNewStringInputStream(xmlParserCtxt* ctxt, + char* buffer) + cdef xmlParserInput* xmlNewInputFromFile(xmlParserCtxt* ctxt, + char* filename) + cdef void xmlFreeInputStream(xmlParserInput* input) Modified: lxml/trunk/src/lxml/xslt.pxd ============================================================================== --- lxml/trunk/src/lxml/xslt.pxd (original) +++ lxml/trunk/src/lxml/xslt.pxd Thu Apr 27 22:51:43 2006 @@ -1,27 +1,43 @@ -from tree cimport xmlDoc +from tree cimport xmlDoc, xmlDict from xpath cimport xmlXPathContext, xmlXPathFunction cdef extern from "libxslt/xsltInternals.h": + ctypedef struct xsltDocument: + xmlDoc* doc + ctypedef struct xsltStylesheet: - pass + xmlDoc* doc ctypedef struct xsltTransformContext: + xsltStylesheet* style xmlXPathContext* xpathCtxt - + xsltDocument* document + cdef xsltStylesheet* xsltParseStylesheetDoc(xmlDoc* doc) cdef void xsltFreeStylesheet(xsltStylesheet* sheet) - -#cdef extern from "libxslt/xslt.h": -# pass cdef extern from "libxslt/extensions.h": cdef int xsltRegisterExtFunction(xsltTransformContext* ctxt, char* name, - char * URI, + char* URI, xmlXPathFunction function) cdef int xsltRegisterExtModuleFunction(char* name, char* URI, xmlXPathFunction function) cdef int xsltUnregisterExtModuleFunction(char* name, char* URI) + cdef xmlXPathFunction xsltExtModuleFunctionLookup(char* name, char* URI) + +cdef extern from "libxslt/documents.h": + ctypedef enum xsltLoadType: + XSLT_LOAD_START + XSLT_LOAD_STYLESHEET + XSLT_LOAD_DOCUMENT + + ctypedef xmlDoc* (*xsltDocLoaderFunc)(char* URI, xmlDict* dict, + int options, + void* ctxt, + xsltLoadType type) + cdef xsltDocLoaderFunc xsltDocDefaultLoader + cdef void xsltSetLoaderFunc(xsltDocLoaderFunc f) cdef extern from "libxslt/transform.h": cdef xmlDoc* xsltApplyStylesheet(xsltStylesheet* style, xmlDoc* doc, @@ -45,4 +61,15 @@ cdef void xsltSetTransformErrorFunc(xsltTransformContext*, void* ctxt, void (*handler)(void* ctxt, char* msg, ...)) - + +cdef extern from "libxslt/extra.h": + cdef char* XSLT_LIBXSLT_NAMESPACE + cdef char* XSLT_XALAN_NAMESPACE + cdef char* XSLT_SAXON_NAMESPACE + cdef char* XSLT_XT_NAMESPACE + + cdef xmlXPathFunction xsltFunctionNodeSet + cdef void xsltRegisterAllExtras() + +cdef extern from "libexslt/exslt.h": + cdef void exsltRegisterAll() Modified: lxml/trunk/src/lxml/xslt.pxi ============================================================================== --- lxml/trunk/src/lxml/xslt.pxi (original) +++ lxml/trunk/src/lxml/xslt.pxi Thu Apr 27 22:51:43 2006 @@ -39,9 +39,8 @@ cdef object _utf_refs cdef object _temp_functions # for exception handling and temporary reference keeping: - cdef object _temp_elements - cdef object _temp_docs - cdef object _exc_info + cdef _TempStore _temp_refs + cdef _ExceptionContext _exc def __init__(self, namespaces, extensions): self._xpathCtxt = NULL @@ -62,12 +61,11 @@ extensions = new_extensions or None self._doc = None - self._exc_info = None + self._exc = _ExceptionContext() self._extensions = extensions self._namespaces = namespaces self._registered_namespaces = [] - self._temp_elements = {} - self._temp_docs = {} + self._temp_refs = _TempStore() cdef object _to_utf(self, s): "Convert to UTF-8 and keep a reference to the encoded string" @@ -86,9 +84,9 @@ xpathCtxt.userData = self cdef _register_context(self, _Document doc, int allow_none_namespace): - self._doc = doc - self._exc_info = None - self._temp_functions.clear() + self._doc = doc + self._exc.clear() + python.PyDict_Clear(self._temp_functions) namespaces = self._namespaces if namespaces is not None: self.registerNamespaces(namespaces) @@ -154,8 +152,7 @@ cdef _release_temp_refs(self): "Free temporarily referenced objects from this context." - python.PyDict_Clear(self._temp_elements) - python.PyDict_Clear(self._temp_docs) + self._temp_refs.clear() cdef _hold(self, obj): """A way to temporarily hold references to nodes in the evaluator. @@ -173,27 +170,128 @@ if isinstance(o, _NodeBase): element = <_NodeBase>o #print "Holding element:", element._c_node - python.PyDict_SetItem(self._temp_elements, id(element), element) + self._temp_refs.add(element) #print "Holding document:", element._doc._c_doc - python.PyDict_SetItem(self._temp_docs, id(element._doc), element._doc) + self._temp_refs.add(element._doc) cdef xpath.xmlXPathFunction _function_check(void* ctxt, char* c_name, char* c_ns_uri): cdef _BaseContext context if c_name is NULL: return NULL if c_ns_uri is NULL: - c_ns_uri = '' ns_uri = None else: ns_uri = c_ns_uri context = <_BaseContext>ctxt - if context._lookup_extension(ns_uri, c_name) is None: - return NULL - else: + if context._lookup_extension(ns_uri, c_name) is not None: return _xpath_function_call + else: + return xslt.xsltExtModuleFunctionLookup(c_name, c_ns_uri) ################################################################################ +# XSLT document loaders + +cdef class _XSLTResolverContext(_ResolverContext): + cdef xmlDoc* _c_style_doc + cdef object _style_url_utf + cdef object _style_doc_utf + cdef BaseParser _parser + def __init__(self, BaseParser parser not None): + _ResolverContext.__init__(self, parser.resolvers) + self._parser = parser + self._c_style_doc = NULL + self._style_url_utf = None + self._style_doc_utf = None + +cdef xmlDoc* _doc_loader(char* c_uri, tree.xmlDict* c_dict, int parse_options, + void* c_ctxt, xslt.xsltLoadType c_type): + cdef xmlDoc* c_doc + cdef _ResolverRegistry resolvers + cdef _InputDocument doc_ref + cdef _XSLTResolverContext xslt_resolver_context + cdef _XSLTResolverContext doc_resolver_context + cdef _XSLTResolverContext resolver_context + cdef XMLParser parser + # find resolver contexts of stylesheet and transformed doc + c_doc = NULL + doc_resolver_context = None + if c_type == xslt.XSLT_LOAD_DOCUMENT: + c_doc = (c_ctxt).document.doc + if c_doc is not NULL and c_doc._private is not NULL: + if isinstance(c_doc._private, _XSLTResolverContext): + doc_resolver_context = <_XSLTResolverContext>c_doc._private + c_doc = (c_ctxt).style.doc + elif c_type == xslt.XSLT_LOAD_STYLESHEET: + c_doc = (c_ctxt).doc + + if c_doc is NULL or c_doc._private is NULL or \ + not isinstance(c_doc._private, _XSLTResolverContext): + # can't call Python without context, fall back to default loader + return XSLT_DOC_DEFAULT_LOADER( + c_uri, c_dict, parse_options, c_ctxt, c_type) + + xslt_resolver_context = <_XSLTResolverContext>c_doc._private + + # quick check if we are looking for the current stylesheet + c_doc = xslt_resolver_context._c_style_doc + if c_doc is not NULL and c_doc.URL is not NULL: + if tree.strcmp(c_uri, c_doc.URL) == 0: + return tree.xmlCopyDoc(c_doc, 1) + + # call the Python document loaders + c_doc = NULL + resolver_context = xslt_resolver_context # currently use only XSLT resolvers + resolvers = resolver_context._resolvers + try: + uri = funicode(c_uri) + doc_ref = resolvers.resolve(uri, None, resolver_context) + + if doc_ref is not None: + if doc_ref._type == PARSER_DATA_EMPTY: + c_doc = _newDoc() + if doc_ref._type == PARSER_DATA_STRING: + c_doc = _internalParseDoc( + _cstr(doc_ref._data_utf), parse_options, + resolver_context) + elif doc_ref._type == PARSER_DATA_FILE: + data = doc_ref._file.read() + c_doc = _internalParseDoc( + _cstr(data), parse_options, + resolver_context) + elif doc_ref._type == PARSER_DATA_FILENAME: + c_doc = _internalParseDocFromFile( + _cstr(doc_ref._data_utf), parse_options, + resolver_context) + if c_doc is not NULL and c_doc.URL is NULL: + c_doc.URL = tree.xmlStrdup(c_uri) + + except Exception, e: + xslt_resolver_context._store_raised() + return NULL + + if c_doc is NULL: + c_doc = XSLT_DOC_DEFAULT_LOADER( + c_uri, c_dict, parse_options, c_ctxt, c_type) + if c_doc is NULL: + message = "Cannot resolve URI %s" % funicode(c_uri) + if c_type == xslt.XSLT_LOAD_DOCUMENT: + exception = XSLTApplyError(message) + else: + exception = XSLTParseError(message) + xslt_resolver_context._store_exception(exception) + return NULL + if c_doc is not NULL and c_doc._private is NULL: + c_doc._private = xslt_resolver_context + return c_doc + +cdef xslt.xsltDocLoaderFunc XSLT_DOC_DEFAULT_LOADER +XSLT_DOC_DEFAULT_LOADER = xslt.xsltDocDefaultLoader + +xslt.xsltSetLoaderFunc(_doc_loader) + + +################################################################################ # XSLT cdef class _XSLTContext(_BaseContext): @@ -223,13 +321,14 @@ cdef _registerLocalExtensionFunction(self, ns_utf, name_utf, function): extensions = self._extensions - if self._extensions is None: + if extensions is None: self._extensions = {ns_utf:{name_utf:function}} else: - if ns_utf in self._extensions: - self._extensions[ns_utf][name_utf] = function + if ns_utf in extensions: + ns_extensions = extensions[ns_utf] else: - self._extensions[ns_utf] = ns_extensions = {name_utf:function} + ns_extensions = extensions[ns_utf] = {} + python.PyDict_SetItem(ns_extensions, name_utf, function) xslt.xsltRegisterExtFunction( self._xsltCtxt, _cstr(name_utf), _cstr(ns_utf), _xpath_function_call) @@ -252,12 +351,10 @@ """ cdef _XSLTContext _context cdef xslt.xsltStylesheet* _c_style + cdef _XSLTResolverContext _xslt_resolver_context cdef _ExsltRegExp _regexp - cdef object _doc_url_utf - + def __init__(self, xslt_input, extensions=None, regexp=True): - # make a copy of the document as stylesheet needs to assume it - # doesn't change cdef xslt.xsltStylesheet* c_style cdef xmlDoc* c_doc cdef xmlDoc* fake_c_doc @@ -267,23 +364,30 @@ doc = _documentOrRaise(xslt_input) root_node = _rootNodeOf(xslt_input) + # make a copy of the document as stylesheet parsing modifies it fake_c_doc = _fakeRootDoc(doc._c_doc, root_node._c_node) c_doc = tree.xmlCopyDoc(fake_c_doc, 1) _destroyFakeDoc(doc._c_doc, fake_c_doc) - # XXX work around bug in xmlCopyDoc (fix is upcoming in new release - # of libxml2) - if c_doc.URL is not NULL and c_doc.URL != doc._c_doc.URL: + # make sure we always have a stylesheet URL + if c_doc.URL is not NULL: + # handle a bug in older libxml2 versions tree.xmlFree(c_doc.URL) if doc._c_doc.URL is not NULL: - self._doc_url_utf = doc._c_doc.URL c_doc.URL = tree.xmlStrdup(doc._c_doc.URL) else: - self._doc_url_utf = "__STRING__XSLT__%s" % id(self) - c_doc.URL = tree.xmlStrdup(_cstr(self._doc_url_utf)) + doc_url_utf = "XSLT:__STRING__XSLT__%s" % id(self) + c_doc.URL = tree.xmlStrdup(_cstr(doc_url_utf)) + + self._xslt_resolver_context = _XSLTResolverContext(doc._parser) + # keep a copy in case we need to access the stylesheet via 'document()' + self._xslt_resolver_context._c_style_doc = tree.xmlCopyDoc(c_doc, 1) + c_doc._private = self._xslt_resolver_context c_style = xslt.xsltParseStylesheetDoc(c_doc) if c_style is NULL: + tree.xmlFreeDoc(c_doc) + self._xslt_resolver_context._raise_if_stored() raise XSLTParseError, "Cannot parse style sheet" self._c_style = c_style @@ -295,6 +399,9 @@ # XXX is it worthwile to use xsltPrecomputeStylesheet here? def __dealloc__(self): + if self._xslt_resolver_context is not None and \ + self._xslt_resolver_context._c_style_doc is not NULL: + tree.xmlFreeDoc(self._xslt_resolver_context._c_style_doc) # this cleans up copy of doc as well xslt.xsltFreeStylesheet(self._c_style) @@ -302,16 +409,20 @@ cdef _Document input_doc cdef _NodeBase root_node cdef _Document result_doc + cdef _XSLTResolverContext resolver_context cdef xslt.xsltTransformContext* transform_ctxt cdef xmlDoc* c_result cdef xmlDoc* c_doc cdef char** params + cdef void* ptemp cdef int i - cdef int j input_doc = _documentOrRaise(_input) root_node = _rootNodeOf(_input) + resolver_context = _XSLTResolverContext(input_doc._parser) + resolver_context._c_style_doc = self._xslt_resolver_context._c_style_doc + c_doc = _fakeRootDoc(input_doc._c_doc, root_node._c_node) transform_ctxt = xslt.xsltNewTransformContext(self._c_style, c_doc) @@ -319,6 +430,9 @@ _destroyFakeDoc(input_doc._c_doc, c_doc) raise XSLTApplyError, "Error preparing stylesheet run" + ptemp = c_doc._private + c_doc._private = resolver_context + if _kw: # allocate space for parameters # * 2 as we want an entry for both key and value, @@ -352,19 +466,25 @@ cstd.free(params) self._context.free_context() + c_doc._private = ptemp # restore _private before _destroyFakeDoc! _destroyFakeDoc(input_doc._c_doc, c_doc) + if self._xslt_resolver_context._has_raised(): + if c_result is not NULL: + tree.xmlFreeDoc(c_result) + self._xslt_resolver_context._raise_if_stored() + if c_result is NULL: raise XSLTApplyError, "Error applying stylesheet" - result_doc = _documentFactory(c_result) + result_doc = _documentFactory(c_result, input_doc._parser) return _xsltResultTreeFactory(result_doc, self) def apply(self, _input, **_kw): - return self(_input, **_kw) + return self.__call__(_input, **_kw) def tostring(self, _ElementTree result_tree): - """Save result doc to string using stylesheet as guidance. + """Save result doc to string based on stylesheet output method. """ return str(result_tree) @@ -397,6 +517,23 @@ cdef void _unregister_global_xslt_function(char* ns_uri, char* name): xslt.xsltUnRegisterExtModuleFunction(ns_uri, name) +# do not register all libxslt extra function, provide only "node-set" +# functions like "output" and "write" are a potential security risk +#xslt.xsltRegisterAllExtras() +xslt.xsltRegisterExtModuleFunction("node-set", + xslt.XSLT_LIBXSLT_NAMESPACE, + xslt.xsltFunctionNodeSet) +xslt.xsltRegisterExtModuleFunction("node-set", + xslt.XSLT_SAXON_NAMESPACE, + xslt.xsltFunctionNodeSet) +xslt.xsltRegisterExtModuleFunction("node-set", + xslt.XSLT_XT_NAMESPACE, + xslt.xsltFunctionNodeSet) + +# enable EXSLT support for XSLT +xslt.exsltRegisterAll() + + ################################################################################ # EXSLT regexp implementation @@ -541,17 +678,21 @@ self._context = _XPathContext(namespaces, extensions, variables) cdef object _handle_result(self, xpath.xmlXPathObject* xpathObj, _Document doc): - _exc_info = self._context._exc_info - if _exc_info is not None: - type, value, traceback = _exc_info - raise type, value, traceback + if self._context._exc._has_raised(): + if xpathObj is not NULL: + xpath.xmlXPathFreeObject(xpathObj) + xpathObj = NULL + self._context._exc._raise_if_stored() + if xpathObj is NULL: raise XPathSyntaxError, "Error in xpath expression." + try: result = _unwrapXPathObject(xpathObj, doc) except XPathResultError: xpath.xmlXPathFreeObject(xpathObj) raise + xpath.xmlXPathFreeObject(xpathObj) return result @@ -795,6 +936,7 @@ cdef xmlNode* c_node cdef char* s cdef _NodeBase element + cdef int i result = [] if xpathObj.nodesetval is NULL: return result @@ -819,7 +961,8 @@ cdef xpath.xmlXPathContext* rctxt cdef _Document doc cdef xpath.xmlXPathObject* obj - cdef _BaseContext extensions + cdef _BaseContext context + cdef int i rctxt = ctxt.context @@ -831,15 +974,15 @@ uri = None # get our evaluator - extensions = <_BaseContext>(rctxt.userData) + context = <_BaseContext>(rctxt.userData) # lookup up the extension function in the context - f = extensions._lookup_extension(uri, name) + f = context._lookup_extension(uri, name) args = [] - doc = extensions._doc + doc = context._doc for i from 0 <= i < nargs: - args.append(_unwrapXPathObject(xpath.valuePop(ctxt), doc)) + python.PyList_Append(args, _unwrapXPathObject(xpath.valuePop(ctxt), doc)) args.reverse() try: @@ -847,13 +990,13 @@ res = f(None, *args) # hold python objects temporarily so that they won't get deallocated # during processing - extensions._hold(res) + context._hold(res) # now wrap for XPath consumption obj = _wrapXPathObject(res) except: xpath.xmlXPathErr( ctxt, xmlerror.XML_XPATH_EXPR_ERROR - xmlerror.XML_XPATH_EXPRESSION_OK) - extensions._exc_info = sys.exc_info() + context._exc._store_raised() return xpath.valuePush(ctxt, obj) From scoder at codespeak.net Fri Apr 28 08:14:41 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Fri Apr 28 08:14:42 2006 Subject: [Lxml-checkins] r26492 - lxml/trunk/src/lxml Message-ID: <20060428061441.BC2E5100A7@code0.codespeak.net> Author: scoder Date: Fri Apr 28 08:14:40 2006 New Revision: 26492 Modified: lxml/trunk/src/lxml/nsclasses.pxi lxml/trunk/src/lxml/xslt.pxi Log: large cleanup in XSLT function calling: store function for next call after lookup, do not xsltExtModuleRegister XSLT functions (looked up by Python anyway) Modified: lxml/trunk/src/lxml/nsclasses.pxi ============================================================================== --- lxml/trunk/src/lxml/nsclasses.pxi (original) +++ lxml/trunk/src/lxml/nsclasses.pxi Fri Apr 28 08:14:40 2006 @@ -143,7 +143,6 @@ else: name_utf = _utf8(name) self._extensions[name_utf] = function - _register_global_xslt_function(self._c_ns_uri_utf, _cstr(name_utf)) def __getitem__(self, name): cdef python.PyObject* dict_result @@ -153,13 +152,6 @@ raise KeyError, "Name not registered." return dict_result - def clear(self): - cdef char* c_uri_utf - c_uri_utf = self._c_ns_uri_utf - for name_utf in self._extensions: - _unregister_global_xslt_function(c_uri_utf, _cstr(name_utf)) - _NamespaceRegistry.clear(self) - def __repr__(self): return "FunctionNamespace(%r)" % self._ns_uri Modified: lxml/trunk/src/lxml/xslt.pxi ============================================================================== --- lxml/trunk/src/lxml/xslt.pxi (original) +++ lxml/trunk/src/lxml/xslt.pxi Fri Apr 28 08:14:40 2006 @@ -37,7 +37,8 @@ cdef object _namespaces cdef object _registered_namespaces cdef object _utf_refs - cdef object _temp_functions + cdef object _function_cache + cdef object _called_function # for exception handling and temporary reference keeping: cdef _TempStore _temp_refs cdef _ExceptionContext _exc @@ -45,7 +46,8 @@ def __init__(self, namespaces, extensions): self._xpathCtxt = NULL self._utf_refs = {} - self._temp_functions = {} + self._function_cache = {} + self._called_function = None # convert old format extensions to UTF-8 if isinstance(extensions, (list, tuple)): @@ -86,7 +88,7 @@ cdef _register_context(self, _Document doc, int allow_none_namespace): self._doc = doc self._exc.clear() - python.PyDict_Clear(self._temp_functions) + python.PyDict_Clear(self._function_cache) namespaces = self._namespaces if namespaces is not None: self.registerNamespaces(namespaces) @@ -130,12 +132,14 @@ # extension functions - cdef _lookup_extension(self, ns_uri_utf, name_utf): + cdef _prepare_function_call(self, ns_uri_utf, name_utf): cdef python.PyObject* dict_result key = (ns_uri_utf, name_utf) - dict_result = python.PyDict_GetItem(self._temp_functions, key) + dict_result = python.PyDict_GetItem(self._function_cache, key) if dict_result is not NULL: - return dict_result + function = dict_result + self._called_function = function + return function dict_result = python.PyDict_GetItem(self._extensions, ns_uri_utf) if dict_result is not NULL: @@ -145,7 +149,8 @@ else: function = _find_extension(ns_uri_utf, name_utf) - python.PyDict_SetItem(self._temp_functions, key, function) + python.PyDict_SetItem(self._function_cache, key, function) + self._called_function = function return function # Python reference keeping during XPath function evaluation @@ -183,8 +188,9 @@ else: ns_uri = c_ns_uri context = <_BaseContext>ctxt - if context._lookup_extension(ns_uri, c_name) is not None: - return _xpath_function_call + function = context._prepare_function_call(ns_uri, c_name) + if function is not None: + return _call_prepared_function else: return xslt.xsltExtModuleFunctionLookup(c_name, c_ns_uri) @@ -194,15 +200,11 @@ cdef class _XSLTResolverContext(_ResolverContext): cdef xmlDoc* _c_style_doc - cdef object _style_url_utf - cdef object _style_doc_utf cdef BaseParser _parser def __init__(self, BaseParser parser not None): _ResolverContext.__init__(self, parser.resolvers) self._parser = parser self._c_style_doc = NULL - self._style_url_utf = None - self._style_doc_utf = None cdef xmlDoc* _doc_loader(char* c_uri, tree.xmlDict* c_dict, int parse_options, void* c_ctxt, xslt.xsltLoadType c_type): @@ -308,7 +310,6 @@ self._set_xpath_context(xsltCtxt.xpathCtxt) self._register_context(doc, 0) xsltCtxt.xpathCtxt.userData = self - self._registerLocalExtensionFunctions() cdef free_context(self): cdef xslt.xsltTransformContext* xsltCtxt @@ -318,6 +319,7 @@ self._free_context() self._xsltCtxt = NULL xslt.xsltFreeTransformContext(xsltCtxt) + self._release_temp_refs() cdef _registerLocalExtensionFunction(self, ns_utf, name_utf, function): extensions = self._extensions @@ -329,20 +331,6 @@ else: ns_extensions = extensions[ns_utf] = {} python.PyDict_SetItem(ns_extensions, name_utf, function) - xslt.xsltRegisterExtFunction( - self._xsltCtxt, _cstr(name_utf), _cstr(ns_utf), - _xpath_function_call) - - cdef _registerLocalExtensionFunctions(self): - cdef xslt.xsltTransformContext* xsltCtxt - if self._extensions is None: - return - xsltCtxt = self._xsltCtxt - for ns_uri_utf, extension in self._extensions.items(): - for name_utf, function in extension.items(): - xslt.xsltRegisterExtFunction( - xsltCtxt, _cstr(name_utf), _cstr(ns_uri_utf), - _xpath_function_call) cdef class _ExsltRegExp # forward declaration @@ -453,7 +441,6 @@ else: params = NULL - self._context._release_temp_refs() self._context.register_context(transform_ctxt, input_doc) if self._regexp is not None: self._regexp._register_in_context(self._context) @@ -510,13 +497,6 @@ result._xslt = xslt return result -# used by FunctionNamespace(): -cdef void _register_global_xslt_function(char* ns_uri, char* name): - xslt.xsltRegisterExtModuleFunction(ns_uri, name, _xpath_function_call) - -cdef void _unregister_global_xslt_function(char* ns_uri, char* name): - xslt.xsltUnRegisterExtModuleFunction(ns_uri, name) - # do not register all libxslt extra function, provide only "node-set" # functions like "output" and "write" are a potential security risk #xslt.xsltRegisterAllExtras() @@ -774,7 +754,6 @@ """Create an XPath evaluator for an element. """ cdef _Element _element - def __init__(self, _Element element not None, namespaces=None, extensions=None): XPathDocumentEvaluator.__init__( self, element._doc, namespaces, extensions) @@ -957,28 +936,32 @@ raise NotImplementedError return result + cdef void _xpath_function_call(xpath.xmlXPathParserContext* ctxt, int nargs): cdef xpath.xmlXPathContext* rctxt - cdef _Document doc - cdef xpath.xmlXPathObject* obj cdef _BaseContext context - cdef int i - rctxt = ctxt.context - - # get information on what function is called + context = <_BaseContext>(rctxt.userData) name = rctxt.function if rctxt.functionURI is not NULL: uri = rctxt.functionURI else: uri = None + context._prepare_function_call(uri, name) + _extension_function_call(context, ctxt, nargs) - # get our evaluator +cdef void _call_prepared_function(xpath.xmlXPathParserContext* ctxt, int nargs): + cdef xpath.xmlXPathContext* rctxt + cdef _BaseContext context + rctxt = ctxt.context context = <_BaseContext>(rctxt.userData) + _extension_function_call(context, ctxt, nargs) - # lookup up the extension function in the context - f = context._lookup_extension(uri, name) - +cdef void _extension_function_call(_BaseContext context, + xpath.xmlXPathParserContext* ctxt, int nargs): + cdef _Document doc + cdef xpath.xmlXPathObject* obj + cdef int i args = [] doc = context._doc for i from 0 <= i < nargs: @@ -987,16 +970,16 @@ try: # call the function - res = f(None, *args) + res = context._called_function(None, *args) # hold python objects temporarily so that they won't get deallocated # during processing context._hold(res) # now wrap for XPath consumption obj = _wrapXPathObject(res) + xpath.valuePush(ctxt, obj) except: xpath.xmlXPathErr( ctxt, xmlerror.XML_XPATH_EXPR_ERROR - xmlerror.XML_XPATH_EXPRESSION_OK) context._exc._store_raised() return - xpath.valuePush(ctxt, obj) From scoder at codespeak.net Fri Apr 28 08:54:10 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Fri Apr 28 08:54:12 2006 Subject: [Lxml-checkins] r26494 - in lxml/trunk: . src/lxml src/lxml/tests Message-ID: <20060428065410.7D3A0100B0@code0.codespeak.net> Author: scoder Date: Fri Apr 28 08:54:07 2006 New Revision: 26494 Modified: lxml/trunk/CHANGES.txt lxml/trunk/src/lxml/etree.pyx lxml/trunk/src/lxml/tests/test_xpathevaluator.py lxml/trunk/src/lxml/xslt.pxi Log: refactoring of XPath(Document|Element)Evaluator: fix document evaluator by using the _ElementTree context node Modified: lxml/trunk/CHANGES.txt ============================================================================== --- lxml/trunk/CHANGES.txt (original) +++ lxml/trunk/CHANGES.txt Fri Apr 28 08:54:07 2006 @@ -34,7 +34,10 @@ Bugs fixed ---------- -* document('') now works in all XSLT documents +* ElementTree.xpath() and XPathDocumentEvaluator were not using the + ElementTree root node as reference point + +* Calling document('') in XSLT failed to return the stylesheet * Crash in XMLSchema and RelaxNG when passing non-schema documents Modified: lxml/trunk/src/lxml/etree.pyx ============================================================================== --- lxml/trunk/src/lxml/etree.pyx (original) +++ lxml/trunk/src/lxml/etree.pyx Fri Apr 28 08:54:07 2006 @@ -320,7 +320,8 @@ against the same document, it is more efficient to use XPathEvaluator directly. """ - return XPathDocumentEvaluator(self._doc, namespaces).evaluate(_path, **_variables) + evaluator = XPathElementEvaluator(self._context_node, namespaces) + return evaluator.evaluate(_path, **_variables) def xslt(self, _xslt, extensions=None, **_kw): """Transform this document using other document. @@ -827,7 +828,8 @@ return _elementpath.findall(self, path) def xpath(self, _path, namespaces=None, **_variables): - return XPathElementEvaluator(self, namespaces).evaluate(_path, **_variables) + evaluator = XPathElementEvaluator(self, namespaces) + return evaluator.evaluate(_path, **_variables) cdef _Element _elementFactory(_Document doc, xmlNode* c_node): cdef _Element result Modified: lxml/trunk/src/lxml/tests/test_xpathevaluator.py ============================================================================== --- lxml/trunk/src/lxml/tests/test_xpathevaluator.py (original) +++ lxml/trunk/src/lxml/tests/test_xpathevaluator.py Fri Apr 28 08:54:07 2006 @@ -105,6 +105,18 @@ [root], e.evaluate('//a')) + def test_xpath_evaluator_tree(self): + tree = self.parse('') + child_tree = etree.ElementTree(tree.getroot()[0]) + e = etree.XPathEvaluator(child_tree) + self.assertEquals( + [], + e.evaluate('.//a')) + root = child_tree.getroot() + self.assertEquals( + [root[0]], + e.evaluate('.//c')) + def test_xpath_evaluator_element(self): tree = self.parse('') root = tree.getroot() Modified: lxml/trunk/src/lxml/xslt.pxi ============================================================================== --- lxml/trunk/src/lxml/xslt.pxi (original) +++ lxml/trunk/src/lxml/xslt.pxi Fri Apr 28 08:54:07 2006 @@ -677,32 +677,23 @@ return result -cdef class XPathDocumentEvaluator(XPathEvaluatorBase): - """Create an XPath evaluator for a document. +cdef class XPathElementEvaluator(XPathEvaluatorBase): + """Create an XPath evaluator for an element. + + XPath evaluators must not be shared between threads. """ cdef xpath.xmlXPathContext* _c_ctxt - cdef _Document _doc - - def __init__(self, etree, namespaces=None, extensions=None): + cdef _Element _element + def __init__(self, _NodeBase element not None, namespaces=None, extensions=None): cdef xpath.xmlXPathContext* xpathCtxt cdef int ns_register_status cdef _Document doc - - if isinstance(etree, _Document): - doc = <_Document>etree # for internal use only! - elif isinstance(etree, _ElementTree): - doc = (<_ElementTree>etree)._doc - else: - raise TypeError, "XPathDocumentEvaluator can only work on ElementTree objects" - + doc = element._doc xpathCtxt = xpath.xmlXPathNewContext(doc._c_doc) if xpathCtxt is NULL: - # XXX what triggers this exception? raise XPathContextError, "Unable to create new XPath context" - - self._doc = doc + self._element = element self._c_ctxt = xpathCtxt - XPathEvaluatorBase.__init__(self, namespaces, extensions) def __dealloc__(self): @@ -717,55 +708,51 @@ def registerNamespaces(self, namespaces): """Register a prefix -> uri dict. """ + add = self._context.addNamespace for prefix, uri in namespaces.items(): - self.registerNamespace(prefix, uri) + add(prefix, uri) def evaluate(self, _path, **_variables): - """Evaluate an XPath expression on the document. Variables - may be given as keyword arguments. Note that namespaces are - currently not supported for variables.""" - return self._evaluate(_path, NULL, _variables) - - cdef object _evaluate(self, path, xmlNode* c_ctxt_node, variable_dict): + """Evaluate an XPath expression on the document. Variables may be + provided as keyword arguments. Note that namespaces are currently not + supported for variables.""" cdef xpath.xmlXPathContext* xpathCtxt cdef xpath.xmlXPathObject* xpathObj cdef xmlNode* c_node - + cdef _Document doc xpathCtxt = self._c_ctxt - # if element context is requested; unfortunately need to modify ctxt - xpathCtxt.node = c_ctxt_node + xpathCtxt.node = self._element._c_node + doc = self._element._doc self._context._release_temp_refs() - self._context.register_context(xpathCtxt, self._doc) - self._context.registerVariables(variable_dict) + self._context.register_context(xpathCtxt, doc) + self._context.registerVariables(_variables) - path = _utf8(path) + path = _utf8(_path) xpathObj = xpath.xmlXPathEvalExpression(_cstr(path), xpathCtxt) self._context.unregister_context() - return self._handle_result(xpathObj, self._doc) + return self._handle_result(xpathObj, doc) #def clone(self): # # XXX pretty expensive so calling this from callback is probably # # not desirable # return XPathEvaluator(self._doc, self._namespaces, self._extensions) -cdef class XPathElementEvaluator(XPathDocumentEvaluator): - """Create an XPath evaluator for an element. - """ - cdef _Element _element - def __init__(self, _Element element not None, namespaces=None, extensions=None): - XPathDocumentEvaluator.__init__( - self, element._doc, namespaces, extensions) - self._element = element +cdef class XPathDocumentEvaluator(XPathElementEvaluator): + """Create an XPath evaluator for an ElementTree. - def evaluate(self, _path, **_variables): - """Evaluate an XPath expression on the element. Variables may - be given as keyword arguments. Note that namespaces are - currently not supported for variables.""" - return self._evaluate(_path, self._element._c_node, _variables) + XPath evaluators must not be shared between threads. + """ + def __init__(self, _ElementTree etree not None, namespaces=None, extensions=None): + XPathElementEvaluator.__init__( + self, etree._context_node, namespaces, extensions) def XPathEvaluator(etree_or_element, namespaces=None, extensions=None): + """Creates and XPath evaluator for an ElementTree or an Element. + + XPath evaluators must not be shared between threads. + """ if isinstance(etree_or_element, _ElementTree): return XPathDocumentEvaluator(etree_or_element, namespaces, extensions) else: From scoder at codespeak.net Fri Apr 28 08:55:49 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Fri Apr 28 08:55:50 2006 Subject: [Lxml-checkins] r26495 - lxml/trunk/src/lxml/tests Message-ID: <20060428065549.6BC90100B0@code0.codespeak.net> Author: scoder Date: Fri Apr 28 08:55:48 2006 New Revision: 26495 Modified: lxml/trunk/src/lxml/tests/test_xpathevaluator.py Log: better test case for last bug Modified: lxml/trunk/src/lxml/tests/test_xpathevaluator.py ============================================================================== --- lxml/trunk/src/lxml/tests/test_xpathevaluator.py (original) +++ lxml/trunk/src/lxml/tests/test_xpathevaluator.py Fri Apr 28 08:55:48 2006 @@ -111,11 +111,11 @@ e = etree.XPathEvaluator(child_tree) self.assertEquals( [], - e.evaluate('.//a')) + e.evaluate('a')) root = child_tree.getroot() self.assertEquals( [root[0]], - e.evaluate('.//c')) + e.evaluate('c')) def test_xpath_evaluator_element(self): tree = self.parse('') From scoder at codespeak.net Fri Apr 28 10:33:18 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Fri Apr 28 10:33:20 2006 Subject: [Lxml-checkins] r26498 - lxml/trunk/doc Message-ID: <20060428083318.ADD77100A4@code0.codespeak.net> Author: scoder Date: Fri Apr 28 10:33:17 2006 New Revision: 26498 Modified: lxml/trunk/doc/valgrind.txt Log: changed cmd line in doc/valgrind.txt to let valgrind do leak-check Modified: lxml/trunk/doc/valgrind.txt ============================================================================== --- lxml/trunk/doc/valgrind.txt (original) +++ lxml/trunk/doc/valgrind.txt Fri Apr 28 10:33:17 2006 @@ -1,3 +1,3 @@ The command used to run the tests with valgrind: -valgrind --tool=memcheck --suppressions=valgrind-python.supp python2.3 test.py +valgrind --tool=memcheck --leak-check=full --suppressions=valgrind-python.supp python2.3 test.py From scoder at codespeak.net Fri Apr 28 10:35:54 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Fri Apr 28 10:35:56 2006 Subject: [Lxml-checkins] r26500 - lxml/trunk/src/lxml/tests Message-ID: <20060428083554.D8B3F100A4@code0.codespeak.net> Author: scoder Date: Fri Apr 28 10:35:53 2006 New Revision: 26500 Modified: lxml/trunk/src/lxml/tests/test_xpathevaluator.py Log: cosmetics in test case Modified: lxml/trunk/src/lxml/tests/test_xpathevaluator.py ============================================================================== --- lxml/trunk/src/lxml/tests/test_xpathevaluator.py (original) +++ lxml/trunk/src/lxml/tests/test_xpathevaluator.py Fri Apr 28 10:35:53 2006 @@ -139,7 +139,7 @@ return "hello %s and %s" % (a, b) extension = {(None, 'foo'): foo} tree = self.parse('') - e = etree.XPathEvaluator(tree, None, [extension]) + e = etree.XPathEvaluator(tree, extensions=[extension]) self.assertRaises(TypeError, e.evaluate, "foo('you')") def test_xpath_extensions_error(self): From scoder at codespeak.net Fri Apr 28 11:36:47 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Fri Apr 28 11:36:48 2006 Subject: [Lxml-checkins] r26508 - lxml/trunk/src/lxml Message-ID: <20060428093647.35A75100AD@code0.codespeak.net> Author: scoder Date: Fri Apr 28 11:36:46 2006 New Revision: 26508 Modified: lxml/trunk/src/lxml/xslt.pxi Log: rewrote buggy handling of nodes generated in Python extensions * now cleans up node-sets the way they were created (instead of a straight 'xmlXPathFreeObject') * temporary Python refs are cleared after XSLT resp. XPath execution, /before/ returning from user API call * result node-sets are freed when raising exceptions Elements from result node sets are now deep-copied if they came from other documents than the input document in XSLT or the context document in XPath. This prevents possible multiple-free crashes that could occur before. It fixes the case where elements were created by extension functions, passed through the XPath machinery and then garbage collected twice. Modified: lxml/trunk/src/lxml/xslt.pxi ============================================================================== --- lxml/trunk/src/lxml/xslt.pxi (original) +++ lxml/trunk/src/lxml/xslt.pxi Fri Apr 28 11:36:46 2006 @@ -620,7 +620,7 @@ if xpathCtxt is NULL: return self._unregisterVariables() - self._registered_variables = [] + del self._registered_variables[:] self._unregister_context() cdef void _unregisterVariables(self): @@ -633,7 +633,7 @@ xpathVarValue = xpath.xmlXPathVariableLookup(xpathCtxt, c_name) if xpathVarValue is not NULL: xpath.xmlXPathRegisterVariable(xpathCtxt, c_name, NULL) - xpath.xmlXPathFreeObject(xpathVarValue) + _freeXPathObject(xpathVarValue) def registerVariables(self, variable_dict): for name, value in variable_dict.items(): @@ -660,20 +660,24 @@ cdef object _handle_result(self, xpath.xmlXPathObject* xpathObj, _Document doc): if self._context._exc._has_raised(): if xpathObj is not NULL: - xpath.xmlXPathFreeObject(xpathObj) + _freeXPathObject(xpathObj) xpathObj = NULL + self._context._release_temp_refs() self._context._exc._raise_if_stored() if xpathObj is NULL: + self._context._release_temp_refs() raise XPathSyntaxError, "Error in xpath expression." try: result = _unwrapXPathObject(xpathObj, doc) except XPathResultError: - xpath.xmlXPathFreeObject(xpathObj) + _freeXPathObject(xpathObj) + self._context._release_temp_refs() raise - xpath.xmlXPathFreeObject(xpathObj) + _freeXPathObject(xpathObj) + self._context._release_temp_refs() return result @@ -724,7 +728,6 @@ xpathCtxt.node = self._element._c_node doc = self._element._doc - self._context._release_temp_refs() self._context.register_context(xpathCtxt, doc) self._context.registerVariables(_variables) @@ -851,7 +854,6 @@ if python.PyUnicode_Check(obj): obj = _utf8(obj) if python.PyString_Check(obj): - # XXX use the Wrap variant? Or leak... return xpath.xmlXPathNewCString(_cstr(obj)) if python.PyBool_Check(obj): return xpath.xmlXPathNewBoolean(obj) @@ -866,6 +868,7 @@ node = <_NodeBase>element xpath.xmlXPathNodeSetAdd(resultSet, node._c_node) else: + xpath.xmlXPathFreeNodeSet(resultSet) raise XPathResultError, "This is not a node: %s" % element return xpath.xmlXPathWrapNodeSet(resultSet) else: @@ -877,7 +880,7 @@ if xpathObj.type == xpath.XPATH_UNDEFINED: raise XPathResultError, "Undefined xpath result" elif xpathObj.type == xpath.XPATH_NODESET: - return _createNodeSetResult(doc, xpathObj) + return _createNodeSetResult(xpathObj, doc) elif xpathObj.type == xpath.XPATH_BOOLEAN: return bool(xpathObj.boolval) elif xpathObj.type == xpath.XPATH_NUMBER: @@ -897,8 +900,7 @@ else: raise XPathResultError, "Unknown xpath result %s" % str(xpathObj.type) -cdef object _createNodeSetResult(_Document doc, - xpath.xmlXPathObject* xpathObj): +cdef object _createNodeSetResult(xpath.xmlXPathObject* xpathObj, _Document doc): cdef xmlNode* c_node cdef char* s cdef _NodeBase element @@ -909,6 +911,12 @@ for i from 0 <= i < xpathObj.nodesetval.nodeNr: c_node = xpathObj.nodesetval.nodeTab[i] if _isElement(c_node): + if c_node.doc != doc._c_doc: + # XXX: works, but maybe not always the right thing to do? + # XPath: only runs when extensions create or copy trees + # -> we store Python refs to these, so that is OK + # XSLT: can it leak when merging trees from multiple sources? + c_node = tree.xmlDocCopyNode(c_node, doc._c_doc, 1) element = _elementFactory(doc, c_node) result.append(element) elif c_node.type == tree.XML_TEXT_NODE: @@ -923,6 +931,14 @@ raise NotImplementedError return result +cdef void _freeXPathObject(xpath.xmlXPathObject* xpathObj): + """Free the XPath object, but *never* free the *content* of node sets. + Python dealloc will do that for us. + """ + if xpathObj.nodesetval is not NULL: + xpath.xmlXPathFreeNodeSet(xpathObj.nodesetval) + xpathObj.nodesetval = NULL + xpath.xmlXPathFreeObject(xpathObj) cdef void _xpath_function_call(xpath.xmlXPathParserContext* ctxt, int nargs): cdef xpath.xmlXPathContext* rctxt @@ -946,6 +962,7 @@ cdef void _extension_function_call(_BaseContext context, xpath.xmlXPathParserContext* ctxt, int nargs): + cdef _NodeBase node cdef _Document doc cdef xpath.xmlXPathObject* obj cdef int i @@ -956,17 +973,14 @@ args.reverse() try: - # call the function res = context._called_function(None, *args) - # hold python objects temporarily so that they won't get deallocated - # during processing - context._hold(res) - # now wrap for XPath consumption + # wrap result for XPath consumption obj = _wrapXPathObject(res) + # prevent Python from deallocating elements handed to libxml2 + context._hold(res) xpath.valuePush(ctxt, obj) except: xpath.xmlXPathErr( ctxt, xmlerror.XML_XPATH_EXPR_ERROR - xmlerror.XML_XPATH_EXPRESSION_OK) context._exc._store_raised() - return From scoder at codespeak.net Fri Apr 28 11:42:02 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Fri Apr 28 11:42:03 2006 Subject: [Lxml-checkins] r26509 - lxml/trunk/src/lxml Message-ID: <20060428094202.A3790100AD@code0.codespeak.net> Author: scoder Date: Fri Apr 28 11:42:01 2006 New Revision: 26509 Modified: lxml/trunk/src/lxml/xslt.pxi Log: allow None to be returned from extension function: convert to empty node set Modified: lxml/trunk/src/lxml/xslt.pxi ============================================================================== --- lxml/trunk/src/lxml/xslt.pxi (original) +++ lxml/trunk/src/lxml/xslt.pxi Fri Apr 28 11:42:01 2006 @@ -859,7 +859,9 @@ return xpath.xmlXPathNewBoolean(obj) if python.PyNumber_Check(obj): return xpath.xmlXPathNewFloat(obj) - if isinstance(obj, _NodeBase): + if obj is None: + obj = () + elif isinstance(obj, _NodeBase): obj = (obj,) if python.PySequence_Check(obj): resultSet = xpath.xmlXPathNodeSetCreate(NULL) From scoder at codespeak.net Fri Apr 28 11:45:05 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Fri Apr 28 11:45:05 2006 Subject: [Lxml-checkins] r26510 - lxml/trunk/src/lxml/tests Message-ID: <20060428094505.3D97F100AD@code0.codespeak.net> Author: scoder Date: Fri Apr 28 11:45:04 2006 New Revision: 26510 Modified: lxml/trunk/src/lxml/tests/test_xpathevaluator.py Log: lengthy test case for mixing XPath extensions, variables and exceptions Modified: lxml/trunk/src/lxml/tests/test_xpathevaluator.py ============================================================================== --- lxml/trunk/src/lxml/tests/test_xpathevaluator.py (original) +++ lxml/trunk/src/lxml/tests/test_xpathevaluator.py Fri Apr 28 11:45:04 2006 @@ -182,6 +182,68 @@ self.assertEquals(1, len(r)) self.assertEquals("true", r[0].get('attr')) + def test_xpath_extensions_mix(self): + x = self.parse('') + + class LocalException(Exception): + pass + + def foo(evaluator, a, varval): + etree.Element("DUMMY") + if varval == 0: + raise LocalException + elif varval == 1: + return () + elif varval == 2: + return None + elif varval == 3: + return a[0][0] + a = a[0] + if a.get("attr") == str(varval): + return a + else: + return etree.Element("NODE") + + extension = {(None, 'foo'): foo} + e = etree.XPathEvaluator(x, extensions=[extension]) + del x + + self.assertRaises(LocalException, e.evaluate, "foo(., 0)") + self.assertRaises(LocalException, e.evaluate, "foo(., $value)", value=0) + + r = e.evaluate("foo(., $value)", value=1) + self.assertEqual(len(r), 0) + + r = e.evaluate("foo(., 1)") + self.assertEqual(len(r), 0) + + r = e.evaluate("foo(., $value)", value=2) + self.assertEqual(len(r), 0) + + r = e.evaluate("foo(., $value)", value=3) + self.assertEqual(len(r), 1) + self.assertEqual(r[0].tag, "test") + + r = e.evaluate("foo(., $value)", value="false") + self.assertEqual(len(r), 1) + self.assertEqual(r[0].tag, "NODE") + + r = e.evaluate("foo(., 'false')") + self.assertEqual(len(r), 1) + self.assertEqual(r[0].tag, "NODE") + + r = e.evaluate("foo(., 'true')") + self.assertEqual(len(r), 1) + self.assertEqual(r[0].tag, "a") + self.assertEqual(r[0][0].tag, "test") + + r = e.evaluate("foo(., $value)", value="true") + self.assertEqual(len(r), 1) + self.assertEqual(r[0].tag, "a") + + self.assertRaises(LocalException, e.evaluate, "foo(., 0)") + self.assertRaises(LocalException, e.evaluate, "foo(., $value)", value=0) + class ETreeXPathClassTestCase(HelperTestCase): "Tests for the XPath class" From scoder at codespeak.net Fri Apr 28 11:50:23 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Fri Apr 28 11:50:24 2006 Subject: [Lxml-checkins] r26513 - lxml/trunk/src/lxml Message-ID: <20060428095023.9F1F010079@code0.codespeak.net> Author: scoder Date: Fri Apr 28 11:50:22 2006 New Revision: 26513 Modified: lxml/trunk/src/lxml/python.pxd lxml/trunk/src/lxml/xslt.pxi Log: small C-ification Modified: lxml/trunk/src/lxml/python.pxd ============================================================================== --- lxml/trunk/src/lxml/python.pxd (original) +++ lxml/trunk/src/lxml/python.pxd Fri Apr 28 11:50:22 2006 @@ -20,6 +20,7 @@ cdef int PyList_GET_SIZE(object l) cdef int PyList_Append(object l, object obj) + cdef int PyList_Reverse(object l) cdef int PyDict_SetItemString(object d, char* key, object value) cdef int PyDict_SetItem(object d, object key, object value) cdef PyObject* PyDict_GetItemString(object d, char* key) Modified: lxml/trunk/src/lxml/xslt.pxi ============================================================================== --- lxml/trunk/src/lxml/xslt.pxi (original) +++ lxml/trunk/src/lxml/xslt.pxi Fri Apr 28 11:50:22 2006 @@ -972,7 +972,7 @@ doc = context._doc for i from 0 <= i < nargs: python.PyList_Append(args, _unwrapXPathObject(xpath.valuePop(ctxt), doc)) - args.reverse() + python.PyList_Reverse(args) try: res = context._called_function(None, *args) From scoder at codespeak.net Fri Apr 28 11:57:28 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Fri Apr 28 11:57:30 2006 Subject: [Lxml-checkins] r26514 - lxml/trunk/src/lxml Message-ID: <20060428095728.CDE5F10079@code0.codespeak.net> Author: scoder Date: Fri Apr 28 11:57:27 2006 New Revision: 26514 Modified: lxml/trunk/src/lxml/xslt.pxi Log: call XSLT function lookup only from within XSLT, not from XPath evaluation Modified: lxml/trunk/src/lxml/xslt.pxi ============================================================================== --- lxml/trunk/src/lxml/xslt.pxi (original) +++ lxml/trunk/src/lxml/xslt.pxi Fri Apr 28 11:57:27 2006 @@ -191,8 +191,10 @@ function = context._prepare_function_call(ns_uri, c_name) if function is not None: return _call_prepared_function - else: + elif isinstance(context, _XSLTContext): return xslt.xsltExtModuleFunctionLookup(c_name, c_ns_uri) + else: + return NULL ################################################################################ From scoder at codespeak.net Fri Apr 28 12:47:32 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Fri Apr 28 12:47:33 2006 Subject: [Lxml-checkins] r26518 - lxml/trunk/src/lxml Message-ID: <20060428104732.951BD1009F@code0.codespeak.net> Author: scoder Date: Fri Apr 28 12:47:31 2006 New Revision: 26518 Modified: lxml/trunk/src/lxml/xpath.pxd lxml/trunk/src/lxml/xslt.pxi Log: cleanup in extension functions Modified: lxml/trunk/src/lxml/xpath.pxd ============================================================================== --- lxml/trunk/src/lxml/xpath.pxd (original) +++ lxml/trunk/src/lxml/xpath.pxd Fri Apr 28 12:47:31 2006 @@ -14,6 +14,31 @@ XPATH_USERS = 8 XPATH_XSLT_TREE = 9 + ctypedef enum xmlXPathError: + XPATH_EXPRESSION_OK = 0 + XPATH_NUMBER_ERROR = 1 + XPATH_UNFINISHED_LITERAL_ERROR = 2 + XPATH_START_LITERAL_ERROR = 3 + XPATH_VARIABLE_REF_ERROR = 4 + XPATH_UNDEF_VARIABLE_ERROR = 5 + XPATH_INVALID_PREDICATE_ERROR = 6 + XPATH_EXPR_ERROR = 7 + XPATH_UNCLOSED_ERROR = 8 + XPATH_UNKNOWN_FUNC_ERROR = 9 + XPATH_INVALID_OPERAND = 10 + XPATH_INVALID_TYPE = 11 + XPATH_INVALID_ARITY = 12 + XPATH_INVALID_CTXT_SIZE = 13 + XPATH_INVALID_CTXT_POSITION = 14 + XPATH_MEMORY_ERROR = 15 + XPTR_SYNTAX_ERROR = 16 + XPTR_RESOURCE_ERROR = 17 + XPTR_SUB_RESOURCE_ERROR = 18 + XPATH_UNDEF_PREFIX_ERROR = 19 + XPATH_ENCODING_ERROR = 20 + XPATH_INVALID_CHAR_ERROR = 21 + XPATH_INVALID_CTXT = 22 + ctypedef struct xmlNodeSet: int nodeNr int nodeMax Modified: lxml/trunk/src/lxml/xslt.pxi ============================================================================== --- lxml/trunk/src/lxml/xslt.pxi (original) +++ lxml/trunk/src/lxml/xslt.pxi Fri Apr 28 12:47:31 2006 @@ -21,6 +21,9 @@ class XPathContextError(XPathError): pass +class XPathFunctionError(XPathError): + pass + class XPathResultError(XPathError): pass @@ -32,6 +35,7 @@ cdef class _BaseContext: cdef xpath.xmlXPathContext* _xpathCtxt + cdef xpath.xmlXPathFuncLookupFunc _ext_lookup_function cdef _Document _doc cdef object _extensions cdef object _namespaces @@ -92,8 +96,8 @@ namespaces = self._namespaces if namespaces is not None: self.registerNamespaces(namespaces) - xpath.xmlXPathRegisterFuncLookup(self._xpathCtxt, _function_check, - self) + xpath.xmlXPathRegisterFuncLookup( + self._xpathCtxt, self._ext_lookup_function, self) cdef _unregister_context(self): self._unregisterNamespaces() @@ -132,14 +136,14 @@ # extension functions - cdef _prepare_function_call(self, ns_uri_utf, name_utf): + cdef int _prepare_function_call(self, ns_uri_utf, name_utf): cdef python.PyObject* dict_result key = (ns_uri_utf, name_utf) dict_result = python.PyDict_GetItem(self._function_cache, key) if dict_result is not NULL: function = dict_result self._called_function = function - return function + return function is not None dict_result = python.PyDict_GetItem(self._extensions, ns_uri_utf) if dict_result is not NULL: @@ -151,7 +155,7 @@ python.PyDict_SetItem(self._function_cache, key, function) self._called_function = function - return function + return function is not None # Python reference keeping during XPath function evaluation @@ -179,7 +183,8 @@ #print "Holding document:", element._doc._c_doc self._temp_refs.add(element._doc) -cdef xpath.xmlXPathFunction _function_check(void* ctxt, char* c_name, char* c_ns_uri): +cdef xpath.xmlXPathFunction _function_check(void* ctxt, + char* c_name, char* c_ns_uri): cdef _BaseContext context if c_name is NULL: return NULL @@ -188,15 +193,21 @@ else: ns_uri = c_ns_uri context = <_BaseContext>ctxt - function = context._prepare_function_call(ns_uri, c_name) - if function is not None: + if context._prepare_function_call(ns_uri, c_name): return _call_prepared_function - elif isinstance(context, _XSLTContext): - return xslt.xsltExtModuleFunctionLookup(c_name, c_ns_uri) else: return NULL - +cdef xpath.xmlXPathFunction _xslt_function_check(void* ctxt, + char* c_name, char* c_ns_uri): + cdef xpath.xmlXPathFunction result + result = _function_check(ctxt, c_name, c_ns_uri) + if result is NULL: + return xslt.xsltExtModuleFunctionLookup(c_name, c_ns_uri) + else: + return result + + ################################################################################ # XSLT document loaders @@ -302,8 +313,9 @@ cdef xslt.xsltTransformContext* _xsltCtxt def __init__(self, namespaces, extensions): self._xsltCtxt = NULL + self._ext_lookup_function = _xslt_function_check if extensions and None in extensions: - raise XSLTExtensionError, "extensions must have non-empty namespaces" + raise XSLTExtensionError, "extensions must not have empty namespaces" _BaseContext.__init__(self, namespaces, extensions) cdef register_context(self, xslt.xsltTransformContext* xsltCtxt, @@ -603,9 +615,10 @@ cdef object _variables cdef object _registered_variables def __init__(self, namespaces, extensions, variables): - _BaseContext.__init__(self, namespaces, extensions) + self._ext_lookup_function = _function_check self._variables = variables self._registered_variables = [] + _BaseContext.__init__(self, namespaces, extensions) cdef register_context(self, xpath.xmlXPathContext* xpathCtxt, _Document doc): self._set_xpath_context(xpathCtxt) @@ -781,7 +794,7 @@ path = _utf8(path) self._xpath = xpath.xmlXPathCompile(_cstr(path)) if self._xpath is NULL: - raise XPathSyntaxError, "Error in xpath expression." + raise XPathSyntaxError, "Error in XPath expression" self._xpathCtxt = xpath.xmlXPathNewContext(NULL) def __call__(self, _etree_or_element, **_variables): @@ -954,8 +967,12 @@ uri = rctxt.functionURI else: uri = None - context._prepare_function_call(uri, name) - _extension_function_call(context, ctxt, nargs) + if context._prepare_function_call(uri, name): + _extension_function_call(context, ctxt, nargs) + else: + xpath.xmlXPathErr(ctxt, xpath.XPATH_EXPR_ERROR) + exception = XPathFunctionError("XPath function {%s}%s not found" % (uri, name)) + context._exc._store_exception(exception) cdef void _call_prepared_function(xpath.xmlXPathParserContext* ctxt, int nargs): cdef xpath.xmlXPathContext* rctxt @@ -970,13 +987,14 @@ cdef _Document doc cdef xpath.xmlXPathObject* obj cdef int i - args = [] doc = context._doc - for i from 0 <= i < nargs: - python.PyList_Append(args, _unwrapXPathObject(xpath.valuePop(ctxt), doc)) - python.PyList_Reverse(args) - try: + args = [] + for i from 0 <= i < nargs: + o = _unwrapXPathObject(xpath.valuePop(ctxt), doc) + python.PyList_Append(args, o) + python.PyList_Reverse(args) + res = context._called_function(None, *args) # wrap result for XPath consumption obj = _wrapXPathObject(res) @@ -984,7 +1002,5 @@ context._hold(res) xpath.valuePush(ctxt, obj) except: - xpath.xmlXPathErr( - ctxt, - xmlerror.XML_XPATH_EXPR_ERROR - xmlerror.XML_XPATH_EXPRESSION_OK) + xpath.xmlXPathErr(ctxt, xpath.XPATH_EXPR_ERROR) context._exc._store_raised() From scoder at codespeak.net Fri Apr 28 12:51:47 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Fri Apr 28 12:51:50 2006 Subject: [Lxml-checkins] r26521 - lxml/trunk/src/lxml Message-ID: <20060428105147.11D49100B5@code0.codespeak.net> Author: scoder Date: Fri Apr 28 12:51:43 2006 New Revision: 26521 Modified: lxml/trunk/src/lxml/nsclasses.pxi Log: remove unused XSLTElement class Modified: lxml/trunk/src/lxml/nsclasses.pxi ============================================================================== --- lxml/trunk/src/lxml/nsclasses.pxi (original) +++ lxml/trunk/src/lxml/nsclasses.pxi Fri Apr 28 12:51:43 2006 @@ -10,10 +10,6 @@ persistent state of elements must be stored in the underlying XML.""" pass -cdef class XSLTElement: - "NOT IMPLEMENTED YET!" - pass - cdef object __NAMESPACE_REGISTRIES __NAMESPACE_REGISTRIES = {} @@ -56,7 +52,6 @@ cdef object _ns_uri_utf cdef object _classes cdef object _extensions - cdef object _xslt_elements cdef char* _c_ns_uri_utf def __init__(self, ns_uri): self._ns_uri = ns_uri @@ -68,7 +63,6 @@ self._c_ns_uri_utf = _cstr(self._ns_uri_utf) self._classes = {} self._extensions = {} - self._xslt_elements = {} def update(self, class_dict_iterable): """Forgivingly update the registry. If registered values are @@ -89,8 +83,6 @@ d = self._classes elif name is None: raise NamespaceRegistryError, "Registered name can only be None for elements." - elif python.PyType_Check(item) and issubclass(item, XSLTElement): - d = self._xslt_elements elif callable(item): d = self._extensions else: @@ -115,7 +107,6 @@ def clear(self): self._classes.clear() self._extensions.clear() - #self.self._xslt_elements.clear() def __repr__(self): return "Namespace(%r)" % self._ns_uri From scoder at codespeak.net Fri Apr 28 19:57:28 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Fri Apr 28 19:57:29 2006 Subject: [Lxml-checkins] r26532 - in lxml/trunk: . src/lxml src/lxml/tests Message-ID: <20060428175728.44BF4100A3@code0.codespeak.net> Author: scoder Date: Fri Apr 28 19:57:25 2006 New Revision: 26532 Modified: lxml/trunk/CHANGES.txt lxml/trunk/src/lxml/cstd.pxd lxml/trunk/src/lxml/etree.h lxml/trunk/src/lxml/tests/test_xslt.py lxml/trunk/src/lxml/tree.pxd lxml/trunk/src/lxml/xmlerror.pxi lxml/trunk/src/lxml/xslt.pxi Log: XSLT error reporting Modified: lxml/trunk/CHANGES.txt ============================================================================== --- lxml/trunk/CHANGES.txt (original) +++ lxml/trunk/CHANGES.txt Fri Apr 28 19:57:25 2006 @@ -7,6 +7,8 @@ Features added -------------- +* Error reporting now also works in XSLT + * Support for custom document loaders (URI resolvers) in parsers and XSLT, resolvers are registered at parser level Modified: lxml/trunk/src/lxml/cstd.pxd ============================================================================== --- lxml/trunk/src/lxml/cstd.pxd (original) +++ lxml/trunk/src/lxml/cstd.pxd Fri Apr 28 19:57:25 2006 @@ -3,3 +3,12 @@ cdef void* malloc(int size) void free(void* ptr) +cdef extern from "stdarg.h": + ctypedef void *va_list + void va_start(va_list ap, void *last) + void va_end(va_list ap) + +cdef extern from "etree.h": + cdef int va_int(va_list ap) + cdef char *va_charptr(va_list ap) + Modified: lxml/trunk/src/lxml/etree.h ============================================================================== --- lxml/trunk/src/lxml/etree.h (original) +++ lxml/trunk/src/lxml/etree.h Fri Apr 28 19:57:25 2006 @@ -12,4 +12,8 @@ ((c_node)->type == XML_ELEMENT_NODE || \ (c_node)->type == XML_COMMENT_NODE) +/* v_arg functions */ +#define va_int(ap) va_arg(ap, int) +#define va_charptr(ap) va_arg(ap, char *) + #endif /*HAS_ETREE_H*/ Modified: lxml/trunk/src/lxml/tests/test_xslt.py ============================================================================== --- lxml/trunk/src/lxml/tests/test_xslt.py (original) +++ lxml/trunk/src/lxml/tests/test_xslt.py Fri Apr 28 19:57:25 2006 @@ -75,7 +75,6 @@ self.assertRaises(TypeError, etree.XSLT, None) def test_xslt_input_partial_doc(self): - tree = self.parse('BC') style = self.parse('''\ BC') @@ -406,6 +406,18 @@ self.assertEquals(root[0].tag, '{http://www.w3.org/1999/XSL/Transform}stylesheet') + def test_xslt_document_error(self): + # make sure document('') works from parsed strings + xslt = etree.XSLT(etree.XML("""\ + + + TEXT + + +""")) + self.assertRaises(etree.XSLTApplyError, xslt, etree.XML('')) + def test_exslt_regexp_test(self): xslt = etree.XSLT(etree.XML("""\ self, _localReceiveError) + xmlerror.xmlSetStructuredErrorFunc(self, _receiveError) cdef void disconnect(self): - xmlerror.xmlSetStructuredErrorFunc(NULL, _globalReceiveError) + xmlerror.xmlSetStructuredErrorFunc(NULL, _receiveError) cdef void _receive(self, xmlerror.xmlError* error): cdef _LogEntry entry @@ -159,6 +168,15 @@ __GLOBAL_ERROR_LOG.receive(entry) self.receive(entry) + cdef void _receiveGeneric(self, int domain, int type, int level, int line, + message, filename): + cdef _LogEntry entry + entry = _LogEntry() + entry._setGeneric(domain, type, level, line, message, filename) + if __GLOBAL_ERROR_LOG is not self: + __GLOBAL_ERROR_LOG.receive(entry) + self.receive(entry) + def receive(self, entry): python.PyList_Append(self._entries, entry) @@ -198,7 +216,7 @@ logger = logging.getLogger(name) else: logger = logging.getLogger() - self._log = logger.log + self._log = logger.log def copy(self): return self @@ -220,16 +238,56 @@ return __GLOBAL_ERROR_LOG.copy() # local log function: forward error to logger object -cdef void _localReceiveError(void* c_log_handler, xmlerror.xmlError* error): +cdef void _receiveError(void* c_log_handler, xmlerror.xmlError* error): cdef _ErrorLog log_handler if __DEBUG != 0: - log_handler = <_ErrorLog>c_log_handler + if c_log_handler is not NULL: + log_handler = <_ErrorLog>c_log_handler + else: + log_handler = __GLOBAL_ERROR_LOG log_handler._receive(error) -# global log functions: overridden by local functions -cdef void _globalReceiveError(void* userData, xmlerror.xmlError* error): - if __DEBUG != 0: - __GLOBAL_ERROR_LOG._receive(error) +cdef void _receiveGenericError(void* c_log_handler, char* msg, ...): + cdef cstd.va_list args + cdef _ErrorLog log_handler + cdef char* c_text + cdef char* c_filename + cdef char* c_element + cdef int c_line + if __DEBUG == 0 or msg == NULL or tree.strlen(msg) < 10: + return + if c_log_handler is not NULL: + log_handler = <_ErrorLog>c_log_handler + else: + log_handler = __GLOBAL_ERROR_LOG + + cstd.va_start(args, msg) + c_text = cstd.va_charptr(args) + c_filename = cstd.va_charptr(args) + c_line = cstd.va_int(args) + c_element = cstd.va_charptr(args) + cstd.va_end(args) + + if c_text is NULL: + message = None + elif c_element is NULL: + message = funicode(c_text) + else: + message = "%s (element '%s')" % ( + funicode(c_text), funicode(c_element)) + + if c_filename is not NULL and tree.strlen(c_filename) > 0: + if tree.strncmp(c_filename, 'XSLT:', 5) == 0: + filename = '' + else: + filename = funicode(c_filename) + else: + filename = None + + log_handler._receiveGeneric(xmlerror.XML_FROM_XSLT, + xmlerror.XML_ERR_OK, + xmlerror.XML_ERR_ERROR, + c_line, message, filename) # dummy function: no debug output at all cdef void _nullGenericErrorFunc(void* ctxt, char* msg, ...): @@ -238,11 +296,10 @@ # setup for global log: cdef void _logLibxmlErrors(): xmlerror.xmlSetGenericErrorFunc(NULL, _nullGenericErrorFunc) - xmlerror.xmlSetStructuredErrorFunc(NULL, _globalReceiveError) + xmlerror.xmlSetStructuredErrorFunc(NULL, _receiveError) cdef void _logLibxsltErrors(): - xslt.xsltSetGenericErrorFunc(NULL, _nullGenericErrorFunc) - # xslt.xsltSetTransformErrorFunc + xslt.xsltSetGenericErrorFunc(NULL, _receiveGenericError) # init global logging initThreadLogging() Modified: lxml/trunk/src/lxml/xslt.pxi ============================================================================== --- lxml/trunk/src/lxml/xslt.pxi (original) +++ lxml/trunk/src/lxml/xslt.pxi Fri Apr 28 19:57:25 2006 @@ -355,6 +355,7 @@ cdef xslt.xsltStylesheet* _c_style cdef _XSLTResolverContext _xslt_resolver_context cdef _ExsltRegExp _regexp + cdef _ErrorLog _error_log def __init__(self, xslt_input, extensions=None, regexp=True): cdef xslt.xsltStylesheet* c_style @@ -394,12 +395,13 @@ self._c_style = c_style self._context = _XSLTContext(None, extensions) + self._error_log = _ErrorLog() if regexp: self._regexp = _ExsltRegExp() else: self._regexp = None # XXX is it worthwile to use xsltPrecomputeStylesheet here? - + def __dealloc__(self): if self._xslt_resolver_context is not None and \ self._xslt_resolver_context._c_style_doc is not NULL: @@ -407,6 +409,10 @@ # this cleans up copy of doc as well xslt.xsltFreeStylesheet(self._c_style) + property error_log: + def __get__(self): + return self._error_log.copy() + def __call__(self, _input, **_kw): cdef _Document input_doc cdef _NodeBase root_node @@ -432,6 +438,10 @@ _destroyFakeDoc(input_doc._c_doc, c_doc) raise XSLTApplyError, "Error preparing stylesheet run" + self._error_log.clear() + xslt.xsltSetTransformErrorFunc(transform_ctxt, self._error_log, + _receiveGenericError) + ptemp = c_doc._private c_doc._private = resolver_context @@ -527,7 +537,6 @@ # enable EXSLT support for XSLT xslt.exsltRegisterAll() - ################################################################################ # EXSLT regexp implementation From scoder at codespeak.net Fri Apr 28 21:11:26 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Fri Apr 28 21:11:28 2006 Subject: [Lxml-checkins] r26534 - lxml/trunk/src/lxml Message-ID: <20060428191126.4C0DD1008E@code0.codespeak.net> Author: scoder Date: Fri Apr 28 21:11:25 2006 New Revision: 26534 Modified: lxml/trunk/src/lxml/xslt.pxi Log: connect XSLT error log during transformation to receive also libxml2 errors Modified: lxml/trunk/src/lxml/xslt.pxi ============================================================================== --- lxml/trunk/src/lxml/xslt.pxi (original) +++ lxml/trunk/src/lxml/xslt.pxi Fri Apr 28 21:11:25 2006 @@ -438,7 +438,7 @@ _destroyFakeDoc(input_doc._c_doc, c_doc) raise XSLTApplyError, "Error preparing stylesheet run" - self._error_log.clear() + self._error_log.connect() xslt.xsltSetTransformErrorFunc(transform_ctxt, self._error_log, _receiveGenericError) @@ -480,6 +480,7 @@ c_doc._private = ptemp # restore _private before _destroyFakeDoc! _destroyFakeDoc(input_doc._c_doc, c_doc) + self._error_log.disconnect() if self._xslt_resolver_context._has_raised(): if c_result is not NULL: tree.xmlFreeDoc(c_result) From scoder at codespeak.net Fri Apr 28 21:11:40 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Fri Apr 28 21:11:42 2006 Subject: [Lxml-checkins] r26535 - in lxml/trunk: doc src/lxml src/lxml/tests Message-ID: <20060428191140.95D9D1008E@code0.codespeak.net> Author: scoder Date: Fri Apr 28 21:11:37 2006 New Revision: 26535 Added: lxml/trunk/src/lxml/tests/test.xsd Modified: lxml/trunk/doc/api.txt lxml/trunk/src/lxml/etree.pyx lxml/trunk/src/lxml/relaxng.pxi lxml/trunk/src/lxml/tests/test_xmlschema.py lxml/trunk/src/lxml/xmlschema.pxi Log: merged APIs of RelaxNG and XMLSchema into new _Validator superclass, updated doctests Modified: lxml/trunk/doc/api.txt ============================================================================== --- lxml/trunk/doc/api.txt (original) +++ lxml/trunk/doc/api.txt Fri Apr 28 21:11:37 2006 @@ -67,8 +67,6 @@ >>> print entry.domain_name, entry.type_name, entry.filename PARSER ERR_TAG_NOT_FINISHED -XSLT error messages are not currently available through the lxml API. - xpath method on ElementTree, Element ------------------------------------ @@ -221,9 +219,9 @@ >>> relaxng_doc = lxml.etree.parse(f) >>> relaxng = lxml.etree.RelaxNG(relaxng_doc) -You can then validate some ElementTree document with this. You'll get -back true if the document is valid against the Relax NG schema, and -false if not:: +You can then validate some ElementTree document against the schema. You'll get +back True if the document is valid against the Relax NG schema, and False if +not:: >>> valid = StringIO('') >>> doc = lxml.etree.parse(valid) @@ -235,6 +233,23 @@ >>> relaxng.validate(doc2) 0 +Calling the schema object has the same effect as calling its validate +method. This is sometimes used in conditional statements:: + + >>> invalid = StringIO('') + >>> doc2 = lxml.etree.parse(invalid) + >>> if not relaxng(doc2): + ... print "invalid!" + invalid! + +If you prefer getting an exception when validating, you can use the +assertValid method:: + + >>> relaxng.assertValid(doc2) + Traceback (most recent call last): + [...] + DocumentInvalid: Document does not comply with schema + Starting with version 0.9, lxml now has a simple API to report the errors generated by libxml2. If you want to find out why the validation failed in the second case, you can look up the error log of the validation process and check @@ -250,7 +265,7 @@ appeares during the validation. Similar to XSLT, there's also a less efficient but easier shortcut method to -do RelaxNG validation:: +do one-shot RelaxNG validation:: >>> doc.relaxng(relaxng_doc) 1 @@ -293,6 +308,23 @@ >>> xmlschema.validate(doc2) 0 +Calling the schema object has the same effect as calling its validate +method. This is sometimes used in conditional statements:: + + >>> invalid = StringIO('') + >>> doc2 = lxml.etree.parse(invalid) + >>> if not xmlschema(doc2): + ... print "invalid!" + invalid! + +If you prefer getting an exception when validating, you can use the +assertValid method:: + + >>> xmlschema.assertValid(doc2) + Traceback (most recent call last): + [...] + DocumentInvalid: Document does not comply with schema + Error reporting works like for the RelaxNG class:: >>> log = xmlschema.error_log Modified: lxml/trunk/src/lxml/etree.pyx ============================================================================== --- lxml/trunk/src/lxml/etree.pyx (original) +++ lxml/trunk/src/lxml/etree.pyx Fri Apr 28 21:11:37 2006 @@ -50,6 +50,9 @@ class LxmlSyntaxError(LxmlError, SyntaxError): pass +class DocumentInvalid(LxmlError): + pass + class XIncludeError(LxmlError): pass @@ -1349,18 +1352,51 @@ # include submodules +include "proxy.pxi" # Proxy handling (element backpointers/memory/etc.) include "xmlerror.pxi" # error and log handling -include "xmlid.pxi" # XMLID and IDDict include "nsclasses.pxi" # Namespace implementation and registry include "docloader.pxi" # Support for custom document loaders include "parser.pxi" # XML Parser +include "xmlid.pxi" # XMLID and IDDict include "xslt.pxi" # XPath and XSLT + + +################################################################################ +# Validation + +cdef class _Validator: + "Base class for XML validators." + cdef _ErrorLog _error_log + def __init__(self): + self._error_log = _ErrorLog() + + def validate(self, etree): + """Validate the document using this schema. + + Returns true if document is valid, false if not.""" + return self(etree) + + def assertValid(self, etree): + "Raises DocumentInvalid if the document does not comply with the schema." + if not self(etree): + raise DocumentInvalid, "Document does not comply with schema" + + def assert_(self, etree): + "Raises AssertionError if the document does not comply with the schema." + if not self(etree): + raise AssertionError, "Document does not comply with schema" + + property error_log: + def __get__(self): + return self._error_log.copy() + include "relaxng.pxi" # RelaxNG include "xmlschema.pxi" # XMLSchema -include "proxy.pxi" # Proxy handling (element backpointers/memory/etc.) +################################################################################ # Private helper functions + cdef _Document _documentOrRaise(object input): cdef _Document doc doc = _documentOf(input) Modified: lxml/trunk/src/lxml/relaxng.pxi ============================================================================== --- lxml/trunk/src/lxml/relaxng.pxi (original) +++ lxml/trunk/src/lxml/relaxng.pxi Fri Apr 28 21:11:37 2006 @@ -13,13 +13,11 @@ ################################################################################ # RelaxNG -cdef class RelaxNG: +cdef class RelaxNG(_Validator): """Turn a document into a Relax NG validator. Can also load from filesystem directly given file object or filename. """ cdef relaxng.xmlRelaxNG* _c_schema - cdef _ErrorLog _error_log - def __init__(self, etree=None, file=None): cdef _Document doc cdef _NodeBase root_node @@ -64,12 +62,12 @@ relaxng.xmlRelaxNGFreeParserCtxt(parser_ctxt) if fake_c_doc is not NULL: _destroyFakeDoc(doc._c_doc, fake_c_doc) - self._error_log = _ErrorLog() - + _Validator.__init__(self) + def __dealloc__(self): relaxng.xmlRelaxNGFree(self._c_schema) - - def validate(self, etree): + + def __call__(self, etree): """Validate doc using Relax NG. Returns true if document is valid, false if not.""" @@ -94,7 +92,3 @@ if ret == -1: raise RelaxNGValidateError, "Internal error in Relax NG validation" return ret == 0 - - property error_log: - def __get__(self): - return self._error_log.copy() Added: lxml/trunk/src/lxml/tests/test.xsd ============================================================================== --- (empty file) +++ lxml/trunk/src/lxml/tests/test.xsd Fri Apr 28 21:11:37 2006 @@ -0,0 +1,8 @@ + + + + + + + + Modified: lxml/trunk/src/lxml/tests/test_xmlschema.py ============================================================================== --- lxml/trunk/src/lxml/tests/test_xmlschema.py (original) +++ lxml/trunk/src/lxml/tests/test_xmlschema.py Fri Apr 28 21:11:37 2006 @@ -6,7 +6,7 @@ import unittest -from common_imports import etree, HelperTestCase +from common_imports import etree, HelperTestCase, fileInTestDir class ETreeXMLSchemaTestCase(HelperTestCase): def test_xmlschema(self): @@ -45,11 +45,13 @@ self.assertRaises(etree.XMLSchemaParseError, etree.XMLSchema, schema) -## def test_xmlschema_include(self): -## # this will only work if we access the file through path or -## # file object.. -## f = open(fileInTestDir('test1.rng'), 'r') -## schema = etree.RelaxNG(file=f) + def test_xmlschema_file(self): + # this will only work if we access the file through path or + # file object.. + f = open(fileInTestDir('test.xsd'), 'r') + schema = etree.XMLSchema(file=f) + tree_valid = self.parse('') + self.assert_(schema.validate(tree_valid)) def test_xmlschema_shortcut(self): tree_valid = self.parse('') Modified: lxml/trunk/src/lxml/xmlschema.pxi ============================================================================== --- lxml/trunk/src/lxml/xmlschema.pxi (original) +++ lxml/trunk/src/lxml/xmlschema.pxi Fri Apr 28 21:11:37 2006 @@ -13,46 +13,55 @@ ################################################################################ # XMLSchema -cdef class XMLSchema: +cdef class XMLSchema(_Validator): """Turn a document into an XML Schema validator. """ cdef xmlschema.xmlSchema* _c_schema - cdef _ErrorLog _error_log - - def __init__(self, etree): + def __init__(self, etree=None, file=None): cdef _Document doc cdef _NodeBase root_node cdef xmlDoc* fake_c_doc cdef xmlNode* c_node cdef xmlschema.xmlSchemaParserCtxt* parser_ctxt - doc = _documentOrRaise(etree) - root_node = _rootNodeOf(etree) + if etree is not None: + doc = _documentOrRaise(etree) + root_node = _rootNodeOf(etree) + + # work around for libxml2 bug if document is not XML schema at all + c_node = root_node._c_node + if c_node.ns is NULL or c_node.ns.href is NULL or \ + tree.strcmp(c_node.ns.href, 'http://www.w3.org/2001/XMLSchema') != 0: + raise XMLSchemaParseError, "Document is not XML Schema" + + fake_c_doc = _fakeRootDoc(doc._c_doc, root_node._c_node) + parser_ctxt = xmlschema.xmlSchemaNewDocParserCtxt(fake_c_doc) + if parser_ctxt is NULL: + _destroyFakeDoc(doc._c_doc, fake_c_doc) + raise XMLSchemaParseError, "Document is not parsable as XML Schema" + self._c_schema = xmlschema.xmlSchemaParse(parser_ctxt) - # work around for libxml2 bug if document is not XML schema at all - c_node = root_node._c_node - if c_node.ns is NULL or c_node.ns.href is NULL or \ - tree.strcmp(c_node.ns.href, 'http://www.w3.org/2001/XMLSchema') != 0: - raise XMLSchemaParseError, "Document is not XML Schema" - - fake_c_doc = _fakeRootDoc(doc._c_doc, root_node._c_node) - parser_ctxt = xmlschema.xmlSchemaNewDocParserCtxt(fake_c_doc) - if parser_ctxt is NULL: + xmlschema.xmlSchemaFreeParserCtxt(parser_ctxt) _destroyFakeDoc(doc._c_doc, fake_c_doc) - raise XMLSchemaParseError, "Document is not parsable as XML Schema" - self._c_schema = xmlschema.xmlSchemaParse(parser_ctxt) - - xmlschema.xmlSchemaFreeParserCtxt(parser_ctxt) - _destroyFakeDoc(doc._c_doc, fake_c_doc) + elif file is not None: + filename = _getFilenameForFile(file) + if filename is None: + # XXX assume a string object + filename = file + parser_ctxt = xmlschema.xmlSchemaNewParserCtxt(filename) + self._c_schema = xmlschema.xmlSchemaParse(parser_ctxt) + xmlschema.xmlSchemaFreeParserCtxt(parser_ctxt) + else: + raise XMLSchemaParseError, "No tree or file given" if self._c_schema is NULL: raise XMLSchemaParseError, "Document is not valid XML Schema" - self._error_log = _ErrorLog() + _Validator.__init__(self) def __dealloc__(self): xmlschema.xmlSchemaFree(self._c_schema) - def validate(self, etree): + def __call__(self, etree): """Validate doc using XML Schema. Returns true if document is valid, false if not. @@ -78,7 +87,3 @@ if ret == -1: raise XMLSchemaValidateError, "Internal error in XML Schema validation." return ret == 0 - - property error_log: - def __get__(self): - return self._error_log.copy() From scoder at codespeak.net Fri Apr 28 21:15:49 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Fri Apr 28 21:15:50 2006 Subject: [Lxml-checkins] r26536 - lxml/trunk/src/lxml Message-ID: <20060428191549.980FC10094@code0.codespeak.net> Author: scoder Date: Fri Apr 28 21:15:48 2006 New Revision: 26536 Added: lxml/trunk/src/lxml/xpath.pxi - copied unchanged from r26535, lxml/trunk/src/lxml/xslt.pxi Log: new source file xpath.pxi to split xslt.pxi into XSLT and XPath part From scoder at codespeak.net Fri Apr 28 21:17:38 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Fri Apr 28 21:17:40 2006 Subject: [Lxml-checkins] r26537 - lxml/trunk/src/lxml Message-ID: <20060428191738.8017210094@code0.codespeak.net> Author: scoder Date: Fri Apr 28 21:17:37 2006 New Revision: 26537 Added: lxml/trunk/src/lxml/extensions.pxi - copied unchanged from r26536, lxml/trunk/src/lxml/xslt.pxi Log: new source file extensions.pxi to factor extensions out of xslt.pxi/xpath.pxi From scoder at codespeak.net Fri Apr 28 21:36:11 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Fri Apr 28 21:36:14 2006 Subject: [Lxml-checkins] r26538 - lxml/trunk/src/lxml Message-ID: <20060428193611.B091010094@code0.codespeak.net> Author: scoder Date: Fri Apr 28 21:36:09 2006 New Revision: 26538 Modified: lxml/trunk/src/lxml/etree.pyx lxml/trunk/src/lxml/extensions.pxi lxml/trunk/src/lxml/xpath.pxi lxml/trunk/src/lxml/xslt.pxi Log: major split of xslt.pxi: XSLT -> xslt.pxi, XPath -> xpath.pxi, extension functions -> extensions.pxi Modified: lxml/trunk/src/lxml/etree.pyx ============================================================================== --- lxml/trunk/src/lxml/etree.pyx (original) +++ lxml/trunk/src/lxml/etree.pyx Fri Apr 28 21:36:09 2006 @@ -1352,13 +1352,15 @@ # include submodules -include "proxy.pxi" # Proxy handling (element backpointers/memory/etc.) -include "xmlerror.pxi" # error and log handling -include "nsclasses.pxi" # Namespace implementation and registry -include "docloader.pxi" # Support for custom document loaders -include "parser.pxi" # XML Parser -include "xmlid.pxi" # XMLID and IDDict -include "xslt.pxi" # XPath and XSLT +include "proxy.pxi" # Proxy handling (element backpointers/memory/etc.) +include "xmlerror.pxi" # error and log handling +include "nsclasses.pxi" # Namespace implementation and registry +include "docloader.pxi" # Support for custom document loaders +include "parser.pxi" # XML Parser +include "xmlid.pxi" # XMLID and IDDict +include "extensions.pxi" # XPath/XSLT extension functions +include "xpath.pxi" # XPath evaluation +include "xslt.pxi" # XSL transformations ################################################################################ Modified: lxml/trunk/src/lxml/extensions.pxi ============================================================================== --- lxml/trunk/src/lxml/extensions.pxi (original) +++ lxml/trunk/src/lxml/extensions.pxi Fri Apr 28 21:36:09 2006 @@ -1,37 +1,16 @@ -# XSLT and XPath classes, supports for extension functions - -class XSLTError(LxmlError): - pass - -class XSLTParseError(XSLTError): - pass - -class XSLTApplyError(XSLTError): - pass - -class XSLTSaveError(XSLTError): - pass - -class XSLTExtensionError(XSLTError): - pass +# supports for extension functions in XPath and XSLT class XPathError(LxmlError): pass -class XPathContextError(XPathError): - pass - class XPathFunctionError(XPathError): pass class XPathResultError(XPathError): pass -class XPathSyntaxError(LxmlSyntaxError): - pass - ################################################################################ -# support for extension functions in XPath/XSLT +# Base class for XSLT and XPath evaluation contexts: functions, namespaces, ... cdef class _BaseContext: cdef xpath.xmlXPathContext* _xpathCtxt @@ -183,8 +162,20 @@ #print "Holding document:", element._doc._c_doc self._temp_refs.add(element._doc) + +def Extension(module, function_mapping, ns_uri=None): + functions = [] + for function_name, xpath_name in function_mapping.items(): + functions[xpath_name] = getattr(module, function_name) + return {ns_uri : functions} + + +################################################################################ +# helper functions + cdef xpath.xmlXPathFunction _function_check(void* ctxt, char* c_name, char* c_ns_uri): + "Module level lookup function for XPath/XSLT functions" cdef _BaseContext context if c_name is NULL: return NULL @@ -198,681 +189,6 @@ else: return NULL -cdef xpath.xmlXPathFunction _xslt_function_check(void* ctxt, - char* c_name, char* c_ns_uri): - cdef xpath.xmlXPathFunction result - result = _function_check(ctxt, c_name, c_ns_uri) - if result is NULL: - return xslt.xsltExtModuleFunctionLookup(c_name, c_ns_uri) - else: - return result - - -################################################################################ -# XSLT document loaders - -cdef class _XSLTResolverContext(_ResolverContext): - cdef xmlDoc* _c_style_doc - cdef BaseParser _parser - def __init__(self, BaseParser parser not None): - _ResolverContext.__init__(self, parser.resolvers) - self._parser = parser - self._c_style_doc = NULL - -cdef xmlDoc* _doc_loader(char* c_uri, tree.xmlDict* c_dict, int parse_options, - void* c_ctxt, xslt.xsltLoadType c_type): - cdef xmlDoc* c_doc - cdef _ResolverRegistry resolvers - cdef _InputDocument doc_ref - cdef _XSLTResolverContext xslt_resolver_context - cdef _XSLTResolverContext doc_resolver_context - cdef _XSLTResolverContext resolver_context - cdef XMLParser parser - # find resolver contexts of stylesheet and transformed doc - c_doc = NULL - doc_resolver_context = None - if c_type == xslt.XSLT_LOAD_DOCUMENT: - c_doc = (c_ctxt).document.doc - if c_doc is not NULL and c_doc._private is not NULL: - if isinstance(c_doc._private, _XSLTResolverContext): - doc_resolver_context = <_XSLTResolverContext>c_doc._private - c_doc = (c_ctxt).style.doc - elif c_type == xslt.XSLT_LOAD_STYLESHEET: - c_doc = (c_ctxt).doc - - if c_doc is NULL or c_doc._private is NULL or \ - not isinstance(c_doc._private, _XSLTResolverContext): - # can't call Python without context, fall back to default loader - return XSLT_DOC_DEFAULT_LOADER( - c_uri, c_dict, parse_options, c_ctxt, c_type) - - xslt_resolver_context = <_XSLTResolverContext>c_doc._private - - # quick check if we are looking for the current stylesheet - c_doc = xslt_resolver_context._c_style_doc - if c_doc is not NULL and c_doc.URL is not NULL: - if tree.strcmp(c_uri, c_doc.URL) == 0: - return tree.xmlCopyDoc(c_doc, 1) - - # call the Python document loaders - c_doc = NULL - resolver_context = xslt_resolver_context # currently use only XSLT resolvers - resolvers = resolver_context._resolvers - try: - uri = funicode(c_uri) - doc_ref = resolvers.resolve(uri, None, resolver_context) - - if doc_ref is not None: - if doc_ref._type == PARSER_DATA_EMPTY: - c_doc = _newDoc() - if doc_ref._type == PARSER_DATA_STRING: - c_doc = _internalParseDoc( - _cstr(doc_ref._data_utf), parse_options, - resolver_context) - elif doc_ref._type == PARSER_DATA_FILE: - data = doc_ref._file.read() - c_doc = _internalParseDoc( - _cstr(data), parse_options, - resolver_context) - elif doc_ref._type == PARSER_DATA_FILENAME: - c_doc = _internalParseDocFromFile( - _cstr(doc_ref._data_utf), parse_options, - resolver_context) - if c_doc is not NULL and c_doc.URL is NULL: - c_doc.URL = tree.xmlStrdup(c_uri) - - except Exception, e: - xslt_resolver_context._store_raised() - return NULL - - if c_doc is NULL: - c_doc = XSLT_DOC_DEFAULT_LOADER( - c_uri, c_dict, parse_options, c_ctxt, c_type) - if c_doc is NULL: - message = "Cannot resolve URI %s" % funicode(c_uri) - if c_type == xslt.XSLT_LOAD_DOCUMENT: - exception = XSLTApplyError(message) - else: - exception = XSLTParseError(message) - xslt_resolver_context._store_exception(exception) - return NULL - if c_doc is not NULL and c_doc._private is NULL: - c_doc._private = xslt_resolver_context - return c_doc - -cdef xslt.xsltDocLoaderFunc XSLT_DOC_DEFAULT_LOADER -XSLT_DOC_DEFAULT_LOADER = xslt.xsltDocDefaultLoader - -xslt.xsltSetLoaderFunc(_doc_loader) - - -################################################################################ -# XSLT - -cdef class _XSLTContext(_BaseContext): - cdef xslt.xsltTransformContext* _xsltCtxt - def __init__(self, namespaces, extensions): - self._xsltCtxt = NULL - self._ext_lookup_function = _xslt_function_check - if extensions and None in extensions: - raise XSLTExtensionError, "extensions must not have empty namespaces" - _BaseContext.__init__(self, namespaces, extensions) - - cdef register_context(self, xslt.xsltTransformContext* xsltCtxt, - _Document doc): - self._xsltCtxt = xsltCtxt - self._set_xpath_context(xsltCtxt.xpathCtxt) - self._register_context(doc, 0) - xsltCtxt.xpathCtxt.userData = self - - cdef free_context(self): - cdef xslt.xsltTransformContext* xsltCtxt - xsltCtxt = self._xsltCtxt - if xsltCtxt is NULL: - return - self._free_context() - self._xsltCtxt = NULL - xslt.xsltFreeTransformContext(xsltCtxt) - self._release_temp_refs() - - cdef _registerLocalExtensionFunction(self, ns_utf, name_utf, function): - extensions = self._extensions - if extensions is None: - self._extensions = {ns_utf:{name_utf:function}} - else: - if ns_utf in extensions: - ns_extensions = extensions[ns_utf] - else: - ns_extensions = extensions[ns_utf] = {} - python.PyDict_SetItem(ns_extensions, name_utf, function) - -cdef class _ExsltRegExp # forward declaration - -cdef class XSLT: - """Turn a document into an XSLT object. - """ - cdef _XSLTContext _context - cdef xslt.xsltStylesheet* _c_style - cdef _XSLTResolverContext _xslt_resolver_context - cdef _ExsltRegExp _regexp - cdef _ErrorLog _error_log - - def __init__(self, xslt_input, extensions=None, regexp=True): - cdef xslt.xsltStylesheet* c_style - cdef xmlDoc* c_doc - cdef xmlDoc* fake_c_doc - cdef _Document doc - cdef _NodeBase root_node - - doc = _documentOrRaise(xslt_input) - root_node = _rootNodeOf(xslt_input) - - # make a copy of the document as stylesheet parsing modifies it - fake_c_doc = _fakeRootDoc(doc._c_doc, root_node._c_node) - c_doc = tree.xmlCopyDoc(fake_c_doc, 1) - _destroyFakeDoc(doc._c_doc, fake_c_doc) - - # make sure we always have a stylesheet URL - if c_doc.URL is not NULL: - # handle a bug in older libxml2 versions - tree.xmlFree(c_doc.URL) - if doc._c_doc.URL is not NULL: - c_doc.URL = tree.xmlStrdup(doc._c_doc.URL) - else: - doc_url_utf = "XSLT:__STRING__XSLT__%s" % id(self) - c_doc.URL = tree.xmlStrdup(_cstr(doc_url_utf)) - - self._xslt_resolver_context = _XSLTResolverContext(doc._parser) - # keep a copy in case we need to access the stylesheet via 'document()' - self._xslt_resolver_context._c_style_doc = tree.xmlCopyDoc(c_doc, 1) - c_doc._private = self._xslt_resolver_context - - c_style = xslt.xsltParseStylesheetDoc(c_doc) - if c_style is NULL: - tree.xmlFreeDoc(c_doc) - self._xslt_resolver_context._raise_if_stored() - raise XSLTParseError, "Cannot parse style sheet" - self._c_style = c_style - - self._context = _XSLTContext(None, extensions) - self._error_log = _ErrorLog() - if regexp: - self._regexp = _ExsltRegExp() - else: - self._regexp = None - # XXX is it worthwile to use xsltPrecomputeStylesheet here? - - def __dealloc__(self): - if self._xslt_resolver_context is not None and \ - self._xslt_resolver_context._c_style_doc is not NULL: - tree.xmlFreeDoc(self._xslt_resolver_context._c_style_doc) - # this cleans up copy of doc as well - xslt.xsltFreeStylesheet(self._c_style) - - property error_log: - def __get__(self): - return self._error_log.copy() - - def __call__(self, _input, **_kw): - cdef _Document input_doc - cdef _NodeBase root_node - cdef _Document result_doc - cdef _XSLTResolverContext resolver_context - cdef xslt.xsltTransformContext* transform_ctxt - cdef xmlDoc* c_result - cdef xmlDoc* c_doc - cdef char** params - cdef void* ptemp - cdef int i - - input_doc = _documentOrRaise(_input) - root_node = _rootNodeOf(_input) - - resolver_context = _XSLTResolverContext(input_doc._parser) - resolver_context._c_style_doc = self._xslt_resolver_context._c_style_doc - - c_doc = _fakeRootDoc(input_doc._c_doc, root_node._c_node) - - transform_ctxt = xslt.xsltNewTransformContext(self._c_style, c_doc) - if transform_ctxt is NULL: - _destroyFakeDoc(input_doc._c_doc, c_doc) - raise XSLTApplyError, "Error preparing stylesheet run" - - self._error_log.connect() - xslt.xsltSetTransformErrorFunc(transform_ctxt, self._error_log, - _receiveGenericError) - - ptemp = c_doc._private - c_doc._private = resolver_context - - if _kw: - # allocate space for parameters - # * 2 as we want an entry for both key and value, - # and + 1 as array is NULL terminated - params = cstd.malloc(sizeof(char*) * (len(_kw) * 2 + 1)) - i = 0 - keep_ref = [] - for key, value in _kw.items(): - k = _utf8(key) - python.PyList_Append(keep_ref, k) - v = _utf8(value) - python.PyList_Append(keep_ref, v) - params[i] = _cstr(k) - i = i + 1 - params[i] = _cstr(v) - i = i + 1 - params[i] = NULL - else: - params = NULL - - self._context.register_context(transform_ctxt, input_doc) - if self._regexp is not None: - self._regexp._register_in_context(self._context) - - c_result = xslt.xsltApplyStylesheetUser(self._c_style, c_doc, params, - NULL, NULL, transform_ctxt) - - if params is not NULL: - # deallocate space for parameters - cstd.free(params) - - self._context.free_context() - c_doc._private = ptemp # restore _private before _destroyFakeDoc! - _destroyFakeDoc(input_doc._c_doc, c_doc) - - self._error_log.disconnect() - if self._xslt_resolver_context._has_raised(): - if c_result is not NULL: - tree.xmlFreeDoc(c_result) - self._xslt_resolver_context._raise_if_stored() - - if c_result is NULL: - raise XSLTApplyError, "Error applying stylesheet" - - result_doc = _documentFactory(c_result, input_doc._parser) - return _xsltResultTreeFactory(result_doc, self) - - def apply(self, _input, **_kw): - return self.__call__(_input, **_kw) - - def tostring(self, _ElementTree result_tree): - """Save result doc to string based on stylesheet output method. - """ - return str(result_tree) - -cdef class _XSLTResultTree(_ElementTree): - cdef XSLT _xslt - def __str__(self): - cdef char* s - cdef int l - cdef int r - r = xslt.xsltSaveResultToString(&s, &l, self._doc._c_doc, - self._xslt._c_style) - if r == -1: - raise XSLTSaveError, "Error saving XSLT result to string" - if s is NULL: - return '' - result = funicode(s) - tree.xmlFree(s) - return result - -cdef _xsltResultTreeFactory(_Document doc, XSLT xslt): - cdef _XSLTResultTree result - result = <_XSLTResultTree>_newElementTree(doc, None, _XSLTResultTree) - result._xslt = xslt - return result - -# do not register all libxslt extra function, provide only "node-set" -# functions like "output" and "write" are a potential security risk -#xslt.xsltRegisterAllExtras() -xslt.xsltRegisterExtModuleFunction("node-set", - xslt.XSLT_LIBXSLT_NAMESPACE, - xslt.xsltFunctionNodeSet) -xslt.xsltRegisterExtModuleFunction("node-set", - xslt.XSLT_SAXON_NAMESPACE, - xslt.xsltFunctionNodeSet) -xslt.xsltRegisterExtModuleFunction("node-set", - xslt.XSLT_XT_NAMESPACE, - xslt.xsltFunctionNodeSet) - -# enable EXSLT support for XSLT -xslt.exsltRegisterAll() - -################################################################################ -# EXSLT regexp implementation - -cdef object RE_COMPILE -RE_COMPILE = re.compile - -cdef class _ExsltRegExp: - cdef object _compile_map - def __init__(self): - self._compile_map = {} - - cdef _make_string(self, value): - if python.PyString_Check(value) or python.PyUnicode_Check(value): - return value - else: - raise TypeError, "Invalid argument type %s" % type(value) - - cdef _compile(self, rexp, ignore_case): - cdef python.PyObject* c_result - rexp = self._make_string(rexp) - key = (rexp, ignore_case) - c_result = python.PyDict_GetItem(self._compile_map, key) - if c_result is not NULL: - return c_result - py_flags = re.UNICODE - if ignore_case: - py_flags = py_flags | re.IGNORECASE - rexp_compiled = RE_COMPILE(rexp, py_flags) - python.PyDict_SetItem(self._compile_map, key, rexp_compiled) - return rexp_compiled - - def test(self, ctxt, s, rexp, flags=''): - flags = self._make_string(flags) - s = self._make_string(s) - rexpc = self._compile(rexp, 'i' in flags) - if rexpc.search(s) is None: - return False - else: - return True - - def match(self, ctxt, s, rexp, flags=''): - flags = self._make_string(flags) - s = self._make_string(s) - rexpc = self._compile(rexp, 'i' in flags) - if 'g' in flags: - results = rexpc.findall(s) - if not results: - return () - result_list = [] - root = Element('matches') - for s_match in results: - elem = SubElement(root, 'match') - elem.text = s_match - python.PyList_Append(result_list, elem) - return result_list - else: - result = rexpc.search(s) - if result is None: - return () - root = Element('match') - root.text = result.group() - return (root,) - - def replace(self, ctxt, s, rexp, flags, replacement): - replacement = self._make_string(replacement) - flags = self._make_string(flags) - s = self._make_string(s) - rexpc = self._compile(rexp, 'i' in flags) - if 'g' in flags: - count = 0 - else: - count = 1 - return rexpc.sub(replacement, s, count) - - cdef _register_in_context(self, _XSLTContext context): - ns = "http://exslt.org/regular-expressions" - context._registerLocalExtensionFunction(ns, "test", self.test) - context._registerLocalExtensionFunction(ns, "match", self.match) - context._registerLocalExtensionFunction(ns, "replace", self.replace) - -################################################################################ -# XPath - -cdef class _XPathContext(_BaseContext): - cdef object _variables - cdef object _registered_variables - def __init__(self, namespaces, extensions, variables): - self._ext_lookup_function = _function_check - self._variables = variables - self._registered_variables = [] - _BaseContext.__init__(self, namespaces, extensions) - - cdef register_context(self, xpath.xmlXPathContext* xpathCtxt, _Document doc): - self._set_xpath_context(xpathCtxt) - ns_prefixes = _find_all_extension_prefixes() - if ns_prefixes: - self.registerNamespaces(ns_prefixes) - self._register_context(doc, 1) - if self._variables is not None: - self.registerVariables(self._variables) - - cdef unregister_context(self): - cdef xpath.xmlXPathContext* xpathCtxt - xpathCtxt = self._xpathCtxt - if xpathCtxt is NULL: - return - self._unregisterVariables() - del self._registered_variables[:] - self._unregister_context() - - cdef void _unregisterVariables(self): - cdef xpath.xmlXPathContext* xpathCtxt - cdef xpath.xmlXPathObject* xpathVarValue - cdef char* c_name - xpathCtxt = self._xpathCtxt - for name_utf in self._registered_variables: - c_name = _cstr(name_utf) - xpathVarValue = xpath.xmlXPathVariableLookup(xpathCtxt, c_name) - if xpathVarValue is not NULL: - xpath.xmlXPathRegisterVariable(xpathCtxt, c_name, NULL) - _freeXPathObject(xpathVarValue) - - def registerVariables(self, variable_dict): - for name, value in variable_dict.items(): - name_utf = self._to_utf(name) - self._registerVariable(name_utf, value) - python.PyList_Append(self._registered_variables, name_utf) - - def registerVariable(self, name, value): - name_utf = self._to_utf(name) - self._registerVariable(name_utf, value) - python.PyList_Append(self._registered_variables, name_utf) - - cdef void _registerVariable(self, name_utf, value): - xpath.xmlXPathRegisterVariable( - self._xpathCtxt, _cstr(name_utf), _wrapXPathObject(value)) - - -cdef class XPathEvaluatorBase: - cdef _XPathContext _context - - def __init__(self, namespaces, extensions, variables=None): - self._context = _XPathContext(namespaces, extensions, variables) - - cdef object _handle_result(self, xpath.xmlXPathObject* xpathObj, _Document doc): - if self._context._exc._has_raised(): - if xpathObj is not NULL: - _freeXPathObject(xpathObj) - xpathObj = NULL - self._context._release_temp_refs() - self._context._exc._raise_if_stored() - - if xpathObj is NULL: - self._context._release_temp_refs() - raise XPathSyntaxError, "Error in xpath expression." - - try: - result = _unwrapXPathObject(xpathObj, doc) - except XPathResultError: - _freeXPathObject(xpathObj) - self._context._release_temp_refs() - raise - - _freeXPathObject(xpathObj) - self._context._release_temp_refs() - return result - - -cdef class XPathElementEvaluator(XPathEvaluatorBase): - """Create an XPath evaluator for an element. - - XPath evaluators must not be shared between threads. - """ - cdef xpath.xmlXPathContext* _c_ctxt - cdef _Element _element - def __init__(self, _NodeBase element not None, namespaces=None, extensions=None): - cdef xpath.xmlXPathContext* xpathCtxt - cdef int ns_register_status - cdef _Document doc - doc = element._doc - xpathCtxt = xpath.xmlXPathNewContext(doc._c_doc) - if xpathCtxt is NULL: - raise XPathContextError, "Unable to create new XPath context" - self._element = element - self._c_ctxt = xpathCtxt - XPathEvaluatorBase.__init__(self, namespaces, extensions) - - def __dealloc__(self): - if self._c_ctxt is not NULL: - xpath.xmlXPathFreeContext(self._c_ctxt) - - def registerNamespace(self, prefix, uri): - """Register a namespace with the XPath context. - """ - self._context.addNamespace(prefix, uri) - - def registerNamespaces(self, namespaces): - """Register a prefix -> uri dict. - """ - add = self._context.addNamespace - for prefix, uri in namespaces.items(): - add(prefix, uri) - - def evaluate(self, _path, **_variables): - """Evaluate an XPath expression on the document. Variables may be - provided as keyword arguments. Note that namespaces are currently not - supported for variables.""" - cdef xpath.xmlXPathContext* xpathCtxt - cdef xpath.xmlXPathObject* xpathObj - cdef xmlNode* c_node - cdef _Document doc - xpathCtxt = self._c_ctxt - xpathCtxt.node = self._element._c_node - doc = self._element._doc - - self._context.register_context(xpathCtxt, doc) - self._context.registerVariables(_variables) - - path = _utf8(_path) - xpathObj = xpath.xmlXPathEvalExpression(_cstr(path), xpathCtxt) - self._context.unregister_context() - - return self._handle_result(xpathObj, doc) - - #def clone(self): - # # XXX pretty expensive so calling this from callback is probably - # # not desirable - # return XPathEvaluator(self._doc, self._namespaces, self._extensions) - -cdef class XPathDocumentEvaluator(XPathElementEvaluator): - """Create an XPath evaluator for an ElementTree. - - XPath evaluators must not be shared between threads. - """ - def __init__(self, _ElementTree etree not None, namespaces=None, extensions=None): - XPathElementEvaluator.__init__( - self, etree._context_node, namespaces, extensions) - -def XPathEvaluator(etree_or_element, namespaces=None, extensions=None): - """Creates and XPath evaluator for an ElementTree or an Element. - - XPath evaluators must not be shared between threads. - """ - if isinstance(etree_or_element, _ElementTree): - return XPathDocumentEvaluator(etree_or_element, namespaces, extensions) - else: - return XPathElementEvaluator(etree_or_element, namespaces, extensions) - -def Extension(module, function_mapping, ns_uri=None): - functions = [] - for function_name, xpath_name in function_mapping.items(): - functions[xpath_name] = getattr(module, function_name) - return {ns_uri : functions} - -cdef class XPath(XPathEvaluatorBase): - cdef xpath.xmlXPathContext* _xpathCtxt - cdef xpath.xmlXPathCompExpr* _xpath - cdef object _prefix_map - cdef readonly object path - - def __init__(self, path, namespaces=None, extensions=None): - XPathEvaluatorBase.__init__(self, namespaces, extensions, None) - self.path = path - path = _utf8(path) - self._xpath = xpath.xmlXPathCompile(_cstr(path)) - if self._xpath is NULL: - raise XPathSyntaxError, "Error in XPath expression" - self._xpathCtxt = xpath.xmlXPathNewContext(NULL) - - def __call__(self, _etree_or_element, **_variables): - cdef xpath.xmlXPathContext* xpathCtxt - cdef xpath.xmlXPathObject* xpathObj - cdef _Document document - cdef _NodeBase element - cdef _XPathContext context - - document = _documentOrRaise(_etree_or_element) - element = _rootNodeOf(_etree_or_element) - - xpathCtxt = self._xpathCtxt - xpathCtxt.doc = document._c_doc - xpathCtxt.node = element._c_node - - context = self._context - context._release_temp_refs() - context.register_context(xpathCtxt, document) - context.registerVariables(_variables) - - xpathObj = xpath.xmlXPathCompiledEval(self._xpath, xpathCtxt) - context.unregister_context() - return self._handle_result(xpathObj, document) - - def evaluate(self, _tree, **_variables): - return self(_tree, **_variables) - - def __dealloc__(self): - if self._xpathCtxt is not NULL: - xpath.xmlXPathFreeContext(self._xpathCtxt) - if self._xpath is not NULL: - xpath.xmlXPathFreeCompExpr(self._xpath) - -cdef object _replace_strings -cdef object _find_namespaces -_replace_strings = re.compile('("[^"]*")|(\'[^\']*\')').sub -_find_namespaces = re.compile('({[^}]+})').findall - -cdef class ETXPath(XPath): - """Special XPath class that supports the ElementTree {uri} notation for - namespaces.""" - def __init__(self, path, extensions=None): - path_utf, namespaces = self._nsextract_path(_utf8(path)) - XPath.__init__(self, funicode(path_utf), namespaces, extensions) - - cdef _nsextract_path(self, path_utf): - # replace {namespaces} by new prefixes - cdef int i - namespaces = {} - stripped_path = _replace_strings('', path_utf) # remove string literals - namespace_defs = [] - i = 1 - for namespace_def in _find_namespaces(stripped_path): - if namespace_def not in namespace_defs: - prefix = python.PyString_FromFormat("xpp%02d", i) - i = i+1 - python.PyList_Append(namespace_defs, namespace_def) - namespace = namespace_def[1:-1] # remove '{}' - python.PyDict_SetItem(namespaces, prefix, namespace) - prefix_str = prefix + ':' - # FIXME: this also replaces {namespaces} within strings! - path_utf = path_utf.replace(namespace_def, prefix_str) - return path_utf, namespaces - -################################################################################ -# helper functions - cdef xpath.xmlXPathObject* _wrapXPathObject(object obj) except NULL: cdef xpath.xmlNodeSet* resultSet cdef _NodeBase node Modified: lxml/trunk/src/lxml/xpath.pxi ============================================================================== --- lxml/trunk/src/lxml/xpath.pxi (original) +++ lxml/trunk/src/lxml/xpath.pxi Fri Apr 28 21:36:09 2006 @@ -1,624 +1,12 @@ # XSLT and XPath classes, supports for extension functions -class XSLTError(LxmlError): - pass - -class XSLTParseError(XSLTError): - pass - -class XSLTApplyError(XSLTError): - pass - -class XSLTSaveError(XSLTError): - pass - -class XSLTExtensionError(XSLTError): - pass - -class XPathError(LxmlError): - pass - class XPathContextError(XPathError): pass -class XPathFunctionError(XPathError): - pass - -class XPathResultError(XPathError): - pass - class XPathSyntaxError(LxmlSyntaxError): pass ################################################################################ -# support for extension functions in XPath/XSLT - -cdef class _BaseContext: - cdef xpath.xmlXPathContext* _xpathCtxt - cdef xpath.xmlXPathFuncLookupFunc _ext_lookup_function - cdef _Document _doc - cdef object _extensions - cdef object _namespaces - cdef object _registered_namespaces - cdef object _utf_refs - cdef object _function_cache - cdef object _called_function - # for exception handling and temporary reference keeping: - cdef _TempStore _temp_refs - cdef _ExceptionContext _exc - - def __init__(self, namespaces, extensions): - self._xpathCtxt = NULL - self._utf_refs = {} - self._function_cache = {} - self._called_function = None - - # convert old format extensions to UTF-8 - if isinstance(extensions, (list, tuple)): - new_extensions = {} - for extension in extensions: - for (ns_uri, name), function in extension.items(): - ns_utf = self._to_utf(ns_uri) - name_utf = self._to_utf(name) - try: - new_extensions[ns_utf][name_utf] = function - except KeyError: - new_extensions[ns_utf] = {name_utf : function} - extensions = new_extensions or None - - self._doc = None - self._exc = _ExceptionContext() - self._extensions = extensions - self._namespaces = namespaces - self._registered_namespaces = [] - self._temp_refs = _TempStore() - - cdef object _to_utf(self, s): - "Convert to UTF-8 and keep a reference to the encoded string" - cdef python.PyObject* dict_result - if s is None: - return None - dict_result = python.PyDict_GetItem(self._utf_refs, s) - if dict_result is not NULL: - return dict_result - utf = _utf8(s) - python.PyDict_SetItem(self._utf_refs, s, utf) - return utf - - cdef void _set_xpath_context(self, xpath.xmlXPathContext* xpathCtxt): - self._xpathCtxt = xpathCtxt - xpathCtxt.userData = self - - cdef _register_context(self, _Document doc, int allow_none_namespace): - self._doc = doc - self._exc.clear() - python.PyDict_Clear(self._function_cache) - namespaces = self._namespaces - if namespaces is not None: - self.registerNamespaces(namespaces) - xpath.xmlXPathRegisterFuncLookup( - self._xpathCtxt, self._ext_lookup_function, self) - - cdef _unregister_context(self): - self._unregisterNamespaces() - self._free_context() - - cdef _free_context(self): - del self._registered_namespaces[:] - python.PyDict_Clear(self._utf_refs) - self._doc = None - if self._xpathCtxt is not NULL: - self._xpathCtxt.userData = NULL - self._xpathCtxt = NULL - - # namespaces (internal UTF-8 methods with leading '_') - - def addNamespace(self, prefix, uri): - if self._namespaces is None: - self._namespaces = {} - python.PyDict_SetItem(self._namespaces, prefix, uri) - - def registerNamespaces(self, namespaces): - for prefix, uri in namespaces.items(): - self.registerNamespace(prefix, uri) - - def registerNamespace(self, prefix, ns_uri): - prefix_utf = self._to_utf(prefix) - ns_uri_utf = self._to_utf(ns_uri) - xpath.xmlXPathRegisterNs(self._xpathCtxt, prefix_utf, ns_uri_utf) - python.PyList_Append(self._registered_namespaces, prefix_utf) - - cdef _unregisterNamespaces(self): - cdef xpath.xmlXPathContext* xpathCtxt - xpathCtxt = self._xpathCtxt - for prefix_utf in self._registered_namespaces: - xpath.xmlXPathRegisterNs(xpathCtxt, prefix_utf, NULL) - - # extension functions - - cdef int _prepare_function_call(self, ns_uri_utf, name_utf): - cdef python.PyObject* dict_result - key = (ns_uri_utf, name_utf) - dict_result = python.PyDict_GetItem(self._function_cache, key) - if dict_result is not NULL: - function = dict_result - self._called_function = function - return function is not None - - dict_result = python.PyDict_GetItem(self._extensions, ns_uri_utf) - if dict_result is not NULL: - dict_result = python.PyDict_GetItem(dict_result, name_utf) - if dict_result is not NULL: - function = dict_result - else: - function = _find_extension(ns_uri_utf, name_utf) - - python.PyDict_SetItem(self._function_cache, key, function) - self._called_function = function - return function is not None - - # Python reference keeping during XPath function evaluation - - cdef _release_temp_refs(self): - "Free temporarily referenced objects from this context." - self._temp_refs.clear() - - cdef _hold(self, obj): - """A way to temporarily hold references to nodes in the evaluator. - - This is needed because otherwise nodes created in XPath extension - functions would be reference counted too soon, during the XPath - evaluation. This is most important in the case of exceptions. - """ - cdef _NodeBase element - if isinstance(obj, _NodeBase): - obj = (obj,) - elif not python.PySequence_Check(obj): - return - for o in obj: - if isinstance(o, _NodeBase): - element = <_NodeBase>o - #print "Holding element:", element._c_node - self._temp_refs.add(element) - #print "Holding document:", element._doc._c_doc - self._temp_refs.add(element._doc) - -cdef xpath.xmlXPathFunction _function_check(void* ctxt, - char* c_name, char* c_ns_uri): - cdef _BaseContext context - if c_name is NULL: - return NULL - if c_ns_uri is NULL: - ns_uri = None - else: - ns_uri = c_ns_uri - context = <_BaseContext>ctxt - if context._prepare_function_call(ns_uri, c_name): - return _call_prepared_function - else: - return NULL - -cdef xpath.xmlXPathFunction _xslt_function_check(void* ctxt, - char* c_name, char* c_ns_uri): - cdef xpath.xmlXPathFunction result - result = _function_check(ctxt, c_name, c_ns_uri) - if result is NULL: - return xslt.xsltExtModuleFunctionLookup(c_name, c_ns_uri) - else: - return result - - -################################################################################ -# XSLT document loaders - -cdef class _XSLTResolverContext(_ResolverContext): - cdef xmlDoc* _c_style_doc - cdef BaseParser _parser - def __init__(self, BaseParser parser not None): - _ResolverContext.__init__(self, parser.resolvers) - self._parser = parser - self._c_style_doc = NULL - -cdef xmlDoc* _doc_loader(char* c_uri, tree.xmlDict* c_dict, int parse_options, - void* c_ctxt, xslt.xsltLoadType c_type): - cdef xmlDoc* c_doc - cdef _ResolverRegistry resolvers - cdef _InputDocument doc_ref - cdef _XSLTResolverContext xslt_resolver_context - cdef _XSLTResolverContext doc_resolver_context - cdef _XSLTResolverContext resolver_context - cdef XMLParser parser - # find resolver contexts of stylesheet and transformed doc - c_doc = NULL - doc_resolver_context = None - if c_type == xslt.XSLT_LOAD_DOCUMENT: - c_doc = (c_ctxt).document.doc - if c_doc is not NULL and c_doc._private is not NULL: - if isinstance(c_doc._private, _XSLTResolverContext): - doc_resolver_context = <_XSLTResolverContext>c_doc._private - c_doc = (c_ctxt).style.doc - elif c_type == xslt.XSLT_LOAD_STYLESHEET: - c_doc = (c_ctxt).doc - - if c_doc is NULL or c_doc._private is NULL or \ - not isinstance(c_doc._private, _XSLTResolverContext): - # can't call Python without context, fall back to default loader - return XSLT_DOC_DEFAULT_LOADER( - c_uri, c_dict, parse_options, c_ctxt, c_type) - - xslt_resolver_context = <_XSLTResolverContext>c_doc._private - - # quick check if we are looking for the current stylesheet - c_doc = xslt_resolver_context._c_style_doc - if c_doc is not NULL and c_doc.URL is not NULL: - if tree.strcmp(c_uri, c_doc.URL) == 0: - return tree.xmlCopyDoc(c_doc, 1) - - # call the Python document loaders - c_doc = NULL - resolver_context = xslt_resolver_context # currently use only XSLT resolvers - resolvers = resolver_context._resolvers - try: - uri = funicode(c_uri) - doc_ref = resolvers.resolve(uri, None, resolver_context) - - if doc_ref is not None: - if doc_ref._type == PARSER_DATA_EMPTY: - c_doc = _newDoc() - if doc_ref._type == PARSER_DATA_STRING: - c_doc = _internalParseDoc( - _cstr(doc_ref._data_utf), parse_options, - resolver_context) - elif doc_ref._type == PARSER_DATA_FILE: - data = doc_ref._file.read() - c_doc = _internalParseDoc( - _cstr(data), parse_options, - resolver_context) - elif doc_ref._type == PARSER_DATA_FILENAME: - c_doc = _internalParseDocFromFile( - _cstr(doc_ref._data_utf), parse_options, - resolver_context) - if c_doc is not NULL and c_doc.URL is NULL: - c_doc.URL = tree.xmlStrdup(c_uri) - - except Exception, e: - xslt_resolver_context._store_raised() - return NULL - - if c_doc is NULL: - c_doc = XSLT_DOC_DEFAULT_LOADER( - c_uri, c_dict, parse_options, c_ctxt, c_type) - if c_doc is NULL: - message = "Cannot resolve URI %s" % funicode(c_uri) - if c_type == xslt.XSLT_LOAD_DOCUMENT: - exception = XSLTApplyError(message) - else: - exception = XSLTParseError(message) - xslt_resolver_context._store_exception(exception) - return NULL - if c_doc is not NULL and c_doc._private is NULL: - c_doc._private = xslt_resolver_context - return c_doc - -cdef xslt.xsltDocLoaderFunc XSLT_DOC_DEFAULT_LOADER -XSLT_DOC_DEFAULT_LOADER = xslt.xsltDocDefaultLoader - -xslt.xsltSetLoaderFunc(_doc_loader) - - -################################################################################ -# XSLT - -cdef class _XSLTContext(_BaseContext): - cdef xslt.xsltTransformContext* _xsltCtxt - def __init__(self, namespaces, extensions): - self._xsltCtxt = NULL - self._ext_lookup_function = _xslt_function_check - if extensions and None in extensions: - raise XSLTExtensionError, "extensions must not have empty namespaces" - _BaseContext.__init__(self, namespaces, extensions) - - cdef register_context(self, xslt.xsltTransformContext* xsltCtxt, - _Document doc): - self._xsltCtxt = xsltCtxt - self._set_xpath_context(xsltCtxt.xpathCtxt) - self._register_context(doc, 0) - xsltCtxt.xpathCtxt.userData = self - - cdef free_context(self): - cdef xslt.xsltTransformContext* xsltCtxt - xsltCtxt = self._xsltCtxt - if xsltCtxt is NULL: - return - self._free_context() - self._xsltCtxt = NULL - xslt.xsltFreeTransformContext(xsltCtxt) - self._release_temp_refs() - - cdef _registerLocalExtensionFunction(self, ns_utf, name_utf, function): - extensions = self._extensions - if extensions is None: - self._extensions = {ns_utf:{name_utf:function}} - else: - if ns_utf in extensions: - ns_extensions = extensions[ns_utf] - else: - ns_extensions = extensions[ns_utf] = {} - python.PyDict_SetItem(ns_extensions, name_utf, function) - -cdef class _ExsltRegExp # forward declaration - -cdef class XSLT: - """Turn a document into an XSLT object. - """ - cdef _XSLTContext _context - cdef xslt.xsltStylesheet* _c_style - cdef _XSLTResolverContext _xslt_resolver_context - cdef _ExsltRegExp _regexp - cdef _ErrorLog _error_log - - def __init__(self, xslt_input, extensions=None, regexp=True): - cdef xslt.xsltStylesheet* c_style - cdef xmlDoc* c_doc - cdef xmlDoc* fake_c_doc - cdef _Document doc - cdef _NodeBase root_node - - doc = _documentOrRaise(xslt_input) - root_node = _rootNodeOf(xslt_input) - - # make a copy of the document as stylesheet parsing modifies it - fake_c_doc = _fakeRootDoc(doc._c_doc, root_node._c_node) - c_doc = tree.xmlCopyDoc(fake_c_doc, 1) - _destroyFakeDoc(doc._c_doc, fake_c_doc) - - # make sure we always have a stylesheet URL - if c_doc.URL is not NULL: - # handle a bug in older libxml2 versions - tree.xmlFree(c_doc.URL) - if doc._c_doc.URL is not NULL: - c_doc.URL = tree.xmlStrdup(doc._c_doc.URL) - else: - doc_url_utf = "XSLT:__STRING__XSLT__%s" % id(self) - c_doc.URL = tree.xmlStrdup(_cstr(doc_url_utf)) - - self._xslt_resolver_context = _XSLTResolverContext(doc._parser) - # keep a copy in case we need to access the stylesheet via 'document()' - self._xslt_resolver_context._c_style_doc = tree.xmlCopyDoc(c_doc, 1) - c_doc._private = self._xslt_resolver_context - - c_style = xslt.xsltParseStylesheetDoc(c_doc) - if c_style is NULL: - tree.xmlFreeDoc(c_doc) - self._xslt_resolver_context._raise_if_stored() - raise XSLTParseError, "Cannot parse style sheet" - self._c_style = c_style - - self._context = _XSLTContext(None, extensions) - self._error_log = _ErrorLog() - if regexp: - self._regexp = _ExsltRegExp() - else: - self._regexp = None - # XXX is it worthwile to use xsltPrecomputeStylesheet here? - - def __dealloc__(self): - if self._xslt_resolver_context is not None and \ - self._xslt_resolver_context._c_style_doc is not NULL: - tree.xmlFreeDoc(self._xslt_resolver_context._c_style_doc) - # this cleans up copy of doc as well - xslt.xsltFreeStylesheet(self._c_style) - - property error_log: - def __get__(self): - return self._error_log.copy() - - def __call__(self, _input, **_kw): - cdef _Document input_doc - cdef _NodeBase root_node - cdef _Document result_doc - cdef _XSLTResolverContext resolver_context - cdef xslt.xsltTransformContext* transform_ctxt - cdef xmlDoc* c_result - cdef xmlDoc* c_doc - cdef char** params - cdef void* ptemp - cdef int i - - input_doc = _documentOrRaise(_input) - root_node = _rootNodeOf(_input) - - resolver_context = _XSLTResolverContext(input_doc._parser) - resolver_context._c_style_doc = self._xslt_resolver_context._c_style_doc - - c_doc = _fakeRootDoc(input_doc._c_doc, root_node._c_node) - - transform_ctxt = xslt.xsltNewTransformContext(self._c_style, c_doc) - if transform_ctxt is NULL: - _destroyFakeDoc(input_doc._c_doc, c_doc) - raise XSLTApplyError, "Error preparing stylesheet run" - - self._error_log.connect() - xslt.xsltSetTransformErrorFunc(transform_ctxt, self._error_log, - _receiveGenericError) - - ptemp = c_doc._private - c_doc._private = resolver_context - - if _kw: - # allocate space for parameters - # * 2 as we want an entry for both key and value, - # and + 1 as array is NULL terminated - params = cstd.malloc(sizeof(char*) * (len(_kw) * 2 + 1)) - i = 0 - keep_ref = [] - for key, value in _kw.items(): - k = _utf8(key) - python.PyList_Append(keep_ref, k) - v = _utf8(value) - python.PyList_Append(keep_ref, v) - params[i] = _cstr(k) - i = i + 1 - params[i] = _cstr(v) - i = i + 1 - params[i] = NULL - else: - params = NULL - - self._context.register_context(transform_ctxt, input_doc) - if self._regexp is not None: - self._regexp._register_in_context(self._context) - - c_result = xslt.xsltApplyStylesheetUser(self._c_style, c_doc, params, - NULL, NULL, transform_ctxt) - - if params is not NULL: - # deallocate space for parameters - cstd.free(params) - - self._context.free_context() - c_doc._private = ptemp # restore _private before _destroyFakeDoc! - _destroyFakeDoc(input_doc._c_doc, c_doc) - - self._error_log.disconnect() - if self._xslt_resolver_context._has_raised(): - if c_result is not NULL: - tree.xmlFreeDoc(c_result) - self._xslt_resolver_context._raise_if_stored() - - if c_result is NULL: - raise XSLTApplyError, "Error applying stylesheet" - - result_doc = _documentFactory(c_result, input_doc._parser) - return _xsltResultTreeFactory(result_doc, self) - - def apply(self, _input, **_kw): - return self.__call__(_input, **_kw) - - def tostring(self, _ElementTree result_tree): - """Save result doc to string based on stylesheet output method. - """ - return str(result_tree) - -cdef class _XSLTResultTree(_ElementTree): - cdef XSLT _xslt - def __str__(self): - cdef char* s - cdef int l - cdef int r - r = xslt.xsltSaveResultToString(&s, &l, self._doc._c_doc, - self._xslt._c_style) - if r == -1: - raise XSLTSaveError, "Error saving XSLT result to string" - if s is NULL: - return '' - result = funicode(s) - tree.xmlFree(s) - return result - -cdef _xsltResultTreeFactory(_Document doc, XSLT xslt): - cdef _XSLTResultTree result - result = <_XSLTResultTree>_newElementTree(doc, None, _XSLTResultTree) - result._xslt = xslt - return result - -# do not register all libxslt extra function, provide only "node-set" -# functions like "output" and "write" are a potential security risk -#xslt.xsltRegisterAllExtras() -xslt.xsltRegisterExtModuleFunction("node-set", - xslt.XSLT_LIBXSLT_NAMESPACE, - xslt.xsltFunctionNodeSet) -xslt.xsltRegisterExtModuleFunction("node-set", - xslt.XSLT_SAXON_NAMESPACE, - xslt.xsltFunctionNodeSet) -xslt.xsltRegisterExtModuleFunction("node-set", - xslt.XSLT_XT_NAMESPACE, - xslt.xsltFunctionNodeSet) - -# enable EXSLT support for XSLT -xslt.exsltRegisterAll() - -################################################################################ -# EXSLT regexp implementation - -cdef object RE_COMPILE -RE_COMPILE = re.compile - -cdef class _ExsltRegExp: - cdef object _compile_map - def __init__(self): - self._compile_map = {} - - cdef _make_string(self, value): - if python.PyString_Check(value) or python.PyUnicode_Check(value): - return value - else: - raise TypeError, "Invalid argument type %s" % type(value) - - cdef _compile(self, rexp, ignore_case): - cdef python.PyObject* c_result - rexp = self._make_string(rexp) - key = (rexp, ignore_case) - c_result = python.PyDict_GetItem(self._compile_map, key) - if c_result is not NULL: - return c_result - py_flags = re.UNICODE - if ignore_case: - py_flags = py_flags | re.IGNORECASE - rexp_compiled = RE_COMPILE(rexp, py_flags) - python.PyDict_SetItem(self._compile_map, key, rexp_compiled) - return rexp_compiled - - def test(self, ctxt, s, rexp, flags=''): - flags = self._make_string(flags) - s = self._make_string(s) - rexpc = self._compile(rexp, 'i' in flags) - if rexpc.search(s) is None: - return False - else: - return True - - def match(self, ctxt, s, rexp, flags=''): - flags = self._make_string(flags) - s = self._make_string(s) - rexpc = self._compile(rexp, 'i' in flags) - if 'g' in flags: - results = rexpc.findall(s) - if not results: - return () - result_list = [] - root = Element('matches') - for s_match in results: - elem = SubElement(root, 'match') - elem.text = s_match - python.PyList_Append(result_list, elem) - return result_list - else: - result = rexpc.search(s) - if result is None: - return () - root = Element('match') - root.text = result.group() - return (root,) - - def replace(self, ctxt, s, rexp, flags, replacement): - replacement = self._make_string(replacement) - flags = self._make_string(flags) - s = self._make_string(s) - rexpc = self._compile(rexp, 'i' in flags) - if 'g' in flags: - count = 0 - else: - count = 1 - return rexpc.sub(replacement, s, count) - - cdef _register_in_context(self, _XSLTContext context): - ns = "http://exslt.org/regular-expressions" - context._registerLocalExtensionFunction(ns, "test", self.test) - context._registerLocalExtensionFunction(ns, "match", self.match) - context._registerLocalExtensionFunction(ns, "replace", self.replace) - -################################################################################ # XPath cdef class _XPathContext(_BaseContext): @@ -762,10 +150,6 @@ return self._handle_result(xpathObj, doc) - #def clone(self): - # # XXX pretty expensive so calling this from callback is probably - # # not desirable - # return XPathEvaluator(self._doc, self._namespaces, self._extensions) cdef class XPathDocumentEvaluator(XPathElementEvaluator): """Create an XPath evaluator for an ElementTree. @@ -776,6 +160,7 @@ XPathElementEvaluator.__init__( self, etree._context_node, namespaces, extensions) + def XPathEvaluator(etree_or_element, namespaces=None, extensions=None): """Creates and XPath evaluator for an ElementTree or an Element. @@ -786,11 +171,6 @@ else: return XPathElementEvaluator(etree_or_element, namespaces, extensions) -def Extension(module, function_mapping, ns_uri=None): - functions = [] - for function_name, xpath_name in function_mapping.items(): - functions[xpath_name] = getattr(module, function_name) - return {ns_uri : functions} cdef class XPath(XPathEvaluatorBase): cdef xpath.xmlXPathContext* _xpathCtxt @@ -839,6 +219,7 @@ if self._xpath is not NULL: xpath.xmlXPathFreeCompExpr(self._xpath) + cdef object _replace_strings cdef object _find_namespaces _replace_strings = re.compile('("[^"]*")|(\'[^\']*\')').sub @@ -869,148 +250,3 @@ # FIXME: this also replaces {namespaces} within strings! path_utf = path_utf.replace(namespace_def, prefix_str) return path_utf, namespaces - -################################################################################ -# helper functions - -cdef xpath.xmlXPathObject* _wrapXPathObject(object obj) except NULL: - cdef xpath.xmlNodeSet* resultSet - cdef _NodeBase node - if python.PyUnicode_Check(obj): - obj = _utf8(obj) - if python.PyString_Check(obj): - return xpath.xmlXPathNewCString(_cstr(obj)) - if python.PyBool_Check(obj): - return xpath.xmlXPathNewBoolean(obj) - if python.PyNumber_Check(obj): - return xpath.xmlXPathNewFloat(obj) - if obj is None: - obj = () - elif isinstance(obj, _NodeBase): - obj = (obj,) - if python.PySequence_Check(obj): - resultSet = xpath.xmlXPathNodeSetCreate(NULL) - for element in obj: - if isinstance(element, _NodeBase): - node = <_NodeBase>element - xpath.xmlXPathNodeSetAdd(resultSet, node._c_node) - else: - xpath.xmlXPathFreeNodeSet(resultSet) - raise XPathResultError, "This is not a node: %s" % element - return xpath.xmlXPathWrapNodeSet(resultSet) - else: - raise XPathResultError, "Unknown return type: %s" % obj - return NULL - -cdef object _unwrapXPathObject(xpath.xmlXPathObject* xpathObj, - _Document doc): - if xpathObj.type == xpath.XPATH_UNDEFINED: - raise XPathResultError, "Undefined xpath result" - elif xpathObj.type == xpath.XPATH_NODESET: - return _createNodeSetResult(xpathObj, doc) - elif xpathObj.type == xpath.XPATH_BOOLEAN: - return bool(xpathObj.boolval) - elif xpathObj.type == xpath.XPATH_NUMBER: - return xpathObj.floatval - elif xpathObj.type == xpath.XPATH_STRING: - return funicode(xpathObj.stringval) - elif xpathObj.type == xpath.XPATH_POINT: - raise NotImplementedError - elif xpathObj.type == xpath.XPATH_RANGE: - raise NotImplementedError - elif xpathObj.type == xpath.XPATH_LOCATIONSET: - raise NotImplementedError - elif xpathObj.type == xpath.XPATH_USERS: - raise NotImplementedError - elif xpathObj.type == xpath.XPATH_XSLT_TREE: - raise NotImplementedError - else: - raise XPathResultError, "Unknown xpath result %s" % str(xpathObj.type) - -cdef object _createNodeSetResult(xpath.xmlXPathObject* xpathObj, _Document doc): - cdef xmlNode* c_node - cdef char* s - cdef _NodeBase element - cdef int i - result = [] - if xpathObj.nodesetval is NULL: - return result - for i from 0 <= i < xpathObj.nodesetval.nodeNr: - c_node = xpathObj.nodesetval.nodeTab[i] - if _isElement(c_node): - if c_node.doc != doc._c_doc: - # XXX: works, but maybe not always the right thing to do? - # XPath: only runs when extensions create or copy trees - # -> we store Python refs to these, so that is OK - # XSLT: can it leak when merging trees from multiple sources? - c_node = tree.xmlDocCopyNode(c_node, doc._c_doc, 1) - element = _elementFactory(doc, c_node) - result.append(element) - elif c_node.type == tree.XML_TEXT_NODE: - result.append(funicode(c_node.content)) - elif c_node.type == tree.XML_ATTRIBUTE_NODE: - s = tree.xmlNodeGetContent(c_node) - attr_value = funicode(s) - tree.xmlFree(s) - result.append(attr_value) - else: - print "Not yet implemented result node type:", c_node.type - raise NotImplementedError - return result - -cdef void _freeXPathObject(xpath.xmlXPathObject* xpathObj): - """Free the XPath object, but *never* free the *content* of node sets. - Python dealloc will do that for us. - """ - if xpathObj.nodesetval is not NULL: - xpath.xmlXPathFreeNodeSet(xpathObj.nodesetval) - xpathObj.nodesetval = NULL - xpath.xmlXPathFreeObject(xpathObj) - -cdef void _xpath_function_call(xpath.xmlXPathParserContext* ctxt, int nargs): - cdef xpath.xmlXPathContext* rctxt - cdef _BaseContext context - rctxt = ctxt.context - context = <_BaseContext>(rctxt.userData) - name = rctxt.function - if rctxt.functionURI is not NULL: - uri = rctxt.functionURI - else: - uri = None - if context._prepare_function_call(uri, name): - _extension_function_call(context, ctxt, nargs) - else: - xpath.xmlXPathErr(ctxt, xpath.XPATH_EXPR_ERROR) - exception = XPathFunctionError("XPath function {%s}%s not found" % (uri, name)) - context._exc._store_exception(exception) - -cdef void _call_prepared_function(xpath.xmlXPathParserContext* ctxt, int nargs): - cdef xpath.xmlXPathContext* rctxt - cdef _BaseContext context - rctxt = ctxt.context - context = <_BaseContext>(rctxt.userData) - _extension_function_call(context, ctxt, nargs) - -cdef void _extension_function_call(_BaseContext context, - xpath.xmlXPathParserContext* ctxt, int nargs): - cdef _NodeBase node - cdef _Document doc - cdef xpath.xmlXPathObject* obj - cdef int i - doc = context._doc - try: - args = [] - for i from 0 <= i < nargs: - o = _unwrapXPathObject(xpath.valuePop(ctxt), doc) - python.PyList_Append(args, o) - python.PyList_Reverse(args) - - res = context._called_function(None, *args) - # wrap result for XPath consumption - obj = _wrapXPathObject(res) - # prevent Python from deallocating elements handed to libxml2 - context._hold(res) - xpath.valuePush(ctxt, obj) - except: - xpath.xmlXPathErr(ctxt, xpath.XPATH_EXPR_ERROR) - context._exc._store_raised() Modified: lxml/trunk/src/lxml/xslt.pxi ============================================================================== --- lxml/trunk/src/lxml/xslt.pxi (original) +++ lxml/trunk/src/lxml/xslt.pxi Fri Apr 28 21:36:09 2006 @@ -15,198 +15,6 @@ class XSLTExtensionError(XSLTError): pass -class XPathError(LxmlError): - pass - -class XPathContextError(XPathError): - pass - -class XPathFunctionError(XPathError): - pass - -class XPathResultError(XPathError): - pass - -class XPathSyntaxError(LxmlSyntaxError): - pass - -################################################################################ -# support for extension functions in XPath/XSLT - -cdef class _BaseContext: - cdef xpath.xmlXPathContext* _xpathCtxt - cdef xpath.xmlXPathFuncLookupFunc _ext_lookup_function - cdef _Document _doc - cdef object _extensions - cdef object _namespaces - cdef object _registered_namespaces - cdef object _utf_refs - cdef object _function_cache - cdef object _called_function - # for exception handling and temporary reference keeping: - cdef _TempStore _temp_refs - cdef _ExceptionContext _exc - - def __init__(self, namespaces, extensions): - self._xpathCtxt = NULL - self._utf_refs = {} - self._function_cache = {} - self._called_function = None - - # convert old format extensions to UTF-8 - if isinstance(extensions, (list, tuple)): - new_extensions = {} - for extension in extensions: - for (ns_uri, name), function in extension.items(): - ns_utf = self._to_utf(ns_uri) - name_utf = self._to_utf(name) - try: - new_extensions[ns_utf][name_utf] = function - except KeyError: - new_extensions[ns_utf] = {name_utf : function} - extensions = new_extensions or None - - self._doc = None - self._exc = _ExceptionContext() - self._extensions = extensions - self._namespaces = namespaces - self._registered_namespaces = [] - self._temp_refs = _TempStore() - - cdef object _to_utf(self, s): - "Convert to UTF-8 and keep a reference to the encoded string" - cdef python.PyObject* dict_result - if s is None: - return None - dict_result = python.PyDict_GetItem(self._utf_refs, s) - if dict_result is not NULL: - return dict_result - utf = _utf8(s) - python.PyDict_SetItem(self._utf_refs, s, utf) - return utf - - cdef void _set_xpath_context(self, xpath.xmlXPathContext* xpathCtxt): - self._xpathCtxt = xpathCtxt - xpathCtxt.userData = self - - cdef _register_context(self, _Document doc, int allow_none_namespace): - self._doc = doc - self._exc.clear() - python.PyDict_Clear(self._function_cache) - namespaces = self._namespaces - if namespaces is not None: - self.registerNamespaces(namespaces) - xpath.xmlXPathRegisterFuncLookup( - self._xpathCtxt, self._ext_lookup_function, self) - - cdef _unregister_context(self): - self._unregisterNamespaces() - self._free_context() - - cdef _free_context(self): - del self._registered_namespaces[:] - python.PyDict_Clear(self._utf_refs) - self._doc = None - if self._xpathCtxt is not NULL: - self._xpathCtxt.userData = NULL - self._xpathCtxt = NULL - - # namespaces (internal UTF-8 methods with leading '_') - - def addNamespace(self, prefix, uri): - if self._namespaces is None: - self._namespaces = {} - python.PyDict_SetItem(self._namespaces, prefix, uri) - - def registerNamespaces(self, namespaces): - for prefix, uri in namespaces.items(): - self.registerNamespace(prefix, uri) - - def registerNamespace(self, prefix, ns_uri): - prefix_utf = self._to_utf(prefix) - ns_uri_utf = self._to_utf(ns_uri) - xpath.xmlXPathRegisterNs(self._xpathCtxt, prefix_utf, ns_uri_utf) - python.PyList_Append(self._registered_namespaces, prefix_utf) - - cdef _unregisterNamespaces(self): - cdef xpath.xmlXPathContext* xpathCtxt - xpathCtxt = self._xpathCtxt - for prefix_utf in self._registered_namespaces: - xpath.xmlXPathRegisterNs(xpathCtxt, prefix_utf, NULL) - - # extension functions - - cdef int _prepare_function_call(self, ns_uri_utf, name_utf): - cdef python.PyObject* dict_result - key = (ns_uri_utf, name_utf) - dict_result = python.PyDict_GetItem(self._function_cache, key) - if dict_result is not NULL: - function = dict_result - self._called_function = function - return function is not None - - dict_result = python.PyDict_GetItem(self._extensions, ns_uri_utf) - if dict_result is not NULL: - dict_result = python.PyDict_GetItem(dict_result, name_utf) - if dict_result is not NULL: - function = dict_result - else: - function = _find_extension(ns_uri_utf, name_utf) - - python.PyDict_SetItem(self._function_cache, key, function) - self._called_function = function - return function is not None - - # Python reference keeping during XPath function evaluation - - cdef _release_temp_refs(self): - "Free temporarily referenced objects from this context." - self._temp_refs.clear() - - cdef _hold(self, obj): - """A way to temporarily hold references to nodes in the evaluator. - - This is needed because otherwise nodes created in XPath extension - functions would be reference counted too soon, during the XPath - evaluation. This is most important in the case of exceptions. - """ - cdef _NodeBase element - if isinstance(obj, _NodeBase): - obj = (obj,) - elif not python.PySequence_Check(obj): - return - for o in obj: - if isinstance(o, _NodeBase): - element = <_NodeBase>o - #print "Holding element:", element._c_node - self._temp_refs.add(element) - #print "Holding document:", element._doc._c_doc - self._temp_refs.add(element._doc) - -cdef xpath.xmlXPathFunction _function_check(void* ctxt, - char* c_name, char* c_ns_uri): - cdef _BaseContext context - if c_name is NULL: - return NULL - if c_ns_uri is NULL: - ns_uri = None - else: - ns_uri = c_ns_uri - context = <_BaseContext>ctxt - if context._prepare_function_call(ns_uri, c_name): - return _call_prepared_function - else: - return NULL - -cdef xpath.xmlXPathFunction _xslt_function_check(void* ctxt, - char* c_name, char* c_ns_uri): - cdef xpath.xmlXPathFunction result - result = _function_check(ctxt, c_name, c_ns_uri) - if result is NULL: - return xslt.xsltExtModuleFunctionLookup(c_name, c_ns_uri) - else: - return result - ################################################################################ # XSLT document loaders @@ -538,6 +346,16 @@ # enable EXSLT support for XSLT xslt.exsltRegisterAll() +cdef xpath.xmlXPathFunction _xslt_function_check(void* ctxt, + char* c_name, char* c_ns_uri): + "Find XSLT extension function from set of XPath and XSLT functions" + cdef xpath.xmlXPathFunction result + result = _function_check(ctxt, c_name, c_ns_uri) + if result is NULL: + return xslt.xsltExtModuleFunctionLookup(c_name, c_ns_uri) + else: + return result + ################################################################################ # EXSLT regexp implementation @@ -617,400 +435,3 @@ context._registerLocalExtensionFunction(ns, "test", self.test) context._registerLocalExtensionFunction(ns, "match", self.match) context._registerLocalExtensionFunction(ns, "replace", self.replace) - -################################################################################ -# XPath - -cdef class _XPathContext(_BaseContext): - cdef object _variables - cdef object _registered_variables - def __init__(self, namespaces, extensions, variables): - self._ext_lookup_function = _function_check - self._variables = variables - self._registered_variables = [] - _BaseContext.__init__(self, namespaces, extensions) - - cdef register_context(self, xpath.xmlXPathContext* xpathCtxt, _Document doc): - self._set_xpath_context(xpathCtxt) - ns_prefixes = _find_all_extension_prefixes() - if ns_prefixes: - self.registerNamespaces(ns_prefixes) - self._register_context(doc, 1) - if self._variables is not None: - self.registerVariables(self._variables) - - cdef unregister_context(self): - cdef xpath.xmlXPathContext* xpathCtxt - xpathCtxt = self._xpathCtxt - if xpathCtxt is NULL: - return - self._unregisterVariables() - del self._registered_variables[:] - self._unregister_context() - - cdef void _unregisterVariables(self): - cdef xpath.xmlXPathContext* xpathCtxt - cdef xpath.xmlXPathObject* xpathVarValue - cdef char* c_name - xpathCtxt = self._xpathCtxt - for name_utf in self._registered_variables: - c_name = _cstr(name_utf) - xpathVarValue = xpath.xmlXPathVariableLookup(xpathCtxt, c_name) - if xpathVarValue is not NULL: - xpath.xmlXPathRegisterVariable(xpathCtxt, c_name, NULL) - _freeXPathObject(xpathVarValue) - - def registerVariables(self, variable_dict): - for name, value in variable_dict.items(): - name_utf = self._to_utf(name) - self._registerVariable(name_utf, value) - python.PyList_Append(self._registered_variables, name_utf) - - def registerVariable(self, name, value): - name_utf = self._to_utf(name) - self._registerVariable(name_utf, value) - python.PyList_Append(self._registered_variables, name_utf) - - cdef void _registerVariable(self, name_utf, value): - xpath.xmlXPathRegisterVariable( - self._xpathCtxt, _cstr(name_utf), _wrapXPathObject(value)) - - -cdef class XPathEvaluatorBase: - cdef _XPathContext _context - - def __init__(self, namespaces, extensions, variables=None): - self._context = _XPathContext(namespaces, extensions, variables) - - cdef object _handle_result(self, xpath.xmlXPathObject* xpathObj, _Document doc): - if self._context._exc._has_raised(): - if xpathObj is not NULL: - _freeXPathObject(xpathObj) - xpathObj = NULL - self._context._release_temp_refs() - self._context._exc._raise_if_stored() - - if xpathObj is NULL: - self._context._release_temp_refs() - raise XPathSyntaxError, "Error in xpath expression." - - try: - result = _unwrapXPathObject(xpathObj, doc) - except XPathResultError: - _freeXPathObject(xpathObj) - self._context._release_temp_refs() - raise - - _freeXPathObject(xpathObj) - self._context._release_temp_refs() - return result - - -cdef class XPathElementEvaluator(XPathEvaluatorBase): - """Create an XPath evaluator for an element. - - XPath evaluators must not be shared between threads. - """ - cdef xpath.xmlXPathContext* _c_ctxt - cdef _Element _element - def __init__(self, _NodeBase element not None, namespaces=None, extensions=None): - cdef xpath.xmlXPathContext* xpathCtxt - cdef int ns_register_status - cdef _Document doc - doc = element._doc - xpathCtxt = xpath.xmlXPathNewContext(doc._c_doc) - if xpathCtxt is NULL: - raise XPathContextError, "Unable to create new XPath context" - self._element = element - self._c_ctxt = xpathCtxt - XPathEvaluatorBase.__init__(self, namespaces, extensions) - - def __dealloc__(self): - if self._c_ctxt is not NULL: - xpath.xmlXPathFreeContext(self._c_ctxt) - - def registerNamespace(self, prefix, uri): - """Register a namespace with the XPath context. - """ - self._context.addNamespace(prefix, uri) - - def registerNamespaces(self, namespaces): - """Register a prefix -> uri dict. - """ - add = self._context.addNamespace - for prefix, uri in namespaces.items(): - add(prefix, uri) - - def evaluate(self, _path, **_variables): - """Evaluate an XPath expression on the document. Variables may be - provided as keyword arguments. Note that namespaces are currently not - supported for variables.""" - cdef xpath.xmlXPathContext* xpathCtxt - cdef xpath.xmlXPathObject* xpathObj - cdef xmlNode* c_node - cdef _Document doc - xpathCtxt = self._c_ctxt - xpathCtxt.node = self._element._c_node - doc = self._element._doc - - self._context.register_context(xpathCtxt, doc) - self._context.registerVariables(_variables) - - path = _utf8(_path) - xpathObj = xpath.xmlXPathEvalExpression(_cstr(path), xpathCtxt) - self._context.unregister_context() - - return self._handle_result(xpathObj, doc) - - #def clone(self): - # # XXX pretty expensive so calling this from callback is probably - # # not desirable - # return XPathEvaluator(self._doc, self._namespaces, self._extensions) - -cdef class XPathDocumentEvaluator(XPathElementEvaluator): - """Create an XPath evaluator for an ElementTree. - - XPath evaluators must not be shared between threads. - """ - def __init__(self, _ElementTree etree not None, namespaces=None, extensions=None): - XPathElementEvaluator.__init__( - self, etree._context_node, namespaces, extensions) - -def XPathEvaluator(etree_or_element, namespaces=None, extensions=None): - """Creates and XPath evaluator for an ElementTree or an Element. - - XPath evaluators must not be shared between threads. - """ - if isinstance(etree_or_element, _ElementTree): - return XPathDocumentEvaluator(etree_or_element, namespaces, extensions) - else: - return XPathElementEvaluator(etree_or_element, namespaces, extensions) - -def Extension(module, function_mapping, ns_uri=None): - functions = [] - for function_name, xpath_name in function_mapping.items(): - functions[xpath_name] = getattr(module, function_name) - return {ns_uri : functions} - -cdef class XPath(XPathEvaluatorBase): - cdef xpath.xmlXPathContext* _xpathCtxt - cdef xpath.xmlXPathCompExpr* _xpath - cdef object _prefix_map - cdef readonly object path - - def __init__(self, path, namespaces=None, extensions=None): - XPathEvaluatorBase.__init__(self, namespaces, extensions, None) - self.path = path - path = _utf8(path) - self._xpath = xpath.xmlXPathCompile(_cstr(path)) - if self._xpath is NULL: - raise XPathSyntaxError, "Error in XPath expression" - self._xpathCtxt = xpath.xmlXPathNewContext(NULL) - - def __call__(self, _etree_or_element, **_variables): - cdef xpath.xmlXPathContext* xpathCtxt - cdef xpath.xmlXPathObject* xpathObj - cdef _Document document - cdef _NodeBase element - cdef _XPathContext context - - document = _documentOrRaise(_etree_or_element) - element = _rootNodeOf(_etree_or_element) - - xpathCtxt = self._xpathCtxt - xpathCtxt.doc = document._c_doc - xpathCtxt.node = element._c_node - - context = self._context - context._release_temp_refs() - context.register_context(xpathCtxt, document) - context.registerVariables(_variables) - - xpathObj = xpath.xmlXPathCompiledEval(self._xpath, xpathCtxt) - context.unregister_context() - return self._handle_result(xpathObj, document) - - def evaluate(self, _tree, **_variables): - return self(_tree, **_variables) - - def __dealloc__(self): - if self._xpathCtxt is not NULL: - xpath.xmlXPathFreeContext(self._xpathCtxt) - if self._xpath is not NULL: - xpath.xmlXPathFreeCompExpr(self._xpath) - -cdef object _replace_strings -cdef object _find_namespaces -_replace_strings = re.compile('("[^"]*")|(\'[^\']*\')').sub -_find_namespaces = re.compile('({[^}]+})').findall - -cdef class ETXPath(XPath): - """Special XPath class that supports the ElementTree {uri} notation for - namespaces.""" - def __init__(self, path, extensions=None): - path_utf, namespaces = self._nsextract_path(_utf8(path)) - XPath.__init__(self, funicode(path_utf), namespaces, extensions) - - cdef _nsextract_path(self, path_utf): - # replace {namespaces} by new prefixes - cdef int i - namespaces = {} - stripped_path = _replace_strings('', path_utf) # remove string literals - namespace_defs = [] - i = 1 - for namespace_def in _find_namespaces(stripped_path): - if namespace_def not in namespace_defs: - prefix = python.PyString_FromFormat("xpp%02d", i) - i = i+1 - python.PyList_Append(namespace_defs, namespace_def) - namespace = namespace_def[1:-1] # remove '{}' - python.PyDict_SetItem(namespaces, prefix, namespace) - prefix_str = prefix + ':' - # FIXME: this also replaces {namespaces} within strings! - path_utf = path_utf.replace(namespace_def, prefix_str) - return path_utf, namespaces - -################################################################################ -# helper functions - -cdef xpath.xmlXPathObject* _wrapXPathObject(object obj) except NULL: - cdef xpath.xmlNodeSet* resultSet - cdef _NodeBase node - if python.PyUnicode_Check(obj): - obj = _utf8(obj) - if python.PyString_Check(obj): - return xpath.xmlXPathNewCString(_cstr(obj)) - if python.PyBool_Check(obj): - return xpath.xmlXPathNewBoolean(obj) - if python.PyNumber_Check(obj): - return xpath.xmlXPathNewFloat(obj) - if obj is None: - obj = () - elif isinstance(obj, _NodeBase): - obj = (obj,) - if python.PySequence_Check(obj): - resultSet = xpath.xmlXPathNodeSetCreate(NULL) - for element in obj: - if isinstance(element, _NodeBase): - node = <_NodeBase>element - xpath.xmlXPathNodeSetAdd(resultSet, node._c_node) - else: - xpath.xmlXPathFreeNodeSet(resultSet) - raise XPathResultError, "This is not a node: %s" % element - return xpath.xmlXPathWrapNodeSet(resultSet) - else: - raise XPathResultError, "Unknown return type: %s" % obj - return NULL - -cdef object _unwrapXPathObject(xpath.xmlXPathObject* xpathObj, - _Document doc): - if xpathObj.type == xpath.XPATH_UNDEFINED: - raise XPathResultError, "Undefined xpath result" - elif xpathObj.type == xpath.XPATH_NODESET: - return _createNodeSetResult(xpathObj, doc) - elif xpathObj.type == xpath.XPATH_BOOLEAN: - return bool(xpathObj.boolval) - elif xpathObj.type == xpath.XPATH_NUMBER: - return xpathObj.floatval - elif xpathObj.type == xpath.XPATH_STRING: - return funicode(xpathObj.stringval) - elif xpathObj.type == xpath.XPATH_POINT: - raise NotImplementedError - elif xpathObj.type == xpath.XPATH_RANGE: - raise NotImplementedError - elif xpathObj.type == xpath.XPATH_LOCATIONSET: - raise NotImplementedError - elif xpathObj.type == xpath.XPATH_USERS: - raise NotImplementedError - elif xpathObj.type == xpath.XPATH_XSLT_TREE: - raise NotImplementedError - else: - raise XPathResultError, "Unknown xpath result %s" % str(xpathObj.type) - -cdef object _createNodeSetResult(xpath.xmlXPathObject* xpathObj, _Document doc): - cdef xmlNode* c_node - cdef char* s - cdef _NodeBase element - cdef int i - result = [] - if xpathObj.nodesetval is NULL: - return result - for i from 0 <= i < xpathObj.nodesetval.nodeNr: - c_node = xpathObj.nodesetval.nodeTab[i] - if _isElement(c_node): - if c_node.doc != doc._c_doc: - # XXX: works, but maybe not always the right thing to do? - # XPath: only runs when extensions create or copy trees - # -> we store Python refs to these, so that is OK - # XSLT: can it leak when merging trees from multiple sources? - c_node = tree.xmlDocCopyNode(c_node, doc._c_doc, 1) - element = _elementFactory(doc, c_node) - result.append(element) - elif c_node.type == tree.XML_TEXT_NODE: - result.append(funicode(c_node.content)) - elif c_node.type == tree.XML_ATTRIBUTE_NODE: - s = tree.xmlNodeGetContent(c_node) - attr_value = funicode(s) - tree.xmlFree(s) - result.append(attr_value) - else: - print "Not yet implemented result node type:", c_node.type - raise NotImplementedError - return result - -cdef void _freeXPathObject(xpath.xmlXPathObject* xpathObj): - """Free the XPath object, but *never* free the *content* of node sets. - Python dealloc will do that for us. - """ - if xpathObj.nodesetval is not NULL: - xpath.xmlXPathFreeNodeSet(xpathObj.nodesetval) - xpathObj.nodesetval = NULL - xpath.xmlXPathFreeObject(xpathObj) - -cdef void _xpath_function_call(xpath.xmlXPathParserContext* ctxt, int nargs): - cdef xpath.xmlXPathContext* rctxt - cdef _BaseContext context - rctxt = ctxt.context - context = <_BaseContext>(rctxt.userData) - name = rctxt.function - if rctxt.functionURI is not NULL: - uri = rctxt.functionURI - else: - uri = None - if context._prepare_function_call(uri, name): - _extension_function_call(context, ctxt, nargs) - else: - xpath.xmlXPathErr(ctxt, xpath.XPATH_EXPR_ERROR) - exception = XPathFunctionError("XPath function {%s}%s not found" % (uri, name)) - context._exc._store_exception(exception) - -cdef void _call_prepared_function(xpath.xmlXPathParserContext* ctxt, int nargs): - cdef xpath.xmlXPathContext* rctxt - cdef _BaseContext context - rctxt = ctxt.context - context = <_BaseContext>(rctxt.userData) - _extension_function_call(context, ctxt, nargs) - -cdef void _extension_function_call(_BaseContext context, - xpath.xmlXPathParserContext* ctxt, int nargs): - cdef _NodeBase node - cdef _Document doc - cdef xpath.xmlXPathObject* obj - cdef int i - doc = context._doc - try: - args = [] - for i from 0 <= i < nargs: - o = _unwrapXPathObject(xpath.valuePop(ctxt), doc) - python.PyList_Append(args, o) - python.PyList_Reverse(args) - - res = context._called_function(None, *args) - # wrap result for XPath consumption - obj = _wrapXPathObject(res) - # prevent Python from deallocating elements handed to libxml2 - context._hold(res) - xpath.valuePush(ctxt, obj) - except: - xpath.xmlXPathErr(ctxt, xpath.XPATH_EXPR_ERROR) - context._exc._store_raised() From scoder at codespeak.net Fri Apr 28 21:41:18 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Fri Apr 28 21:41:19 2006 Subject: [Lxml-checkins] r26539 - lxml/trunk/src/lxml Message-ID: <20060428194118.D9EEB10094@code0.codespeak.net> Author: scoder Date: Fri Apr 28 21:41:17 2006 New Revision: 26539 Added: lxml/trunk/src/lxml/apihelpers.pxi - copied unchanged from r26538, lxml/trunk/src/lxml/etree.pyx Log: new source file apihelpers.pxi to move helper functions out of API source file etree.pyx From scoder at codespeak.net Fri Apr 28 21:56:37 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Fri Apr 28 21:56:39 2006 Subject: [Lxml-checkins] r26540 - lxml/trunk/src/lxml Message-ID: <20060428195637.BC25E10083@code0.codespeak.net> Author: scoder Date: Fri Apr 28 21:56:36 2006 New Revision: 26540 Modified: lxml/trunk/src/lxml/apihelpers.pxi lxml/trunk/src/lxml/etree.pyx lxml/trunk/src/lxml/extensions.pxi lxml/trunk/src/lxml/parser.pxi lxml/trunk/src/lxml/xmlerror.pxi lxml/trunk/src/lxml/xpath.pxi lxml/trunk/src/lxml/xslt.pxi Log: moved helper functions from etree.pyx to apihelpers.pxi, some cleanup Modified: lxml/trunk/src/lxml/apihelpers.pxi ============================================================================== --- lxml/trunk/src/lxml/apihelpers.pxi (original) +++ lxml/trunk/src/lxml/apihelpers.pxi Fri Apr 28 21:56:36 2006 @@ -1,1403 +1,13 @@ -cimport tree, python -from tree cimport xmlDoc, xmlNode, xmlAttr, xmlNs, _isElement -from python cimport isinstance, issubclass, hasattr, callable -from python cimport iter, str, _cstr -cimport xpath -cimport xslt -cimport xmlerror -cimport xinclude -cimport c14n -cimport cstd -import re - -import _elementpath -from StringIO import StringIO -import sys - -# the rules -# any libxml C argument/variable is prefixed with c_ -# any non-public function/class is prefixed with an underscore -# instance creation is always through factories - -ctypedef enum LXML_PROXY_TYPE: - PROXY_ELEMENT - PROXY_ATTRIB - -# what to do with libxml2/libxslt error messages? -# 0 : drop -# 1 : use log -cdef int __DEBUG -__DEBUG = 1 - -# maximum number of lines in the libxml2/xslt log if __DEBUG == 1 -cdef int __MAX_LOG_SIZE -__MAX_LOG_SIZE = 100 - -# make the compiled-in debug state publicly available -DEBUG = __DEBUG - -# Error superclass for ElementTree compatibility -class Error(Exception): - pass - -# module level superclass for all exceptions -class LxmlError(Error): - def __init__(self, *args): - Error.__init__(self, *args) - self.error_log = __copyGlobalErrorLog() - -# superclass for all syntax errors -class LxmlSyntaxError(LxmlError, SyntaxError): - pass - -class DocumentInvalid(LxmlError): - pass - -class XIncludeError(LxmlError): - pass - -class C14NError(LxmlError): - pass - - -# class for temporary storage of Python references -cdef class _TempStore: - cdef object _storage - def __init__(self): - self._storage = {} - - cdef void add(self, obj): - python.PyDict_SetItem(self._storage, id(obj), obj) - - cdef void clear(self): - python.PyDict_Clear(self._storage) - - cdef object dictcopy(self): - return self._storage.copy() - -# class for temporarily storing exceptions raised in extensions -cdef class _ExceptionContext: - cdef object _exc_info - def __init__(self): - self._exc_info = None - - cdef void clear(self): - self._exc_info = None - - cdef void _store_raised(self): - self._exc_info = sys.exc_info() - - cdef void _store_exception(self, exception): - self._exc_info = (exception, None, None) - - cdef _has_raised(self): - return self._exc_info is not None - - cdef _raise_if_stored(self): - _exc_info = self._exc_info - if _exc_info is not None: - self._exc_info = None - type, value, traceback = _exc_info - if traceback is None and value is None: - raise type - else: - raise type, value, traceback - - -cdef class BaseParser # forward declaration - -cdef class _Document: - """Internal base class to reference a libxml document. - - When instances of this class are garbage collected, the libxml - document is cleaned up. - """ - cdef int _ns_counter - cdef xmlDoc* _c_doc - cdef BaseParser _parser - - def __dealloc__(self): - # if there are no more references to the document, it is safe - # to clean the whole thing up, as all nodes have a reference to - # the document - #print "freeing document:", self._c_doc - #displayNode(self._c_doc, 0) - #print self._c_doc, self._c_doc.dict is __GLOBAL_PARSER_CONTEXT._c_dict - tree.xmlFreeDoc(self._c_doc) - - cdef getroot(self): - cdef xmlNode* c_node - c_node = tree.xmlDocGetRootElement(self._c_doc) - if c_node is NULL: - return None - return _elementFactory(self, c_node) - - cdef buildNewPrefix(self): - ns = python.PyString_FromFormat("ns%d", self._ns_counter) - self._ns_counter = self._ns_counter + 1 - return ns - - cdef xmlNs* _findOrBuildNodeNs(self, xmlNode* c_node, char* href): - """Get or create namespace structure for a node. - """ - cdef xmlNs* c_ns - # look for existing ns - c_ns = tree.xmlSearchNsByHref(self._c_doc, c_node, href) - if c_ns is not NULL: - return c_ns - # create ns if existing ns cannot be found - # try to simulate ElementTree's namespace prefix creation - prefix = self.buildNewPrefix() - c_ns = tree.xmlNewNs(c_node, href, _cstr(prefix)) - return c_ns - - cdef void _setNodeNs(self, xmlNode* c_node, char* href): - "Lookup namespace structure and set it for the node." - cdef xmlNs* c_ns - c_ns = self._findOrBuildNodeNs(c_node, href) - tree.xmlSetNs(c_node, c_ns) - - cdef void _setNodeNamespaces(self, xmlNode* c_node, - object node_ns_utf, object nsmap): - """Lookup current namespace prefixes, then set namespace structure for - node and register new ns-prefix mappings. - """ - cdef xmlNs* c_ns - cdef xmlDoc* c_doc - cdef char* c_prefix - cdef char* c_href - if not nsmap: - if node_ns_utf is not None: - self._setNodeNs(c_node, node_ns_utf) - return - - c_doc = self._c_doc - for prefix, href in nsmap.items(): - href_utf = _utf8(href) - c_href = _cstr(href_utf) - if prefix is not None: - prefix_utf = _utf8(prefix) - c_prefix = _cstr(prefix_utf) - else: - c_prefix = NULL - # add namespace with prefix if ns is not already known - c_ns = tree.xmlSearchNsByHref(c_doc, c_node, c_href) - if c_ns is NULL: - c_ns = tree.xmlNewNs(c_node, c_href, c_prefix) - if href_utf == node_ns_utf: - tree.xmlSetNs(c_node, c_ns) - node_ns_utf = None - - if node_ns_utf is not None: - self._setNodeNs(c_node, node_ns_utf) - -cdef _Document _parseDocument(source, parser): - cdef xmlDoc* c_doc - filename = _getFilenameForFile(source) - # Support for unamed file-like object (StringIO, urlgrabber.urlopen, ...) - if not filename and hasattr(source, 'read'): - return _parseMemoryDocument(source.read(), parser) - - # Otherwise parse the file directly from the filesystem - if filename is None: - filename = source - # open filename - c_doc = _parseDocFromFile(_utf8(filename), parser) - return _documentFactory(c_doc, parser) - -cdef _Document _parseMemoryDocument(text, parser): - cdef xmlDoc* c_doc - if python.PyUnicode_Check(text): - text = _stripDeclaration(_utf8(text)) - c_doc = _parseDoc(text, parser) - return _documentFactory(c_doc, parser) - -cdef _Document _documentFactory(xmlDoc* c_doc, parser): - cdef _Document result - result = _Document() - result._c_doc = c_doc - result._ns_counter = 0 - if parser is None: - parser = __DEFAULT_PARSER - result._parser = parser.copy() - return result +# Private helper functions -# to help with debugging cdef void displayNode(xmlNode* c_node, indent): + # to help with debugging cdef xmlNode* c_child print indent * ' ', c_node c_child = c_node.children while c_child is not NULL: displayNode(c_child, indent + 1) c_child = c_child.next - -cdef class _NodeBase: - """Base class to reference a document object and a libxml node. - - By pointing to a Document instance, a reference is kept to - _Document as long as there is some pointer to a node in it. - """ - cdef _Document _doc - cdef xmlNode* _c_node - cdef int _proxy_type - - def __dealloc__(self): - #print "trying to free node:", self._c_node - #displayNode(self._c_node, 0) - if self._c_node is not NULL: - unregisterProxy(self) - attemptDeallocation(self._c_node) - - def _init(self): - """Called after object initialisation. Subclasses may override - this if they recursively call _init() in the superclasses. - """ - -cdef class _ElementTree: - cdef _Document _doc - cdef _NodeBase _context_node - - def parse(self, source, parser=None): - """Updates self with the content of source and returns its root - """ - self._doc = _parseDocument(source, parser) - self._context_node = self._doc.getroot() - return self._context_node - - def getroot(self): - return self._context_node - - def write(self, file, encoding='us-ascii'): - if not hasattr(file, 'write'): - # file is a filename, we want a file object - file = open(file, 'wb') - - m = tostring(self._context_node, encoding) - # XXX this is purely for ElementTree compatibility.. - if encoding == 'UTF-8' or encoding == 'us-ascii': - m = _stripDeclaration(m) - if m[-1:] == '\n': - m = m[:-1] - file.write(m) - - def getiterator(self, tag=None): - root = self.getroot() - if root is None: - return () - return root.getiterator(tag) - - def find(self, path): - root = self.getroot() - assert root is not None - if path[:1] == "/": - path = "." + path - return root.find(path) - - def findtext(self, path, default=None): - root = self.getroot() - assert root is not None - if path[:1] == "/": - path = "." + path - return root.findtext(path, default) - - def findall(self, path): - root = self.getroot() - assert root is not None - if path[:1] == "/": - path = "." + path - return root.findall(path) - - # extensions to ElementTree API - def xpath(self, _path, namespaces=None, **_variables): - """XPath evaluate in context of document. - - namespaces is an optional dictionary with prefix to namespace URI - mappings, used by XPath. - - Returns a list (nodeset), or bool, float or string. - - In case of a list result, return Element for element nodes, - string for text and attribute values. - - Note: if you are going to apply multiple XPath expressions - against the same document, it is more efficient to use - XPathEvaluator directly. - """ - evaluator = XPathElementEvaluator(self._context_node, namespaces) - return evaluator.evaluate(_path, **_variables) - - def xslt(self, _xslt, extensions=None, **_kw): - """Transform this document using other document. - - xslt is a tree that should be XSLT - keyword parameters are XSLT transformation parameters. - - Returns the transformed tree. - - Note: if you are going to apply the same XSLT stylesheet against - multiple documents, it is more efficient to use the XSLT - class directly. - """ - style = XSLT(_xslt, extensions) - return style(self, **_kw) - - def relaxng(self, relaxng): - """Validate this document using other document. - - relaxng is a tree that should contain Relax NG XML - - Returns True or False, depending on whether validation - succeeded. - - Note: if you are going to apply the same Relax NG schema against - multiple documents, it is more efficient to use the RelaxNG - class directly. - """ - schema = RelaxNG(relaxng) - return schema.validate(self) - - def xmlschema(self, xmlschema): - """Validate this document using other doucment. - - xmlschema is a tree that should contain XML Schema XML. - - Returns True or False, depending on whether validation - succeeded. - - Note: If you are going to applyt he same XML Schema against - multiple documents, it is more efficient to use the XMLSchema - class directly. - """ - schema = XMLSchema(xmlschema) - return schema.validate(self) - - def xinclude(self): - """Process this document, including using XInclude. - """ - cdef int result - # XXX what happens memory-wise with the original XInclude nodes? - # they seem to be still accessible if a reference to them has - # been made previously, but I have no idea whether they get freed - # at all. The XInclude nodes appear to be still being in the same - # parent and same document, but they must not be connected to the - # tree.. - result = xinclude.xmlXIncludeProcessTree(self._context_node._c_node) - if result == -1: - raise XIncludeError, "XInclude processing failed" - - def write_c14n(self, file): - """C14N write of document. Always writes UTF-8. - """ - cdef xmlDoc* c_base_doc - cdef xmlDoc* c_doc - cdef char* data - cdef int bytes - c_base_doc = self._doc._c_doc - - c_doc = _fakeRootDoc(c_base_doc, self._context_node._c_node) - bytes = c14n.xmlC14NDocDumpMemory(c_doc, NULL, 0, NULL, 1, &data) - _destroyFakeDoc(c_base_doc, c_doc) - - if bytes < 0: - raise C14NError, "C14N failed" - if not hasattr(file, 'write'): - file = open(file, 'wb') - file.write(data) - tree.xmlFree(data) - -cdef _ElementTree _elementTreeFactory(_Document doc, - _NodeBase context_node): - return _newElementTree(doc, context_node, _ElementTree) - -cdef _ElementTree _newElementTree(_Document doc, _NodeBase context_node, - object baseclass): - cdef _ElementTree result - result = baseclass() - result._doc = doc - if context_node is None and doc is not None: - context_node = doc.getroot() - result._context_node = context_node - return result - -cdef class _Element(_NodeBase): - cdef object _tag - - # MANIPULATORS - - def __setitem__(self, index, _NodeBase element): - cdef xmlNode* c_node - cdef xmlNode* c_next - cdef int foreign - c_node = _findChild(self._c_node, index) - if c_node is NULL: - raise IndexError - foreign = self._doc is not element._doc - c_next = element._c_node.next - _removeText(c_node.next) - tree.xmlReplaceNode(c_node, element._c_node) - _moveTail(c_next, element._c_node) - changeDocumentBelow(element, self._doc, foreign) - - def __delitem__(self, index): - cdef xmlNode* c_node - c_node = _findChild(self._c_node, index) - if c_node is NULL: - raise IndexError - _removeText(c_node.next) - _removeNode(c_node) - - def __delslice__(self, start, stop): - cdef xmlNode* c_node - c_node = _findChild(self._c_node, start) - _deleteSlice(c_node, start, stop) - - def __setslice__(self, start, stop, value): - cdef xmlNode* c_node - cdef xmlNode* c_next - cdef _Element mynode - cdef int foreign - # first, find start of slice - c_node = _findChild(self._c_node, start) - # now delete the slice - if start != stop: - c_node = _deleteSlice(c_node, start, stop) - # if the insertion point is at the end, append there - if c_node is NULL: - append = self.append - for node in value: - append(node) - return - # if the next element is in the list, insert before it - for mynode in value: - if mynode is None: - raise TypeError, "Node must not be None." - foreign = self._doc is not mynode._doc - # store possible text tail - c_next = mynode._c_node.next - # now move node previous to insertion point - tree.xmlUnlinkNode(mynode._c_node) - tree.xmlAddPrevSibling(c_node, mynode._c_node) - # and move tail just behind his node - _moveTail(c_next, mynode._c_node) - # move it into a new document - changeDocumentBelow(mynode, self._doc, foreign) - - def __deepcopy__(self, memo): - return self.__copy__() - - def __copy__(self): - cdef xmlNode* c_node - cdef xmlDoc* c_doc - cdef xmlDoc* fake_c_doc - cdef _Document doc - doc = self._doc - fake_c_doc = _fakeRootDoc(doc._c_doc, self._c_node) - c_doc = tree.xmlCopyDoc(fake_c_doc, 1) # recursive copy - _destroyFakeDoc(doc._c_doc, fake_c_doc) - doc = _documentFactory(c_doc, doc._parser) - return doc.getroot() - - def set(self, key, value): - self.attrib[key] = value - - def append(self, _Element element not None): - cdef xmlNode* c_next - cdef xmlNode* c_node - cdef int foreign - foreign = self._doc is not element._doc - c_node = element._c_node - # store possible text node - c_next = c_node.next - # XXX what if element is coming from a different document? - tree.xmlUnlinkNode(c_node) - # move node itself - tree.xmlAddChild(self._c_node, c_node) - _moveTail(c_next, c_node) - # uh oh, elements may be pointing to different doc when - # parent element has moved; change them too.. - changeDocumentBelow(element, self._doc, foreign) - - def clear(self): - cdef xmlAttr* c_attr - cdef xmlAttr* c_attr_next - cdef xmlNode* c_node - cdef xmlNode* c_node_next - c_node = self._c_node - # remove self.text and self.tail - _removeText(c_node.children) - _removeText(c_node.next) - # remove all attributes - c_attr = c_node.properties - while c_attr is not NULL: - c_attr_next = c_attr.next - tree.xmlRemoveProp(c_attr) - c_attr = c_attr_next - # remove all subelements - c_node = c_node.children - while c_node is not NULL: - c_node_next = c_node.next - if _isElement(c_node): - _removeText(c_node_next) - c_node_next = c_node.next - _removeNode(c_node) - c_node = c_node_next - - def insert(self, index, _Element element not None): - cdef xmlNode* c_node - cdef xmlNode* c_next - cdef int foreign - c_node = _findChild(self._c_node, index) - if c_node is NULL: - self.append(element) - return - foreign = self._doc is not element._doc - c_next = element._c_node.next - tree.xmlAddPrevSibling(c_node, element._c_node) - _moveTail(c_next, element._c_node) - changeDocumentBelow(element, self._doc, foreign) - - def remove(self, _Element element not None): - cdef xmlNode* c_node - c_node = element._c_node - if c_node.parent is not self._c_node: - raise ValueError, "Element is not a child of this node." - _removeText(c_node.next) - tree.xmlUnlinkNode(c_node) - - # PROPERTIES - property tag: - def __get__(self): - if self._tag is not None: - return self._tag - self._tag = _namespacedName(self._c_node) - return self._tag - - def __set__(self, value): - cdef xmlNs* c_ns - ns, text = _getNsTag(value) - self._tag = value - tree.xmlNodeSetName(self._c_node, _cstr(text)) - if ns is None: - return - self._doc._setNodeNs(self._c_node, _cstr(ns)) - - # not in ElementTree, read-only - property prefix: - def __get__(self): - if self._c_node.ns is not NULL: - if self._c_node.ns.prefix is not NULL: - return funicode(self._c_node.ns.prefix) - return None - - property attrib: - def __get__(self): - return _attribFactory(self._doc, self._c_node) - - property text: - def __get__(self): - return _collectText(self._c_node.children) - - def __set__(self, value): - cdef xmlNode* c_text_node - # remove all text nodes at the start first - _removeText(self._c_node.children) - if value is None: - return - # now add new text node with value at start - text = _utf8(value) - c_text_node = tree.xmlNewDocText(self._doc._c_doc, - _cstr(text)) - if self._c_node.children is NULL: - tree.xmlAddChild(self._c_node, c_text_node) - else: - tree.xmlAddPrevSibling(self._c_node.children, - c_text_node) - - property tail: - def __get__(self): - return _collectText(self._c_node.next) - - def __set__(self, value): - cdef xmlNode* c_text_node - # remove all text nodes at the start first - _removeText(self._c_node.next) - if value is None: - return - text = _utf8(value) - c_text_node = tree.xmlNewDocText(self._doc._c_doc, _cstr(text)) - # XXX what if we're the top element? - tree.xmlAddNextSibling(self._c_node, c_text_node) - - # ACCESSORS - def __repr__(self): - return "" % (self.tag, id(self)) - - def __getitem__(self, index): - cdef xmlNode* c_node - c_node = _findChild(self._c_node, index) - if c_node is NULL: - raise IndexError, "list index out of range" - return _elementFactory(self._doc, c_node) - - def __getslice__(self, start, stop): - cdef xmlNode* c_node - cdef _Document doc - cdef int c, c_stop - # this does not work for negative start, stop, however, - # python seems to convert these to positive start, stop before - # calling, so this all works perfectly (at the cost of a len() call) - c_node = _findChild(self._c_node, start) - if c_node is NULL: - return [] - c = start - c_stop = stop - result = [] - doc = self._doc - while c_node is not NULL and c < c_stop: - if _isElement(c_node): - ret = python.PyList_Append(result, _elementFactory(doc, c_node)) - if ret: - raise - c = c + 1 - c_node = c_node.next - return result - - def __len__(self): - cdef int c - cdef xmlNode* c_node - c = 0 - c_node = self._c_node.children - while c_node is not NULL: - if _isElement(c_node): - c = c + 1 - c_node = c_node.next - return c - - def __nonzero__(self): - cdef xmlNode* c_node - c_node = _findChildBackwards(self._c_node, 0) - return c_node != NULL - - def __iter__(self): - return ElementChildIterator(self) - - def __reversed__(self): - return ElementChildIterator(self, reversed=True) - - def index(self, _Element x not None, start=None, stop=None): - cdef int k - cdef int l - cdef int c_stop - cdef int c_start - cdef xmlNode* c_child - cdef xmlNode* c_start_node - c_child = x._c_node - if c_child.parent is not self._c_node: - raise ValueError, "Element is not a child of this node." - - if start is None: - c_start = 0 - else: - c_start = start - if stop is None: - c_stop = 0 - else: - c_stop = stop - if c_stop == 0 or \ - c_start >= c_stop and (c_stop > 0 or c_start < 0): - raise ValueError, "list.index(x): x not in slice" - - # for negative slice indices, check slice before searching index - if c_start < 0 or c_stop < 0: - # start from right, at most up to leftmost(c_start, c_stop) - if c_start < c_stop: - k = -c_start - else: - k = -c_stop - c_start_node = self._c_node.last - l = 1 - while c_start_node != c_child and l < k: - if _isElement(c_start_node): - l = l + 1 - c_start_node = c_start_node.prev - if c_start_node == c_child: - # found! before slice end? - if c_stop < 0 and l <= -c_stop: - raise ValueError, "list.index(x): x not in slice" - elif c_start < 0: - raise ValueError, "list.index(x): x not in slice" - - # now determine the index backwards from child - c_child = c_child.prev - k = 0 - if c_stop > 0: - # we can optimize: stop after c_stop elements if not found - while c_child != NULL and k < c_stop: - if _isElement(c_child): - k = k + 1 - c_child = c_child.prev - if k < c_stop: - return k - else: - # traverse all - while c_child != NULL: - if _isElement(c_child): - k = k + 1 - c_child = c_child.prev - if c_start > 0: - if k >= c_start: - return k - else: - return k - if c_start or c_stop: - raise ValueError, "list.index(x): x not in slice" - else: - raise ValueError, "list.index(x): x not in list" - - def get(self, key, default=None): - # XXX more redundancy, but might be slightly faster than - # return self.attrib.get(key, default) - cdef char* cresult - cdef char* c_tag - ns, tag = _getNsTag(key) - c_tag = _cstr(tag) - if ns is None: - cresult = tree.xmlGetNoNsProp(self._c_node, c_tag) - else: - cresult = tree.xmlGetNsProp(self._c_node, c_tag, _cstr(ns)) - if cresult is NULL: - result = default - else: - result = funicode(cresult) - tree.xmlFree(cresult) - return result - - def keys(self): - return self.attrib.keys() - - def items(self): - return self.attrib.items() - - def getchildren(self): - cdef xmlNode* c_node - cdef _Document doc - cdef int ret - result = [] - doc = self._doc - c_node = self._c_node.children - while c_node is not NULL: - if _isElement(c_node): - ret = python.PyList_Append(result, _elementFactory(doc, c_node)) - if ret: - raise - c_node = c_node.next - return result - - def getparent(self): - cdef xmlNode* c_node - c_node = self._c_node.parent - if c_node is not NULL and _isElement(c_node): - return _elementFactory(self._doc, c_node) - return None - - def getiterator(self, tag=None): - iterator = ElementDepthFirstIterator(self) - if tag is None or tag == '*': - return iterator - else: - return ElementTagFilter(iterator, tag) - - def makeelement(self, _tag, attrib=None, nsmap=None, **_extra): - "Creates a new element associated with the same document." - # a little code duplication, but less overhead through doc reuse - cdef xmlNode* c_node - cdef xmlDoc* c_doc - cdef _Document doc - ns_utf, name_utf = _getNsTag(_tag) - doc = self._doc - c_doc = doc._c_doc - c_node = _createElement(c_doc, name_utf, attrib, _extra) - # add namespaces to node if necessary - doc._setNodeNamespaces(c_node, ns_utf, nsmap) - return _elementFactory(doc, c_node) - - def find(self, path): - return _elementpath.find(self, path) - - def findtext(self, path, default=None): - return _elementpath.findtext(self, path, default) - - def findall(self, path): - return _elementpath.findall(self, path) - - def xpath(self, _path, namespaces=None, **_variables): - evaluator = XPathElementEvaluator(self, namespaces) - return evaluator.evaluate(_path, **_variables) - -cdef _Element _elementFactory(_Document doc, xmlNode* c_node): - cdef _Element result - cdef char* c_ns_href - result = getProxy(c_node, PROXY_ELEMENT) - if result is not None: - return result - if c_node is NULL: - return None - if c_node.type == tree.XML_ELEMENT_NODE: - if c_node.ns == NULL: - c_ns_href = NULL - else: - c_ns_href = c_node.ns.href - element_class = _find_element_class(c_ns_href, c_node.name) - elif c_node.type == tree.XML_COMMENT_NODE: - element_class = _Comment - else: - assert 0, "Unknown node type: %s" % c_node.type - result = element_class() - result._tag = None - result._doc = doc - result._c_node = c_node - result._proxy_type = PROXY_ELEMENT - registerProxy(result, PROXY_ELEMENT) - result._init() - return result - -cdef class _Comment(_Element): - def set(self, key, value): - pass - - def append(self, _Element element): - pass - - property tag: - def __get__(self): - return None - - property attrib: - def __get__(self): - return {} - - property text: - def __get__(self): - return funicode(self._c_node.content) - - def __set__(self, value): - pass - - # ACCESSORS - def __repr__(self): - return "" % self.text - - def __getitem__(self, n): - raise IndexError - - def __len__(self): - return 0 - - def get(self, key, default=None): - return None - - def keys(self): - return [] - - def items(self): - return [] - -cdef _Comment _commentFactory(_Document doc, xmlNode* c_node): - cdef _Comment result - result = getProxy(c_node, PROXY_ELEMENT) - if result is not None: - return result - if c_node is NULL: - return None - result = _Comment() - result._doc = doc - result._c_node = c_node - result._proxy_type = PROXY_ELEMENT - registerProxy(result, PROXY_ELEMENT) - return result - -cdef class _Attrib(_NodeBase): - # MANIPULATORS - def __setitem__(self, key, value): - cdef xmlNs* c_ns - cdef char* c_value - cdef char* c_tag - ns, tag = _getNsTag(key) - c_tag = _cstr(tag) - value = _utf8(value) - c_value = _cstr(value) - if ns is None: - tree.xmlSetProp(self._c_node, c_tag, c_value) - else: - c_ns = self._doc._findOrBuildNodeNs(self._c_node, _cstr(ns)) - tree.xmlSetNsProp(self._c_node, c_ns, c_tag, c_value) - - def __delitem__(self, key): - cdef xmlNs* c_ns - cdef xmlAttr* c_attr - cdef char* c_tag - ns, tag = _getNsTag(key) - c_tag = _cstr(tag) - if ns is None: - c_attr = tree.xmlHasProp(self._c_node, c_tag) - else: - c_attr = tree.xmlHasNsProp(self._c_node, c_tag, _cstr(ns)) - if c_attr is NULL: - # XXX free namespace that is not in use..? - raise KeyError, key - tree.xmlRemoveProp(c_attr) - - # ACCESSORS - def __repr__(self): - result = {} - for key, value in self.items(): - result[key] = value - return repr(result) - - def __getitem__(self, key): - cdef xmlNs* c_ns - cdef char* cresult - cdef char* c_tag - ns, tag = _getNsTag(key) - c_tag = _cstr(tag) - if ns is None: - cresult = tree.xmlGetNoNsProp(self._c_node, c_tag) - else: - cresult = tree.xmlGetNsProp(self._c_node, c_tag, _cstr(ns)) - if cresult is NULL: - # XXX free namespace that is not in use..? - raise KeyError, key - result = funicode(cresult) - tree.xmlFree(cresult) - return result - - def __len__(self): - cdef int c - cdef xmlNode* c_node - c = 0 - c_node = (self._c_node.properties) - while c_node is not NULL: - if c_node.type == tree.XML_ATTRIBUTE_NODE: - c = c + 1 - c_node = c_node.next - return c - - def get(self, key, default=None): - try: - return self.__getitem__(key) - except KeyError: - return default - - def keys(self): - result = [] - cdef xmlNode* c_node - c_node = (self._c_node.properties) - while c_node is not NULL: - if c_node.type == tree.XML_ATTRIBUTE_NODE: - python.PyList_Append(result, _namespacedName(c_node)) - c_node = c_node.next - return result - - def __iter__(self): - return iter(self.keys()) - - def iterkeys(self): - return iter(self.keys()) - - def values(self): - cdef xmlNode* c_node - result = [] - c_node = (self._c_node.properties) - while c_node is not NULL: - if c_node.type == tree.XML_ATTRIBUTE_NODE: - python.PyList_Append( - result, _attributeValue(self._c_node, c_node)) - c_node = c_node.next - return result - - def itervalues(self): - return iter(self.values()) - - def items(self): - result = [] - cdef xmlNode* c_node - c_node = (self._c_node.properties) - while c_node is not NULL: - if c_node.type == tree.XML_ATTRIBUTE_NODE: - python.PyList_Append(result, ( - _namespacedName(c_node), - _attributeValue(self._c_node, c_node) - )) - c_node = c_node.next - return result - - def iteritems(self): - return iter(self.items()) - - def has_key(self, key): - cdef xmlNs* c_ns - cdef char* result - cdef char* c_tag - ns, tag = _getNsTag(key) - c_tag = _cstr(tag) - if ns is None: - result = tree.xmlGetNoNsProp(self._c_node, c_tag) - else: - result = tree.xmlGetNsProp(self._c_node, c_tag, _cstr(ns)) - if result is not NULL: - tree.xmlFree(result) - return True - else: - return False - - def __contains__(self, key): - cdef xmlNs* c_ns - cdef char* result - cdef char* c_tag - ns, tag = _getNsTag(key) - c_tag = _cstr(tag) - if ns is None: - result = tree.xmlGetNoNsProp(self._c_node, c_tag) - else: - result = tree.xmlGetNsProp(self._c_node, c_tag, _cstr(ns)) - if result is not NULL: - tree.xmlFree(result) - return True - else: - return False - -cdef _Attrib _attribFactory(_Document doc, xmlNode* c_node): - cdef _Attrib result - result = getProxy(c_node, PROXY_ATTRIB) - if result is not None: - return result - result = _Attrib() - result._doc = doc - result._c_node = c_node - result._proxy_type = PROXY_ATTRIB - registerProxy(result, PROXY_ATTRIB) - return result - -ctypedef xmlNode* (*_node_to_node_function)(xmlNode*) - -cdef class ElementChildIterator: - # we keep Python references here to control GC - cdef _NodeBase _node - cdef _node_to_node_function _next_element - def __init__(self, _NodeBase node, reversed=False): # Python ref! - cdef xmlNode* c_node - if reversed: - c_node = _findChildBackwards(node._c_node, 0) - self._next_element = _previousElement - else: - c_node = _findChildForwards(node._c_node, 0) - self._next_element = _nextElement - if c_node is NULL: - self._node = None - else: - self._node = _elementFactory(node._doc, c_node) - def __iter__(self): - return self - def __next__(self): - cdef xmlNode* c_node - cdef _NodeBase current_node - # Python ref: - current_node = self._node - if current_node is None: - raise StopIteration - c_node = self._next_element(current_node._c_node) - if c_node is NULL: - self._node = None - else: - # Python ref: - self._node = _elementFactory(current_node._doc, c_node) - return current_node - -cdef class ElementDepthFirstIterator: - """Iterates over an element and its sub-elements in document order (depth - first pre-order).""" - # we keep Python references here to control GC - # keep next node to return and a stack of position state in the tree - cdef object _stack - cdef _NodeBase _next_node - def __init__(self, _NodeBase node not None): - cdef xmlNode* c_node - self._next_node = node - self._stack = [] - self._findAndPushNextNode(node) - def __iter__(self): - return self - def __next__(self): - cdef xmlNode* c_node - cdef _NodeBase next_node - current_node = self._next_node - if current_node is None: - raise StopIteration - stack = self._stack - if python.PyList_GET_SIZE(stack) == 0: - self._next_node = None - return current_node - next_node = stack[-1] - self._next_node = next_node - self._findAndPushNextNode(next_node) - return current_node - - cdef void _findAndPushNextNode(self, _NodeBase node): - cdef xmlNode* c_node - stack = self._stack - # try next child level until we hit a leaf - c_node = _findChildForwards(node._c_node, 0) - if c_node is NULL: - pop = stack.pop - while c_node is NULL and python.PyList_GET_SIZE(stack): - # walk up the stack until we find a sibling - node = pop() - c_node = _nextElement(node._c_node) - if c_node is not NULL: - python.PyList_Append( - stack, _elementFactory(node._doc, c_node)) - -cdef class ElementTagFilter: - cdef object _iterator - cdef object _pystrings - cdef char* _href - cdef char* _name - def __init__(self, element_iterator, tag): - self._iterator = iter(element_iterator) - ns_href, name = _getNsTag(tag) - self._pystrings = (ns_href, name) # keep Python references - self._name = _cstr(name) - if ns_href is None: - self._href = NULL - else: - self._href = _cstr(ns_href) - def __iter__(self): - return self - def __next__(self): - cdef _NodeBase node - while 1: - node = self._iterator.next() - if self._tagMatches(node._c_node): - return node - - cdef int _tagMatches(self, xmlNode* c_node): - if tree.strcmp(c_node.name, self._name) == 0: - if c_node.ns == NULL or c_node.ns.href == NULL: - return self._href == NULL - else: - return tree.strcmp(c_node.ns.href, self._href) == 0 - return 0 - -cdef xmlNode* _createElement(xmlDoc* c_doc, object name_utf, - object attrib, object extra) except NULL: - cdef xmlNode* c_node - if extra: - if attrib is None: - attrib = extra - else: - attrib.update(extra) - c_node = tree.xmlNewDocNode(c_doc, NULL, _cstr(name_utf), NULL) - if attrib: - for name, value in attrib.items(): - attr_name_utf = _utf8(name) - value_utf = _utf8(value) - tree.xmlNewProp(c_node, _cstr(attr_name_utf), _cstr(value_utf)) - return c_node - -cdef xmlNode* _createComment(xmlDoc* c_doc, char* text): - cdef xmlNode* c_node - c_node = tree.xmlNewDocComment(c_doc, text) - return c_node - - -# module-level API for ElementTree - -def Element(_tag, attrib=None, nsmap=None, **_extra): - cdef xmlNode* c_node - cdef xmlDoc* c_doc - cdef _Document doc - ns_utf, name_utf = _getNsTag(_tag) - c_doc = _newDoc() - c_node = _createElement(c_doc, name_utf, attrib, _extra) - tree.xmlDocSetRootElement(c_doc, c_node) - doc = _documentFactory(c_doc, None) - # add namespaces to node if necessary - doc._setNodeNamespaces(c_node, ns_utf, nsmap) - return _elementFactory(doc, c_node) - -def Comment(text=None): - cdef _Document doc - cdef xmlNode* c_node - cdef xmlDoc* c_doc - if text is None: - text = ' ' - else: - text = ' %s ' % _utf8(text) - c_doc = _newDoc() - doc = _documentFactory(c_doc, None) - c_node = _createComment(c_doc, text) - tree.xmlAddChild(c_doc, c_node) - return _commentFactory(doc, c_node) - -def SubElement(_Element _parent not None, _tag, - attrib=None, nsmap=None, **_extra): - cdef xmlNode* c_node - cdef _Document doc - ns_utf, name_utf = _getNsTag(_tag) - doc = _parent._doc - c_node = _createElement(doc._c_doc, name_utf, attrib, _extra) - tree.xmlAddChild(_parent._c_node, c_node) - # add namespaces to node if necessary - doc._setNodeNamespaces(c_node, ns_utf, nsmap) - return _elementFactory(doc, c_node) - -def ElementTree(_Element element=None, file=None, parser=None): - cdef xmlNode* c_next - cdef xmlNode* c_node - cdef xmlNode* c_node_copy - cdef xmlDoc* c_doc - cdef _ElementTree etree - cdef _Document doc - - if element is not None: - doc = element._doc - elif file is not None: - doc = _parseDocument(file, parser) - else: - c_doc = _newDoc() - doc = _documentFactory(c_doc, parser) - - etree = _elementTreeFactory(doc, element) - -## # XXX what if element and file are both not None? -## if element is not None: -## c_next = element._c_node.next -## tree.xmlDocSetRootElement(etree._c_doc, element._c_node) -## _moveTail(c_next, element._c_node) -## changeDocumentBelow(element, etree) - - return etree - -def HTML(text): - cdef _Document doc - doc = _parseMemoryDocument(text, __DEFAULT_HTML_PARSER) - return doc.getroot() - -def XML(text): - cdef _Document doc - doc = _parseMemoryDocument(text, __DEFAULT_XML_PARSER) - return doc.getroot() - -fromstring = XML - -cdef class QName: - cdef readonly object text - def __init__(self, text_or_uri, tag=None): - if tag is not None: - text_or_uri = "{%s}%s" % (text_or_uri, tag) - elif not python.PyString_Check(text_or_uri) and \ - not python.PyUnicode_Check(text_or_uri): - text_or_uri = str(text_or_uri) - self.text = text_or_uri - def __str__(self): - return self.text - def __hash__(self): - return self.text.__hash__() - -def iselement(element): - return isinstance(element, _Element) - -def dump(_NodeBase elem): - assert elem is not None, "Must supply element." - # better, but not ET compatible : "_NodeBase elem not None" - _dumpToFile(sys.stdout, elem._doc._c_doc, elem._c_node) - -def tostring(_NodeBase element, encoding='us-ascii'): - cdef _Document doc - cdef tree.xmlOutputBuffer* c_buffer - cdef tree.xmlCharEncodingHandler* enchandler - cdef char* enc - - assert element is not None - # better, but not ET compatible : "_NodeBase element not None" - - #if encoding is None: - # encoding = 'UTF-8' - if encoding in ('utf8', 'UTF8', 'utf-8'): - encoding = 'UTF-8' - doc = element._doc - enc = _cstr(encoding) - # it is necessary to *and* find the encoding handler *and* use - # encoding during output - enchandler = tree.xmlFindCharEncodingHandler(enc) - c_buffer = tree.xmlAllocOutputBuffer(enchandler) - tree.xmlNodeDumpOutput(c_buffer, doc._c_doc, element._c_node, 0, 0, - enc) - _dumpNextNode(c_buffer, doc._c_doc, element._c_node, enc) - tree.xmlOutputBufferFlush(c_buffer) - if c_buffer.conv is not NULL: - result = tree.xmlBufferContent(c_buffer.conv) - else: - result = tree.xmlBufferContent(c_buffer.buffer) - tree.xmlOutputBufferClose(c_buffer) - return result - -def parse(source, parser=None): - """Return an ElementTree object loaded with source elements. If no parser - is provided as second argument, the default parser is used. - """ - cdef _Document doc - doc = _parseDocument(source, parser) - return ElementTree(doc.getroot()) - - -# include submodules -include "proxy.pxi" # Proxy handling (element backpointers/memory/etc.) -include "xmlerror.pxi" # error and log handling -include "nsclasses.pxi" # Namespace implementation and registry -include "docloader.pxi" # Support for custom document loaders -include "parser.pxi" # XML Parser -include "xmlid.pxi" # XMLID and IDDict -include "extensions.pxi" # XPath/XSLT extension functions -include "xpath.pxi" # XPath evaluation -include "xslt.pxi" # XSL transformations - - -################################################################################ -# Validation - -cdef class _Validator: - "Base class for XML validators." - cdef _ErrorLog _error_log - def __init__(self): - self._error_log = _ErrorLog() - - def validate(self, etree): - """Validate the document using this schema. - - Returns true if document is valid, false if not.""" - return self(etree) - - def assertValid(self, etree): - "Raises DocumentInvalid if the document does not comply with the schema." - if not self(etree): - raise DocumentInvalid, "Document does not comply with schema" - - def assert_(self, etree): - "Raises AssertionError if the document does not comply with the schema." - if not self(etree): - raise AssertionError, "Document does not comply with schema" - - property error_log: - def __get__(self): - return self._error_log.copy() - -include "relaxng.pxi" # RelaxNG -include "xmlschema.pxi" # XMLSchema - - -################################################################################ -# Private helper functions cdef _Document _documentOrRaise(object input): cdef _Document doc Modified: lxml/trunk/src/lxml/etree.pyx ============================================================================== --- lxml/trunk/src/lxml/etree.pyx (original) +++ lxml/trunk/src/lxml/etree.pyx Fri Apr 28 21:56:36 2006 @@ -2,9 +2,6 @@ from tree cimport xmlDoc, xmlNode, xmlAttr, xmlNs, _isElement from python cimport isinstance, issubclass, hasattr, callable from python cimport iter, str, _cstr -cimport xpath -cimport xslt -cimport xmlerror cimport xinclude cimport c14n cimport cstd @@ -191,27 +188,6 @@ if node_ns_utf is not None: self._setNodeNs(c_node, node_ns_utf) -cdef _Document _parseDocument(source, parser): - cdef xmlDoc* c_doc - filename = _getFilenameForFile(source) - # Support for unamed file-like object (StringIO, urlgrabber.urlopen, ...) - if not filename and hasattr(source, 'read'): - return _parseMemoryDocument(source.read(), parser) - - # Otherwise parse the file directly from the filesystem - if filename is None: - filename = source - # open filename - c_doc = _parseDocFromFile(_utf8(filename), parser) - return _documentFactory(c_doc, parser) - -cdef _Document _parseMemoryDocument(text, parser): - cdef xmlDoc* c_doc - if python.PyUnicode_Check(text): - text = _stripDeclaration(_utf8(text)) - c_doc = _parseDoc(text, parser) - return _documentFactory(c_doc, parser) - cdef _Document _documentFactory(xmlDoc* c_doc, parser): cdef _Document result result = _Document() @@ -221,15 +197,6 @@ parser = __DEFAULT_PARSER result._parser = parser.copy() return result - -# to help with debugging -cdef void displayNode(xmlNode* c_node, indent): - cdef xmlNode* c_child - print indent * ' ', c_node - c_child = c_node.children - while c_child is not NULL: - displayNode(c_child, indent + 1) - c_child = c_child.next cdef class _NodeBase: """Base class to reference a document object and a libxml node. @@ -1353,7 +1320,8 @@ # include submodules include "proxy.pxi" # Proxy handling (element backpointers/memory/etc.) -include "xmlerror.pxi" # error and log handling +include "apihelpers.pxi" # Private helper functions +include "xmlerror.pxi" # Error and log handling include "nsclasses.pxi" # Namespace implementation and registry include "docloader.pxi" # Support for custom document loaders include "parser.pxi" # XML Parser @@ -1394,391 +1362,3 @@ include "relaxng.pxi" # RelaxNG include "xmlschema.pxi" # XMLSchema - - -################################################################################ -# Private helper functions - -cdef _Document _documentOrRaise(object input): - cdef _Document doc - doc = _documentOf(input) - if doc is None: - raise TypeError, "Invalid input object: %s" % type(input) - else: - return doc - -cdef _Document _documentOf(object input): - # call this to get the document of a - # _Document, _ElementTree or _NodeBase object - if isinstance(input, _ElementTree): - return (<_ElementTree>input)._doc - elif isinstance(input, _NodeBase): - return (<_NodeBase>input)._doc - elif isinstance(input, _Document): - return <_Document>input - else: - return None - -cdef _NodeBase _rootNodeOf(object input): - # call this to get the root node of a - # _Document, _ElementTree or _NodeBase object - if isinstance(input, _ElementTree): - return (<_ElementTree>input)._context_node - elif isinstance(input, _NodeBase): - return <_NodeBase>input - elif isinstance(input, _Document): - return (<_Document>input).getroot() - else: - return None - -cdef xmlDoc* _fakeRootDoc(xmlDoc* c_base_doc, xmlNode* c_node): - # build a temporary document that has the given node as root node - # note that copy and original must not be modified during its lifetime!! - # always call _destroyFakeDoc() after use! - cdef xmlNode* c_child - cdef xmlNode* c_root - cdef xmlDoc* c_doc - c_root = tree.xmlDocGetRootElement(c_base_doc) - if c_root == c_node: - # already the root node - return c_base_doc - - c_doc = tree.xmlCopyDoc(c_base_doc, 0) # non recursive! - c_root = tree.xmlDocCopyNode(c_node, c_doc, 2) # non recursive! - - c_root.children = c_node.children - c_root.last = c_node.last - c_root.next = c_root.prev = c_root.parent = NULL - - # store original node - c_root._private = c_node - - # divert parent pointers of children - c_child = c_root.children - while c_child is not NULL: - c_child.parent = c_root - c_child = c_child.next - - c_doc.children = c_root - return c_doc - -cdef void _destroyFakeDoc(xmlDoc* c_base_doc, xmlDoc* c_doc): - # delete a temporary document - cdef xmlNode* c_child - cdef xmlNode* c_parent - cdef xmlNode* c_root - if c_doc != c_base_doc: - c_root = tree.xmlDocGetRootElement(c_doc) - - # restore parent pointers of children - c_parent = c_root._private - c_child = c_root.children - while c_child is not NULL: - c_child.parent = c_parent - c_child = c_child.next - - # prevent recursive removal of children - c_root.children = c_root.last = c_root._private = NULL - tree.xmlFreeDoc(c_doc) - -cdef object _attributeValue(xmlNode* c_element, xmlNode* c_attrib_node): - cdef char* value - if c_attrib_node.ns is NULL or c_attrib_node.ns.href is NULL: - value = tree.xmlGetNoNsProp(c_element, c_attrib_node.name) - else: - value = tree.xmlGetNsProp(c_element, c_attrib_node.name, - c_attrib_node.ns.href) - return funicode(value) - -cdef _dumpToFile(f, xmlDoc* c_doc, xmlNode* c_node): - cdef python.PyObject* o - cdef tree.xmlOutputBuffer* c_buffer - - if not python.PyFile_Check(f): - raise ValueError, "Not a file" - o = f - c_buffer = tree.xmlOutputBufferCreateFile(python.PyFile_AsFile(o), NULL) - tree.xmlNodeDumpOutput(c_buffer, c_doc, c_node, 0, 0, NULL) - # dump next node if it's a text node - _dumpNextNode(c_buffer, c_doc, c_node, NULL) - tree.xmlOutputBufferWriteString(c_buffer, '\n') - tree.xmlOutputBufferFlush(c_buffer) - -cdef _dumpNextNode(tree.xmlOutputBuffer* c_buffer, xmlDoc* c_doc, - xmlNode* c_node, char* encoding): - cdef xmlNode* c_next - c_next = c_node.next - if c_next is not NULL and c_next.type == tree.XML_TEXT_NODE: - tree.xmlNodeDumpOutput(c_buffer, c_doc, c_next, 0, 0, encoding) - -cdef object _stripDeclaration(object xml_string): - xml_string = xml_string.strip() - if xml_string[:5] == '') - if i != -1: - if xml_string[i+2:i+3] == '\n': - i = i+1 - xml_string = xml_string[i + 2:] - return xml_string - -cdef _collectText(xmlNode* c_node): - """Collect all text nodes and return them as a unicode string. - - Start collecting at c_node. - - If there was no text to collect, return None - """ - cdef int scount - cdef char* text - cdef xmlNode* c_node_cur - # check for multiple text nodes - scount = 0 - text = NULL - c_node_cur = c_node - while c_node_cur is not NULL and c_node_cur.type == tree.XML_TEXT_NODE: - if c_node_cur.content[0] != c'\0': - text = c_node_cur.content - scount = scount + 1 - c_node_cur = c_node_cur.next - - # handle two most common cases first - if text is NULL: - return None - if scount == 1: - return funicode(text) - - # the rest is not performance critical anymore - result = '' - while c_node is not NULL and c_node.type == tree.XML_TEXT_NODE: - result = result + c_node.content - c_node = c_node.next - return funicode(result) - -cdef _removeText(xmlNode* c_node): - """Remove all text nodes. - - Start removing at c_node. - """ - cdef xmlNode* c_next - while c_node is not NULL and c_node.type == tree.XML_TEXT_NODE: - c_next = c_node.next - tree.xmlUnlinkNode(c_node) - # XXX cannot safely free in case of direct text node proxies.. - tree.xmlFreeNode(c_node) - c_node = c_next - -cdef xmlNode* _findChild(xmlNode* c_node, int index): - if index < 0: - return _findChildBackwards(c_node, -index - 1) - else: - return _findChildForwards(c_node, index) - -cdef xmlNode* _findChildForwards(xmlNode* c_node, int index): - """Return child element of c_node with index, or return NULL if not found. - """ - cdef xmlNode* c_child - cdef int c - c_child = c_node.children - c = 0 - while c_child is not NULL: - if _isElement(c_child): - if c == index: - return c_child - c = c + 1 - c_child = c_child.next - else: - return NULL - -cdef xmlNode* _findChildBackwards(xmlNode* c_node, int index): - """Return child element of c_node with index, or return NULL if not found. - Search from the end. - """ - cdef xmlNode* c_child - cdef int c - c_child = c_node.last - c = 0 - while c_child is not NULL: - if _isElement(c_child): - if c == index: - return c_child - c = c + 1 - c_child = c_child.prev - else: - return NULL - -cdef xmlNode* _nextElement(xmlNode* c_node): - """Given a node, find the next sibling that is an element. - """ - c_node = c_node.next - while c_node is not NULL: - if _isElement(c_node): - return c_node - c_node = c_node.next - return NULL - -cdef xmlNode* _previousElement(xmlNode* c_node): - """Given a node, find the next sibling that is an element. - """ - c_node = c_node.prev - while c_node is not NULL: - if _isElement(c_node): - return c_node - c_node = c_node.prev - return NULL - -cdef void _removeNode(xmlNode* c_node): - """Unlink and free a node and subnodes if possible. - """ - tree.xmlUnlinkNode(c_node) - attemptDeallocation(c_node) - -cdef void _moveTail(xmlNode* c_tail, xmlNode* c_target): - cdef xmlNode* c_next - # tail support: look for any text nodes trailing this node and - # move them too - while c_tail is not NULL and c_tail.type == tree.XML_TEXT_NODE: - c_next = c_tail.next - tree.xmlUnlinkNode(c_tail) - tree.xmlAddNextSibling(c_target, c_tail) - c_target = c_tail - c_tail = c_next - -### see etree.h: -## cdef int _isElement(xmlNode* c_node): -## return (c_node.type == tree.XML_ELEMENT_NODE or -## c_node.type == tree.XML_COMMENT_NODE) - -cdef xmlNode* _deleteSlice(xmlNode* c_node, int start, int stop): - """Delete slice, starting with c_node, start counting at start, end at stop. - """ - cdef xmlNode* c_next - cdef int c - if c_node is NULL: - return NULL - # now start deleting nodes - c = start - while c_node is not NULL and c < stop: - c_next = c_node.next - if _isElement(c_node): - _removeText(c_node.next) - c_next = c_node.next - _removeNode(c_node) - c = c + 1 - c_node = c_next - return c_node - -cdef int isutf8(char* s): - cdef char c - c = s[0] - while c != c'\0': - if c & 0x80: - return 1 - s = s + 1 - c = s[0] - return 0 - -cdef object funicode(char* s): - if isutf8(s): - return python.PyUnicode_DecodeUTF8(s, tree.strlen(s), NULL) - return python.PyString_FromString(s) - -cdef object _utf8(object s): - if python.PyString_Check(s): - assert not isutf8(_cstr(s)), "All strings must be Unicode or ASCII" - return s - elif python.PyUnicode_Check(s): - return python.PyUnicode_AsUTF8String(s) - else: - raise TypeError, "Argument must be string or unicode." - -cdef _getNsTag(tag): - """Given a tag, find namespace URI and tag name. - Return None for NS uri if no namespace URI available. - """ - cdef char* c_tag - cdef char* c_pos - cdef int nslen - if isinstance(tag, QName): - tag = (tag).text - tag = _utf8(tag) - c_tag = _cstr(tag) - if c_tag[0] == c'{': - c_pos = tree.xmlStrchr(c_tag+1, c'}') - if c_pos is NULL: - raise ValueError, "Invalid tag name" - nslen = c_pos - c_tag - 1 - ns = python.PyString_FromStringAndSize(c_tag+1, nslen) - tag = python.PyString_FromString(c_pos+1) - else: - ns = None - return ns, tag - -cdef object _namespacedName(xmlNode* c_node): - cdef char* href - cdef char* name - name = c_node.name - if c_node.ns is NULL or c_node.ns.href is NULL: - return funicode(name) - else: - href = c_node.ns.href - s = python.PyString_FromFormat("{%s}%s", href, name) - if isutf8(href) or isutf8(name): - return python.PyUnicode_FromEncodedObject(s, 'UTF-8', NULL) - else: - return s - -cdef _getFilenameForFile(source): - """Given a Python File or Gzip object, give filename back. - - Returns None if not a file object. - """ - # file instances have a name attribute - if hasattr(source, 'name'): - return source.name - # gzip file instances have a filename attribute - if hasattr(source, 'filename'): - return source.filename - return None - -cdef void changeDocumentBelow(_NodeBase node, _Document doc, int recursive): - """For a node and all nodes below, change document. - - A node can change document in certain operations as an XML - subtree can move. This updates all possible proxies in the - tree below (including the current node). It also reconciliates - namespaces so they're correct inside the new environment. - """ - if recursive: - changeDocumentBelowHelper(node._c_node, doc) - tree.xmlReconciliateNs(doc._c_doc, node._c_node) - -cdef void changeDocumentBelowHelper(xmlNode* c_node, _Document doc): - cdef ProxyRef* ref - cdef xmlNode* c_current - cdef xmlAttr* c_attr_current - cdef _NodeBase proxy - - if c_node is NULL: - return - # different _c_doc - c_node.doc = doc._c_doc - - if c_node._private is not NULL: - ref = c_node._private - while ref is not NULL: - proxy = <_NodeBase>ref.proxy - proxy._doc = doc - ref = ref.next - - # adjust all children - c_current = c_node.children - while c_current is not NULL: - changeDocumentBelowHelper(c_current, doc) - c_current = c_current.next - - # adjust all attributes - c_attr_current = c_node.properties - while c_attr_current is not NULL: - changeDocumentBelowHelper(c_current, doc) - c_attr_current = c_attr_current.next - Modified: lxml/trunk/src/lxml/extensions.pxi ============================================================================== --- lxml/trunk/src/lxml/extensions.pxi (original) +++ lxml/trunk/src/lxml/extensions.pxi Fri Apr 28 21:56:36 2006 @@ -1,5 +1,7 @@ # supports for extension functions in XPath and XSLT +cimport xpath + class XPathError(LxmlError): pass Modified: lxml/trunk/src/lxml/parser.pxi ============================================================================== --- lxml/trunk/src/lxml/parser.pxi (original) +++ lxml/trunk/src/lxml/parser.pxi Fri Apr 28 21:56:36 2006 @@ -465,3 +465,29 @@ result = tree.xmlNewDoc("1.0") __GLOBAL_PARSER_CONTEXT._initDocDict(result) return result + +############################################################ +## API level helper functions for _Document creation +############################################################ + +cdef _Document _parseDocument(source, parser): + cdef xmlDoc* c_doc + filename = _getFilenameForFile(source) + # Support for unamed file-like object (StringIO, urlgrabber.urlopen, ...) + if not filename and hasattr(source, 'read'): + return _parseMemoryDocument(source.read(), parser) + + # Otherwise parse the file directly from the filesystem + if filename is None: + filename = source + # open filename + c_doc = _parseDocFromFile(_utf8(filename), parser) + return _documentFactory(c_doc, parser) + +cdef _Document _parseMemoryDocument(text, parser): + cdef xmlDoc* c_doc + if python.PyUnicode_Check(text): + text = _stripDeclaration(_utf8(text)) + c_doc = _parseDoc(text, parser) + return _documentFactory(c_doc, parser) + Modified: lxml/trunk/src/lxml/xmlerror.pxi ============================================================================== --- lxml/trunk/src/lxml/xmlerror.pxi (original) +++ lxml/trunk/src/lxml/xmlerror.pxi Fri Apr 28 21:56:36 2006 @@ -1,5 +1,6 @@ -################################################################################ -# DEBUG setup +# DEBUG and error logging + +cimport xmlerror # module level API functions Modified: lxml/trunk/src/lxml/xpath.pxi ============================================================================== --- lxml/trunk/src/lxml/xpath.pxi (original) +++ lxml/trunk/src/lxml/xpath.pxi Fri Apr 28 21:56:36 2006 @@ -1,4 +1,4 @@ -# XSLT and XPath classes, supports for extension functions +# XPath evaluation class XPathContextError(XPathError): pass Modified: lxml/trunk/src/lxml/xslt.pxi ============================================================================== --- lxml/trunk/src/lxml/xslt.pxi (original) +++ lxml/trunk/src/lxml/xslt.pxi Fri Apr 28 21:56:36 2006 @@ -1,5 +1,7 @@ # XSLT and XPath classes, supports for extension functions +cimport xslt + class XSLTError(LxmlError): pass From scoder at codespeak.net Sun Apr 30 09:17:20 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Sun Apr 30 09:17:24 2006 Subject: [Lxml-checkins] r26590 - lxml/trunk/src/lxml Message-ID: <20060430071720.1CC3E10081@code0.codespeak.net> Author: scoder Date: Sun Apr 30 09:17:17 2006 New Revision: 26590 Modified: lxml/trunk/src/lxml/xmlerror.pxi lxml/trunk/src/lxml/xslt.pxi Log: make etree.pyx compilable without libxslt by uncommenting 'include xslt.pxi' line, will still have ElementTree.xslt() function that raises NameError for 'XSLT', but otherwise imports cleanly and works Modified: lxml/trunk/src/lxml/xmlerror.pxi ============================================================================== --- lxml/trunk/src/lxml/xmlerror.pxi (original) +++ lxml/trunk/src/lxml/xmlerror.pxi Sun Apr 30 09:17:17 2006 @@ -12,7 +12,11 @@ def initThreadLogging(): "Setup logging for the current thread." _logLibxmlErrors() - _logLibxsltErrors() + try: + _logLibxsltErrors() + except NameError: + # compiled without libxslt + pass # Logging classes @@ -299,9 +303,6 @@ xmlerror.xmlSetGenericErrorFunc(NULL, _nullGenericErrorFunc) xmlerror.xmlSetStructuredErrorFunc(NULL, _receiveError) -cdef void _logLibxsltErrors(): - xslt.xsltSetGenericErrorFunc(NULL, _receiveGenericError) - # init global logging initThreadLogging() Modified: lxml/trunk/src/lxml/xslt.pxi ============================================================================== --- lxml/trunk/src/lxml/xslt.pxi (original) +++ lxml/trunk/src/lxml/xslt.pxi Sun Apr 30 09:17:17 2006 @@ -18,6 +18,10 @@ pass +cdef void _logLibxsltErrors(): + xslt.xsltSetGenericErrorFunc(NULL, _receiveGenericError) + + ################################################################################ # XSLT document loaders From scoder at codespeak.net Sun Apr 30 09:24:06 2006 From: scoder at codespeak.net (scoder@codespeak.net) Date: Sun Apr 30 09:24:07 2006 Subject: [Lxml-checkins] r26591 - in lxml/trunk: . src/lxml Message-ID: <20060430072406.CFB8E10081@code0.codespeak.net> Author: scoder Date: Sun Apr 30 09:24:04 2006 New Revision: 26591 Modified: lxml/trunk/CHANGES.txt lxml/trunk/src/lxml/etree.pyx Log: CHANGES.txt: mention possibility of compiling without libxslt Modified: lxml/trunk/CHANGES.txt ============================================================================== --- lxml/trunk/CHANGES.txt (original) +++ lxml/trunk/CHANGES.txt Sun Apr 30 09:24:04 2006 @@ -7,6 +7,9 @@ Features added -------------- +* etree module can be compiled without libxslt by commenting out the line + 'include "xslt.pxi"' at the end of the etree.pyx source file + * Error reporting now also works in XSLT * Support for custom document loaders (URI resolvers) in parsers and XSLT, Modified: lxml/trunk/src/lxml/etree.pyx ============================================================================== --- lxml/trunk/src/lxml/etree.pyx (original) +++ lxml/trunk/src/lxml/etree.pyx Sun Apr 30 09:24:04 2006 @@ -1328,7 +1328,10 @@ include "xmlid.pxi" # XMLID and IDDict include "extensions.pxi" # XPath/XSLT extension functions include "xpath.pxi" # XPath evaluation -include "xslt.pxi" # XSL transformations + +# XSL transformations +# comment out to compile without libxslt +include "xslt.pxi" ################################################################################ From scoder at codespeak.net Tue Apr 25 15:43:33 2006 From: scoder at codespeak.net (scoder at codespeak.net) Date: Tue, 25 Apr 2006 13:43:33 -0000 Subject: [Lxml-checkins] r26334 - in lxml/pyrex: . Demos Demos/callback Doc Pyrex Pyrex/Compiler Pyrex/Distutils Pyrex/Mac Message-ID: <20060425134333.B42A610091@code0.codespeak.net> Author: scoder Date: Tue Apr 25 15:43:21 2006 New Revision: 26334 Modified: lxml/pyrex/CHANGES.txt lxml/pyrex/Demos/callback/cheese.pyx lxml/pyrex/Demos/primes.pyx lxml/pyrex/Doc/FAQ.html lxml/pyrex/Doc/extension_types.html lxml/pyrex/Doc/overview.html lxml/pyrex/Doc/sharing.html lxml/pyrex/Doc/special_methods.html lxml/pyrex/Pyrex/Compiler/CmdLine.py lxml/pyrex/Pyrex/Compiler/Code.py lxml/pyrex/Pyrex/Compiler/ExprNodes.py lxml/pyrex/Pyrex/Compiler/Lexicon.py lxml/pyrex/Pyrex/Compiler/Main.py lxml/pyrex/Pyrex/Compiler/Naming.py lxml/pyrex/Pyrex/Compiler/Nodes.py lxml/pyrex/Pyrex/Compiler/Parsing.py lxml/pyrex/Pyrex/Compiler/PyrexTypes.py lxml/pyrex/Pyrex/Compiler/Scanning.py lxml/pyrex/Pyrex/Compiler/Symtab.py lxml/pyrex/Pyrex/Compiler/TypeSlots.py lxml/pyrex/Pyrex/Compiler/Version.py lxml/pyrex/Pyrex/Distutils/build_ext.py lxml/pyrex/Pyrex/Mac/DarwinSystem.py lxml/pyrex/Pyrex/Utils.py lxml/pyrex/ToDo.txt Log: updated to Pyrex 0.9.4.1, added support for Python 2.5 Modified: lxml/pyrex/CHANGES.txt ============================================================================== --- lxml/pyrex/CHANGES.txt (original) +++ lxml/pyrex/CHANGES.txt Tue Apr 25 15:43:21 2006 @@ -1,293 +1,403 @@ -0.9.3.1 +0.9.4.1 ------- Bug fixes: - - Fix generated code to compile with GCC 4.0 (remove usage of the - cast-as-lvalue extension). - ["Jeremy Katz" ] + - Fixed indentation problem in Pyrex.Distutils.build_ext. + [Oliver Grisel ] - - Fix interoperability with Python 2.4. - ["Bob Ippolito" ] -0.9.3 +0.9.4 ----- -Enhancements: +Improvements: - - Types defined with a ctypedef in a 'cdef extern from' block - are now referred to by the typedef name in generated C code, - so it is no longer necessary to match the type in the C - header file exactly. + - All use of lvalue casts has been eliminated, for + compatibility with gcc4. + + - PyMODINIT_FUNC now used to declare the module init function. + + - Generated code should be compilable as either C or C++. + When compiling as C++, "extern C" is used where appropriate + to preserve linkage semantics. C++ functions still cannot + be called yet. + + - An extension type can be made weak-referenceable by + giving it a C attribute of type object called __weakref__. + + - Source files opened in universal newlines mode. + + - Support for public extension type C attributes of type + long long and unsigned long long added (but not tested). + [Sam Rushing ] + + - Distutils include directories now passed to Pyrex compiler. + [Konrad Hinsen ] + + - Integer constants with an "L" suffix are now allowed + and are converted to Python long integers. + [Rainer Deyke ] + + - A broken .c file is no longer left behind if there are + compilation errors. + + - Using the result of a Python indexing or attribute access + operation as a char * is no longer considered an error in + most cases, as the former behaviour proved to be more + annoying than helpful. + +Bug fixes: + + - Fixed problems with conversion from Python integers to + C unsigned longs. Now use PyInt_AsUnsignedLongMask and + PyInt_AsUnsignedLongLongMask instead of the PyLong_* + functions (which only work on Python longs). + [Wim Vree ] + + - C unsigned ints now converted to/from Python longs intead + of Python ints to avoid overflow problems. + [Heiko Wundram ] + + - Correct PyArg_ParseTuple format characters now used for + unsigned types. [Jeff Bowden ] + + - Nonzero return value from a base class tp_traverse call + is handled. + + - Taking sizeof an incomplete type caused a crash while + producing an error message. [Drew Perttula ] + + - If a module cimported itself, definitions of global variables + were generated twice. [Parzival Herzog ] + + - Distutils extension updated to handle changed signature of + swig_sources(). [David M. Cooke ] + + - Incorrect C code generated for a raw string containing a double + quote preceded by a backslash. [Thomas Drake ] + + - Declaration of public C function with an exception value written + to generated .pxi file without the except clause. + [Robby Dermody ] + + - __delitem__ method of an extension type with no __setitem__ + did not get called. [Richard Boulton ] + + - A spurious Py_INCREF was generated when a return statement + required a type test. [Jonathan Doda ] + + - Casting a value to a function pointer and then immediately + calling it generated a cast to a function instead of a cast + to a function pointer. [Simon Burton ] + + - Py_TPFLAGS_HAVE_GC was not being set on an extension type that + inherited from an external extension type that used GC but did + not itself have any PyObject* attributes. + [Michael Hordijk ] + + - A return statement inside a for statement leaked a reference + to the loop's iterator. + [J?rgen Kartnaller ] + + - Full module name now appears in __module__ attribute of classes + and extension types, provided a correct dotted name is used + for the .pyx file. [Giovanni Bajo ] + + - Public extension type with no C attributes produced an + invalid .pxi file. [Simon Burton ] + + - Using a dict constructor as the second operand of a boolean + expression crashed the Pyrex compiler. + [Stefan Behnel ] + + - A C declaration list ending with a comma resulted in invalid + C code being generated. [Alex Coventry ] + + - A raw string containing two consecutive backslashes produced + incorrect C code. [Helmut Jarausch ] + + - An error is reported if you attempt to declare a special + method of an extension type using 'cdef' instead of 'def'. + [Sam Rushing ] - - Conversion to/from unsigned long now done with - PyLong_AsUnsignedLong and PyLong_FromUnsignedLong. - [Dug Song ] +0.9.3 +----- - - A struct, union or enum definition in a 'cdef extern from' - block may now be left empty (using 'pass'). This can be useful - if you need to declare a variable of that type, but don't need - to refer to any of its members. +Enhancements: - - More flexible about ordering of qualifiers such as 'long' and - 'unsigned'. - ["John (J5) Palmieri" ] + - Types defined with a ctypedef in a 'cdef extern from' block + are now referred to by the typedef name in generated C code, + so it is no longer necessary to match the type in the C + header file exactly. + + - Conversion to/from unsigned long now done with + PyLong_AsUnsignedLong and PyLong_FromUnsignedLong. + [Dug Song ] + + - A struct, union or enum definition in a 'cdef extern from' + block may now be left empty (using 'pass'). This can be useful + if you need to declare a variable of that type, but don't need + to refer to any of its members. + + - More flexible about ordering of qualifiers such as 'long' and + 'unsigned'. + ["John (J5) Palmieri" ] Bug fixes: - - Non-interned string literals used in a Python class - definition did not work. - [Atsuo Ishimoto ] - [Andreas Kostyrka ] - - - Return types of the buffer interface functions for extension - types have been corrected. - [Dug Song ] - - - Added 'static' to declarations of string literals. - [Phil Frost ] - - - Float literals are now copied directly to the C code as written, - to avoid problems with loss of precision. - [Mario Pernici ] - - - Inheriting from an extension type with C methods defined in - another Pyrex module did not work. - [Itamar Shtull-Trauring ] + - Non-interned string literals used in a Python class + definition did not work. + [Atsuo Ishimoto ] + [Andreas Kostyrka ] + + - Return types of the buffer interface functions for extension + types have been corrected. + [Dug Song ] + + - Added 'static' to declarations of string literals. + [Phil Frost ] + + - Float literals are now copied directly to the C code as written, + to avoid problems with loss of precision. + [Mario Pernici ] + + - Inheriting from an extension type with C methods defined in + another Pyrex module did not work. + [Itamar Shtull-Trauring ] 0.9.2.1 ------- Bug fixes: - - Corrected an import statement setup.py, and made it - check for a unix platform in a more reliable way. + - Corrected an import statement setup.py, and made it + check for a unix platform in a more reliable way. 0.9.2 ----- Enhancements: - - Names of Python global variables and attributes are now - interned, and PyObject_GetAttr/SetAttr are used instead - of PyObject_GetAttrString/SetAttrString. String literals - which resemble Python identifiers are also interned. - - - String literals are now converted to Python objects only - once instead of every time they are used. - - - NUL characters are now allowed in Python string literals. - - - Added some missing error checking code to the beginning - of module init functions. It's unlikely the operations - involved would ever fail, but you never know. + - Names of Python global variables and attributes are now + interned, and PyObject_GetAttr/SetAttr are used instead + of PyObject_GetAttrString/SetAttrString. String literals + which resemble Python identifiers are also interned. + + - String literals are now converted to Python objects only + once instead of every time they are used. + + - NUL characters are now allowed in Python string literals. + + - Added some missing error checking code to the beginning + of module init functions. It's unlikely the operations + involved would ever fail, but you never know. Bug fixes: - - Corrected some problems introduced by moving the Plex - package. + - Corrected some problems introduced by moving the Plex + package. 0.9.1.1 ------- Bug fixes: - - Corrected a problem in the setup.py (pyrexc script incorrectly - named). - - - Updated the distutils extension to match changes in the - Pyrex compiler calling interface. - - - Doing 'make clean' in Demos/callback was removing a little too - much (that's why cheesefinder.c kept disappearing). + - Corrected a problem in the setup.py (pyrexc script incorrectly + named). + + - Updated the distutils extension to match changes in the + Pyrex compiler calling interface. + + - Doing 'make clean' in Demos/callback was removing a little too + much (that's why cheesefinder.c kept disappearing). 0.9.1 ----- Enhancements: - - A C method can now call an inherited C method by the usual - Python technique. - [Jiba ] - - - The __modname__ of a Python class is now set correctly. - [Paul Prescod ] - - - A MANIFEST.in file has been added to the distribution to - facilitate building rpms. - [contributed by Konrad Hinsen ] + - A C method can now call an inherited C method by the usual + Python technique. + [Jiba ] + + - The __modname__ of a Python class is now set correctly. + [Paul Prescod ] + + - A MANIFEST.in file has been added to the distribution to + facilitate building rpms. + [contributed by Konrad Hinsen ] Bug fixes: - Conditional code now generated to allow for the renaming of LONG_LONG to PY_LONG_LONG that occurred between Python 2.2 and 2.3. - - Header files referenced in cimported modules were not being included. - [Tom Popovich ] - - - References to C functions and variables in a cimported module were - not being recognised if made from within a local scope. - [Tom Popovich ] - - - Spurious declarations in code generated for a "finally" block. - [Brandon Long ] - - - Attempting to return a value from a __contains__ method didn't work. - [Andreas Kostyrka ] - - - Incorrect code generated for an extension type with C methods - inheriting from a base type with no C methods. - [Robin Becker ] - - - Failure to report an error if a C method was defined in the - implementation part of an extension type that was not declared - in the corresponding definition part. Documentation also updated - to explain that this is necessary. - [Jiba ] - - - Made it an error to forward-declare an extension type with - a different base class specification from its subsequent - definition. - [Jiba ] - - - C attributes of an extension type were not being propagated - through more than one level of inheritance. - [Jiba ] - - - If a garbage collection occurred early enough in the __new__ - method of an extension type with Python-valued C attributes, - a crash could occur in its tp_traverse function. - [reported by Jiba ] - [fix suggested by Paul Prescod ] - - - An empty vtable struct is no longer generated for extension - types with no C methods. - [Robin Becker ] - - - Memory was leaked in the sq_item function of an extension - type with a __getitem__ method. - [Atsuo Ishimoto ] - - - Code generated to work around a bug in some versions of Python - 2.2 which fails to initialise the tp_free slot correctly in - some circumstances. - [Matthias Baas ] - - - Compiler crash when defining an extension type with a base - class specified by a dotted name. - [Alain Pointdexter ] - - - Referencing an extension type defined in a cimported module - at run time did not work correctly. - [Alain Pointdexter ] - - - Incorrect object struct code generated for an extension type - whose base class was defined in a .pxd file. - [Alain Pointdexter ] - - - Redeclaring a type that wasn't previously an extension type - as an extension type caused a compiler crash. - [Scott Robinson ] - - - Incorrect code was generated for return statements in a - special method with no return value. - [Gary Bishop ] - - - Single-line def statement did not work. - [Francois Pinard ] - + - Header files referenced in cimported modules were not being included. + [Tom Popovich ] + + - References to C functions and variables in a cimported module were + not being recognised if made from within a local scope. + [Tom Popovich ] + + - Spurious declarations in code generated for a "finally" block. + [Brandon Long ] + + - Attempting to return a value from a __contains__ method didn't work. + [Andreas Kostyrka ] + + - Incorrect code generated for an extension type with C methods + inheriting from a base type with no C methods. + [Robin Becker ] + + - Failure to report an error if a C method was defined in the + implementation part of an extension type that was not declared + in the corresponding definition part. Documentation also updated + to explain that this is necessary. + [Jiba ] + + - Made it an error to forward-declare an extension type with + a different base class specification from its subsequent + definition. + [Jiba ] + + - C attributes of an extension type were not being propagated + through more than one level of inheritance. + [Jiba ] + + - If a garbage collection occurred early enough in the __new__ + method of an extension type with Python-valued C attributes, + a crash could occur in its tp_traverse function. + [reported by Jiba ] + [fix suggested by Paul Prescod ] + + - An empty vtable struct is no longer generated for extension + types with no C methods. + [Robin Becker ] + + - Memory was leaked in the sq_item function of an extension + type with a __getitem__ method. + [Atsuo Ishimoto ] + + - Code generated to work around a bug in some versions of Python + 2.2 which fails to initialise the tp_free slot correctly in + some circumstances. + [Matthias Baas ] + + - Compiler crash when defining an extension type with a base + class specified by a dotted name. + [Alain Pointdexter ] + + - Referencing an extension type defined in a cimported module + at run time did not work correctly. + [Alain Pointdexter ] + + - Incorrect object struct code generated for an extension type + whose base class was defined in a .pxd file. + [Alain Pointdexter ] + + - Redeclaring a type that wasn't previously an extension type + as an extension type caused a compiler crash. + [Scott Robinson ] + + - Incorrect code was generated for return statements in a + special method with no return value. + [Gary Bishop ] + + - Single-line def statement did not work. + [Francois Pinard ] + Modifications: - - - Only the last pathname component of the .pyx file is reported in - backtraces now. - [Bryan Weingarten ] - - - Documentation corrected to remove the erroneous statement that - extension classes can have a __del__ method. - [Bryan Weingarten ] - - - Note added to documentation explaining that it is not possible - for an extension type's __new__ method to explicitly call the - inherited __new__ method. - - - The version of Plex included with Pyrex is now installed - as a subpackage of the Pyrex package, rather than as a - top-level package, so as not to interfere with any other - version of Plex the user may have installed. + + - Only the last pathname component of the .pyx file is reported in + backtraces now. + [Bryan Weingarten ] + + - Documentation corrected to remove the erroneous statement that + extension classes can have a __del__ method. + [Bryan Weingarten ] + + - Note added to documentation explaining that it is not possible + for an extension type's __new__ method to explicitly call the + inherited __new__ method. + + - The version of Plex included with Pyrex is now installed + as a subpackage of the Pyrex package, rather than as a + top-level package, so as not to interfere with any other + version of Plex the user may have installed. 0.9 --- New features: - - Extension types can have properties. See the new "Properties" - section in the "Extension Types" page. - - - An extension type can inherit from a builtin type or another - extension type. See "Subclassing" in the "Extension Types" page. - - - Extension types can have C methods, which can be overridden - in derived extension types. See "C Methods" in the "Extension Types" - page. + - Extension types can have properties. See the new "Properties" + section in the "Extension Types" page. + + - An extension type can inherit from a builtin type or another + extension type. See "Subclassing" in the "Extension Types" page. + + - Extension types can have C methods, which can be overridden + in derived extension types. See "C Methods" in the "Extension Types" + page. Enhancements: - - - Conversion is now performed between C long longs and Python - long integers without chopping to the size of a C long. - Also the Python PY_LONG_LONG type is now used for long longs - for greater portability. + + - Conversion is now performed between C long longs and Python + long integers without chopping to the size of a C long. + Also the Python PY_LONG_LONG type is now used for long longs + for greater portability. Bug fixes: - - Names were sometimes being generated that were insufficiently - unique in the presence of cimported declarations. - - - Changed the way the included filename table is declared from - char *[] to char **, to stop MSVC from complaining about it - having an unknown size. - [Alexander A Naanou ] - - - Second argument of assert statement was not being coerced - to a Python value. - [Francois Pinard http://www.iro.umontreal.ca/~pinard] - - - Return statement without value wasn't accepted in some - extension type special methods when it should have been. - [Francois Pinard http://www.iro.umontreal.ca/~pinard] - - - Attempting to call a non-function C value crashed the - compiler. - [John J Lee ] - - - Functions declared as "except *" were not returning exceptions. - [John J Lee ] - - - A syntax warning from Plex about assignment to None has - been eliminated. - [Gordon Williams ] - - - Public function declaration with empty argument list was - producing (void) in .pxi file. - [Michael P. Dubner ] - - - Incorrect error signalling code was being generated in the - __hash__ special method of an extension type. - + - Names were sometimes being generated that were insufficiently + unique in the presence of cimported declarations. + + - Changed the way the included filename table is declared from + char *[] to char **, to stop MSVC from complaining about it + having an unknown size. + [Alexander A Naanou ] + + - Second argument of assert statement was not being coerced + to a Python value. + [Francois Pinard http://www.iro.umontreal.ca/~pinard] + + - Return statement without value wasn't accepted in some + extension type special methods when it should have been. + [Francois Pinard http://www.iro.umontreal.ca/~pinard] + + - Attempting to call a non-function C value crashed the + compiler. + [John J Lee ] + + - Functions declared as "except *" were not returning exceptions. + [John J Lee ] + + - A syntax warning from Plex about assignment to None has + been eliminated. + [Gordon Williams ] + + - Public function declaration with empty argument list was + producing (void) in .pxi file. + [Michael P. Dubner ] + + - Incorrect error signalling code was being generated in the + __hash__ special method of an extension type. + 0.8.1 ----- Bug fixes: - - Names of structs, unions and enums in external header - files were getting mangled when they shouldn't have been. - [Norman Shelley ] - - - Modified distutils extension so that it will stop before - compiling the C file if the Pyrex compiler reports errors. - [John J Lee ] - + - Names of structs, unions and enums in external header + files were getting mangled when they shouldn't have been. + [Norman Shelley ] + + - Modified distutils extension so that it will stop before + compiling the C file if the Pyrex compiler reports errors. + [John J Lee ] + 0.8 --- @@ -300,54 +410,54 @@ declaration must DECLARE THE MODULE from which the extension type originates. See the new version of the "Extension Types" documentation for details. - - This change was made to eliminate the need for Pyrex to be - told the C name of the type object, or for the Pyrex module + + This change was made to eliminate the need for Pyrex to be + told the C name of the type object, or for the Pyrex module to be linked against the object code providing the type object. - + You will have to update any existing external extension type declarations that you are using. I'm sorry about that, but it was too hard to support both the old and new ways. - - - Compile-time importing: A Pyrex module can now import declarations - from another Pyrex module using the new 'cimport' statement. See - the new section on "Sharing Declarations Between Pyrex Modules" in + + - Compile-time importing: A Pyrex module can now import declarations + from another Pyrex module using the new 'cimport' statement. See + the new section on "Sharing Declarations Between Pyrex Modules" in the documentation. Minor improvements: - - An error is reported if you declare a struct, union or - extension type using 'cdef' in one place and 'ctypedef' - in another. - - - Struct, union and extension types can only be forward- - declared using 'cdef', not 'ctypedef' (otherwise invalid - C code would be generated). - - - The 'global' statement can be used at the module level to - declare that a name is a module-level name rather than a - builtin. This can be used to access module attributes such - as __name__ that would otherwise be assumed to be builtins. - [Pat Maupin ] - - - The 'assert' statement now accepts a second argument. - [Francois Pinard ] + - An error is reported if you declare a struct, union or + extension type using 'cdef' in one place and 'ctypedef' + in another. + + - Struct, union and extension types can only be forward- + declared using 'cdef', not 'ctypedef' (otherwise invalid + C code would be generated). + + - The 'global' statement can be used at the module level to + declare that a name is a module-level name rather than a + builtin. This can be used to access module attributes such + as __name__ that would otherwise be assumed to be builtins. + [Pat Maupin ] + + - The 'assert' statement now accepts a second argument. + [Francois Pinard ] Bug fixes: - - When using Python 2.3, "True" or "False" could sometimes - turn up in generated code instead of "1" or "0". - [Adam Hixson ] - - - Function return value not always converted to or from a - Python object when it should have been. - - - Certain kinds of error in a function call expression - could crash the compiler. - ["Edward C. Jones" ] - - - Fixed memory leak in functions with * or ** args. - [Alexander A Naanou ] + - When using Python 2.3, "True" or "False" could sometimes + turn up in generated code instead of "1" or "0". + [Adam Hixson ] + + - Function return value not always converted to or from a + Python object when it should have been. + + - Certain kinds of error in a function call expression + could crash the compiler. + ["Edward C. Jones" ] + + - Fixed memory leak in functions with * or ** args. + [Alexander A Naanou ] 0.7.1 @@ -355,86 +465,86 @@ Bug fixes: - - Calling a function declared as returning an extension - type could crash the compiler. - - - A function call with type errors in the argument list - could crash the compiler. - - - An 'else' clause on a for-from statement could crash - the compiler. - - - Incorrect casting code was generated when a generic - object argument of a special method was declared as - being of an extension type. - [Phillip J. Eby ] - - - A blank line that couldn't be interpreted wholly as - a valid indentation sequence caused a syntax error. - In particular, a formfeed character on an otherwise - blank line wasn't accepted. - [Francois Pinard ] - - - Parallel assignments were incorrectly optimised. + - Calling a function declared as returning an extension + type could crash the compiler. - - A bare tuple constructor with an extra comma at the - end of a line caused a syntax error. + - A function call with type errors in the argument list + could crash the compiler. + + - An 'else' clause on a for-from statement could crash + the compiler. + + - Incorrect casting code was generated when a generic + object argument of a special method was declared as + being of an extension type. + [Phillip J. Eby ] + + - A blank line that couldn't be interpreted wholly as + a valid indentation sequence caused a syntax error. + In particular, a formfeed character on an otherwise + blank line wasn't accepted. + [Francois Pinard ] + + - Parallel assignments were incorrectly optimised. + + - A bare tuple constructor with an extra comma at the + end of a line caused a syntax error. 0.7 --- New features: - - Attributes of extension types can be exposed to Python - code, either read/write or read-only. + - Attributes of extension types can be exposed to Python + code, either read/write or read-only. - - Different internal and external names can be specified - for C entities. - - - None is a compile-time constant, and more efficient code - is generated to reference it. - - - Command line options for specifying directories to - search for include files. + - Different internal and external names can be specified + for C entities. + + - None is a compile-time constant, and more efficient code + is generated to reference it. + + - Command line options for specifying directories to + search for include files. Enhancements: - - More efficient code is generated for access to Python - valued C attributes of extension types. - - - Cosmetic code improvement: Less casting back and forth - between extension types and PyObject * when referencing - C members of the object struct. - - - C arguments and variables declared as an extension type - can take the value None. - - - Form feed characters are accepted as whitespace. - - - Function names in tracebacks are qualified with - module name and class name. + - More efficient code is generated for access to Python + valued C attributes of extension types. + + - Cosmetic code improvement: Less casting back and forth + between extension types and PyObject * when referencing + C members of the object struct. + + - C arguments and variables declared as an extension type + can take the value None. + + - Form feed characters are accepted as whitespace. + + - Function names in tracebacks are qualified with + module name and class name. Bug fixes: - - A sufficiently complex expression in a boolean context - could cause code to be generated twice for the same - subexpression. - - - Incorrect casting code was generated when passing an - extension type to a function expecting a generic Python - object. - - - Executable statements are now disallowed inside a - cdef class block (previously they silently caused - crazy C code to be generated). - - - Tracebacks should now report the correct filename for - functions defined in files included with the 'include' - statement. - - - The documentation incorrectly claimed that an extension - type can't have a __del__ method. In fact, it can, and - it behaves as expected. + - A sufficiently complex expression in a boolean context + could cause code to be generated twice for the same + subexpression. + + - Incorrect casting code was generated when passing an + extension type to a function expecting a generic Python + object. + + - Executable statements are now disallowed inside a + cdef class block (previously they silently caused + crazy C code to be generated). + + - Tracebacks should now report the correct filename for + functions defined in files included with the 'include' + statement. + + - The documentation incorrectly claimed that an extension + type can't have a __del__ method. In fact, it can, and + it behaves as expected. 0.6.1 @@ -442,7 +552,7 @@ Bug fixes: - - Fixed broken distutils extension. + - Fixed broken distutils extension. @@ -451,102 +561,102 @@ New features: - - Command line options for reporting version number, - requesting a listing file and specifying the name of - the generated C file. - - - An 'include' statement allows inclusion of declarations - from other Pyrex source files. - - - If there are any public declarations, a Pyrex include - file is generated (as well as a .h file) containing - declarations for them. - - - Extension types can be declared public, so their C - attributes are visible to other Pyrex and C code. - - - Try-except statements can now have an 'else' clause. - [Francois Pinard ] - - - Multiple simple statements can be placed on one line - separated by semicolons. - - - A suite consisting of a simple statement list can now - be placed on the same line after the colon in most - cases. - [Francois Pinard ] - - - The automatic coercion of a C string to a C char has - been removed (it proved to be too error-prone). - Instead, there is a new form of literal for C - character constants: c'X' - - - The __get__ special method (used by descriptor objects) - now allows for the possibility of the 2nd or 3rd - arguments being NULL. Also the __set__ method has been - split into two methods, __set__ and __delete__. - [Phillip J. Eby ] - -Bug fixes: - - - Values unpacked into a non-Python destination variable - were not being converted before assignment. - [Gareth Watts ] - - - Hex constants greater than 0x7fffffff caused compiler - to crash. [Gareth Watts ] - - - Type slots are no longer statically initialised with - extern function pointers, to avoid problems with - some compilers. The hack in the distutils extension - to work around this by compiling as C++ has been - disabled. [Phillip J. Eby ] - - - Fixed several more instances of the error-reporting - routine being called with arguments in the wrong - order. Hoping I've *finally* got all of them now... - - - Nested for-from loops used the same control variable. - [Sebastien de Menten ] - - - Fixed some other error message related bugs. - [Francois Pinard ] - - - Assigning to slice didn't work. - [Francois Pinard ] - - - Temp variables were being declared as extension - types and then being assigned PyObject *'s. All - Python temp vars are now declared as PyObject *. - [Francois Pinard ] + - Command line options for reporting version number, + requesting a listing file and specifying the name of + the generated C file. + + - An 'include' statement allows inclusion of declarations + from other Pyrex source files. + + - If there are any public declarations, a Pyrex include + file is generated (as well as a .h file) containing + declarations for them. + + - Extension types can be declared public, so their C + attributes are visible to other Pyrex and C code. + + - Try-except statements can now have an 'else' clause. + [Francois Pinard ] + + - Multiple simple statements can be placed on one line + separated by semicolons. + + - A suite consisting of a simple statement list can now + be placed on the same line after the colon in most + cases. + [Francois Pinard ] + + - The automatic coercion of a C string to a C char has + been removed (it proved to be too error-prone). + Instead, there is a new form of literal for C + character constants: c'X' + + - The __get__ special method (used by descriptor objects) + now allows for the possibility of the 2nd or 3rd + arguments being NULL. Also the __set__ method has been + split into two methods, __set__ and __delete__. + [Phillip J. Eby ] + +Bug fixes: + + - Values unpacked into a non-Python destination variable + were not being converted before assignment. + [Gareth Watts ] + + - Hex constants greater than 0x7fffffff caused compiler + to crash. [Gareth Watts ] + + - Type slots are no longer statically initialised with + extern function pointers, to avoid problems with + some compilers. The hack in the distutils extension + to work around this by compiling as C++ has been + disabled. [Phillip J. Eby ] + + - Fixed several more instances of the error-reporting + routine being called with arguments in the wrong + order. Hoping I've *finally* got all of them now... + + - Nested for-from loops used the same control variable. + [Sebastien de Menten ] + + - Fixed some other error message related bugs. + [Francois Pinard ] + + - Assigning to slice didn't work. + [Francois Pinard ] + + - Temp variables were being declared as extension + types and then being assigned PyObject *'s. All + Python temp vars are now declared as PyObject *. + [Francois Pinard ] 0.5 --- Bug fixes: - - Algorithm for allocating temp variables redesigned - to fix various errors concerning temp - variable re-use. - [Mark Rowe ] - - - Memory leak occured sometimes when an implicit - type test was applied to the result of an - expression. - [christoph.wiedemann at daimlerchrysler.com] - - - __set__ method of extension types had wrong - signature. - [Josh Littlefield ] - + - Algorithm for allocating temp variables redesigned + to fix various errors concerning temp + variable re-use. + [Mark Rowe ] + + - Memory leak occured sometimes when an implicit + type test was applied to the result of an + expression. + [christoph.wiedemann at daimlerchrysler.com] + + - __set__ method of extension types had wrong + signature. + [Josh Littlefield ] + 0.4.6 ----- Bug fixes: - - Indexing multi-dimensional C arrays didn't - work. - [Gary Dietachmayer ] + - Indexing multi-dimensional C arrays didn't + work. + [Gary Dietachmayer ] 0.4.5 @@ -554,48 +664,48 @@ New features: - - There is now a 'public' declaration for - making Pyrex-defined variables and functions - available to external C code. A .h file is - also generated if there are any public - declarations. + - There is now a 'public' declaration for + making Pyrex-defined variables and functions + available to external C code. A .h file is + also generated if there are any public + declarations. Enhancements: - - Defining __len__/__getitem__ methods in an - extension class fills sq_length/sq_item slots - as well as mp_length/mp_subscript. - [Matthias Baas ] - - - The Distutils extension now allows .c files - to be incorporated along with .pyx files. - [Modification to Distutils extension contributed - by Darrell Gallion .] + - Defining __len__/__getitem__ methods in an + extension class fills sq_length/sq_item slots + as well as mp_length/mp_subscript. + [Matthias Baas ] + + - The Distutils extension now allows .c files + to be incorporated along with .pyx files. + [Modification to Distutils extension contributed + by Darrell Gallion .] Bug fixes: - - Float literals without a decimal point - work again now. - [Mike Rovner ] - [Peter Lepage ] - - - Compiler crashed if exception value didn't - match function return type. - [Michael JasonSmith ] - - - The setup.py file should now install the - Lexicon.pickle file in the right place. - [Patch supplied by David M. Cooke - ] - - - Compiler crashed when compiling a C function that - returned an extension type. - [David M. Cooke - ] - - - Anonymous enum types did not have C code - suppressed inside an extern-from block. - [Matthew Mueller ] + - Float literals without a decimal point + work again now. + [Mike Rovner ] + [Peter Lepage ] + + - Compiler crashed if exception value didn't + match function return type. + [Michael JasonSmith ] + + - The setup.py file should now install the + Lexicon.pickle file in the right place. + [Patch supplied by David M. Cooke + ] + + - Compiler crashed when compiling a C function that + returned an extension type. + [David M. Cooke + ] + + - Anonymous enum types did not have C code + suppressed inside an extern-from block. + [Matthew Mueller ] 0.4.4 @@ -603,46 +713,46 @@ Enhancements: - - Tracebacks now extend into Pyrex function - calls and show line numbers in the Pyrex - source file. - - - Syntax for float literals made more lenient - (no longer requires digits both before and - after the point). - [Peter Lepage ] - - - Method calls can be made on string literals - (e.g. ",".join(x)). - [pedro_rodriguez at club-internet.fr] + - Tracebacks now extend into Pyrex function + calls and show line numbers in the Pyrex + source file. + + - Syntax for float literals made more lenient + (no longer requires digits both before and + after the point). + [Peter Lepage ] + + - Method calls can be made on string literals + (e.g. ",".join(x)). + [pedro_rodriguez at club-internet.fr] Bug fixes: - - Incorrect refcount code generated when a - Python function needing argument type tests - had local Python variables. - [Matthias Baas ] - - - 'self' parameter of __getitem__ method of - extension type had wrong implicit type. - [Peter Lepage ] - - - Repaired breakage introduced by trying to - allow an empty parameter list to be written - as (void). No longer attempting to allow - this (too hard to parse correctly). - [Peter Lepage ] - - - Found bug in Plex 1.1.2 which was the *real* - cause of the two-newlines-in-a-row problem. - Removed the Opt(Eol)+Str("\n") hacks in - the scanner which were working around this - before. - [pedro_rodriguez at club-internet.fr] - - - __call__ special method of extension types - had wrong signature. - [Peter Lepage ] + - Incorrect refcount code generated when a + Python function needing argument type tests + had local Python variables. + [Matthias Baas ] + + - 'self' parameter of __getitem__ method of + extension type had wrong implicit type. + [Peter Lepage ] + + - Repaired breakage introduced by trying to + allow an empty parameter list to be written + as (void). No longer attempting to allow + this (too hard to parse correctly). + [Peter Lepage ] + + - Found bug in Plex 1.1.2 which was the *real* + cause of the two-newlines-in-a-row problem. + Removed the Opt(Eol)+Str("\n") hacks in + the scanner which were working around this + before. + [pedro_rodriguez at club-internet.fr] + + - __call__ special method of extension types + had wrong signature. + [Peter Lepage ] 0.4.3 @@ -650,26 +760,26 @@ New language features: - - For-from loop for iterating over integer - ranges, using pure C loop where possible. + - For-from loop for iterating over integer + ranges, using pure C loop where possible. Enhancements: - - sizeof() can now be applied to types as - well as variables. - - - Improved handling of forward-declared - extension types. + - sizeof() can now be applied to types as + well as variables. + + - Improved handling of forward-declared + extension types. Bug fixes: - - Two newlines in a row in a triple quoted - string caused a parse error on some - platforms. - [Matthias Baas ] - - - Fixed problem with break and continue in - the else-clause of a loop. + - Two newlines in a row in a triple quoted + string caused a parse error on some + platforms. + [Matthias Baas ] + + - Fixed problem with break and continue in + the else-clause of a loop. 0.4.2 @@ -677,118 +787,118 @@ New language features: - - C functions can be declared as having an - exception return value, which is checked - whenever the function is called. If an - exception is detected inside a C function - for which no exception value is declared, - a warning message is printed and the - exception is cleared. - - - Cascaded assignments (i.e. a = b = c - are now supported. - - - Anonymous enum declarations are allowed, - for when you just want to declare constants. - - - The C types "long long" and "long double" - are now understood. Also, "int" is optional - after "short" or "long". - + - C functions can be declared as having an + exception return value, which is checked + whenever the function is called. If an + exception is detected inside a C function + for which no exception value is declared, + a warning message is printed and the + exception is cleared. + + - Cascaded assignments (i.e. a = b = c + are now supported. + + - Anonymous enum declarations are allowed, + for when you just want to declare constants. + + - The C types "long long" and "long double" + are now understood. Also, "int" is optional + after "short" or "long". + Enhancements: - - A * argument in a function call can now be - any sequence, not just a tuple. - - - A C char* or char[] will be turned into - a char by taking its first character if - used in a context where a char is required, - thus allowing a string literal to be used as - a char literal. - - - C string * C int or vice versa is now - interpreted as Python string replication. + - A * argument in a function call can now be + any sequence, not just a tuple. - - Function arguments are checked for void or - incomplete type. + - A C char* or char[] will be turned into + a char by taking its first character if + used in a context where a char is required, + thus allowing a string literal to be used as + a char literal. + + - C string * C int or vice versa is now + interpreted as Python string replication. + + - Function arguments are checked for void or + incomplete type. Bug fixes: - - Non-external extension types show up in the - module dict once more (this got broken in - 0.4.1). - - - A spurious decref has been removed from the - runtime support code for the "import" statement. - Hopefully this will prevent the crashes some - people have been experiencing when importing - builtin modules. - [Mathew Yeates ] + - Non-external extension types show up in the + module dict once more (this got broken in + 0.4.1). + + - A spurious decref has been removed from the + runtime support code for the "import" statement. + Hopefully this will prevent the crashes some + people have been experiencing when importing + builtin modules. + [Mathew Yeates ] 0.4.1 ----- New language features: - - "ctypedef struct/union/enum/class" statements - added, for use in extern-from blocks when a - header file uses a ctypedef to declare a - tagless struct, union or enum type. - - - "pass" allowed in an extern-from block. - - - "cdef extern from *" for when you don't want - to specify an include file name. - - - Argument names may be omitted in function - signatures when they're not needed. - - - New reserved word NULL for the null C pointer. + - "ctypedef struct/union/enum/class" statements + added, for use in extern-from blocks when a + header file uses a ctypedef to declare a + tagless struct, union or enum type. + + - "pass" allowed in an extern-from block. + + - "cdef extern from *" for when you don't want + to specify an include file name. + + - Argument names may be omitted in function + signatures when they're not needed. + + - New reserved word NULL for the null C pointer. Compiler enhancements: - - - Lexicon is now picked in binary format, so - startup should be much faster on slower - machines. - - - If Pyrex decides to rebuild the lexicon and - then finds that it can't write a pickle file, - it now prints a warning and carries on - instead of crashing. - - - Chat about hash codes and lexicon pickling - now turned off by default except when creating - a new lexicon (which ought never happen now - unless you change the scanner). + + - Lexicon is now picked in binary format, so + startup should be much faster on slower + machines. + + - If Pyrex decides to rebuild the lexicon and + then finds that it can't write a pickle file, + it now prints a warning and carries on + instead of crashing. + + - Chat about hash codes and lexicon pickling + now turned off by default except when creating + a new lexicon (which ought never happen now + unless you change the scanner). Bug fixes: - - Modified the runtime support code for "import" - statements, hopefully fixing problem with using - a Pyrex module in conjunction with py2exe. - - - DL_EXPORT now used in both the prototype and - definition of the module init function. - - - Exception state is now saved and restored around - calls to an extension type __dealloc__ method, - to avoid screwing up if the object is deallocated - while an exception is being propagated. - - - Making an attribute reference to a method of - an extension type caused a compiler crash. - - - Doc string in new-style class definition - caused a run-time error. - - - Insufficient parentheses were put around C type - casts. - - - Constructors for extension types are now read-only - C global variables instead of entries in the - module dict. This change was needed to prevent - Numeric from blowing up due to touching its - typeobject before import_numeric() could be called. + - Modified the runtime support code for "import" + statements, hopefully fixing problem with using + a Pyrex module in conjunction with py2exe. + + - DL_EXPORT now used in both the prototype and + definition of the module init function. + + - Exception state is now saved and restored around + calls to an extension type __dealloc__ method, + to avoid screwing up if the object is deallocated + while an exception is being propagated. + + - Making an attribute reference to a method of + an extension type caused a compiler crash. + + - Doc string in new-style class definition + caused a run-time error. + + - Insufficient parentheses were put around C type + casts. + + - Constructors for extension types are now read-only + C global variables instead of entries in the + module dict. This change was needed to prevent + Numeric from blowing up due to touching its + typeobject before import_numeric() could be called. 0.4 --- @@ -805,7 +915,7 @@ - Helping deal with types such as "size_t" - Helping deal with functions defined as macros - + - Access to internals of pre-existing extension types is now possible by placing an extension type declaration inside a "cdef extern from" @@ -817,14 +927,14 @@ wrong number of args to certain special methods of extension types. [Mitch Chapman ] - + - Compile-time crash when defining an extension type with a __hash__ method. Minor enhancements: - Hashing of the scanner source file made more - platform-independent, making spurious regeneration + platform-independent, making spurious regeneration of the pickle less likely. @@ -835,11 +945,11 @@ - Runtime crash when using * or ** args in a method of an extension type fixed. - [Matthew Mueller ] - - - Compiler crash when using default argument - values in a method of a Python class. - [Mike Rovner ] + [Matthew Mueller ] + + - Compiler crash when using default argument + values in a method of a Python class. + [Mike Rovner ] Enhancements: @@ -849,12 +959,12 @@ "initialiser is not constant" problems experienced on Windows. [Marek Baczek ] - + - On Windows, __declspec(dllexport) is now used for the module init func declaration (or should be -- I haven't tested this). [Marek Baczek ] - + - The compiler shouldn't attempt to rewrite the Lexicon.pickle file unless the source has been changed (hashing is used now instead of comparing @@ -981,7 +1091,7 @@ * Extension types can now be forward-declared. -* All permutations of (non-Unicode) string literals +* All permutations of (non-Unicode) string literals and escape codes should work now. * Hex and octal integer literals. @@ -1032,13 +1142,13 @@ - Class definitions are only allowed at the top level of a module, not inside a control structure or function or another class definition. - + - Assigning a Pyrex-defined Python function to a class attribute outside of the class definition - will not create a method (because it's not an + will not create a method (because it's not an interpreted Python function and therefore won't trigger the bound-method creation magic). - + - The __metaclass__ mechanism and the creation of new-style classes is not (yet) supported. Modified: lxml/pyrex/Demos/callback/cheese.pyx ============================================================================== --- lxml/pyrex/Demos/callback/cheese.pyx (original) +++ lxml/pyrex/Demos/callback/cheese.pyx Tue Apr 25 15:43:21 2006 @@ -3,11 +3,11 @@ # cdef extern from "cheesefinder.h": - ctypedef void (*cheesefunc)(char *name, void *user_data) - void find_cheeses(cheesefunc user_func, void *user_data) + ctypedef void (*cheesefunc)(char *name, void *user_data) + void find_cheeses(cheesefunc user_func, void *user_data) def find(f): - find_cheeses(callback, f) - + find_cheeses(callback, f) + cdef void callback(char *name, void *f): - (f)(name) + (f)(name) Modified: lxml/pyrex/Demos/primes.pyx ============================================================================== --- lxml/pyrex/Demos/primes.pyx (original) +++ lxml/pyrex/Demos/primes.pyx Tue Apr 25 15:43:21 2006 @@ -1,18 +1,18 @@ def primes(int kmax): - cdef int n, k, i - cdef int p[1000] - result = [] - if kmax > 1000: - kmax = 1000 - k = 0 - n = 2 - while k < kmax: - i = 0 - while i < k and n % p[i] <> 0: - i = i + 1 - if i == k: - p[k] = n - k = k + 1 - result.append(n) - n = n + 1 - return result + cdef int n, k, i + cdef int p[1000] + result = [] + if kmax > 1000: + kmax = 1000 + k = 0 + n = 2 + while k < kmax: + i = 0 + while i < k and n % p[i] <> 0: + i = i + 1 + if i == k: + p[k] = n + k = k + 1 + result.append(n) + n = n + 1 + return result Modified: lxml/pyrex/Doc/FAQ.html ============================================================================== --- lxml/pyrex/Doc/FAQ.html (original) +++ lxml/pyrex/Doc/FAQ.html Tue Apr 25 15:43:21 2006 @@ -1 +1,77 @@ - FAQ.html


Pyrex FAQ

Contents


How do I call Python/C API routines?

Declare them as C functions inside a cdef extern from block. Use the type name object for any parameters and return types which are Python object references. Don't use the word const anywhere. Here is an example which defines and uses the PyString_FromStringAndSize routine:
cdef extern from "Python.h":
    object PyString_FromStringAndSize(char *, int)

cdef char buf[42]
my_string = PyString_FromStringAndSize(buf, 42)

How do I convert a C string containing null bytes to a Python string?

Put in a declaration for the PyString_FromStringAndSize API routine and use that. See How do I call Python/C API routines?

How do I access the data inside a Numeric array object?

Use a cdef extern from block to include the Numeric header file and declare the array object as an external extension type. The following code illustrates how to do this:
cdef extern from "Numeric/arrayobject.h":

    struct PyArray_Descr:
        int type_num, elsize
        char type

    ctypedef class Numeric.ArrayType [object PyArrayObject]:
        cdef char *data
        cdef int nd
        cdef int *dimensions, *strides
        cdef object base
        cdef PyArray_Descr *descr
        cdef int flags

For more information about external extension types, see the "External Extension Types" section of the "Extension Types" documentation page.

Pyrex says my extension type object has no attribute 'rhubarb', but I know it does. What gives?

You're probably trying to access it through a reference which Pyrex thinks is a generic Python object. You need to tell Pyrex that it's a reference to your extension type by means of a declaration,
for example,
cdef class Vegetables:
    cdef int rhubarb

...
cdef Vegetables veg
veg.rhubarb = 42
Also see the "Attributes" section of the "Extension Types" documentation page.
--- \ No newline at end of file + + + + FAQ.html + + +


Pyrex FAQ +

+
+

Contents

+ +

How do I call Python/C API routines?

+ Declare them as C functions inside a cdef extern from block. +Use the type name object for any parameters and return types which +are Python object references. Don't use the word const anywhere. +Here is an example which defines and uses the PyString_FromStringAndSize routine: +
cdef extern from "Python.h":
+     object PyString_FromStringAndSize(char *, int)

cdef char buf[42]
+ my_string = PyString_FromStringAndSize(buf, 42)

+
+

How do I convert a C string containing null +bytes to a Python string?

+ Put in a declaration for the PyString_FromStringAndSize API routine + and use that. See How do I call Python/C API + routines?

How do I access the data inside a Numeric + array object?

+ Use a cdef extern from block to include the Numeric header file + and declare the array object as an external extension type. The following + code illustrates how to do this: +
cdef extern from "Numeric/arrayobject.h":

    struct PyArray_Descr:
+         int type_num, elsize
+         char type

+

    ctypedef class Numeric.ArrayType [object PyArrayObject]:
+         cdef char *data
+         cdef int nd
+         cdef int *dimensions, +*strides
+         cdef object base +
+         cdef PyArray_Descr *descr
+         cdef int flags
+

+
+

For more information about external extension types, see the "External Extension Types" +section of the "Extension Types" documentation +page.
+

+

Pyrex says my extension type object has no attribute +'rhubarb', but I know it does. What gives?

+You're probably trying to access it through a reference which Pyrex thinks +is a generic Python object. You need to tell Pyrex that it's a reference +to your extension type by means of a declaration,
+for example,
+
cdef class Vegetables:
+     cdef int rhubarb
+
+ ...
+ cdef Vegetables veg
+ veg.rhubarb = 42
+
+Also see the "Attributes" +section of the "Extension +Types" documentation page.
+

Python says my extension type has no method called 'quack', but I know it does. What gives?

+You may have declared the method using cdef instead of def. Only functions and methods declared with def are callable from Python code.
+--- + \ No newline at end of file Modified: lxml/pyrex/Doc/extension_types.html ============================================================================== --- lxml/pyrex/Doc/extension_types.html (original) +++ lxml/pyrex/Doc/extension_types.html Tue Apr 25 15:43:21 2006 @@ -1 +1,444 @@ - Extension Types


Extension Types

Contents

Introduction

As well as creating normal user-defined classes with the Python class statement, Pyrex also lets you create new built-in Python types, known as extension types. You define an extension type using the cdef class statement. Here's an example:
cdef class Shrubbery:

    cdef int width, height

    def __init__(self, w, h):
        self.width = w
        self.height = h

    def describe(self):
        print "This shrubbery is", self.width, \
            "by", self.height, "cubits."

As you can see, a Pyrex extension type definition looks a lot like a Python class definition. Within it, you use the def statement to define methods that can be called from Python code. You can even define many of the special methods such as __init__ as you would in Python.

The main difference is that you can use the cdef statement to define attributes. The attributes may be Python objects (either generic or of a particular extension type), or they may be of any C data type. So you can use extension types to wrap arbitrary C data structures and provide a Python-like interface to them.

Attributes

Attributes of an extension type are stored directly in the object's C struct. The set of attributes is fixed at compile time; you can't add attributes to an extension type instance at run time simply by assigning to them, as you could with a Python class instance. (You can subclass the extension type in Python and add attributes to instances of the subclass, however.)

There are two ways that attributes of an extension type can be accessed: by Python attribute lookup, or by direct access to the C struct from Pyrex code. Python code is only able to access attributes of an extension type by the first method, but Pyrex code can use either method.

By default, extension type attributes are only accessible by direct access, not Python access, which means that they are not accessible from Python code. To make them accessible from Python code, you need to declare them as public or readonly. For example,

cdef class Shrubbery:
    cdef public int width, height
    cdef readonly float depth
makes the width and height attributes readable and writable from Python code, and the depth attribute readable but not writable.

Note that you can only expose simple C types, such as ints, floats and strings, for Python access. You can also expose Python-valued attributes, although read-write exposure is only possible for generic Python attributes (of type object). If the attribute is declared to be of an extension type, it must be exposed readonly.

Note also that the public and readonly options apply only to Python access, not direct access. All the attributes of an extension type are always readable and writable by direct access.

Howerver, for direct access to be possible, the Pyrex compiler must know that you have an instance of that type, and not just a generic Python object. It knows this already in the case of the "self" parameter of the methods of that type, but in other cases you will have to tell it by means of a declaration. For example,

cdef widen_shrubbery(Shrubbery sh, extra_width):
    sh.width = sh.width + extra_width
If you attempt to access an extension type attribute through a generic object reference, Pyrex will use a Python attribute lookup. If the attribute is exposed for Python access (using public or readonly) then this will work, but it will be much slower than direct access.

Extension types and None

When you declare a parameter or C variable as being of an extension type, Pyrex will allow it to take on the value None as well as values of its declared type. This is analogous to the way a C pointer can take on the value NULL, and you need to exercise the same caution because of it. There is no problem as long as you are performing Python operations on it, because full dynamic type checking will be applied. However, when you access C attributes of an extension type (as in the widen_shrubbery function above), it's up to you to make sure the reference you're using is not None -- in the interests of efficiency, Pyrex does not check this.

You need to be particularly careful when exposing Python functions which take extension types as arguments. If we wanted to make widen_shrubbery a Python function, for example, if we simply wrote

def widen_shrubbery(Shrubbery sh, extra_width): # This is
    sh.width = sh.width + extra_width           # dangerous!
then users of our module could crash it by passing None for the sh parameter.

One way to fix this would be

def widen_shrubbery(Shrubbery sh, extra_width):
    if sh is None:
        raise TypeError
    sh.width = sh.width + extra_width
but since this is anticipated to be such a frequent requirement, Pyrex provides a more convenient way. Parameters of a Python function declared as an extension type can have a not None clause:
def widen_shrubbery(Shrubbery sh not None, extra_width):
    sh.width = sh.width + extra_width
Now the function will automatically check that sh is not None along with checking that it has the right type.

Note, however that the not None clause can only be used in Python functions (defined with def) and not C functions (defined with cdef). If you need to check whether a parameter to a C function is None, you will need to do it yourself.

Some more things to note:

  • The self parameter of a method of an extension type is guaranteed never to be None.
  • When comparing a value with None, keep in mind that, if x is a Python object, x is None and x is not None are very efficient because they translate directly to C pointer comparisons, whereas x == None and x != None, or simply using x as a boolean value (as in if x: ...) will invoke Python operations and therefore be much slower.

Special methods

Although the principles are similar, there are substantial differences between many of the __xxx___ special methods of extension types and their Python counterparts. There is a separate page devoted to this subject, and you should read it carefully before attempting to use any special methods in your extension types.

Properties

There is a special syntax for defining properties in an extension class:
cdef class Spam:

    property cheese:

        "A doc string can go here."

        def __get__(self):
            # This is called when the property is read.
            ...

        def __set__(self, value):
            # This is called when the property is written.
            ...

        def __del__(self):
            # This is called when the property is deleted.
 

The __get__, __set__ and __del__ methods are all optional; if they are omitted, an exception will be raised when the corresponding operation is attempted.

Here's a complete example. It defines a property which adds to a list each time it is written to, returns the list when it is read, and empties the list when it is deleted.
 

cheesy.pyx Test input
cdef class CheeseShop:

  cdef object cheeses

  def __new__(self):
    self.cheeses = []

  property cheese:

    def __get__(self):
      return "We don't have: %s" % self.cheeses

    def __set__(self, value):
      self.cheeses.append(value)

    def __del__(self):
      del self.cheeses[:]

from cheesy import CheeseShop

shop = CheeseShop()
print shop.cheese

shop.cheese = "camembert"
print shop.cheese

shop.cheese = "cheddar"
print shop.cheese

del shop.cheese
print shop.cheese

Test output
We don't have: []
We don't have: ['camembert']
We don't have: ['camembert', 'cheddar']
We don't have: []

Subclassing

An extension type may inherit from a built-in type or another extension type:
cdef class Parrot:
    ...

cdef class Norwegian(Parrot):
    ...


A complete definition of the base type must be available to Pyrex, so if the base type is a built-in type, it must have been previously declared as an extern extension type. If the base type is defined in another Pyrex module, it must either be declared as an extern extension type or imported using the cimport statement.

An extension type can only have one base class (no multiple inheritance).

Pyrex extension types can also be subclassed in Python. A Python class can inherit from multiple extension types provided that the usual Python rules for multiple inheritance are followed (i.e. the C layouts of all the base classes must be compatible).

C methods

Extension types can have C methods as well as Python methods. Like C functions, C methods are declared using cdef instead of def. C methods are "virtual", and may be overridden in derived extension types.

pets.pyx
Output
cdef class Parrot:

  cdef void describe(self):
    print "This parrot is resting."

cdef class Norwegian(Parrot):

  cdef void describe(self):
    Parrot.describe(self)
    print "Lovely plumage!"


cdef Parrot p1, p2
p1 = Parrot()
p2 = Norwegian()
print "p1:"
p1.describe()
print "p2:"
p2.describe()

p1:
This parrot is resting.
p2:
This parrot is resting.
Lovely plumage!

The above example also illustrates that a C method can call an inherited C method using the usual Python technique, i.e.
Parrot.describe(self)

Forward-declaring extension types

Extension types can be forward-declared, like struct and union types. This will be necessary if you have two extension types that need to refer to each other, e.g.
cdef class Shrubbery # forward declaration

cdef class Shrubber:
    cdef Shrubbery work_in_progress

cdef class Shrubbery:
    cdef Shrubber creator

If you are forward-declaring an exension type that has a base class, you must specify the base class in both the forward declaration and its subsequent definition, for example,
cdef class A(B)

...

cdef class A(B):
    # attributes and methods

Public and external extension types

Extension types can be declared extern or public. An extern extension type declaration makes an extension type defined in external C code available to a Pyrex module. A public extension type declaration makes an extension type defined in a Pyrex module available to external C code.

External extension types

An extern extension type allows you to gain access to the internals of Python objects defined in the Python core or in a non-Pyrex extension module.
NOTE: In Pyrex versions before 0.8, extern extension types were also used to reference extension types defined in another Pyrex module. While you can still do that, Pyrex 0.8 and later provides a better mechanism for this. See Sharing C Declarations Between Pyrex Modules.
Here is an example which will let you get at the C-level members of the built-in complex object.
cdef extern from "complexobject.h":

    struct Py_complex:
        double real
        double imag

    ctypedef class __builtin__.complex [object PyComplexObject]:
        cdef Py_complex cval

# A function which uses the above type
def spam(complex c):
    print "Real:", c.cval.real
    print "Imag:", c.cval.imag

Some important things to note are:
  1. In this example, ctypedef class has been used. This is because, in the Python header files, the PyComplexObject struct is declared with
ctypedef struct {
    ...
} PyComplexObject;
  1. As well as the name of the extension type, the module in which its type object can be found is also specified. See the implicit importing section below.

  2.  
  3. The part in square brackets tells Pyrex the name to use for the object's C struct, so it can generate code that matches what is declared in the header file. See the name specification clause section below.

  4.  
  5. When declaring an external extension type, you don't declare any methods. Declaration of methods is not required in order to call them, because the calls are Python method calls. Also, as with structs and unions, if your extension class declaration is inside a cdef extern from block, you only need to declare those C members which you wish to access.

Implicit importing

Backwards Incompatibility Note: You will have to update any pre-0.8 Pyrex modules you have which use extern extension types. I apologise for this, but for complicated reasons it proved to be too difficult to continue supporting the old way of doing these while introducing the new features that I wanted.
Pyrex 0.8 and later requires you to include a module name in an extern extension class declaration, for example,
cdef extern class MyModule.Spam:
    ...
The type object will be implicitly imported from the specified module and bound to the corresponding name in this module. In other words, in this example an implicit
    from MyModule import Spam
statement will be executed at module load time.

The module name can be a dotted name to refer to a module inside a package hierarchy, for example,

cdef extern class My.Nested.Package.Spam:
    ...
You can also specify an alternative name under which to import the type using an as clause, for example,
    cdef extern class My.Nested.Package.Spam as Yummy:
       ...
which corresponds to the implicit import statement
    from My.Nested.Package import Spam as Yummy

Type names vs. constructor names

Inside a Pyrex module, the name of an extension type serves two distinct purposes. When used in an expression, it refers to a module-level global variable holding the type's constructor (i.e. its type-object). However, it can also be used as a C type name to declare variables, arguments and return values of that type.

When you declare

cdef extern class MyModule.Spam:
    ...
the name Spam serves both these roles. There may be other names by which you can refer to the constructor, but only Spam can be used as a type name. For example, if you were to explicity import MyModule, you could use MyModule.Spam() to create a Spam instance, but you wouldn't be able to use MyModule.Spam as a type name.

When an as clause is used, the name specified in the as clause also takes over both roles. So if you declare

cdef extern class MyModule.Spam as Yummy:
    ...
then Yummy becomes both the type name and a name for the constructor. Again, there are other ways that you could get hold of the constructor, but only Yummy is usable as a type name.

Public extension types

An extension type can be declared public, in which case a .h file is generated containing declarations for its object struct and type object. By including the .h file in external C code that you write, that code can access the attributes of the extension type.

Name specification clause

The part of the class declaration in square brackets is a special feature only available for extern or public extension types. The full form of this clause is
[object object_struct_name, type type_object_name]
where object_struct_name is the name to assume for the type's C struct, and type_object_name is the name to assume for the type's statically declared type object. (The object and type clauses can be written in either order.)

If the extension type declaration is inside a cdef extern from block, the object clause is required, because Pyrex must be able to generate code that is compatible with the declarations in the header file. Otherwise, for extern extension types, the object clause is optional.

For public extension types, the object and type clauses are both required, because Pyrex must be able to generate code that is compatible with external C code.



Back to the Language Overview
 

\ No newline at end of file + + + + Extension Types + +


Extension Types +

+

Contents

+ +

Introduction

+ As well as creating normal user-defined classes with the Python class +statement, Pyrex also lets you create new built-in Python types, known as +extension types. You define an extension type using the cdef class statement. Here's an example: +
cdef class Shrubbery:

    cdef int width, height

+

    def __init__(self, w, h):
+         self.width = w
+         self.height = h

+

    def describe(self):
+         print "This shrubbery is", +self.width, \
+             +"by", self.height, "cubits."

+
+ As you can see, a Pyrex extension type definition looks a lot like a Python + class definition. Within it, you use the def statement to define +methods that can be called from Python code. You can even define many of +the special methods such as __init__ as you would in Python. +

The main difference is that you can use the cdef statement to define +attributes. The attributes may be Python objects (either generic or of a particular +extension type), or they may be of any C data type. So you can use extension +types to wrap arbitrary C data structures and provide a Python-like interface +to them.

+

Attributes

+ Attributes of an extension type are stored directly in the object's C struct. + The set of attributes is fixed at compile time; you can't add attributes +to an extension type instance at run time simply by assigning to them, as +you could with a Python class instance. (You can subclass the extension type +in Python and add attributes to instances of the subclass, however.) +

There are two ways that attributes of an extension type can be accessed: + by Python attribute lookup, or by direct access to the C struct from Pyrex + code. Python code is only able to access attributes of an extension type +by the first method, but Pyrex code can use either method.

+

By default, extension type attributes are only accessible by direct access, +not Python access, which means that they are not accessible from Python code. +To make them accessible from Python code, you need to declare them as public or readonly. For example,

+
cdef class Shrubbery:
+     cdef public int width, height
+     cdef readonly float depth
+ makes the width and height attributes readable and writable + from Python code, and the depth attribute readable but not writable. + +

Note that you can only expose simple C types, such as ints, floats and + strings, for Python access. You can also expose Python-valued attributes, + although read-write exposure is only possible for generic Python attributes + (of type object). If the attribute is declared to be of an extension + type, it must be exposed readonly.

+

Note also that the public and readonly options apply + only to Python access, not direct access. All the attributes of an +extension type are always readable and writable by direct access.

+

Howerver, for direct access to be possible, the Pyrex compiler must know +that you have an instance of that type, and not just a generic Python object. +It knows this already in the case of the "self" parameter of the methods of +that type, but in other cases you will have to tell it by means of a declaration. +For example,

+
cdef widen_shrubbery(Shrubbery sh, extra_width):
+     sh.width = sh.width + extra_width
+ If you attempt to access an extension type attribute through a generic +object reference, Pyrex will use a Python attribute lookup. If the attribute +is exposed for Python access (using public or readonly) +then this will work, but it will be much slower than direct access. +

Extension types and None

+ When you declare a parameter or C variable as being of an extension type, + Pyrex will allow it to take on the value None as well as values of its declared +type. This is analogous to the way a C pointer can take on the value NULL, +and you need to exercise the same caution because of it. There is no problem +as long as you are performing Python operations on it, because full dynamic +type checking will be applied. However, when you access C attributes of an +extension type (as in the widen_shrubbery function above), it's up +to you to make sure the reference you're using is not None -- in the interests +of efficiency, Pyrex does not check this. +

You need to be particularly careful when exposing Python functions which + take extension types as arguments. If we wanted to make widen_shrubbery +a Python function, for example, if we simply wrote

+
def widen_shrubbery(Shrubbery sh, extra_width): # This is
+     sh.width = sh.width + extra_width           +# dangerous!
+ then users of our module could crash it by passing None for the sh +parameter. +

One way to fix this would be

+
def widen_shrubbery(Shrubbery sh, extra_width):
+     if sh is None:
+         raise TypeError
+     sh.width = sh.width + extra_width
+ but since this is anticipated to be such a frequent requirement, Pyrex +provides a more convenient way. Parameters of a Python function declared +as an extension type can have a not None clause: +
def widen_shrubbery(Shrubbery sh not None, extra_width): +
+     sh.width = sh.width + extra_width
+ Now the function will automatically check that sh is not None +along with checking that it has the right type. +

Note, however that the not None clause can only be used + in Python functions (defined with def) and not C functions (defined + with cdef). If you need to check whether a parameter to a C function + is None, you will need to do it yourself.

+

Some more things to note:

+
    +
  • The self parameter of a method of an extension type is guaranteed + never to be None.
  • +
+
    +
  • When comparing a value with None, keep in mind that, if x is a Python object, x is None and x is not None are very +efficient because they translate directly to C pointer comparisons, whereas + x == None and x != None, or simply using x as a boolean value (as in if x: ...) will invoke Python operations +and therefore be much slower.
  • +
+

Special methods

+ Although the principles are similar, there are substantial differences +between many of the __xxx__ special methods of extension types and their +Python counterparts. There is a separate page devoted to this subject, and you should read it carefully before attempting +to use any special methods in your extension types. +

Properties

+ There is a special syntax for defining properties in an extension + class: +
cdef class Spam:

    property cheese:

+

        "A doc string can go +here."

+

        def __get__(self): +
+             +# This is called when the property is read.
+             +...

+

        def __set__(self, value): +
+             +# This is called when the property is written.
+             +...

+

        def __del__(self): +
+             +# This is called when the property is deleted.
+  

+
+ The __get__, __set__ and __del__ methods are +all optional; if they are omitted, an exception will be raised when the corresponding +operation is attempted. +

Here's a complete example. It defines a property which adds to a list +each time it is written to, returns the list when it is read, and empties +the list when it is deleted.
+  

+
+ + + + + + + + + + + + + + + +
cheesy.pyxTest input
cdef class CheeseShop: +

  cdef object cheeses

+

  def __new__(self):
+     self.cheeses = []

+

  property cheese:

+

    def __get__(self):
+       return "We don't have: %s" % self.cheeses +

+

    def __set__(self, value):
+       self.cheeses.append(value) +

+

    def __del__(self):
+       del self.cheeses[:]

+
from cheesy import CheeseShop +

shop = CheeseShop()
+ print shop.cheese

+

shop.cheese = "camembert"
+ print shop.cheese

+

shop.cheese = "cheddar"
+ print shop.cheese

+

del shop.cheese
+ print shop.cheese

+
Test output
We don't have: []
+ We don't have: ['camembert']
+ We don't have: ['camembert', 'cheddar']
+ We don't have: []
+
+

Subclassing

+ An extension type may inherit from a built-in type or another extension +type: +
cdef class Parrot:
+     ...

cdef class Norwegian(Parrot):
+     ...

+
+


+ A complete definition of the base type must be available to Pyrex, so if +the base type is a built-in type, it must have been previously declared as +an extern extension type. If the base type is defined in another Pyrex +module, it must either be declared as an extern extension type or imported +using the cimport statement.

+

An extension type can only have one base class (no multiple inheritance). +

+

Pyrex extension types can also be subclassed in Python. A Python class + can inherit from multiple extension types provided that the usual Python +rules for multiple inheritance are followed (i.e. the C layouts of all the +base classes must be compatible).
+

+

C methods

+ Extension types can have C methods as well as Python methods. Like C functions, +C methods are declared using cdef instead of def. C methods +are "virtual", and may be overridden in derived extension types.
+
+ + + + + + + + + + +
pets.pyx
+
Output
+
cdef class Parrot:
+
+   cdef void describe(self):
+     print "This parrot is resting."
+
+ cdef class Norwegian(Parrot):
+
+   cdef void describe(self):
+    Parrot.describe(self)
+     print "Lovely plumage!"
+
+
+ cdef Parrot p1, p2
+ p1 = Parrot()
+ p2 = Norwegian()
+print "p1:"
+ p1.describe()
+print "p2:"
+ p2.describe()

+
p1:
+This parrot is resting.
+p2:
+
This parrot is resting.
+
Lovely plumage!
+
+
+ The above example also illustrates that a C method can call an inherited +C method using the usual Python technique, i.e.
+
Parrot.describe(self)
+
+

Forward-declaring extension types

+ Extension types can be forward-declared, like struct and union types. This + will be necessary if you have two extension types that need to refer to +each other, e.g. +
cdef class Shrubbery # forward declaration

cdef class Shrubber:
+     cdef Shrubbery work_in_progress

+

cdef class Shrubbery:
+     cdef Shrubber creator

+
+ If you are forward-declaring an exension type that has a base class, you +must specify the base class in both the forward declaration and its subsequent +definition, for example,
+
cdef class A(B)
+
+...
+
+cdef class A(B):
+    # attributes and methods

+
+

Making extension types weak-referenceable

By +default, extension types do not support having weak references made to +them. You can enable weak referencing by declaring a C attribute of +type object called __weakref__. For example,
+
+
cdef class ExplodingAnimal:
+    """This animal will self-destruct when it is
+       no longer strongly referenced."""
+   
+    cdef object __weakref__
+
+
+

Public and external extension types

+ + Extension types can be declared extern or public. An extern extension type declaration makes +an extension type defined in external C code available to a Pyrex module. +A public extension type declaration makes an extension type defined in a Pyrex module available to external C +code. +

External extension types

+ An extern extension type allows you to gain access to the internals + of Python objects defined in the Python core or in a non-Pyrex extension +module. +
NOTE: In Pyrex versions before 0.8, extern extension + types were also used to reference extension types defined in another Pyrex + module. While you can still do that, Pyrex 0.8 and later provides a better + mechanism for this. See Sharing C Declarations Between + Pyrex Modules.
+ Here is an example which will let you get at the C-level members of the +built-in complex object. +
cdef extern from "complexobject.h":

    struct Py_complex:
+         double real
+         double imag

+

    ctypedef class __builtin__.complex [object PyComplexObject]: +
+         cdef Py_complex cval +

+

# A function which uses the above type
+ def spam(complex c):
+     print "Real:", c.cval.real
+     print "Imag:", c.cval.imag

+
+ Some important things to note are: +
    +
  1. In this example, ctypedef class has been used. This is because, + in the Python header files, the PyComplexObject struct is declared + with
    +
    +
    ctypedef struct {
    +     ...
    + } PyComplexObject;
    +
    +
    +
  2. As well as the name of the extension type, the module in which +its type object can be found is also specified. See the implicit importing section below. 
    +
    +
  3. +
  4. When declaring an external extension type, you don't declare +any methods. Declaration of methods is not required in order to call them, +because the calls are Python method calls. Also, as with structs and unions, +if your extension class declaration is inside a cdef extern from block, + you only need to declare those C members which you wish to access.
  5. +
+

Implicit importing

+
Backwards Incompatibility Note: +You will have to update any pre-0.8 Pyrex modules you have which use extern +extension types. I apologise for this, but for complicated reasons it proved + to be too difficult to continue supporting the old way of doing these while + introducing the new features that I wanted.
+ Pyrex 0.8 and later requires you to include a module name in an extern +extension class declaration, for example, +
cdef extern class MyModule.Spam:
+     ...
+ The type object will be implicitly imported from the specified module and + bound to the corresponding name in this module. In other words, in this +example an implicit +
    +
    from MyModule import Spam
    +
+ statement will be executed at module load time. +

The module name can be a dotted name to refer to a module inside a package + hierarchy, for example,

+
cdef extern class My.Nested.Package.Spam:
+     ...
+ You can also specify an alternative name under which to import the type +using an as clause, for example, +
    + cdef extern class My.Nested.Package.Spam as Yummy:
    +    ...
+ which corresponds to the implicit import statement +
    +
    from My.Nested.Package import Spam as Yummy
    +
+

Type names vs. constructor names

+ Inside a Pyrex module, the name of an extension type serves two distinct + purposes. When used in an expression, it refers to a module-level global +variable holding the type's constructor (i.e. its type-object). However, +it can also be used as a C type name to declare variables, arguments and +return values of that type. +

When you declare

+
cdef extern class MyModule.Spam:
+     ...
+ the name Spam serves both these roles. There may be other names + by which you can refer to the constructor, but only Spam can be +used as a type name. For example, if you were to explicity import MyModule, + you could use MyModule.Spam() to create a Spam instance, but you + wouldn't be able to use MyModule.Spam as a type name. +

When an as clause is used, the name specified in the as +clause also takes over both roles. So if you declare

+
cdef extern class MyModule.Spam as Yummy:
+     ...
+ then Yummy becomes both the type name and a name for the constructor. + Again, there are other ways that you could get hold of the constructor, +but only Yummy is usable as a type name. +

Public extension types

+ An extension type can be declared public, in which case a .h +file is generated containing declarations for its object struct and type +object. By including the .h file in external C code that you write, +that code can access the attributes of the extension type. +

Name specification clause

+ The part of the class declaration in square brackets is a special feature + only available for extern or public extension types. The full +form of this clause is +
[object object_struct_name, type type_object_name ]
+ where object_struct_name is the name to assume for the type's C +struct, and type_object_name is the name to assume for the type's +statically declared type object. (The object and type clauses can be written +in either order.) +

If the extension type declaration is inside a cdef extern from +block, the object clause is required, because Pyrex must be able to +generate code that is compatible with the declarations in the header file. +Otherwise, for extern extension types, the object clause is +optional.

+

For public extension types, the object and type clauses +are both required, because Pyrex must be able to generate code that is compatible +with external C code.

+

+

+ Back to the Language Overview
+  
+
+ \ No newline at end of file Modified: lxml/pyrex/Doc/overview.html ============================================================================== --- lxml/pyrex/Doc/overview.html (original) +++ lxml/pyrex/Doc/overview.html Tue Apr 25 15:43:21 2006 @@ -1 +1,960 @@ - Pyrex Language Overview


Overview of the Pyrex Language 

This document informally describes the extensions to the Python language made by Pyrex. Some day there will be a reference manual covering everything in more detail.
 

Contents


Basics

This section describes the basic features of the Pyrex language. The facilities covered in this section allow you to create Python-callable functions that manipulate C data structures and convert between Python and C data types. Later sections will cover facilities for wrapping external C code, creating new Python types and cooperation between Pyrex modules.

Python functions vs. C functions

There are two kinds of function definition in Pyrex:

Python functions are defined using the def statement, as in Python. They take Python objects as parameters and return Python objects.

C functions are defined using the new cdef statement. They take either Python objects or C values as parameters, and can return either Python objects or C values.

Within a Pyrex module, Python functions and C functions can call each other freely, but only Python functions can be called from outside the module by interpreted Python code. So, any functions that you want to "export" from your Pyrex module must be declared as Python functions.

Parameters of either type of function can be declared to have C data types, using normal C declaration syntax. For example,

def spam(int i, char *s):
    ...
cdef int eggs(unsigned long l, float f):
    ...
When a parameter of a Python function is declared to have a C data type, it is passed in as a Python object and automatically converted to a C value, if possible. Automatic conversion is currently only possible for numeric types and string types; attempting to use any other type for the parameter of a Python function will result in a compile-time error.

C functions, on the other hand, can have parameters of any type, since they're passed in directly using a normal C function call.

Python objects as parameters and return values

If no type is specified for a parameter or return value, it is assumed to be a Python object. (Note that this is different from the C convention, where it would default to int.) For example, the following defines a C function that takes two Python objects as parameters and returns a Python object:
cdef spamobjs(x, y):
    ...
Reference counting for these objects is performed automatically according to the standard Python/C API rules (i.e. borrowed references are taken as parameters and a new reference is returned).

The name object can also be used to explicitly declare something as a Python object. This can be useful if the name being declared would otherwise be taken as the name of a type, for example,

cdef ftang(object int):
    ...
declares a parameter called int which is a Python object. You can also use object as the explicit return type of a function, e.g.
cdef object ftang(object int):
    ...
In the interests of clarity, it is probably a good idea to always be explicit about object parameters in C functions.

C variable and type definitions

The cdef statement is also used to declare C variables, either local or module-level:
cdef int i, j, k
cdef float f, g[42], *h
and C struct, union or enum types:
cdef struct Grail:
    int age
    float volume
cdef union Food:
    char *spam
    float *eggs
cdef enum CheeseType:
    cheddar, edam, 
    camembert
cdef enum CheeseState:
    hard = 1
    soft = 2
    runny = 3
There is currently no special syntax for defining a constant, but you can use an anonymous enum declaration for this purpose, for example,
cdef enum:
    tons_of_spam = 3
Note that the words struct, union and enum are used only when defining a type, not when referring to it. For example, to declare a variable pointing to a Grail you would write
cdef Grail *gp
and not
cdef struct Grail *gp # WRONG
There is also a ctypedef statement for giving names to types, e.g.
ctypedef unsigned long ULong
ctypedef int *IntPtr

Scope rules

Pyrex determines whether a variable belongs to a local scope, the module scope, or the built-in scope completely statically. As with Python, assigning to a variable which is not otherwise declared implicitly declares it to be a Python variable residing in the scope where it is assigned. Unlike Python, however, a name which is referred to but not declared or assigned is assumed to reside in the builtin scope, not the module scope. Names added to the module dictionary at run time will not shadow such names.

You can use a global statement at the module level to explicitly declare a name to be a module-level name when there would otherwise not be any indication of this, for example,

global __name__
print __name__
Without the global statement, the above would print the name of the builtins module.

Note: A consequence of these rules is that the module-level scope behaves the same way as a Python local scope if you refer to a variable before assigning to it. In particular, tricks such as the following will not work in Pyrex:
try:
  x = True
except NameError:
  True = 1
because, due to the assignment, the True will always be looked up in the module-level scope. You would have to do something like this instead:
import __builtin__
try:
True = __builtin__.True
except AttributeError:
True = 1

Statements and expressions

Control structures and expressions follow Python syntax for the most part. When applied to Python objects, they have the same semantics as in Python (unless otherwise noted). Most of the Python operators can also be applied to C values, with the obvious semantics.

If Python objects and C values are mixed in an expression, conversions are performed automatically between Python objects and C numeric or string types.

Reference counts are maintained automatically for all Python objects, and all Python operations are automatically checked for errors, with appropriate action taken.

Differences between C and Pyrex expression syntax

Pyrex also includes some C operations which have no direct Python equivalent. Some of them are expressed differently in Pyrex than in C.
  • There is no -> operator in Pyrex. Instead of p->x, use p.x

  •  
  • There is no * operator in Pyrex. Instead of *p, use p[0]

  •  
  • There is an & operator, with the same semantics as in C

  •  
  • The null C pointer is called NULL, not 0 (and NULL is a reserved word).

  •  
  • Character literals are written with a c prefix, for example:
    • c'X'
  • Type casts are written <type>value , for example:
    • cdef char *p, float *q
      p = <char*>q
    Warning: Don't attempt to use a typecast to convert between Python and C data types -- it won't do the right thing. Leave Pyrex to perform the conversion automatically.

Integer for-loops

You should be aware that a for-loop such as
for i in range(n):
    ...
won't be very fast, even if i and n are declared as C integers, because range is a Python function. For iterating over ranges of integers, Pyrex has another form of for-loop:
for i from 0 <= i < n:
    ...
If the loop variable and the lower and upper bounds are all C integers, this form of loop will be much faster, because Pyrex will translate it into pure C code.

Some things to note about the for-from loop:

  • The target expression must be a variable name.
  • The name between the lower and upper bounds must be the same as the target name.
  • The direction of iteration is determined by the relations. If they are both from the set {<, <=} then it is upwards; if they are both from the set {>, >=} then it is downwards. (Any other combination is disallowed.)
Like other Python looping statements, break and continue may be used in the body, and the loop may have an else clause.


Error return values

If you don't do anything special, a function declared with cdef that does not return a Python object has no way of reporting Python exceptions to its caller. If an exception is detected in such a function, a warning message is printed and the exception is ignored.

If you want a C function that does not return a Python object to be able to propagate exceptions to its caller, you need to declare an exception value for it. Here is an example:

cdef int spam() except -1:
    ...
With this declaration, whenever an exception occurs inside spam, it will immediately return with the value -1. Furthermore, whenever a call to spam returns -1, an exception will be assumed to have occurred and will be propagated.

When you declare an exception value for a function, you should never explicitly return that value. If all possible return values are legal and you can't reserve one entirely for signalling errors, you can use an alternative form of exception value declaration:

cdef int spam() except? -1:
    ...
The "?" indicates that the value -1 only indicates a possible error. In this case, Pyrex generates a call to PyErr_Occurredif the exception value is returned, to make sure it really is an error.

There is also a third form of exception value declaration:

cdef int spam() except *:
    ...
This form causes Pyrex to generate a call to PyErr_Occurred after every call to spam, regardless of what value it returns. If you have a function returning void that needs to propagate errors, you will have to use this form, since there isn't any return value to test.

Some things to note:

  • Currently, exception values can only declared for functions returning an integer, float or pointer type, and the value must be a literal, not an expression (although it can be negative). The only possible pointer exception value is NULL. Void functions can only use the except * form.

  •  
  • The exception value specification is part of the signature of the function. If you're passing a pointer to a function as a parameter or assigning it to a variable, the declared type of the parameter or variable must have the same exception value specification (or lack thereof). Here is an example of a pointer-to-function declaration with an exception value:
    • int (*grail)(int, char *) except -1
  • You don't need to (and shouldn't) declare exception values for functions which return Python objects. Remember that a function with no declared return type implicitly returns a Python object.

Checking return values of non-Pyrex functions

It's important to understand that the except clause does not cause an error to be raised when the specified value is returned. For example, you can't write something like
cdef extern FILE *fopen(char *filename, char *mode) except NULL # WRONG!
and expect an exception to be automatically raised if a call to fopen returns NULL. The except clause doesn't work that way; its only purpose is for propagating exceptions that have already been raised, either by a Pyrex function or a C function that calls Python/C API routines. To get an exception from a non-Python-aware function such as fopen, you will have to check the return value and raise it yourself, for example,
cdef FILE *p
p = fopen("spam.txt", "r")
if p == NULL:
    raise SpamError("Couldn't open the spam file")


The include statement

For convenience, a large Pyrex module can be split up into a number of files which are put together using the include statement, for example
include "spamstuff.pxi"
The contents of the named file are textually included at that point. The included file can contain any complete top-level Pyrex statements, including other include statements. The include statement itself can only appear at the top level of a file.

The include statement can also be used in conjunction with public declarations to make C functions and variables defined in one Pyrex module accessible to another. However, note that some of these uses have been superseded by the facilities described in Sharing Declarations Between Pyrex Modules, and it is expected that use of the include statement for this purpose will be phased out altogether in future versions.


Interfacing with External C Code

One of the main uses of Pyrex is wrapping existing libraries of C code. This is achieved by using external declarations to declare the C functions and variables from the library that you want to use.

You can also use public declarations to make C functions and variables defined in a Pyrex module available to external C code. The need for this is expected to be less frequent, but you might want to do it, for example, if you are embedding Python in another application as a scripting language. Just as a Pyrex module can be used as a bridge to allow Python code to call C code, it can also be used to allow C code to call Python code.

External declarations

By default, C functions and variables declared at the module level are local to the module (i.e. they have the C static storage class). They can also be declared extern to specify that they are defined elsewhere, for example:
cdef extern int spam_counter
cdef extern void order_spam(int tons)

Referencing C header files

When you use an extern definition on its own as in the examples above, Pyrex includes a declaration for it in the generated C file. This can cause problems if the declaration doesn't exactly match the declaration that will be seen by other C code. If you're wrapping an existing C library, for example, it's important that the generated C code is compiled with exactly the same declarations as the rest of the library.

To achieve this, you can tell Pyrex that the declarations are to be found in a C header file, like this:

cdef extern from "spam.h":
    int spam_counter
    void order_spam(int tons)
The cdef extern from clause does three things:
  1. It directs Pyrex to place a #include statement for the named header file in the generated C code.
  2.  
  3. It prevents Pyrex from generating any C code for the declarations found in the associated block.
  4.  
  5. It treats all declarations within the block as though they started with cdef extern.
It's important to understand that Pyrex does not itself read the C header file, so you still need to provide Pyrex versions of any declarations from it that you use. However, the Pyrex declarations don't always have to exactly match the C ones, and in some cases they shouldn't or can't. In particular:
  1. Don't use const. Pyrex doesn't know anything about const, so just leave it out. Most of the time this shouldn't cause any problem, although on rare occasions you might have to use a cast. 1
  2.  
  3. Leave out any platform-specific extensions to C declarations such as __declspec().
  4.  
  5. If the header file declares a big struct and you only want to use a few members, you only need to declare the members you're interested in. Leaving the rest out doesn't do any harm, because the C compiler will use the full definition from the header file.

    In some cases, you might not need any of the struct's members, in which case you can just put pass in the body of the struct declaration, e.g.

        cdef extern from "foo.h":
            struct spam:
                pass


    Note that you can only do this inside a cdef extern from block; struct declarations anywhere else must be non-empty.

  6. If the header file uses typedef names such as size_t to refer to platform-dependent flavours of numeric types, you will need a corresponding ctypedef statement, but you don't need to match the type exactly, just use something of the right general kind (int, float, etc). For example,
    1. ctypedef int size_t
    will work okay whatever the actual size of a size_t is (provided the header file defines it correctly).
     
  7. If the header file uses macros to define constants, translate them into a dummy enum declaration.
  8.  
  9. If the header file defines a function using a macro, declare it as though it were an ordinary function, with appropriate argument and result types.
A few more tricks and tips:
  • If you want to include a C header because it's needed by another header, but don't want to use any declarations from it, put pass in the extern-from block:
      cdef extern from "spam.h":
          pass
  • If you want to include some external declarations, but don't want to specify a header file (because it's included by some other header that you've already included) you can put * in place of the header file name:
cdef extern from *:
    ...

Styles of struct, union and enum declaration

There are two main ways that structs, unions and enums can be declared in C header files: using a tag name, or using a typedef. There are also some variations based on various combinations of these.

It's important to make the Pyrex declarations match the style used in the header file, so that Pyrex can emit the right sort of references to the type in the code it generates. To make this possible, Pyrex provides two different syntaxes for declaring a struct, union or enum type. The style introduced above corresponds to the use of a tag name. To get the other style, you prefix the declaration with ctypedef, as illustrated below.

The following table shows the various possible styles that can be found in a header file, and the corresponding Pyrex declaration that you should put in the cdef exern from block. Struct declarations are used as an example; the same applies equally to union and enum declarations.

Note that in all the cases below, you refer to the type in Pyrex code simply as Foo, not struct Foo.
 
  C code Possibilities for corresponding Pyrex code Comments
1 struct Foo {
  ...
};
cdef struct Foo:
  ...
Pyrex will refer to the type as struct Foo in the generated C code.
2 typedef struct {
  ...
} Foo;
ctypedef struct Foo:
  ...
Pyrex will refer to the type simply as Foo in the generated C code.
3 typedef struct foo {
  ...
} Foo;
cdef struct foo:
  ...
ctypedef foo Foo #optional
If the C header uses both a tag and a typedef with different names, you can use either form of declaration in Pyrex (although if you need to forward reference the type, you'll have to use the first form).
ctypedef struct Foo:
  ...
4 typedef struct Foo {
  ...
} Foo;
cdef struct Foo:
  ...
If the header uses the same name for the tag and the typedef, you won't be able to include a ctypedef for it -- but then, it's not necessary.

Accessing Python/C API routines

One particular use of the cdef extern from statement is for gaining access to routines in the Python/C API. For example,
cdef extern from "Python.h":
    object PyString_FromStringAndSize(char *s, int len)
will allow you to create Python strings containing null bytes.


Resolving naming conflicts - C name specifications

Each Pyrex module has a single module-level namespace for both Python and C names. This can be inconvenient if you want to wrap some external C functions and provide the Python user with Python functions of the same names.

Pyrex 0.8 provides a couple of different ways of solving this problem. The best way, especially if you have many C functions to wrap, is probably to put the extern C function declarations into a different namespace using the facilities described in the section on sharing declarations between Pyrex modules.

The other way is to use a c name specification to give different Pyrex and C names to the C function. Suppose, for example, that you want to wrap an external function called eject_tomato. If you declare it as

cdef extern void c_eject_tomato "eject_tomato" (float speed)
then its name inside the Pyrex module will be c_eject_tomato, whereas its name in C will be eject_tomato. You can then wrap it with
def eject_tomato(speed):
  c_eject_tomato(speed)
so that users of your module can refer to it as eject_tomato.

Another use for this feature is referring to external names that happen to be Pyrex keywords. For example, if you want to call an external function called print, you can rename it to something else in your Pyrex module.

As well as functions, C names can be specified for variables, structs, unions, enums, struct and union members, and enum values. For example,

cdef extern int one "ein", two "zwei"
cdef extern float three "drei"

cdef struct spam "SPAM":
  int i "eye"
cdef enum surprise "inquisition":
  first "alpha"
  second "beta" = 3

Public Declarations

You can make C variables and functions defined in a Pyrex module accessible to external C code (or another Pyrex module) using the public keyword, as follows:
cdef public int spam # public variable declaration

cdef public void grail(int num_nuns): # public function declaration
    ...

If there are any public declarations in a Pyrex module, a .h file is generated containing equivalent C declarations for inclusion in other C code.

Pyrex also generates a .pxi file containing Pyrex versions of the declarations for inclusion in another Pyrex module using the include statement. If you use this, you will need to arrange for the module using the declarations to be linked against the module defining them, and for both modules to be available to the dynamic linker at run time. I haven't tested this, so I can't say how well it will work on the various platforms.

NOTE: If all you want to export is an extension type, there is now a better way -- see Sharing Declarations Between Pyrex Modules.


Extension Types

One of the most powerful features of Pyrex is the ability to easily create new built-in Python types, called extension types. This is a major topic in itself, so there is a  separate page devoted to it.


Sharing Declarations Between Pyrex Modules

Pyrex 0.8 introduces a substantial new set of facilities allowing a Pyrex module to easily import and use C declarations and extension types from another Pyrex module. You can now create a set of co-operating Pyrex modules just as easily as you can create a set of co-operating Python modules. There is a separate page devoted to this topic.


Limitations

Unsupported Python features

Pyrex is not quite a full superset of Python. The following restrictions apply:
  • Function definitions (whether using def or cdef) cannot be nested within other function definitions.
  •  
  • Class definitions can only appear at the top level of a module, not inside a function.
  •  
  • The import * form of import is not allowed anywhere (other forms of the import statement are fine, though).
  •  
  • Generators cannot be defined in Pyrex.

  • The globals() and locals() functions cannot be used.
  • The above restrictions will most likely remain, since removing them would be difficult and they're not really needed for Pyrex's intended applications.

    There are also some temporary limitations which may eventually be lifted:

  • Class and function definitions cannot be placed inside control structures.
  •  
  • In-place arithmetic operators (+=, etc) are not yet supported.
  •  
  • List comprehensions are not yet supported.
  •  
  • There is no support for Unicode.
  •  
  • Special methods of extension types cannot have functioning docstrings.

  • The use of string literals as comments is not recommended at present, because Pyrex doesn't optimize them away, and won't even accept them in places where executable statements are not allowed.
  • There are probably also some other gaps which I can't think of at the moment.

    Semantic differences between Python and Pyrex

    Behaviour of class scopes

    In Python, referring to a method of a class inside the class definition, i.e. while the class is being defined, yields a plain function object, but in Pyrex it yields an unbound method2. A consequence of this is that the usual idiom for using the classmethod and staticmethod functions, e.g.
    class Spam:
      def method(cls):
        ...
      method = classmethod(method)
    will not work in Pyrex. This can be worked around by defining the function outside the class, and then assigning the result of classmethod or staticmethod inside the class, i.e.
    def Spam_method(cls):
      ...
    class Spam:
      method = classmethod(Spam_method)


    Footnotes

    1. A problem with const could arise if you have something like
    cdef extern from "grail.h":
      char *nun
    where grail.h actually contains
    extern const char *nun;
    and you do
    cdef void languissement(char *s):
      #something that doesn't change s
    ...
    languissement(nun)
    which will cause the C compiler to complain. You can work around it by casting away the constness:
    oral(<char *>nun)

    2. The reason for the different behaviour of class scopes is that Pyrex-defined Python functions are PyCFunction objects, not PyFunction objects, and are not recognised by the machinery that creates a bound or unbound method when a function is extracted from a class. To get around this, Pyrex wraps each method in an unbound method object itself before storing it in the class's dictionary.
     

    \ No newline at end of file + + + + + + + + Pyrex Language Overview + + + +


    Overview of the Pyrex Language 

    + + This document informally describes the extensions to the Python language + made by Pyrex. Some day there will be a reference manual covering everything + in more detail.
    + +   +

    Contents

    + + + + + +


    Basics +

    + + This section describes the basic features of the Pyrex language. The facilities + covered in this section allow you to create Python-callable functions that + manipulate C data structures and convert between Python and C data types. + Later sections will cover facilities for wrapping external C code, creating new Python types and cooperation between Pyrex modules. +

    Python functions vs. C functions

    + + There are two kinds of function definition in Pyrex: +

    Python functions are defined using the def statement, as + in Python. They take Python objects as parameters and return Python objects. +

    + + +

    C functions are defined using the new cdef statement. They + take either Python objects or C values as parameters, and can return either + Python objects or C values.

    + + +

    Within a Pyrex module, Python functions and C functions can call each other +freely, but only Python functions can be called from outside the module by +interpreted Python code. So, any functions that you want to "export" from + your Pyrex module must be declared as Python functions using def.

    + + +

    Parameters of either type of function can be declared to have C data types, + using normal C declaration syntax. For example,

    + + +
    def spam(int i, char *s):
        ...
    +
    cdef int eggs(unsigned long l, float f):
        ...
    +
    + + When a parameter of a Python function is declared to have a C data type, + it is passed in as a Python object and automatically converted to a C value, + if possible. Automatic conversion is currently only possible for numeric +types and string types; attempting to use any other type for the parameter +of a Python function will result in a compile-time error. +

    C functions, on the other hand, can have parameters of any type, since + they're passed in directly using a normal C function call.

    + + +

    Python objects as parameters and return values

    + + If no type is specified for a parameter or return value, it is assumed + to be a Python object. (Note that this is different from the C convention, + where it would default to int.) For example, the following defines + a C function that takes two Python objects as parameters and returns a Python + object: +
    cdef spamobjs(x, y):
        ...
    +
    + + Reference counting for these objects is performed automatically according + to the standard Python/C API rules (i.e. borrowed references are taken as + parameters and a new reference is returned). +

    The name object can also be used to explicitly declare something + as a Python object. This can be useful if the name being declared would otherwise +be taken as the name of a type, for example,

    + + +
    cdef ftang(object int):
        ...
    +
    + + declares a parameter called int which is a Python object. You +can also use object as the explicit return type of a function, e.g. + +
    cdef object ftang(object int):
        ...
    +
    + + In the interests of clarity, it is probably a good idea to always be explicit + about object parameters in C functions. +

    C variable and type definitions

    + + The cdef statement is also used to declare C variables, either +local or module-level: +
    cdef int i, j, k
    cdef float f, g[42], *h
    +
    + + and C struct, union or enum types: +
    cdef struct Grail:
        int age
        float volume
    +
    cdef union Food:
        char *spam
        float *eggs
    +
    cdef enum CheeseType:
        cheddar, edam, 
        camembert
    +
    cdef enum CheeseState:
        hard = 1
        soft = 2
        runny = 3
    +
    + + There is currently no special syntax for defining a constant, but you +can use an anonymous enum declaration for this purpose, for example, +
    cdef enum:
    +     tons_of_spam = 3
    + + Note that the words struct, union and enum are used only when defining a type, not when referring to it. For example, to declare a variable pointing + to a Grail you would write +
    cdef Grail *gp
    +
    + + and not +
    cdef struct Grail *gp # WRONG
    +
    + + There is also a ctypedef statement for giving names to types, e.g. + +
    ctypedef unsigned long ULong
    +
    ctypedef int *IntPtr
    + +

    Automatic type conversions

    + +In most situations, automatic conversions will be performed for the +basic numeric and string types when a Python object is used in a +context requiring a C value, or vice versa. The following table +summarises the conversion possibilities.
    + +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    C types
    +
    From Python types
    +
    To Python types
    +
    [unsigned] char
    +[unsigned] short
    + int, long
    int, long
    +
    int
    +
    unsigned int
    +unsigned long
    + [unsigned] long long
    + +
    int, long
    +
    + +
    long
    +
    + +
    float, double, long double
    +
    int, long, float
    +
    float
    +
    char *
    +
    str
    +
    str
    +
    + +
    + +

    Caveats when using a Python string in a C context

    + +You need to be careful when using a Python string in a context expecting a char *. +In this situation, a pointer to the contents of the Python string is +used, which is only valid as long as the Python string exists. So you +need to make sure that a reference to the original Python string is +held for as long as the C string is needed. If you can't guarantee that +the Python string will live long enough, you will need to copy the C +string.
    + +
    + +Pyrex detects and prevents some mistakes of this kind. For instance, if you attempt something like
    + +
    cdef char *s
    s = pystring1 + pystring2
    + +then Pyrex will produce the error message "Obtaining char * from temporary Python value". +The reason is that concatenating the two Python strings produces a new +Python string object that is referenced only by a temporary internal +variable that Pyrex generates. As soon as the statement has finished, +the temporary variable will be decrefed and the Python string +deallocated, leaving s dangling. Since this code could not possibly work, Pyrex refuses to compile it.
    + +
    + +The solution is to assign the result of the concatenation to a Python variable, and then obtain the char * from that, i.e.
    + +
    cdef char *s
    p = pystring1 + pystring2
    s = p
    + +It is then your responsibility to hold the reference p for as long as necessary.
    + +
    + +Keep in mind that the rules used to detect such errors are only +heuristics. Sometimes Pyrex will complain unnecessarily, and sometimes +it will fail to detect a problem that exists. Ultimately, you need to +understand the issue and be careful what you do.
    + +
    +
    + + + +

    Scope rules

    + + Pyrex determines whether a variable belongs to a local scope, the module + scope, or the built-in scope completely statically. As with Python, + assigning to a variable which is not otherwise declared implicitly declares + it to be a Python variable residing in the scope where it is assigned. Unlike + Python, however, a name which is referred to but not declared or assigned + is assumed to reside in the builtin scope, not the module scope. +Names added to the module dictionary at run time will not shadow such names. + +

    You can use a global statement at the module level to explicitly + declare a name to be a module-level name when there would otherwise not be +any indication of this, for example,

    + + +
    global __name__
    + print __name__
    + + Without the global statement, the above would print the name of +the builtins module.
    + +
    + + Note: A consequence of these rules is that the module-level scope behaves + the same way as a Python local scope if you refer to a variable before assigning + to it. In particular, tricks such as the following will not work +in Pyrex:
    + + +
    try:
      x = True
    except NameError:
      True = 1
    +
    + + because, due to the assignment, the True will always be looked up in the + module-level scope. You would have to do something like this instead:
    + + +
    import __builtin__
    try:
    True = __builtin__.True
    except AttributeError:
    True = 1
    +
    + + +
    +

    Statements and expressions

    + + Control structures and expressions follow Python syntax for the most part. + When applied to Python objects, they have the same semantics as in Python + (unless otherwise noted). Most of the Python operators can also be applied + to C values, with the obvious semantics. +

    If Python objects and C values are mixed in an expression, conversions + are performed automatically between Python objects and C numeric or string + types.

    + + +

    Reference counts are maintained automatically for all Python objects, and +all Python operations are automatically checked for errors, with appropriate + action taken.

    + + +

    Differences between C and Pyrex +expressions

    +There +are some differences in syntax and semantics between C expressions and +Pyrex expressions, particularly in the area of C constructs which have +no direct equivalent in Python.
    + +
      +
    • An integer literal without an L suffix is treated as a C constant, and will be truncated to whatever size your C compiler thinks appropriate. With an L suffix, it will be converted to Python long integer (even if it would be small enough to fit into a C int).
      +
      +
    • +
    • There is no -> operator in Pyrex. Instead of p->x, + use p.x
    • + +  
    • There is no * operator in Pyrex. Instead of + *p, use p[0]
    • + +  
    • There is an & operator, with the same semantics + as in C.
    • + +  
    • The null C pointer is called NULL, not 0 (and + NULL is a reserved word).
    • + +  
    • Character literals are written with a c prefix, for +example:
    • +
        +
        c'X'
        +
      +
    • Type casts are written <type>value , for example:
    • +
        +
        cdef char *p, float *q
        p = <char*>q
        +
      + Warning: Don't attempt to use a typecast to convert between +Python and C data types -- it won't do the right thing. Leave Pyrex to perform +the conversion automatically. +
    + + +

    Integer for-loops

    + + You should be aware that a for-loop such as +
    for i in range(n):
    +     ...
    + + won't be very fast, even if i and n are declared as +C integers, because range is a Python function. For iterating over +ranges of integers, Pyrex has another form of for-loop: +
    for i from 0 <= i < n:
    +     ...
    + + If the loop variable and the lower and upper bounds are all C integers, +this form of loop will be much faster, because Pyrex will translate it into +pure C code. +

    Some things to note about the for-from loop:

    + + +
      + +
    • The target expression must be a variable name.
    • +
    • The name between the lower and upper bounds must be the same as +the target name.
    • +
    • The direction of iteration is determined by the relations. If they + are both from the set {<, <=} then it is upwards; + if they are both from the set {>, >=} then it is +downwards. (Any other combination is disallowed.)
    • + +
    + + Like other Python looping statements, break and continue may be used in the body, and the loop may have an else clause. + +


    + + +

    Error return values

    + + If you don't do anything special, a function declared with cdef that does not return a Python object has no way of reporting Python exceptions + to its caller. If an exception is detected in such a function, a warning +message is printed and the exception is ignored. +

    If you want a C function that does not return a Python object to be able + to propagate exceptions to its caller, you need to declare an exception + value for it. Here is an example:

    + + +
    cdef int spam() except -1:
    +     ...
    + + With this declaration, whenever an exception occurs inside spam, + it will immediately return with the value -1. Furthermore, whenever + a call to spam returns -1, an exception will be assumed + to have occurred and will be propagated. +

    When you declare an exception value for a function, you should never explicitly + return that value. If all possible return values are legal and you can't +reserve one entirely for signalling errors, you can use an alternative form +of exception value declaration:

    + + +
    cdef int spam() except? -1:
    +     ...
    + + The "?" indicates that the value -1 only indicates a possible error. In this case, Pyrex generates a call to PyErr_Occurredif the +exception value is returned, to make sure it really is an error. +

    There is also a third form of exception value declaration:

    + + +
    cdef int spam() except *:
    +     ...
    + + This form causes Pyrex to generate a call to PyErr_Occurred after + every call to spam, regardless of what value it returns. If you have + a function returning void that needs to propagate errors, you will + have to use this form, since there isn't any return value to test. +

    Some things to note:

    + + +
      + +
    • Currently, exception values can only declared for functions returning + an integer, float or pointer type, and the value must be a literal, + not an expression (although it can be negative). The only possible pointer + exception value is NULL. Void functions can only use the except + * form.
    • +
      +  
    • The exception value specification is part of the signature +of the function. If you're passing a pointer to a function as a parameter +or assigning it to a variable, the declared type of the parameter or variable + must have the same exception value specification (or lack thereof). Here +is an example of a pointer-to-function declaration with an exception value:
    • +
        +
        int (*grail)(int, char *) except -1
        +
      +
    • You don't need to (and shouldn't) declare exception values for functions + which return Python objects. Remember that a function with no declared return + type implicitly returns a Python object.
    • + +
    + + +

    Checking return values of non-Pyrex + functions

    + + It's important to understand that the except clause does not cause an error to be raised when the specified value is returned. For +example, you can't write something like +
    cdef extern FILE *fopen(char *filename, char *mode) except NULL # WRONG!
    +
    + + and expect an exception to be automatically raised if a call to fopen +returns NULL. The except clause doesn't work that way; its only purpose +is for propagating exceptions that have already been raised, either +by a Pyrex function or a C function that calls Python/C API routines. To +get an exception from a non-Python-aware function such as fopen, you will +have to check the return value and raise it yourself, for example, +
    cdef FILE *p
    p = fopen("spam.txt", "r")
    if p == NULL:
        raise SpamError("Couldn't open the spam file")
    +
    + + +


    + + +

    The include statement

    + + For convenience, a large Pyrex module can be split up into a number of +files which are put together using the include statement, for example + +
    include "spamstuff.pxi"
    +
    + + The contents of the named file are textually included at that point. The + included file can contain any complete top-level Pyrex statements, including + other include statements. The include statement itself can +only appear at the top level of a file. +

    The include statement can also be used in conjunction with public declarations to make C functions and + variables defined in one Pyrex module accessible to another. However, note + that some of these uses have been superseded by the facilities described +in Sharing Declarations Between Pyrex Modules, +and it is expected that use of the include statement for this purpose +will be phased out altogether in future versions.

    + + +


    Interfacing with External + C Code +

    + + One of the main uses of Pyrex is wrapping existing libraries of C code. +This is achieved by using external declarations to declare the C functions and variables from the library that you want to + use. +

    You can also use public declarations to make + C functions and variables defined in a Pyrex module available to external + C code. The need for this is expected to be less frequent, but you might +want to do it, for example, if you are embedding Python in another application + as a scripting language. Just as a Pyrex module can be used as a bridge to +allow Python code to call C code, it can also be used to allow C code to +call Python code.

    + + +

    External declarations

    + + By default, C functions and variables declared at the module level are +local to the module (i.e. they have the C static storage class). They +can also be declared extern to specify that they are defined elsewhere, + for example: +
    cdef extern int spam_counter
    +
    cdef extern void order_spam(int tons)
    +
    + + +
    + + +

    Referencing C header files

    + + When you use an extern definition on its own as in the examples above, +Pyrex includes a declaration for it in the generated C file. This can cause +problems if the declaration doesn't exactly match the declaration that will +be seen by other C code. If you're wrapping an existing C library, for example, +it's important that the generated C code is compiled with exactly the same +declarations as the rest of the library. +

    To achieve this, you can tell Pyrex that the declarations are to be found + in a C header file, like this:

    + + +
    cdef extern from "spam.h":
    +
        int spam_counter
    +
        void order_spam(int tons)
    +
    + + The cdef extern from clause does three things: +
      + +
    1. It directs Pyrex to place a #include statement for the named + header file in the generated C code.
      +
    2. +  
    3. It prevents Pyrex from generating any C code for the declarations + found in the associated block.
      +
    4. +  
    5. It treats all declarations within the block as though they +started with cdef extern.
    6. + +
    + + It's important to understand that Pyrex does not itself read the +C header file, so you still need to provide Pyrex versions of any declarations + from it that you use. However, the Pyrex declarations don't always have to +exactly match the C ones, and in some cases they shouldn't or can't. In particular: + +
      + +
    1. Don't use const. Pyrex doesn't know anything about const, +so just leave it out. Most of the time this shouldn't cause any problem, +although on rare occasions you might have to use a cast. 1
      +
    2. +  
    3. Leave out any platform-specific extensions to C declarations + such as __declspec().
      +
    4. +  
    5. If the header file declares a big struct and you only want +to use a few members, you only need to declare the members you're interested +in. Leaving the rest out doesn't do any harm, because the C compiler will +use the full definition from the header file.
      +
      + In some cases, you might not need any of the struct's members, in +which case you can just put pass in the body of the struct declaration, +e.g.
      +
      +     cdef extern from "foo.h":
      +         struct spam:
      +             pass

      +
      +Note that you can only do this inside a cdef extern from block; struct +declarations anywhere else must be non-empty.
      +
      +
    6. +
    7. If the header file uses typedef names such as size_t to refer +to platform-dependent flavours of numeric types, you will need a corresponding + ctypedef statement, but you don't need to match the type exactly, + just use something of the right general kind (int, float, etc). For example,
    8. +
        +
        ctypedef int size_t
        +
      + will work okay whatever the actual size of a size_t is (provided the header + file defines it correctly).
      +  
    9. If the header file uses macros to define constants, translate + them into a dummy enum declaration.
      +
    10. +  
    11. If the header file defines a function using a macro, declare + it as though it were an ordinary function, with appropriate argument and +result types.
    12. + +
    + + A few more tricks and tips: +
      + +
    • If you want to include a C header because it's needed by another +header, but don't want to use any declarations from it, put pass in the extern-from block:
    • + +
    + + +
      + +
        + cdef extern from "spam.h":
        +     pass
      + +
    + + +
      + +
    • If you want to include some external declarations, but don't want +to specify a header file (because it's included by some other header that +you've already included) you can put * in place of the header file +name:
    • + +
    + + +
    cdef extern from *:
    +     ...
    +
    + + +

    Styles of struct, union and enum declaration

    + + There are two main ways that structs, unions and enums can be declared +in C header files: using a tag name, or using a typedef. There are also some + variations based on various combinations of these. +

    It's important to make the Pyrex declarations match the style used in the +header file, so that Pyrex can emit the right sort of references to the type +in the code it generates. To make this possible, Pyrex provides two different +syntaxes for declaring a struct, union or enum type. The style introduced +above corresponds to the use of a tag name. To get the other style, you prefix +the declaration with ctypedef, as illustrated below.

    + + +

    The following table shows the various possible styles that can be found + in a header file, and the corresponding Pyrex declaration that you should + put in the cdef exern from block. Struct declarations are used as +an example; the same applies equally to union and enum declarations.

    + + +

    Note that in all the cases below, you refer to the type in Pyrex code simply +as Foo, not struct Foo. +
    +   + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
     C codePossibilities for corresponding +Pyrex codeComments
    1struct Foo {
    +   ...
    + };
    cdef struct Foo:
    +   ...
    Pyrex will refer to the type as struct Foo in the generated + C code.
    2typedef struct {
    +   ...
    + } Foo;
    ctypedef struct Foo:
    +   ...
    Pyrex will refer to the type simply as Foo +in the generated C code.
    3typedef struct +foo {
    +   ...
    + } Foo;
    cdef struct foo:
    +   ...
    + ctypedef foo Foo #optional
    If the C header uses both a tag and a typedef + with different names, you can use either form of declaration in Pyrex + (although if you need to forward reference the type, you'll have to use +the first form).
    ctypedef struct Foo:
    +   ...
    4typedef struct Foo {
    +   ...
    + } Foo;
    cdef struct Foo:
    +   ...
    If the header uses the same name for the tag and the typedef, + you won't be able to include a ctypedef for it -- but then, it's not +necessary.
    +

    + + +

    Accessing Python/C API routines

    + + One particular use of the cdef extern from statement is for gaining + access to routines in the Python/C API. For example, +
    cdef extern from "Python.h":
    +
        object PyString_FromStringAndSize(char *s, int len)
    +
    + + will allow you to create Python strings containing null bytes. +

    + + +
    +

    Resolving naming conflicts - C name specifications

    + + Each Pyrex module has a single module-level namespace for both Python +and C names. This can be inconvenient if you want to wrap some external +C functions and provide the Python user with Python functions of the same +names. +

    Pyrex 0.8 provides a couple of different ways of solving this problem. + The best way, especially if you have many C functions to wrap, is probably + to put the extern C function declarations into a different namespace using + the facilities described in the section on sharing + declarations between Pyrex modules.

    + + +

    The other way is to use a c name specification to give different + Pyrex and C names to the C function. Suppose, for example, that you want +to wrap an external function called eject_tomato. If you declare +it as

    + + +
    cdef extern void c_eject_tomato "eject_tomato" (float speed)
    +
    + + then its name inside the Pyrex module will be c_eject_tomato, +whereas its name in C will be eject_tomato. You can then wrap it +with +
    def eject_tomato(speed):
      c_eject_tomato(speed)
    +
    + + so that users of your module can refer to it as eject_tomato. + +

    Another use for this feature is referring to external names that happen + to be Pyrex keywords. For example, if you want to call an external function + called print, you can rename it to something else in your Pyrex +module.

    + + +

    As well as functions, C names can be specified for variables, structs, + unions, enums, struct and union members, and enum values. For example,

    + + +
    cdef extern int one "ein", two "zwei"
    cdef extern float three "drei"

    cdef struct spam "SPAM":
      int i "eye"
    + cdef enum surprise "inquisition":
    +   first "alpha"
    +   second "beta" = 3
    + + +
    +

    Public Declarations

    + + You can make C variables and functions defined in a Pyrex module accessible + to external C code (or another Pyrex module) using the public keyword, as follows: +
    cdef public int spam # public variable declaration

    cdef public void grail(int num_nuns): # public function declaration
    +     ...

    +
    + + If there are any public declarations in a Pyrex module, a .h file is generated containing equivalent C declarations for inclusion in other + C code. +

    Pyrex also generates a .pxi file containing Pyrex versions of the + declarations for inclusion in another Pyrex module using the include statement. If you use this, you + will need to arrange for the module using the declarations to be linked +against the module defining them, and for both modules to be available to +the dynamic linker at run time. I haven't tested this, so I can't say how +well it will work on the various platforms.

    + + +
    NOTE: If all you want to export is an extension type, there is + now a better way -- see Sharing Declarations Between + Pyrex Modules.
    + + +


    Extension Types +

    + + One of the most powerful features of Pyrex is the ability to easily create + new built-in Python types, called extension types. This is a major + topic in itself, so there is a  separate + page devoted to it. +


    Sharing Declarations Between Pyrex Modules +

    + + Pyrex 0.8 introduces a substantial new set of facilities allowing a Pyrex + module to easily import and use C declarations and extension types from another +Pyrex module. You can now create a set of co-operating Pyrex modules just +as easily as you can create a set of co-operating Python modules. There is +a separate page devoted to this topic. +


    Limitations +

    + + +

    Unsupported Python features

    + + Pyrex is not quite a full superset of Python. The following restrictions + apply: +
  • Function definitions (whether using def or cdef) + cannot be nested within other function definitions.
    +
  • +  
  • Class definitions can only appear at the top level of a module, + not inside a function.
    +
  • +  
  • The import * form of import is not allowed anywhere + (other forms of the import statement are fine, though).
    +
  • +  
  • Generators cannot be defined in Pyrex.
    +
    +
  • +
  • The globals() and locals() functions cannot be +used.
  • +
    + + The above restrictions will most likely remain, since removing them would + be difficult and they're not really needed for Pyrex's intended applications. + +

    There are also some temporary limitations, which may eventually be lifted, including: +

    + + +
  • Class and function definitions cannot be placed inside +control structures.
    +
  • +  
  • In-place arithmetic operators (+=, etc) are not yet supported.
    +
  • +  
  • List comprehensions are not yet supported.
    +
  • +  
  • There is no support for Unicode.
    +
  • +  
  • Special methods of extension types cannot have functioning +docstrings.
    +
    +
  • +
  • The use of string literals as comments is not recommended at present, + because Pyrex doesn't optimize them away, and won't even accept them in +places where executable statements are not allowed.
  • +
    + +

    Semantic differences between Python + and Pyrex

    + + +

    Behaviour of class scopes

    + + In Python, referring to a method of a class inside the class definition, + i.e. while the class is being defined, yields a plain function object, but + in Pyrex it yields an unbound method2. A consequence of this is that the +usual idiom for using the classmethod and staticmethod functions, e.g. +
    class Spam:
    +
      def method(cls):
        ...
    +
      method = classmethod(method)
    +
    + + will not work in Pyrex. This can be worked around by defining the function + outside the class, and then assigning the result of classmethod or + staticmethod inside the class, i.e. +
    def Spam_method(cls):
      ...
    +
    class Spam:
    +
      method = classmethod(Spam_method)
    +
    + + +


    Footnotes

    + + 1. A problem with const could arise if you have +something like +
    cdef extern from "grail.h":
      char *nun
    +
    + + where grail.h actually contains +
    extern const char *nun;
    +
    + + and you do +
    cdef void languissement(char *s):
      #something that doesn't change s
    +
    ...
    +
    languissement(nun)
    +
    + + which will cause the C compiler to complain. You can work around it by +casting away the constness: +
    languissement(<char *>nun)
    +
    + + +
    2. The reason for the different behaviour +of class scopes is that Pyrex-defined Python functions are PyCFunction objects, +not PyFunction objects, and are not recognised by the machinery that creates +a bound or unbound method when a function is extracted from a class. To get +around this, Pyrex wraps each method in an unbound method object itself before +storing it in the class's dictionary.
    + +  
    + +
    + + \ No newline at end of file Modified: lxml/pyrex/Doc/sharing.html ============================================================================== --- lxml/pyrex/Doc/sharing.html (original) +++ lxml/pyrex/Doc/sharing.html Tue Apr 25 15:43:21 2006 @@ -1 +1,201 @@ - Sharing Declarations Between Pyrex Modules


    Sharing Declarations Between Pyrex Modules

    This section describes a new set of facilities introduced in Pyrex 0.8 for making C declarations and extension types in one Pyrex module available for use in another Pyrex module. These facilities are closely modelled on the Python import mechanism, and can be thought of as a compile-time version of it.

    Contents

    Definition and Implementation files

    A Pyrex module can be split into two parts: a definition file with a .pxd suffix, containing C declarations that are to be available to other Pyrex modules, and an implementation file with a .pyx suffix, containing everything else. When a module wants to use something declared in another module's definition file, it imports it using the cimport statement.

    What a Definition File contains

    A definition file can contain:
    • Any kind of C type declaration.
    • extern C function or variable declarations.
    • The definition part of an extension type (see below).
    It cannot currently contain any non-extern C function or variable declarations (although this may be possible in a future version).

    It cannot contain the implementations of any C or Python functions, or any Python class definitions, or any executable statements.

    NOTE: You don't need to (and shouldn't) declare anything in a declaration file public in order to make it available to other Pyrex modules; its mere presence in a definition file does that. You only need a public declaration if you want to make something available to external C code.

    What an Implementation File contains

    An implementation file can contain any kind of Pyrex statement, although there are some restrictions on the implementation part of an extension type if the corresponding definition file also defines that type (see below).

    The cimport statement

    The cimport statement is used in a definition or implementation file to gain access to names declared in another definition file. Its syntax exactly parallels that of the normal Python import statement:
    cimport module [, module...]
    from module cimport name [as name] [, name [as name] ...]
    Here is an example. The file on the left is a definition file which exports a C data type. The file on the right is an implementation file which imports and uses it.
     
    dishes.pxd restaurant.pyx
    cdef enum otherstuff:
        sausage, eggs, lettuce

    cdef struct spamdish:
        int oz_of_spam
        otherstuff filler

    cimport dishes
    from dishes cimport spamdish

    cdef void prepare(spamdish *d):
        d.oz_of_spam = 42
        d.filler = dishes.sausage

    def serve():
        spamdish d
        prepare(&d)
        print "%d oz spam, filler no. %d" % \
             (d->oz_of_spam, d->otherstuff)

    It is important to understand that the cimport statement can only be used to import C data types, external C functions and variables, and extension types. It cannot be used to import any Python objects, and (with one exception) it doesn't imply any Python import at run time. If you want to refer to any Python names from a module that you have cimported, you will have to include a regular import statement for it as well.

    The exception is that when you use cimport to import an extension type, its type object is imported at run time and made available by the name under which you imported it. Using cimport to import extension types is covered in more detail below.

    Search paths for definition files

    When you cimport a module called modulename, the Pyrex compiler searches for a file called modulename.pxd along the search path for include files, as specified by -I command line options.

    Also, whenever you compile a file modulename.pyx, the corresponding definition file modulename.pxd is first searched for along the same path, and if found, it is processed before processing the .pyx file.

    Using cimport to resolve naming conflicts

    The cimport mechanism provides a clean and simple way to solve the problem of wrapping external C functions with Python functions of the same name. All you need to do is put the extern C declarations into a .pxd file for an imaginary module, and cimport that module. You can then refer to the C functions by qualifying them with the name of the module. Here's an example:
     
    c_lunch.pxd lunch.pyx
    cdef extern from "lunch.h":
        void eject_tomato(float)
    cimport c_lunch

    def eject_tomato(float speed):
        c_lunch.eject_tomato(speed)

    You don't need any c_lunch.pyx file, because the only things defined in c_lunch.pxd are extern C entities. There won't be any actual c_lunch module at run time, but that doesn't matter -- c_lunch has done its job of providing an additional namespace at compile time.

    Sharing Extension Types

    An extension type declaration can also be split into two parts, one in a definition file and the other in the corresponding implementation file.

    The definition part of the extension type can only declare C attributes and C methods, not Python methods, and it must declare all of that type's C attributes and C methods.

    The implementation part must implement all of the C methods declared in the definition part, and may not add any further C attributes. It may also define Python methods.

    Here is an example of a module which defines and exports an extension type, and another module which uses it.
     
    Shrubbing.pxd Shrubbing.pyx
    cdef class Shrubbery:
        cdef int width
        cdef int length
    cdef class Shrubbery:
        def __new__(self, int w, int l):
            self.width = w
            self.length = l

    def standard_shrubbery():
        return Shrubbery(3, 7)

    Landscaping.pyx
    cimport Shrubbing
    import Shrubbing

    cdef Shrubbing.Shrubbery sh
    sh = Shrubbing.standard_shrubbery()
    print "Shrubbery size is %d x %d" % (sh.width, sh.height)
     

    Some things to note about this example:

    • There is a cdef class shrubbery declaration in both Shrubbing.pxd and Shrubbing.pyx. When the Shrubbing module is compiled, these two declarations are combined into one.

    •  
    • In Landscaping.pyx, the cimport Shrubbery declaration allows us to refer to the Shrubbery type as Shrubbing.Shrubbery. But it doesn't bind the name Shrubbery in Landscaping's module namespace at run time, so to access Shrubbery.standard_shrubbery we also need to import Shrubbing.

    Back to the Language Overview

    \ No newline at end of file + + + + Sharing Declarations Between Pyrex Modules + + +


    Sharing Declarations Between Pyrex Modules +

    + This section describes a new set of facilities introduced in Pyrex 0.8 +for making C declarations and extension types in one Pyrex module available +for use in another Pyrex module. These facilities are closely modelled on +the Python import mechanism, and can be thought of as a compile-time version +of it. +

    Contents

    + +

    Definition and Implementation files

    + A Pyrex module can be split into two parts: a definition file with + a .pxd suffix, containing C declarations that are to be available + to other Pyrex modules, and an implementation file with a .pyx +suffix, containing everything else. When a module wants to use something +declared in another module's definition file, it imports it using the cimport statement. +

    What a Definition File contains

    + A definition file can contain: +
      +
    • Any kind of C type declaration.
    • +
    • extern C function or variable declarations.
    • +
    • The definition part of an extension type (see below).
    • +
    + It cannot currently contain any non-extern C function or variable declarations + (although this may be possible in a future version). +

    It cannot contain the implementations of any C or Python functions, or +any Python class definitions, or any executable statements.

    +
    NOTE: You don't need to (and shouldn't) declare anything in a +declaration file public in order to make it available to other Pyrex +modules; its mere presence in a definition file does that. You only need a +public declaration if you want to make something available to external C code.
    +

    What an Implementation File contains

    + An implementation file can contain any kind of Pyrex statement, although + there are some restrictions on the implementation part of an extension type +if the corresponding definition file also defines that type (see below). + +

    The cimport statement

    + The cimport statement is used in a definition or implementation +file to gain access to names declared in another definition file. Its syntax +exactly parallels that of the normal Python import statement: +
    cimport module [, module...]
    +
    from module cimport name +[as name] [, name [as name] + ...]
    + Here is an example. The file on the left is a definition file which exports + a C data type. The file on the right is an implementation file which imports + and uses it.
    +   + + + + + + + + + +
    dishes.pxdrestaurant.pyx
    cdef enum otherstuff:
    +     sausage, eggs, lettuce

    cdef struct spamdish:
    +     int oz_of_spam
    +     otherstuff filler

    +
    cimport dishes
    + from dishes cimport spamdish

    cdef void prepare(spamdish *d):
    +     d.oz_of_spam = 42
    +     d.filler = dishes.sausage

    +

    def serve():
    +     spamdish d
    +     prepare(&d)
    +     print "%d oz spam, filler no. %d" % \ +
    +          (d->oz_of_spam, + d->otherstuff)

    +
    +

    It is important to understand that the cimport statement can only +be used to import C data types, external C functions and variables, and extension +types. It cannot be used to import any Python objects, and (with one exception) +it doesn't imply any Python import at run time. If you want to refer to any +Python names from a module that you have cimported, you will have to include +a regular import statement for it as well.

    +

    The exception is that when you use cimport to import an extension + type, its type object is imported at run time and made available by the +name under which you imported it. Using cimport to import extension +types is covered in more detail below. +

    +

    Search paths for definition files

    + When you cimport a module called modulename, the Pyrex +compiler searches for a file called modulename.pxd along the search +path for include files, as specified by -I command line options. +

    Also, whenever you compile a file modulename.pyx, the corresponding + definition file modulename.pxd is first searched for along the +same path, and if found, it is processed before processing the .pyx +file.

    +

    Using cimport to resolve naming + conflicts

    + The cimport mechanism provides a clean and simple way to solve the problem + of wrapping external C functions with Python functions of the same name. +All you need to do is put the extern C declarations into a .pxd file for +an imaginary module, and cimport that module. You can then refer to the C +functions by qualifying them with the name of the module. Here's an example: +
    +   + + + + + + + + + +
    c_lunch.pxdlunch.pyx
    cdef extern from "lunch.h": +
    +     void eject_tomato(float)
    cimport c_lunch

    def eject_tomato(float speed):
    +     c_lunch.eject_tomato(speed)

    +
    +

    You don't need any c_lunch.pyx file, because the only things +defined in c_lunch.pxd are extern C entities. There won't be any +actual c_lunch module at run time, but that doesn't matter -- c_lunch +has done its job of providing an additional namespace at compile time.

    +

    Sharing Extension Types

    + An extension type declaration can also be split into two parts, one in +a definition file and the other in the corresponding implementation file. +
    +
    + The definition part of the extension type can only declare C attributes +and C methods, not Python methods, and it must declare all of that +type's C attributes and C methods.
    +
    + The implementation part must implement all of the C methods declared in +the definition part, and may not add any further C attributes. It may also +define Python methods. +

    Here is an example of a module which defines and exports an extension +type, and another module which uses it.
    +   + + + + + + + + + + + + + + + +
    Shrubbing.pxdShrubbing.pyx
    cdef class Shrubbery:
    +     cdef int width
    +     cdef int length
    cdef class Shrubbery:
    +     def __new__(self, int w, int l):
    +         self.width = w +
    +         self.length = l +

    def standard_shrubbery():
    +     return Shrubbery(3, 7)

    +
    Landscaping.pyx
    cimport Shrubbing +
    + import Shrubbing

    cdef Shrubbing.Shrubbery sh
    + sh = Shrubbing.standard_shrubbery()
    + print "Shrubbery size is %d x %d" % (sh.width, sh.height) +
    +  

    +
    +

    +

    Some things to note about this example:

    +
      +
    • There is a cdef class Shrubbery declaration in both Shrubbing.pxd + and Shrubbing.pyx. When the Shrubbing module is compiled, these two declarations + are combined into one.
    • + +  
    • In Landscaping.pyx, the cimport Shrubbing declaration +allows us to refer to the Shrubbery type as Shrubbing.Shrubbery. +But it doesn't bind the name Shrubbery in Landscaping's module namespace + at run time, so to access Shrubbery.standard_shrubbery we also +need to import Shrubbing.
    • +
    +
    Back to the Language Overview +
    +
    + \ No newline at end of file Modified: lxml/pyrex/Doc/special_methods.html ============================================================================== --- lxml/pyrex/Doc/special_methods.html (original) +++ lxml/pyrex/Doc/special_methods.html Tue Apr 25 15:43:21 2006 @@ -1 +1,598 @@ - Special Methods of Extenstion Types


    Special Methods of Extension Types

    This page describes the special methods currently supported by Pyrex extension types. A complete list of all the special methods appears in the table at the bottom. Some of these methods behave differently from their Python counterparts or have no direct Python counterparts, and require special mention.

    Note: Everything said on this page applies only to extension types, defined with the cdef class statement. It doesn't apply  to classes defined with the Python class statement, where the normal Python rules apply.

    Docstrings

    Currently, docstrings are not fully supported in special methods of extension types. You can place a docstring in the source to serve as a comment, but it won't show up in the corresponding __doc__ attribute at run time. (This is a Python limitation -- there's nowhere in the PyTypeObject data structure to put such docstrings.)

    Initialisation methods: __new__ and __init__

    There are two methods concerned with initialising the object.

    The __new__ method is where you should perform basic C-level initialisation of the object, including allocation of any C data structures that your object will own. You need to be careful what you do in the __new__ method, because the object may not yet be a valid Python object when it is called. Therefore, you must not invoke any Python operations which might touch the object; in particular, do not try to call any of its methods.

    Unlike the corresponding method in Python, your __new__ method is not responsible for creating the object. By the time your __new__ method is called, memory has been allocated for the object and any C attributes it has have been initialised to 0 or null. (Any Python attributes have also been initialised to None, but you probably shouldn't rely on that.) Your __new__ method is guaranteed to be called exactly once.

    If your extension type has a base type, the __new__ method of the base type is automatically called before your __new__ method is called; you cannot explicitly call the inherited __new__ method. If you need to pass a modified argument list to the base type, you will have to do the relevant part of the initialisation in the __init__ method instead (where the normal rules for calling inherited methods apply).

    Note that the first parameter of the __new__ method is the object to be initialised, not the class of the object as it is in Python.

    Any initialisation which cannot safely be done in the __new__ method should be done in the __init__ method. By the time __init__ is called, the object is a fully valid Python object and all operations are safe. Under some circumstances it is possible for __init__ to be called more than once or not to be called at all, so your other methods should be designed to be robust in such situations.

    Keep in mind that any arguments passed to the constructor will be passed to the __new__ method as well as the __init__ method. If you anticipate subclassing your extension type in Python, you may find it useful to give the __new__ method * and ** arguments so that it can accept and ignore extra arguments. Otherwise, any Python subclass which has an __init__ with a different signature will have to override __new__ as well as __init__, which the writer of a Python class wouldn't expect to have to do.

    Finalization method: __dealloc__

    The counterpart to the __new__ method is the __dealloc__ method, which should perform the inverse of the __new__ method. Any C data structures that you allocated in your __new__ method should be freed in your __dealloc__ method.

    You need to be careful what you do in a __dealloc__ method. By the time your __dealloc__ method is called, the object may already have been partially destroyed and may not be in a valid state as far as Python is concerned, so you should avoid invoking any Python operations which might touch the object. In particular, don't call any other methods of the object or do anything which might cause the object to be resurrected. It's best if you stick to just deallocating C data.

    You don't need to worry about deallocating Python attributes of your object, because that will be done for you by Pyrex after your __dealloc__ method returns.

    Note: There is no __del__ method for extension types. (Earlier versions of the Pyrex documentation stated that there was, but this turned out to be incorrect.)

    Arithmetic methods

    Arithmetic operator methods, such as __add__, behave differently from their Python counterparts. There are no separate "reversed" versions of these methods (__radd__, etc.) Instead, if the first operand cannot perform the operation, the same method of the second operand is called, with the operands in the same order.

    This means that you can't rely on the first parameter of these methods being "self", and you should test the types of both operands before deciding what to do. If you can't handle the combination of types you've been given, you should return NotImplemented.

    This also applies to the in-place arithmetic method __ipow__. It doesn't apply to any of the other in-place methods (__iadd__, etc.) which always take self as the first argument.

    Rich comparisons

    There are no separate methods for the individual rich comparison operations (__eq__, __le__, etc.) Instead there is a single method __richcmp__ which takes an integer indicating which operation is to be performed, as follows:
         
        <
        0
        ==
        2
        >
        4
        <=
        1
        !=
        3
        >=
        5

    The __new__ method

    Extension types wishing to implement the iterator interface should define a method called __new__, not new. The Python system will automatically supply a new method which calls your __new__. Do NOT explicitly give your type a new method, or bad things could happen (see note 3).

    Special Method Table

    This table lists all of the special methods together with their parameter and return types. A parameter named self is of the type the method belongs to. Other untyped parameters are generic Python objects.

    You don't have to declare your method as taking these parameter types. If you declare different types, conversions will be performed as necessary.
     
    Name Parameters Return type Description
    General
    __new__ self, ...   Basic initialisation (no direct Python equivalent)
    __init__ self, ...   Further initialisation
    __dealloc__ self   Basic deallocation (no direct Python equivalent)
    __cmp__ x, y int 3-way comparison
    __richcmp__ x, y, int op object Rich comparison (no direct Python equivalent)
    __str__ self object str(self)
    __repr__ self object repr(self)
    __hash__ self int Hash function
    __call__ self, ... object self(...)
    __iter__ self object Return iterator for sequence
    __getattr__ self, name object Get attribute
    __setattr__ self, name, val   Set attribute
    __delattr__ self, name   Delete attribute
    Arithmetic operators
    __add__ x, y object binary + operator
    __sub__ x, y object binary - operator
    __mul__ x, y object * operator
    __div__ x, y object /  operator for old-style division
    __floordiv__ x, y object //  operator
    __truediv__ x, y object /  operator for new-style division
    __mod__ x, y object % operator
    __divmod__ x, y object combined div and mod
    __pow__ x, y, z object ** operator or pow(x, y, z)
    __neg__ self object unary - operator
    __pos__ self object unary + operator
    __abs__ self object absolute value
    __nonzero__ self int convert to boolean
    __invert__ self object ~ operator
    __lshift__ x, y object << operator
    __rshift__ x, y object >> operator
    __and__ x, y object & operator
    __or__ x, y object | operator
    __xor__ x, y object ^ operator
    Numeric conversions
    __int__ self object Convert to integer
    __long__ self object Convert to long integer
    __float__ self object Convert to float
    __oct__ self object Convert to octal
    __hex__ self object Convert to hexadecimal
    In-place arithmetic operators
    __iadd__ self, x object += operator
    __isub__ self, x object -= operator
    __imul__ self, x object *= operator
    __idiv__ self, x object /= operator for old-style division
    __ifloordiv__ self, x object //= operator
    __itruediv__ self, x object /= operator for new-style division
    __imod__ self, x object %= operator
    __ipow__ x, y, z object **= operator
    __ilshift__ self, x object <<= operator
    __irshift__ self, x object >>= operator
    __iand__ self, x object &= operator
    __ior__ self, x object |= operator
    __ixor__ self, x object ^= operator
    Sequences and mappings
    __len__ self int len(self)
    __getitem__ self, x object self[x]
    __setitem__ self, x, y   self[x] = y
    __delitem__ self, x   del self[x]
    __getslice__ self, int i, int j object self[i:j]
    __setslice__ self, int i, int j, x   self[i:j] = x
    __delslice__ self, int i, int j   del self[i:j]
    __contains__ self, x int x in self
    Iterators
    __next__ self object Get next item (called next in Python)
    Buffer interface  (no Python equivalents - see note 1)
    __getreadbuffer__ self, int i, void **p    
    __getwritebuffer__ self, int i, void **p    
    __getsegcount__ self, int *p    
    __getcharbuffer__ self, int i, char **p    
    Descriptor objects  (no Python equivalents - see note 2)
    __get__ self, instance, class object Get value of attribute
    __set__ self, instance, value   Set value of attribute
    __delete__ self, instance   Delete attribute

    Note 1: The buffer interface is intended for use by C code and is not directly accessible from Python. It is described in the Python/C API Reference Manual under sections 6.6 and 10.6.

    Note 2: Descriptor objects are part of the support mechanism for new-style Python classes. See the discussion of descriptors in the Python documentation. See also PEP 252, "Making Types Look More Like Classes", and PEP 253, "Subtyping Built-In Types".

    Note 3: If your type defines a __new__ method, any method called new that you define will be overwritten with the system-supplied new at module import time.



    \ No newline at end of file + + + + Special Methods of Extenstion Types + +


    Special Methods of Extension Types +

    + This page describes the special methods currently supported by Pyrex extension + types. A complete list of all the special methods appears in the table at +the bottom. Some of these methods behave differently from their Python counterparts +or have no direct Python counterparts, and require special mention. +

    Note: Everything said on this page applies only to extension +types, defined with the cdef class statement. It doesn't apply  +to classes defined with the Python class statement, where the normal + Python rules apply.

    +

    Declaration

    Special methods of extension types must be declared with def, not cdef.
    +

    Docstrings

    + + Currently, docstrings are not fully supported in special methods of extension + types. You can place a docstring in the source to serve as a comment, but + it won't show up in the corresponding __doc__ attribute at run time. (This + is a Python limitation -- there's nowhere in the PyTypeObject data structure + to put such docstrings.) +

    Initialisation methods: __new__ and __init__

    + There are two methods concerned with initialising the object. +

    The __new__ method is where you should perform basic C-level +initialisation of the object, including allocation of any C data structures +that your object will own. You need to be careful what you do in the __new__ +method, because the object may not yet be a valid Python object when it is +called. Therefore, you must not invoke any Python operations which might touch +the object; in particular, do not try to call any of its methods.

    +

    Unlike the corresponding method in Python, your __new__ method + is not responsible for creating the object. By the time your + __new__ method is called, memory has been allocated for the object +and any C attributes it has have been initialised to 0 or null. (Any Python +attributes have also been initialised to None, but you probably shouldn't +rely on that.) Your __new__ method is guaranteed to be called exactly +once.
    +
    +If your extension type has a base type, the __new__ method of the +base type is automatically called before your __new__ method +is called; you cannot explicitly call the inherited __new__ method. +If you need to pass a modified argument list to the base type, you will have +to do the relevant part of the initialisation in the __init__ method +instead (where the normal rules for calling inherited methods apply).
    +

    +

    Note that the first parameter of the __new__ method is the object +to be initialised, not the class of the object as it is in Python.

    +

    Any initialisation which cannot safely be done in the __new__ +method should be done in the __init__ method. By the time + __init__ is called, the object is a fully valid Python object and +all operations are safe. Under some circumstances it is possible for __init__ +to be called more than once or not to be called at all, so your other methods + should be designed to be robust in such situations.

    +

    Keep in mind that any arguments passed to the constructor will be passed + to the __new__ method as well as the __init__ method. +If you anticipate subclassing your extension type in Python, you may find +it useful to give the __new__ method * and ** arguments so that +it can accept and ignore extra arguments. Otherwise, any Python subclass +which has an __init__ with a different signature will have to override +__new__ as well as __init__, which the writer of a Python +class wouldn't expect to have to do.

    +

    Finalization method: __dealloc__

    + The counterpart to the __new__ method is the __dealloc__ +method, which should perform the inverse of the __new__ method. +Any C data structures that you allocated in your __new__ method +should be freed in your __dealloc__ method. +

    You need to be careful what you do in a __dealloc__ method. By +the time your __dealloc__ method is called, the object may already +have been partially destroyed and may not be in a valid state as far as Python +is concerned, so you should avoid invoking any Python operations which might +touch the object. In particular, don't call any other methods of the object +or do anything which might cause the object to be resurrected. It's best if +you stick to just deallocating C data.

    +

    You don't need to worry about deallocating Python attributes of your object, +because that will be done for you by Pyrex after your __dealloc__ +method returns.
    +
    + Note: There is no __del__ method for extension types. (Earlier +versions of the Pyrex documentation stated that there was, but this turned +out to be incorrect.)
    +

    +

    Arithmetic methods

    + Arithmetic operator methods, such as __add__, behave differently + from their Python counterparts. There are no separate "reversed" versions + of these methods (__radd__, etc.) Instead, if the first operand +cannot perform the operation, the same method of the second operand +is called, with the operands in the same order. +

    This means that you can't rely on the first parameter of these methods + being "self", and you should test the types of both operands before deciding + what to do. If you can't handle the combination of types you've been given, + you should return NotImplemented.

    +

    This also applies to the in-place arithmetic method __ipow__. + It doesn't apply to any of the other in-place methods (__iadd__, + etc.) which always take self as the first argument.

    +

    Rich comparisons

    + There are no separate methods for the individual rich comparison operations + (__eq__, __le__, etc.) Instead there is a single method + __richcmp__ which takes an integer indicating which operation is +to be performed, as follows: +
      +
        +   + + + + + + + + + + + + + + + + + + + + + +
        <
        +
        0
        +
        ==
        +
        2
        +
        >
        +
        4
        <=
        +
        1
        +
        !=
        +
        3
        +
        >=
        +
        5
        +
      +
    +

    The __next__ method

    + Extension types wishing to implement the iterator interface should define + a method called __next__, not next. The Python + system will automatically supply a next method which calls your +__next__. Do NOT explicitly give your type a next method, +or bad things could happen (see note 3). +

    Special Method Table

    + This table lists all of the special methods together with their parameter + and return types. A parameter named self is of the type the method + belongs to. Other untyped parameters are generic Python objects. +

    You don't have to declare your method as taking these parameter types. + If you declare different types, conversions will be performed as necessary. +
    +   + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    NameParametersReturn typeDescription
    General
    __new__self, ... Basic initialisation (no direct Python equivalent)
    __init__self, ... Further initialisation
    __dealloc__self Basic deallocation (no direct Python equivalent)
    __cmp__x, yint3-way comparison
    __richcmp__x, y, int opobjectRich comparison (no direct Python equivalent)
    __str__selfobjectstr(self)
    __repr__selfobjectrepr(self)
    __hash__selfintHash function
    __call__self, ...objectself(...)
    __iter__selfobjectReturn iterator for sequence
    __getattr__self, nameobjectGet attribute
    __setattr__self, name, val Set attribute
    __delattr__self, name Delete attribute
    Arithmetic operators
    __add__x, yobjectbinary + operator
    __sub__x, yobjectbinary - operator
    __mul__x, yobject* operator
    __div__x, yobject/  operator for old-style division
    __floordiv__x, yobject//  operator
    __truediv__x, yobject/  operator for new-style division
    __mod__x, yobject% operator
    __divmod__x, yobjectcombined div and mod
    __pow__x, y, zobject** operator or pow(x, y, z)
    __neg__selfobjectunary - operator
    __pos__selfobjectunary + operator
    __abs__selfobjectabsolute value
    __nonzero__selfintconvert to boolean
    __invert__selfobject~ operator
    __lshift__x, yobject<< operator
    __rshift__x, yobject>> operator
    __and__x, yobject& operator
    __or__x, yobject| operator
    __xor__x, yobject^ operator
    Numeric conversions
    __int__selfobjectConvert to integer
    __long__selfobjectConvert to long integer
    __float__selfobjectConvert to float
    __oct__selfobjectConvert to octal
    __hex__selfobjectConvert to hexadecimal
    In-place arithmetic operators
    __iadd__self, xobject+= operator
    __isub__self, xobject-= operator
    __imul__self, xobject*= operator
    __idiv__self, xobject/= operator for old-style division
    __ifloordiv__self, xobject//= operator
    __itruediv__self, xobject/= operator for new-style division
    __imod__self, xobject%= operator
    __ipow__x, y, zobject**= operator
    __ilshift__self, xobject<<= operator
    __irshift__self, xobject>>= operator
    __iand__self, xobject&= operator
    __ior__self, xobject|= operator
    __ixor__self, xobject^= operator
    Sequences and mappings
    __len__selfintlen(self)
    __getitem__self, xobjectself[x]
    __setitem__self, x, y self[x] = y
    __delitem__self, x del self[x]
    __getslice__self, int i, int jobjectself[i:j]
    __setslice__self, int i, int j, x self[i:j] = x
    __delslice__self, int i, int j del self[i:j]
    __contains__self, xintx in self
    Iterators
    __next__selfobjectGet next item (called next in Python)
    Buffer interface  (no Python equivalents + - see note 1)
    __getreadbuffer__self, int i, void **p  
    __getwritebuffer__self, int i, void **p  
    __getsegcount__self, int *p  
    __getcharbuffer__self, int i, char **p  
    Descriptor objects  (no Python equivalents + - see note 2)
    __get__self, instance, classobjectGet value of attribute
    __set__self, instance, value Set value of attribute
    __delete__self, instance Delete attribute
    +

    +

    Note 1: The buffer interface is intended for use by C code and is not +directly accessible from Python. It is described in the Python/C API Reference +Manual under sections 6.6 +and 10.6. +

    +

    Note 2: Descriptor objects are part of the support mechanism for new-style + Python classes. See the discussion + of descriptors in the Python documentation. See also PEP 252, "Making Types Look +More Like Classes", and PEP 253, "Subtyping Built-In +Types".

    +

    Note 3: If your type defines a __new__ method, any method called + new that you define will be overwritten with the system-supplied + new at module import time.

    +
    +
    + \ No newline at end of file Modified: lxml/pyrex/Pyrex/Compiler/CmdLine.py ============================================================================== --- lxml/pyrex/Pyrex/Compiler/CmdLine.py (original) +++ lxml/pyrex/Pyrex/Compiler/CmdLine.py Tue Apr 25 15:43:21 2006 @@ -10,7 +10,12 @@ -v, --version Display version number of pyrex compiler -l, --create-listing Write error messages to a listing file -I, --include-dir Search for include files in named directory - -o, --output-file Specify name of generated C file""" + -o, --output-file Specify name of generated C file +The following experimental options are supported only on MacOSX: + -C, --compile Compile generated .c file to .o file + -X, --link Link .o file to produce extension module (implies -C) + -+, --cplus Use C++ compiler for compiling and linking + Additional .o files to link may be supplied when using -X.""" def bad_usage(): print >>sys.stderr, usage @@ -47,6 +52,8 @@ elif option in ("-X", "--link"): options.c_only = 0 options.obj_only = 0 + elif option in ("-+", "--cplus"): + options.cplus = 1 elif option.startswith("-I"): options.include_path.append(get_param(option)) elif option == "--include-dir": @@ -56,7 +63,17 @@ else: bad_usage() else: - sources.append(pop_arg()) + arg = pop_arg() + if arg.endswith(".pyx"): + sources.append(arg) + elif arg.endswith(".o"): + options.objects.append(arg) + else: + print >>sys.stderr, \ + "pyrexc: %s: Unknown filename suffix" % arg + if options.objects and len(sources) > 1: + print >>sys.stderr, \ + "pyrexc: Only one source file allowed together with .o files" if options.use_listing_file and len(sources) > 1: print >>sys.stderr, \ "pyrexc: Only one source file allowed when using -o" Modified: lxml/pyrex/Pyrex/Compiler/Code.py ============================================================================== --- lxml/pyrex/Pyrex/Compiler/Code.py (original) +++ lxml/pyrex/Pyrex/Compiler/Code.py Tue Apr 25 15:43:21 2006 @@ -4,6 +4,7 @@ import Naming from Pyrex.Utils import open_new_file +from PyrexTypes import py_object_type, typecast class CCodeWriter: # f file output file @@ -18,9 +19,9 @@ # in_try_finally boolean inside try of try...finally # filename_table {string : int} for finding filename table indexes # filename_list [string] filenames in filename table order - + in_try_finally = 0 - + def __init__(self, outfile_name): self.f = open_new_file(outfile_name) self.level = 0 @@ -30,7 +31,7 @@ self.error_label = None self.filename_table = {} self.filename_list = [] - + def putln(self, code = ""): if self.marker and self.bol: self.emit_marker() @@ -38,7 +39,7 @@ self.put(code) self.f.write("\n"); self.bol = 1 - + def emit_marker(self): self.f.write("\n"); self.indent() @@ -55,74 +56,61 @@ self.bol = 0 if dl > 0: self.level += dl - - def begin_require_python(self, hex_version): - self.putln("#if PY_VERSION_HEX >= %#010X" % hex_version) - - def else_require_python(self, hex_version=None): - if hex_version is not None: - self.putln("#elif PY_VERSION_HEX >= %#010X" % hex_version) - else: - self.putln("#else") - - def end_require_python(self): - self.putln("#endif") - + def increase_indent(self): self.level = self.level + 1 - + def decrease_indent(self): self.level = self.level - 1 - + def begin_block(self): self.putln("{") self.increase_indent() - + def end_block(self): self.decrease_indent() self.putln("}") - + def indent(self): self.f.write(" " * self.level) - + def mark_pos(self, pos): file, line, col = pos self.marker = '"%s":%s' % (file, line) def init_labels(self): self.label_counter = 0 - self.used_labels = {} self.return_label = self.new_label() self.new_error_label() self.continue_label = None self.break_label = None - + def new_label(self): n = self.label_counter self.label_counter = n + 1 return "%s%d" % (Naming.label_prefix, n) - + def new_error_label(self): old_err_lbl = self.error_label self.error_label = self.new_label() return old_err_lbl - + def get_loop_labels(self): return ( self.continue_label, self.break_label) - + def set_loop_labels(self, labels): (self.continue_label, self.break_label) = labels - + def new_loop_labels(self): old_labels = self.get_loop_labels() self.set_loop_labels( - (self.new_label(), + (self.new_label(), self.new_label())) return old_labels - + def get_all_labels(self): return ( self.continue_label, @@ -147,35 +135,47 @@ self.set_all_labels(new_labels) return old_labels - def mark_label_used(self, lbl): - self.used_labels[lbl] = 1 - def put_label(self, lbl): - if lbl in self.used_labels: - self.putln("%s:;" % lbl) - - def put_goto(self, lbl): - self.putln("goto %s;" % lbl) - self.mark_label_used(lbl) - - def put_var_declarations(self, entries, static = 0, dll_linkage = None): + self.putln("%s:;" % lbl) + + def put_var_declarations(self, entries, static = 0, dll_linkage = None, + definition = True): for entry in entries: if not entry.in_cinclude: - self.put_var_declaration(entry, static, dll_linkage) - - def put_var_declaration(self, entry, static = 0, dll_linkage = None): - if entry.visibility == 'extern': - self.put("extern ") - elif static and entry.visibility <> 'public': - self.put("static ") - if entry.visibility <> 'public': + self.put_var_declaration(entry, static, dll_linkage, definition) + + def put_var_declaration(self, entry, static = 0, dll_linkage = None, + definition = True): + #print "Code.put_var_declaration:", entry.name, "definition =", definition + visibility = entry.visibility + if visibility == 'private' and not definition: + return + if visibility == 'extern': + storage_class = Naming.extern_c_macro + elif visibility == 'public': + if definition: + storage_class = "" + else: + storage_class = Naming.extern_c_macro + elif visibility == 'private': + if static: + storage_class = "static" + else: + storage_class = "" + if storage_class: + self.put("%s " % storage_class) + #if visibility == 'extern' or visibility == 'public' and not definition: + # self.put("%s " % Naming.extern_c_macro) + #elif static and visibility <> 'public': + # self.put("static ") + if visibility <> 'public': dll_linkage = None self.put(entry.type.declaration_code(entry.cname, dll_linkage = dll_linkage)) if entry.init is not None: self.put(" = %s" % entry.type.literal_code(entry.init)) self.putln(";") - + def entry_as_pyobject(self, entry): type = entry.type if (not entry.is_self_arg and not entry.type.is_complete()) \ @@ -183,30 +183,32 @@ return "(PyObject *)" + entry.cname else: return entry.cname - + def as_pyobject(self, cname, type): - if type.is_extension_type and type.base_type: - return "(PyObject *)" + cname - else: - return cname - + return typecast(py_object_type, type, cname) + #if type.is_extension_type and type.base_type: + # return "(PyObject *)" + cname + #else: + # return cname + def put_incref(self, cname, type): self.putln("Py_INCREF(%s);" % self.as_pyobject(cname, type)) - + def put_decref(self, cname, type): self.putln("Py_DECREF(%s);" % self.as_pyobject(cname, type)) - + def put_var_incref(self, entry): if entry.type.is_pyobject: self.putln("Py_INCREF(%s);" % self.entry_as_pyobject(entry)) - + def put_decref_clear(self, cname, type): self.putln("Py_DECREF(%s); %s = 0;" % ( - self.as_pyobject(cname, type), cname)) - + typecast(py_object_type, type, cname), cname)) + #self.as_pyobject(cname, type), cname)) + def put_xdecref(self, cname, type): self.putln("Py_XDECREF(%s);" % self.as_pyobject(cname, type)) - + def put_xdecref_clear(self, cname, type): self.putln("Py_XDECREF(%s); %s = 0;" % ( self.as_pyobject(cname, type), cname)) @@ -214,44 +216,45 @@ def put_var_decref(self, entry): if entry.type.is_pyobject: self.putln("Py_DECREF(%s);" % self.entry_as_pyobject(entry)) - + + def put_var_decref_clear(self, entry): + if entry.type.is_pyobject: + self.putln("Py_DECREF(%s); %s = 0;" % ( + self.entry_as_pyobject(entry), entry.cname)) + def put_var_xdecref(self, entry): if entry.type.is_pyobject: self.putln("Py_XDECREF(%s);" % self.entry_as_pyobject(entry)) - + def put_var_xdecref_clear(self, entry): if entry.type.is_pyobject: self.putln("Py_XDECREF(%s); %s = 0;" % ( self.entry_as_pyobject(entry), entry.cname)) - + def put_var_decrefs(self, entries): for entry in entries: if entry.xdecref_cleanup: self.put_var_xdecref(entry) else: self.put_var_decref(entry) - + def put_var_xdecrefs(self, entries): for entry in entries: self.put_var_xdecref(entry) - + def put_var_xdecrefs_clear(self, entries): for entry in entries: self.put_var_xdecref_clear(entry) - - def put_init_to_py_none(self, cast, cname): - if cast: - self.putln("%s = (void *)Py_None; Py_INCREF(%s %s);" % (cname, cast, cname)) - else: - self.putln("%s = Py_None; Py_INCREF(%s);" % (cname, cname)) - + + def put_init_to_py_none(self, cname, type): + py_none = typecast(type, py_object_type, "Py_None") + self.putln("%s = %s; Py_INCREF(Py_None);" % (cname, py_none)) + def put_init_var_to_py_none(self, entry, template = "%s"): code = template % entry.cname - if entry.type.is_extension_type: - cast = "(PyObject *)" - else: - cast = None - self.put_init_to_py_none(cast, code) + #if entry.type.is_extension_type: + # code = "((PyObject*)%s)" % code + self.put_init_to_py_none(code, entry.type) def put_pymethoddef(self, entry, term): if entry.doc: @@ -260,13 +263,12 @@ doc_code = 0 self.putln( '{"%s", (PyCFunction)%s, METH_VARARGS|METH_KEYWORDS, %s}%s' % ( - entry.name, - entry.func_cname, + entry.name, + entry.func_cname, doc_code, term)) - + def error_goto(self, pos): - self.mark_label_used(self.error_label) return "{%s = %s[%s]; %s = %s; goto %s;}" % ( Naming.filename_cname, Naming.filetable_cname, @@ -274,7 +276,7 @@ Naming.lineno_cname, pos[1], self.error_label) - + def lookup_filename(self, filename): try: index = self.filename_table[filename] @@ -292,13 +294,13 @@ def __init__(self, outfile_name): self.f = open_new_file(outfile_name) self.level = 0 - + def putln(self, code): self.f.write("%s%s\n" % (" " * self.level, code)) - + def indent(self): self.level += 1 - + def dedent(self): self.level -= 1 Modified: lxml/pyrex/Pyrex/Compiler/ExprNodes.py ============================================================================== --- lxml/pyrex/Pyrex/Compiler/ExprNodes.py (original) +++ lxml/pyrex/Pyrex/Compiler/ExprNodes.py Tue Apr 25 15:43:21 2006 @@ -8,7 +8,7 @@ import Naming from Nodes import Node import PyrexTypes -from PyrexTypes import py_object_type +from PyrexTypes import py_object_type, typecast import Symtab import Options @@ -17,15 +17,18 @@ debug_coercion class ExprNode(Node): - # subexprs [string] Class var holding names of subexpr node attrs - # type PyrexType Type of the result - # result string C code fragment - # is_temp boolean Result is in a temporary variable + # subexprs [string] Class var holding names of subexpr node attrs + # type PyrexType Type of the result + # result_code string Code fragment + # result_ctype string C type of result_code if different from type + # is_temp boolean Result is in a temporary variable # is_sequence_constructor - # boolean Is a list or tuple constructor expression + # boolean Is a list or tuple constructor expression # saved_subexpr_nodes - # [ExprNode or [ExprNode or None] or None] - # Cached result of subexpr_nodes() + # [ExprNode or [ExprNode or None] or None] + # Cached result of subexpr_nodes() + + result_ctype = None # The Analyse Expressions phase for expressions is split # into two sub-phases: @@ -39,7 +42,7 @@ # # Allocate Temps # Allocates temporary variables where needed, and fills - # in the result field of each node. + # in the result_code field of each node. # # ExprNode provides some convenience routines which # perform both of the above phases. These should only @@ -89,7 +92,7 @@ # A default implementation of allocate_temps is # provided which uses the following abstract method: # - # result_code + # calculate_result_code # - Return a C code fragment evaluating to # the result. This is only called when the # result is not a temporary. @@ -142,16 +145,16 @@ # - Generate code to perform the deletion. # - Call generate_disposal_code on all sub-expressions. # - # result_as_extension_type - # Normally, the results of all nodes whose type - # is a Python object, either generic or an extension - # type, are returned as a generic Python object, so - # that they can be passed directly to Python/C API - # routines. This method is called to obtain the - # result as the actual type of the node. It is only - # called when the type is known to actually be an - # extension type, and nodes whose result can never - # be an extension type need not implement it. + # #result_as_extension_type ### OBSOLETE ### + # # Normally, the results of all nodes whose type + # # is a Python object, either generic or an extension + # # type, are returned as a generic Python object, so + # # that they can be passed directly to Python/C API + # # routines. This method is called to obtain the + # # result as the actual type of the node. It is only + # # called when the type is known to actually be an + # # extension type, and nodes whose result can never + # # be an extension type need not implement it. # is_sequence_constructor = 0 @@ -192,6 +195,19 @@ self.saved_subexpr_nodes = nodes return self.saved_subexpr_nodes + def result_as(self, type = None): + # Return the result code cast to the specified C type. + return typecast(type, self.ctype(), self.result_code) + + def py_result(self): + # Return the result code cast to PyObject *. + return self.result_as(py_object_type) + + def ctype(self): + # Return the native C type of the result (i.e. the + # C type of the result_code expression). + return self.result_ctype or self.type + # ------------- Declaration Analysis ---------------- def analyse_target_declaration(self, env): @@ -287,7 +303,7 @@ if debug_temp_alloc: print self, "Allocating target temps" self.allocate_subexpr_temps(env) - self.result = self.target_code() + self.result_code = self.target_code() def allocate_temps(self, env, result = None): # Allocate temporary variables for this node and @@ -325,27 +341,26 @@ if result: if not self.is_temp: raise InternalError("Result forced on non-temp node") - self.result = result + self.result_code = result elif self.is_temp: type = self.type if not type.is_void: if type.is_pyobject: type = PyrexTypes.py_object_type - self.result = env.allocate_temp(type) + self.result_code = env.allocate_temp(type) else: - self.result = None + self.result_code = None if debug_temp_alloc: - print self, "Allocated result", self.result - #print_call_chain(self, "allocated temp", self.result) + print self, "Allocated result", self.result_code else: - self.result = self.result_code() + self.result_code = self.calculate_result_code() def target_code(self): # Return code fragment for use as LHS of a C assignment. - return self.result_code() + return self.calculate_result_code() - def result_code(self): - self.not_implemented("result_code") + def calculate_result_code(self): + self.not_implemented("calculate_result_code") def release_target_temp(self, env): # Release temporaries used by LHS of an assignment. @@ -356,8 +371,8 @@ # otherwise release results of its sub-expressions. if self.is_temp: if debug_temp_alloc: - print self, "Releasing result", self.result - env.release_temp(self.result) + print self, "Releasing result", self.result_code + env.release_temp(self.result_code) else: self.release_subexpr_temps(env) @@ -373,10 +388,8 @@ def make_owned_reference(self, code): # If result is a pyobject, make sure we own # a reference to it. - #if self.type.is_pyobject and not self.is_temp: if self.type.is_pyobject and not self.result_in_temp(): - #code.put_incref(self.result, self.type) - code.put_incref(self.result, py_object_type) + code.put_incref(self.result_code, self.ctype()) def generate_evaluation_code(self, code): # Generate code to evaluate this node and @@ -399,7 +412,7 @@ # temporary Python reference. if self.is_temp: if self.type.is_pyobject: - code.put_decref_clear(self.result, self.type) + code.put_decref_clear(self.result_code, self.ctype()) else: self.generate_subexpr_disposal_code(code) @@ -415,7 +428,7 @@ # the result if it is a Python object. if self.is_temp: if self.type.is_pyobject: - code.putln("%s = 0;" % self.result) + code.putln("%s = 0;" % self.result_code) else: self.generate_subexpr_disposal_code(code) @@ -517,9 +530,9 @@ return 1 def analyse_types(self, env): - self.type = PyrexTypes.py_object_type + self.type = py_object_type - def result_code(self): + def calculate_result_code(self): return self.value def generate_result_code(self, code): @@ -554,7 +567,7 @@ def check_const(self): pass - def result_code(self): + def calculate_result_code(self): return str(self.value) def generate_result_code(self, code): @@ -569,7 +582,7 @@ class CharNode(ConstNode): type = PyrexTypes.c_char_type - def result_code(self): + def calculate_result_code(self): return "'%s'" % self.value @@ -608,28 +621,46 @@ env.add_py_string(entry) return StringNode(self.pos, entry = entry, type = py_object_type) - def result_code(self): + def calculate_result_code(self): if self.type.is_pyobject: return self.entry.pystring_cname else: return self.entry.cname +class LongNode(AtomicExprNode): + # Python long integer literal + # + # value string + + def analyse_types(self, env): + self.type = py_object_type + self.is_temp = 1 + + def generate_evaluation_code(self, code): + code.putln( + '%s = PyLong_FromString("%s", 0, 0); if (!%s) %s' % ( + self.result_code, + self.value, + self.result_code, + code.error_goto(self.pos))) + + class ImagNode(AtomicExprNode): # Imaginary number literal # # value float imaginary part def analyse_types(self, env): - self.type = PyrexTypes.py_object_type + self.type = py_object_type self.is_temp = 1 def generate_evaluation_code(self, code): code.putln( "%s = PyComplex_FromDoubles(0.0, %s); if (!%s) %s" % ( - self.result, + self.result_code, self.value, - self.result, + self.result_code, code.error_goto(self.pos))) @@ -660,8 +691,9 @@ def analyse_target_declaration(self, env): self.entry = env.lookup_here(self.name) if not self.entry: - self.entry = env.declare_var(self.name, - PyrexTypes.py_object_type, self.pos) + #print "NameNode.analyse_target_declaration:", self.name ### + #print "...declaring as py_object_type" ### + self.entry = env.declare_var(self.name, py_object_type, self.pos) def analyse_types(self, env): self.entry = env.lookup(self.name) @@ -672,8 +704,10 @@ def analyse_entry(self, env): self.check_identifier_kind() self.type = self.entry.type + if self.entry.is_declared_generic: + self.result_ctype = py_object_type # Reference to C array turns into pointer to first element. - if self.type.is_array: + while self.type.is_array: self.type = self.type.element_ptr_type() if self.entry.is_pyglobal or self.entry.is_builtin: assert self.type.is_pyobject, "Python global or builtin not a Python object" @@ -729,25 +763,10 @@ # result is in a temporary. return 0 - def result_code(self): - if self.entry is None: - return "" # There was an error earlier - result = self.entry.cname - if self.type.is_extension_type and \ - not self.entry.is_declared_generic: - result = "((PyObject *)%s)" % result - return result - - def result_as_extension_type(self): + def calculate_result_code(self): if self.entry is None: return "" # There was an error earlier - #if not self.entry.is_self_arg: - if not self.entry.is_declared_generic: - return self.entry.cname - else: - return "((%s)%s)" % ( - self.type.declaration_code(""), - self.entry.cname) + return self.entry.cname def generate_result_code(self, code): if not hasattr(self, 'entry'): @@ -764,18 +783,18 @@ #assert entry.interned_cname is not None code.putln( '%s = __Pyx_GetName(%s, %s); if (!%s) %s' % ( - self.result, + self.result_code, namespace, entry.interned_cname, - self.result, + self.result_code, code.error_goto(self.pos))) else: code.putln( '%s = __Pyx_GetName(%s, "%s"); if (!%s) %s' % ( - self.result, + self.result_code, namespace, self.entry.name, - self.result, + self.result_code, code.error_goto(self.pos))) def generate_assignment_code(self, rhs, code): @@ -789,14 +808,14 @@ 'if (PyObject_SetAttr(%s, %s, %s) < 0) %s' % ( namespace, entry.interned_cname, - rhs.result, + rhs.py_result(), code.error_goto(self.pos))) else: code.putln( 'if (PyObject_SetAttrString(%s, "%s", %s) < 0) %s' % ( namespace, entry.name, - rhs.result, + rhs.py_result(), code.error_goto(self.pos))) if debug_disposal_code: print "NameNode.generate_assignment_code:" @@ -804,9 +823,13 @@ rhs.generate_disposal_code(code) else: if self.type.is_pyobject: + #print "NameNode.generate_assignment_code: to", self.name ### + #print "...from", rhs ### + #print "...LHS type", self.type, "ctype", self.ctype() ### + #print "...RHS type", rhs.type, "ctype", rhs.ctype() ### rhs.make_owned_reference(code) - code.put_decref(self.result, self.type) - code.putln('%s = %s;' % (self.entry.cname, rhs.result)) + code.put_decref(self.result_code, self.ctype()) + code.putln('%s = %s;' % (self.result_code, rhs.result_as(self.ctype()))) if debug_disposal_code: print "NameNode.generate_assignment_code:" print "...generating post-assignment code for", rhs @@ -835,15 +858,15 @@ def analyse_types(self, env): self.arg.analyse_types(env) self.arg = self.arg.coerce_to_pyobject(env) - self.type = PyrexTypes.py_object_type + self.type = py_object_type self.is_temp = 1 def generate_result_code(self, code): code.putln( "%s = PyObject_Repr(%s); if (!%s) %s" % ( - self.result, - self.arg.result, - self.result, + self.result_code, + self.arg.py_result(), + self.result_code, code.error_goto(self.pos))) @@ -862,21 +885,21 @@ self.module_name = self.module_name.coerce_to_pyobject(env) if self.name_list: self.name_list.analyse_types(env) - self.type = PyrexTypes.py_object_type + self.type = py_object_type self.is_temp = 1 env.use_utility_code(import_utility_code) def generate_result_code(self, code): if self.name_list: - name_list_code = self.name_list.result + name_list_code = self.name_list.py_result() else: name_list_code = "0" code.putln( "%s = __Pyx_Import(%s, %s); if (!%s) %s" % ( - self.result, - self.module_name.result, + self.result_code, + self.module_name.py_result(), name_list_code, - self.result, + self.result_code, code.error_goto(self.pos))) @@ -891,15 +914,15 @@ def analyse_types(self, env): self.sequence.analyse_types(env) self.sequence = self.sequence.coerce_to_pyobject(env) - self.type = PyrexTypes.py_object_type + self.type = py_object_type self.is_temp = 1 def generate_result_code(self, code): code.putln( "%s = PyObject_GetIter(%s); if (!%s) %s" % ( - self.result, - self.sequence.result, - self.result, + self.result_code, + self.sequence.py_result(), + self.result_code, code.error_goto(self.pos))) @@ -914,17 +937,17 @@ def __init__(self, iterator, env): self.pos = iterator.pos self.iterator = iterator - self.type = PyrexTypes.py_object_type + self.type = py_object_type self.is_temp = 1 def generate_result_code(self, code): code.putln( "%s = PyIter_Next(%s);" % ( - self.result, - self.iterator.result)) + self.result_code, + self.iterator.py_result())) code.putln( "if (!%s) {" % - self.result) + self.result_code) code.putln( "if (PyErr_Occurred()) %s" % code.error_goto(self.pos)) @@ -941,15 +964,15 @@ def __init__(self, pos, env): ExprNode.__init__(self, pos) - self.type = PyrexTypes.py_object_type + self.type = py_object_type self.is_temp = 1 env.use_utility_code(get_exception_utility_code) def generate_result_code(self, code): code.putln( "%s = __Pyx_GetExcValue(); if (!%s) %s" % ( - self.result, - self.result, + self.result_code, + self.result_code, code.error_goto(self.pos))) @@ -960,6 +983,8 @@ def __init__(self, pos, type, env): ExprNode.__init__(self, pos) self.type = type + if type.is_pyobject: + self.result_ctype = py_object_type self.is_temp = 1 def generate_result_code(self, code): @@ -987,6 +1012,9 @@ subexprs = ['base', 'index'] + def is_ephemeral(self): + return self.base.is_ephemeral() + def analyse_target_declaration(self, env): pass @@ -995,7 +1023,7 @@ self.index.analyse_types(env) if self.base.type.is_pyobject: self.index = self.index.coerce_to_pyobject(env) - self.type = PyrexTypes.py_object_type + self.type = py_object_type self.is_temp = 1 else: if self.base.type.is_ptr or self.base.type.is_array: @@ -1007,7 +1035,7 @@ self.type = PyrexTypes.error_type if self.index.type.is_pyobject: self.index = self.index.coerce_to( - PyrexTypes.c_int_type, env) + PyrexTypes.c_py_ssize_t_type, env) if not self.index.type.is_int: error(self.pos, "Invalid index type '%s'" % @@ -1020,18 +1048,18 @@ def is_lvalue(self): return 1 - def result_code(self): + def calculate_result_code(self): return "(%s[%s])" % ( - self.base.result, self.index.result) + self.base.result_code, self.index.result_code) def generate_result_code(self, code): if self.type.is_pyobject: code.putln( "%s = PyObject_GetItem(%s, %s); if (!%s) %s" % ( - self.result, - self.base.result, - self.index.result, - self.result, + self.result_code, + self.base.py_result(), + self.index.py_result(), + self.result_code, code.error_goto(self.pos))) def generate_assignment_code(self, rhs, code): @@ -1039,23 +1067,23 @@ if self.type.is_pyobject: code.putln( "if (PyObject_SetItem(%s, %s, %s) < 0) %s" % ( - self.base.result, - self.index.result, - rhs.result, + self.base.py_result(), + self.index.py_result(), + rhs.py_result(), code.error_goto(self.pos))) self.generate_subexpr_disposal_code(code) else: code.putln( "%s = %s;" % ( - self.result, rhs.result)) + self.result_code, rhs.result_code)) rhs.generate_disposal_code(code) def generate_deletion_code(self, code): self.generate_subexpr_evaluation_code(code) code.putln( "if (PyObject_DelItem(%s, %s) < 0) %s" % ( - self.base.result, - self.index.result, + self.base.py_result(), + self.index.py_result(), code.error_goto(self.pos))) self.generate_subexpr_disposal_code(code) @@ -1079,32 +1107,32 @@ if self.stop: self.stop.analyse_types(env) self.base = self.base.coerce_to_pyobject(env) - c_int = PyrexTypes.c_int_type + c_int = PyrexTypes.c_py_ssize_t_type if self.start: self.start = self.start.coerce_to(c_int, env) if self.stop: self.stop = self.stop.coerce_to(c_int, env) - self.type = PyrexTypes.py_object_type + self.type = py_object_type self.is_temp = 1 def generate_result_code(self, code): code.putln( "%s = PySequence_GetSlice(%s, %s, %s); if (!%s) %s" % ( - self.result, - self.base.result, + self.result_code, + self.base.py_result(), self.start_code(), self.stop_code(), - self.result, + self.result_code, code.error_goto(self.pos))) def generate_assignment_code(self, rhs, code): self.generate_subexpr_evaluation_code(code) code.putln( "if (PySequence_SetSlice(%s, %s, %s, %s) < 0) %s" % ( - self.base.result, + self.base.py_result(), self.start_code(), self.stop_code(), - rhs.result, + rhs.result_code, code.error_goto(self.pos))) self.generate_subexpr_disposal_code(code) rhs.generate_disposal_code(code) @@ -1113,7 +1141,7 @@ self.generate_subexpr_evaluation_code(code) code.putln( "if (PySequence_DelSlice(%s, %s, %s) < 0) %s" % ( - self.base.result, + self.base.py_result(), self.start_code(), self.stop_code(), code.error_goto(self.pos))) @@ -1121,18 +1149,18 @@ def start_code(self): if self.start: - return self.start.result + return self.start.result_code else: return "0" def stop_code(self): if self.stop: - return self.stop.result + return self.stop.result_code else: return "0x7fffffff" - def result_code(self): - # self.result is not used, but this method must exist + def calculate_result_code(self): + # self.result_code is not used, but this method must exist return "" @@ -1152,17 +1180,17 @@ self.start = self.start.coerce_to_pyobject(env) self.stop = self.stop.coerce_to_pyobject(env) self.step = self.step.coerce_to_pyobject(env) - self.type = PyrexTypes.py_object_type + self.type = py_object_type self.is_temp = 1 def generate_result_code(self, code): code.putln( "%s = PySlice_New(%s, %s, %s); if (!%s) %s" % ( - self.result, - self.start.result, - self.stop.result, - self.step.result, - self.result, + self.result_code, + self.start.py_result(), + self.stop.py_result(), + self.step.py_result(), + self.result_code, code.error_goto(self.pos))) @@ -1190,37 +1218,42 @@ # was obtained, because we need to pass it as 'self'. self.self = function.obj function.obj = CloneNode(self.self) - if self.function.type.is_pyobject: + func_type = self.function_type() + if func_type.is_pyobject: self.arg_tuple = TupleNode(self.pos, args = self.args) self.args = None self.arg_tuple.analyse_types(env) - self.type = PyrexTypes.py_object_type + self.type = py_object_type self.is_temp = 1 else: for arg in self.args: arg.analyse_types(env) - if self.self and self.function.type.args: + if self.self and func_type.args: # Coerce 'self' to the type expected by the method. - expected_type = self.function.type.args[0].type + expected_type = func_type.args[0].type self.coerced_self = CloneNode(self.self).coerce_to( expected_type, env) # Insert coerced 'self' argument into argument list. self.args.insert(0, self.coerced_self) self.analyse_c_function_call(env) - def analyse_c_function_call(self, env): + def function_type(self): + # Return the type of the function being called, coercing a function + # pointer to a function if necessary. func_type = self.function.type - # Coerce function pointer to function if func_type.is_ptr: func_type = func_type.base_type - self.function.type = func_type + return func_type + + def analyse_c_function_call(self, env): + func_type = self.function_type() # Check function type if not func_type.is_cfunction: if not func_type.is_error: error(self.pos, "Calling non-function type '%s'" % func_type) self.type = PyrexTypes.error_type - self.result = "" + self.result_code = "" return # Check no. of args expected_nargs = len(func_type.args) @@ -1235,7 +1268,7 @@ % (expected_str, actual_nargs)) self.args = None self.type = PyrexTypes.error_type - self.result = "" + self.result_code = "" return # Coerce arguments for i in range(expected_nargs): @@ -1251,67 +1284,64 @@ or func_type.exception_value is not None \ or func_type.exception_check: self.is_temp = 1 + if self.type.is_pyobject: + self.result_ctype = py_object_type - def result_code(self): - return self.c_call_code(as_extension_type = 0) + def calculate_result_code(self): + return self.c_call_code() - def result_as_extension_type(self): - return self.c_call_code(as_extension_type = 1) - - def c_call_code(self, as_extension_type): - if self.args is None or not self.function.type.is_cfunction: + def c_call_code(self): + func_type = self.function_type() + if self.args is None or not func_type.is_cfunction: return "" - formal_args = self.function.type.args + formal_args = func_type.args arg_list_code = [] for (formal_arg, actual_arg) in \ zip(formal_args, self.args): - if formal_arg.type.is_extension_type: - arg_code = actual_arg.result_as_extension_type() - if not formal_arg.type.same_as(actual_arg.type): - arg_code = "((%s)%s)" % ( - formal_arg.type.declaration_code(""), - arg_code) - else: - arg_code = actual_arg.result + arg_code = actual_arg.result_as(formal_arg.type) arg_list_code.append(arg_code) for actual_arg in self.args[len(formal_args):]: - arg_list_code.append(actual_arg.result) - result = "%s(%s)" % (self.function.result, + arg_list_code.append(actual_arg.result_code) + result = "%s(%s)" % (self.function.result_code, join(arg_list_code, ",")) - if self.type.is_extension_type and not as_extension_type: - result = "((PyObject *)%s)" % result return result def generate_result_code(self, code): - #print_call_chain("SimpleCallNode.generate_result_code") ### - if self.function.type.is_pyobject: + func_type = self.function_type() + if func_type.is_pyobject: code.putln( "%s = PyObject_Call(%s, %s, 0); if (!%s) %s" % ( - self.result, - self.function.result, - self.arg_tuple.result, - self.result, + self.result_code, + self.function.py_result(), + self.arg_tuple.py_result(), + self.result_code, code.error_goto(self.pos))) - elif self.function.type.is_cfunction: + elif func_type.is_cfunction: exc_checks = [] if self.type.is_pyobject: - exc_checks.append("!%s" % self.result) + exc_checks.append("!%s" % self.result_code) else: - exc_val = self.function.type.exception_value - exc_check = self.function.type.exception_check + exc_val = func_type.exception_value + exc_check = func_type.exception_check if exc_val is not None: - exc_checks.append("%s == %s" % (self.result, exc_val)) + exc_checks.append("%s == %s" % (self.result_code, exc_val)) if exc_check: exc_checks.append("PyErr_Occurred()") if self.is_temp or exc_checks: - if self.result: - lhs = "%s = " % self.result + rhs = self.c_call_code() + if self.result_code: + lhs = "%s = " % self.result_code + if self.is_temp and self.type.is_pyobject: + #return_type = self.type # func_type.return_type + #print "SimpleCallNode.generate_result_code: casting", rhs, \ + # "from", return_type, "to pyobject" ### + rhs = typecast(py_object_type, self.type, rhs) else: lhs = "" code.putln( "%s%s; if (%s) %s" % ( lhs, - self.c_call_code(as_extension_type = 0), + rhs, " && ".join(exc_checks), code.error_goto(self.pos))) @@ -1340,37 +1370,37 @@ if self.starstar_arg: self.starstar_arg = \ self.starstar_arg.coerce_to_pyobject(env) - self.type = PyrexTypes.py_object_type + self.type = py_object_type self.is_temp = 1 def generate_result_code(self, code): if self.keyword_args and self.starstar_arg: code.putln( "if (PyDict_Update(%s, %s) < 0) %s" % ( - self.keyword_args.result, - self.starstar_arg.result, + self.keyword_args.py_result(), + self.starstar_arg.py_result(), code.error_goto(self.pos))) - keyword_code = self.keyword_args.result + keyword_code = self.keyword_args.py_result() elif self.keyword_args: - keyword_code = self.keyword_args.result + keyword_code = self.keyword_args.py_result() elif self.starstar_arg: - keyword_code = self.starstar_arg.result + keyword_code = self.starstar_arg.py_result() else: keyword_code = None if not keyword_code: call_code = "PyObject_CallObject(%s, %s)" % ( - self.function.result, - self.positional_args.result) + self.function.py_result(), + self.positional_args.py_result()) else: call_code = "PyEval_CallObjectWithKeywords(%s, %s, %s)" % ( - self.function.result, - self.positional_args.result, + self.function.py_result(), + self.positional_args.py_result(), keyword_code) code.putln( "%s = %s; if (!%s) %s" % ( - self.result, + self.result_code, call_code, - self.result, + self.result_code, code.error_goto(self.pos))) @@ -1385,15 +1415,15 @@ def analyse_types(self, env): self.arg.analyse_types(env) self.arg = self.arg.coerce_to_pyobject(env) - self.type = PyrexTypes.py_object_type + self.type = py_object_type self.is_temp = 1 def generate_result_code(self, code): code.putln( "%s = PySequence_Tuple(%s); if (!%s) %s" % ( - self.result, - self.arg.result, - self.result, + self.result_code, + self.arg.py_result(), + self.result_code, code.error_goto(self.pos))) @@ -1506,11 +1536,12 @@ if self.entry and self.entry.is_cmethod and not self.is_called: error(self.pos, "C method can only be called") # Reference to C array turns into pointer to first element. - if self.type.is_array: + while self.type.is_array: self.type = self.type.element_ptr_type() if self.is_py_attr: if not target: self.is_temp = 1 + self.result_ctype = py_object_type def analyse_attribute(self, env): # Look up attribute and set self.type and self.member. @@ -1551,7 +1582,7 @@ # declared or is declared as a Python method. Treat it as a Python # attribute reference. if obj_type.is_pyobject: - self.type = PyrexTypes.py_object_type + self.type = py_object_type self.is_py_attr = 1 if Options.intern_names: self.interned_attr_cname = env.intern(self.attribute) @@ -1575,34 +1606,21 @@ def is_ephemeral(self): if self.obj: - return ExprNode.is_ephemeral(self) + return self.obj.is_ephemeral() else: return NameNode.is_ephemeral(self) - def result_code(self): - return self.select_code()[0] - - def result_as_extension_type(self): - return self.uncast_select_code() - - def select_code(self): - orig_code = self.uncast_select_code() - if self.type.is_extension_type: - code = "((PyObject *)%s)" % orig_code - else: - code = orig_code - return code, orig_code - - def uncast_select_code(self): - obj_type = self.obj.type - if obj_type.is_extension_type: - obj_code = self.obj.result_as_extension_type() - else: - obj_code = self.obj.result + def calculate_result_code(self): + #print "AttributeNode.calculate_result_code:", self.member ### + #print "...obj node =", self.obj, "code", self.obj.result_code ### + #print "...obj type", self.obj.type, "ctype", self.obj.ctype() ### + obj = self.obj + obj_code = obj.result_as(obj.type) + #print "...obj_code =", obj_code ### if self.entry and self.entry.is_cmethod: return "((struct %s *)%s%s%s)->%s" % ( - obj_type.vtabstruct_cname, obj_code, self.op, - obj_type.vtabslot_cname, self.member) + obj.type.vtabstruct_cname, obj_code, self.op, + obj.type.vtabslot_cname, self.member) else: return "%s%s%s" % (obj_code, self.op, self.member) @@ -1611,18 +1629,18 @@ if Options.intern_names: code.putln( '%s = PyObject_GetAttr(%s, %s); if (!%s) %s' % ( - self.result, - self.obj.result, + self.result_code, + self.obj.py_result(), self.interned_attr_cname, - self.result, + self.result_code, code.error_goto(self.pos))) else: code.putln( '%s = PyObject_GetAttrString(%s, "%s"); if (!%s) %s' % ( - self.result, - self.obj.result, + self.result_code, + self.objpy_result(), self.attribute, - self.result, + self.result_code, code.error_goto(self.pos))) def generate_assignment_code(self, rhs, code): @@ -1631,38 +1649,46 @@ if Options.intern_names: code.putln( 'if (PyObject_SetAttr(%s, %s, %s) < 0) %s' % ( - self.obj.result, + self.obj.py_result(), self.interned_attr_cname, - rhs.result, + rhs.py_result(), code.error_goto(self.pos))) else: code.putln( 'if (PyObject_SetAttrString(%s, "%s", %s) < 0) %s' % ( - self.obj.result, + self.obj.py_result(), self.attribute, - rhs.result, + rhs.py_result(), code.error_goto(self.pos))) rhs.generate_disposal_code(code) else: - select_code, orig_code = self.select_code() + #select_code = self.select_code() + select_code = self.result_code if self.type.is_pyobject: rhs.make_owned_reference(code) - code.put_decref(select_code, self.type) + code.put_decref(select_code, self.ctype()) code.putln( "%s = %s;" % ( - orig_code, - rhs.result)) + select_code, + rhs.result_code)) rhs.generate_post_assignment_code(code) self.obj.generate_disposal_code(code) def generate_deletion_code(self, code): self.obj.generate_evaluation_code(code) if self.is_py_attr: - code.putln( - 'if (PyObject_DelAttrString(%s, "%s") < 0) %s' % ( - self.obj.result, - self.attribute, - code.error_goto(self.pos))) + if Options.intern_names: + code.putln( + 'if (PyObject_DelAttr(%s, %s) < 0) %s' % ( + self.obj.py_result(), + self.interned_attr_cname, + code.error_goto(self.pos))) + else: + code.putln( + 'if (PyObject_DelAttrString(%s, "%s") < 0) %s' % ( + self.obj.py_result(), + self.attribute, + code.error_goto(self.pos))) else: error(self.pos, "Cannot delete C attribute of extension type") self.obj.generate_disposal_code(code) @@ -1695,7 +1721,7 @@ arg = self.args[i] arg.analyse_types(env) self.args[i] = arg.coerce_to_pyobject(env) - self.type = PyrexTypes.py_object_type + self.type = py_object_type self.is_temp = 1 def analyse_target_types(self, env): @@ -1708,7 +1734,7 @@ coerced_unpacked_item = unpacked_item.coerce_to(arg.type, env) self.unpacked_items.append(unpacked_item) self.coerced_unpacked_items.append(coerced_unpacked_item) - self.type = PyrexTypes.py_object_type + self.type = py_object_type env.use_utility_code(unpacking_utility_code) def allocate_target_temps(self, env): @@ -1728,20 +1754,22 @@ def generate_assignment_code(self, rhs, code): for i in range(len(self.args)): - unpack_result = self.unpacked_items[i].result + item = self.unpacked_items[i] + unpack_code = "__Pyx_UnpackItem(%s, %s)" % ( + rhs.py_result(), + i) code.putln( - "%s = __Pyx_UnpackItem(%s, %s); if (!%s) %s" % ( - unpack_result, - rhs.result, - i, - unpack_result, + "%s = %s; if (!%s) %s" % ( + item.result_code, + typecast(item.ctype(), py_object_type, unpack_code), + item.result_code, code.error_goto(self.pos))) value_node = self.coerced_unpacked_items[i] value_node.generate_evaluation_code(code) self.args[i].generate_assignment_code(value_node, code) code.putln( "if (__Pyx_EndUnpack(%s, %s) < 0) %s" % ( - rhs.result, + rhs.py_result(), len(self.args), code.error_goto(self.pos))) if debug_disposal_code: @@ -1756,20 +1784,19 @@ def generate_operation_code(self, code): code.putln( "%s = PyTuple_New(%s); if (!%s) %s" % ( - self.result, + self.result_code, len(self.args), - self.result, + self.result_code, code.error_goto(self.pos))) for i in range(len(self.args)): arg = self.args[i] - result = arg.result if not arg.result_in_temp(): - code.put_incref(result, arg.type) + code.put_incref(arg.result_code, arg.ctype()) code.putln( "PyTuple_SET_ITEM(%s, %s, %s);" % ( - self.result, + self.result_code, i, - result)) + arg.py_result())) def generate_subexpr_disposal_code(self, code): # We call generate_post_assignment_code here instead @@ -1784,19 +1811,19 @@ def generate_operation_code(self, code): code.putln("%s = PyList_New(%s); if (!%s) %s" % - (self.result, + (self.result_code, len(self.args), - self.result, + self.result_code, code.error_goto(self.pos))) for i in range(len(self.args)): arg = self.args[i] #if not arg.is_temp: if not arg.result_in_temp(): - code.put_incref(arg.result, arg.type) + code.put_incref(arg.result_code, arg.ctype()) code.putln("PyList_SET_ITEM(%s, %s, %s);" % - (self.result, + (self.result_code, i, - arg.result)) + arg.py_result())) def generate_subexpr_disposal_code(self, code): # We call generate_post_assignment_code here instead @@ -1820,13 +1847,13 @@ value = value.coerce_to_pyobject(env) new_pairs.append((key, value)) self.key_value_pairs = new_pairs - self.type = PyrexTypes.py_object_type + self.type = py_object_type self.is_temp = 1 - def allocate_temps(self, env): + def allocate_temps(self, env, result = None): # Custom method used here because key-value # pairs are evaluated and used one at a time. - self.allocate_temp(env) + self.allocate_temp(env, result) for key, value in self.key_value_pairs: key.allocate_temps(env) value.allocate_temps(env) @@ -1838,17 +1865,17 @@ # pairs are evaluated and used one at a time. code.putln( "%s = PyDict_New(); if (!%s) %s" % ( - self.result, - self.result, + self.result_code, + self.result_code, code.error_goto(self.pos))) for key, value in self.key_value_pairs: key.generate_evaluation_code(code) value.generate_evaluation_code(code) code.putln( "if (PyDict_SetItem(%s, %s, %s) < 0) %s" % ( - self.result, - key.result, - value.result, + self.result_code, + key.py_result(), + value.py_result(), code.error_goto(self.pos))) key.generate_disposal_code(code) value.generate_disposal_code(code) @@ -1874,8 +1901,8 @@ if self.doc: self.doc.analyse_types(env) self.doc = self.doc.coerce_to_pyobject(env) - self.module_name = env.global_scope().module_name - self.type = PyrexTypes.py_object_type + self.module_name = env.global_scope().qualified_name + self.type = py_object_type self.is_temp = 1 env.use_utility_code(create_class_utility_code); @@ -1883,25 +1910,17 @@ if self.doc: code.putln( 'if (PyDict_SetItemString(%s, "__doc__", %s) < 0) %s' % ( - self.dict.result, - self.doc.result, + self.dict.py_result(), + self.doc.py_result(), code.error_goto(self.pos))) -## code.putln( -## '%s = PyClass_New(%s, %s, %s); if (!%s) %s' % ( -## self.result, -## self.bases.result, -## self.dict.result, -## self.name.result, -## self.result, -## code.error_goto(self.pos))) code.putln( '%s = __Pyx_CreateClass(%s, %s, %s, "%s"); if (!%s) %s' % ( - self.result, - self.bases.result, - self.dict.result, - self.name.result, + self.result_code, + self.bases.py_result(), + self.dict.py_result(), + self.name.py_result(), self.module_name, - self.result, + self.result_code, code.error_goto(self.pos))) @@ -1917,16 +1936,16 @@ def analyse_types(self, env): self.function.analyse_types(env) - self.type = PyrexTypes.py_object_type + self.type = py_object_type self.is_temp = 1 def generate_result_code(self, code): code.putln( "%s = PyMethod_New(%s, 0, %s); if (!%s) %s" % ( - self.result, - self.function.result, + self.result_code, + self.function.py_result(), self.class_cname, - self.result, + self.result_code, code.error_goto(self.pos))) @@ -1938,15 +1957,15 @@ # pymethdef_cname string PyMethodDef structure def analyse_types(self, env): - self.type = PyrexTypes.py_object_type + self.type = py_object_type self.is_temp = 1 def generate_result_code(self, code): code.putln( "%s = PyCFunction_New(&%s, 0); if (!%s) %s" % ( - self.result, + self.result_code, self.pymethdef_cname, - self.result, + self.result_code, code.error_goto(self.pos))) #------------------------------------------------------------------- @@ -1973,7 +1992,7 @@ self.operand.analyse_types(env) if self.is_py_operation(): self.coerce_operand_to_pyobject(env) - self.type = PyrexTypes.py_object_type + self.type = py_object_type self.is_temp = 1 else: self.analyse_c_operation(env) @@ -1998,10 +2017,10 @@ function = self.py_operation_function() code.putln( "%s = %s(%s); if (!%s) %s" % ( - self.result, + self.result_code, function, - self.operand.result, - self.result, + self.operand.py_result(), + self.result_code, code.error_goto(self.pos))) def type_error(self): @@ -2023,8 +2042,8 @@ self.operand = self.operand.coerce_to_boolean(env) self.type = PyrexTypes.c_int_type - def result_code(self): - return "(!%s)" % self.operand.result + def calculate_result_code(self): + return "(!%s)" % self.operand.result_code def generate_result_code(self, code): pass @@ -2041,8 +2060,8 @@ def py_operation_function(self): return "PyNumber_Positive" - def result_code(self): - return self.operand.result + def calculate_result_code(self): + return self.operand.result_code class UnaryMinusNode(UnopNode): @@ -2059,8 +2078,8 @@ def py_operation_function(self): return "PyNumber_Negative" - def result_code(self): - return "(-%s)" % self.operand.result + def calculate_result_code(self): + return "(-%s)" % self.operand.result_code class TildeNode(UnopNode): @@ -2075,8 +2094,8 @@ def py_operation_function(self): return "PyNumber_Invert" - def result_code(self): - return "(~%s)" % self.operand.result + def calculate_result_code(self): + return "(~%s)" % self.operand.result_code class AmpersandNode(ExprNode): @@ -2103,10 +2122,10 @@ def error(self, mess): error(self.pos, mess) self.type = PyrexTypes.error_type - self.result = "" + self.result_code = "" - def result_code(self): - return "(&%s)" % self.operand.result + def calculate_result_code(self): + return "(&%s)" % self.operand.result_code def generate_result_code(self, code): pass @@ -2143,33 +2162,32 @@ from_py = self.operand.type.is_pyobject if from_py and not to_py and self.operand.is_ephemeral(): error(self.pos, "Casting temporary Python object to non-Python type") - #if to_py: if to_py and not from_py: + self.result_ctype = py_object_type self.is_temp = 1 def check_const(self): self.operand.check_const() - def result_code(self): - if self.type.is_pyobject: - cast = "PyObject *" + def calculate_result_code(self): + opnd = self.operand + result_code = self.type.cast_code(opnd.result_code) + return result_code + + def result_as(self, type): + if self.type.is_pyobject and not self.is_temp: + # Optimise away some unnecessary casting + return self.operand.result_as(type) else: - cast = self.type.declaration_code("") - return "((%s)%s)" % (cast, self.operand.result) - - def result_as_extension_type(self): - return "((%s)%s)" % ( - self.type.declaration_code(""), - self.operand.result) + return ExprNode.result_as(self, type) def generate_result_code(self, code): if self.is_temp: code.putln( "%s = (PyObject *)%s;" % ( - self.result, - #self.type.declaration_code(""), - self.operand.result)) - code.put_incref(self.result, self.type) + self.result_code, + self.operand.result_code)) + code.put_incref(self.result_code, self.ctype()) class SizeofNode(ExprNode): @@ -2199,10 +2217,10 @@ elif arg_type.is_void: error(self.pos, "Cannot take sizeof void") elif not arg_type.is_complete(): - error(self.pos, "Cannot take sizeof incomplete type '%s'" % arg_code) + error(self.pos, "Cannot take sizeof incomplete type '%s'" % arg_type) self.type = PyrexTypes.c_int_type - def result_code(self): + def calculate_result_code(self): arg_code = self.arg_type.declaration_code("") return "(sizeof(%s))" % arg_code @@ -2218,8 +2236,8 @@ self.operand.analyse_types(env) self.type = PyrexTypes.c_int_type - def result_code(self): - return "(sizeof(%s))" % self.operand.result + def calculate_result_code(self): + return "(sizeof(%s))" % self.operand.result_code def generate_result_code(self, code): pass @@ -2251,7 +2269,7 @@ self.operand2.analyse_types(env) if self.is_py_operation(): self.coerce_operands_to_pyobjects(env) - self.type = PyrexTypes.py_object_type + self.type = py_object_type self.is_temp = 1 else: self.analyse_c_operation(env) @@ -2269,6 +2287,7 @@ self.operand2.check_const() def generate_result_code(self, code): + #print "BinopNode.generate_result_code:", self.operand1, self.operand2 ### if self.operand1.type.is_pyobject: function = self.py_operation_function() if function == "PyNumber_Power": @@ -2277,12 +2296,12 @@ extra_args = "" code.putln( "%s = %s(%s, %s%s); if (!%s) %s" % ( - self.result, + self.result_code, function, - self.operand1.result, - self.operand2.result, + self.operand1.py_result(), + self.operand2.py_result(), extra_args, - self.result, + self.result_code, code.error_goto(self.pos))) else: if self.is_temp: @@ -2316,11 +2335,11 @@ def c_types_okay(self, type1, type2): return type1.is_numeric and type2.is_numeric - def result_code(self): + def calculate_result_code(self): return "(%s %s %s)" % ( - self.operand1.result, + self.operand1.result_code, self.operator, - self.operand2.result) + self.operand2.result_code) def py_operation_function(self): return self.py_functions[self.operator] @@ -2415,9 +2434,9 @@ else: return None - def result_code(self): + def calculate_result_code(self): return "pow(%s, %s)" % ( - self.operand1.result, self.operand2.result) + self.operand1.result_code, self.operand2.result_code) class BoolBinopNode(ExprNode): @@ -2441,7 +2460,7 @@ self.operand2 = self.operand2.coerce_to_pyobject(env) self.temp_bool = TempNode(self.pos, PyrexTypes.c_int_type, env) - self.type = PyrexTypes.py_object_type + self.type = py_object_type else: self.operand1 = self.operand1.coerce_to_boolean(env) self.operand2 = self.operand2.coerce_to_boolean(env) @@ -2455,7 +2474,7 @@ #self.operand2 = self.operand2.coerce_to_simple(env) self.is_temp = 1 - def allocate_temps(self, env, result = None): + def allocate_temps(self, env, result_code = None): # We don't need both operands at the same time, and # one of the operands will also be our result. So we # use an allocation strategy here which results in @@ -2463,12 +2482,12 @@ # result variable. This allows us to avoid some # assignments and increfs/decrefs that would otherwise # be necessary. - self.allocate_temp(env, result) - self.operand1.allocate_temps(env, self.result) + self.allocate_temp(env, result_code) + self.operand1.allocate_temps(env, self.result_code) if self.temp_bool: self.temp_bool.allocate_temp(env) self.temp_bool.release_temp(env) - self.operand2.allocate_temps(env, self.result) + self.operand2.allocate_temps(env, self.result_code) # We haven't called release_temp on either operand, # because although they are temp nodes, they don't own # their result variable. And because they are temp @@ -2481,11 +2500,11 @@ self.operand1.check_const() self.operand2.check_const() - def result_code(self): + def calculate_result_code(self): return "(%s %s %s)" % ( - self.operand1.result, + self.operand1.result_code, self.py_to_c_op[self.operator], - self.operand2.result) + self.operand2.result_code) py_to_c_op = {'and': "&&", 'or': "||"} @@ -2508,15 +2527,15 @@ def generate_operand1_test(self, code): # Generate code to test the truth of the first operand. if self.type.is_pyobject: - test_result = self.temp_bool.result + test_result = self.temp_bool.result_code code.putln( "%s = PyObject_IsTrue(%s); if (%s < 0) %s" % ( test_result, - self.operand1.result, + self.operand1.py_result(), test_result, code.error_goto(self.pos))) else: - test_result = self.operand1.result + test_result = self.operand1.result_code return test_result @@ -2550,37 +2569,37 @@ else: return 0 - def generate_operation_code(self, code, result, + def generate_operation_code(self, code, result_code, operand1, op , operand2): if op == 'in' or op == 'not_in': code.putln( "%s = PySequence_Contains(%s, %s); if (%s < 0) %s" % ( - result, - operand2.result, - operand1.result, - result, + result_code, + operand2.py_result(), + operand1.py_result(), + result_code, code.error_goto(self.pos))) if op == 'not_in': code.putln( "%s = !%s;" % ( - result, result)) + result_code, result_code)) elif (operand1.type.is_pyobject and op not in ('is', 'is_not')): code.putln( "if (PyObject_Cmp(%s, %s, &%s) < 0) %s" % ( - operand1.result, - operand2.result, - result, + operand1.py_result(), + operand2.py_result(), + result_code, code.error_goto(self.pos))) code.putln( "%s = %s %s 0;" % ( - result, result, op)) + result_code, result_code, op)) else: code.putln("%s = %s %s %s;" % ( - result, - operand1.result, + result_code, + operand1.result_code, self.c_operator(op), - operand2.result)) + operand2.result_code)) def c_operator(self, op): if op == 'is': @@ -2658,21 +2677,21 @@ if self.cascade: self.not_const() - def result_code(self): + def calculate_result_code(self): return "(%s %s %s)" % ( - self.operand1.result, + self.operand1.result_code, self.c_operator(self.operator), - self.operand2.result) + self.operand2.result_code) def generate_evaluation_code(self, code): self.operand1.generate_evaluation_code(code) self.operand2.generate_evaluation_code(code) if self.is_temp: - self.generate_operation_code(code, self.result, + self.generate_operation_code(code, self.result_code, self.operand1, self.operator, self.operand2) if self.cascade: self.cascade.generate_evaluation_code(code, - self.result, self.operand2) + self.result_code, self.operand2) self.operand1.generate_disposal_code(code) self.operand2.generate_disposal_code(code) @@ -2799,13 +2818,11 @@ CoercionNode.__init__(self, arg) self.type = new_type - def result_code(self): - return "((%s)%s)" % ( - self.type.declaration_code(""), - self.arg.result) - - def result_as_extension_type(self): - return self.result + def calculate_result_code(self): + #return "((%s)%s)" % ( + # self.type.declaration_code(""), + # self.arg.result) + return self.arg.result_as(self.type) def generate_result_code(self, code): self.arg.generate_result_code(code) @@ -2822,6 +2839,7 @@ assert dst_type.is_extension_type, "PyTypeTest on non extension type" CoercionNode.__init__(self, arg) self.type = dst_type + self.result_ctype = arg.ctype() env.use_utility_code(type_test_utility_code) def result_in_temp(self): @@ -2830,19 +2848,14 @@ def is_ephemeral(self): return self.arg.is_ephemeral() - def result_code(self): - return self.arg.result - - def result_as_extension_type(self): - return "((%s)%s)" % ( - self.type.declaration_code(""), - self.arg.result) + def calculate_result_code(self): + return self.arg.result_code def generate_result_code(self, code): if self.type.typeobj_is_available(): code.putln( "if (!__Pyx_TypeTest(%s, %s)) %s" % ( - self.result, + self.arg.py_result(), self.type.typeptr_cname, code.error_goto(self.pos))) else: @@ -2859,19 +2872,19 @@ def __init__(self, arg, env): CoercionNode.__init__(self, arg) - self.type = PyrexTypes.py_object_type + self.type = py_object_type + self.is_temp = 1 if not arg.type.to_py_function: error(arg.pos, "Cannot convert '%s' to Python object" % arg.type) - self.is_temp = 1 def generate_result_code(self, code): function = self.arg.type.to_py_function code.putln('%s = %s(%s); if (!%s) %s' % ( - self.result, + self.result_code, function, - self.arg.result, - self.result, + self.arg.result_code, + self.result_code, code.error_goto(self.pos))) @@ -2882,21 +2895,21 @@ def __init__(self, result_type, arg, env): CoercionNode.__init__(self, arg) self.type = result_type + self.is_temp = 1 if not result_type.from_py_function: error(arg.pos, "Cannot convert Python object to '%s'" % result_type) if self.type.is_string and self.arg.is_ephemeral(): error(arg.pos, "Obtaining char * from temporary Python value") - self.is_temp = 1 def generate_result_code(self, code): - opnd = self.arg.result + #opnd = self.arg.py_result() function = self.type.from_py_function code.putln('%s = %s(%s); if (PyErr_Occurred()) %s' % ( - self.result, + self.result_code, function, - self.arg.result, + self.arg.py_result(), code.error_goto(self.pos))) @@ -2915,16 +2928,16 @@ self.not_const() self.arg.check_const() - def result_code(self): - return "(%s != 0)" % self.arg.result + def calculate_result_code(self): + return "(%s != 0)" % self.arg.result_code def generate_result_code(self, code): if self.arg.type.is_pyobject: code.putln( "%s = PyObject_IsTrue(%s); if (%s < 0) %s" % ( - self.result, - self.arg.result, - self.result, + self.result_code, + self.arg.py_result(), + self.result_code, code.error_goto(self.pos))) @@ -2937,14 +2950,16 @@ CoercionNode.__init__(self, arg) self.type = self.arg.type self.is_temp = 1 + if self.type.is_pyobject: + self.result_ctype = py_object_type def generate_result_code(self, code): #self.arg.generate_evaluation_code(code) # Already done # by generic generate_subexpr_evaluation_code! code.putln("%s = %s;" % ( - self.result, self.arg.result)) + self.result_code, self.arg.result_as(self.ctype()))) if self.type.is_pyobject: - code.put_incref(self.result, self.type) + code.put_incref(self.result_code, self.ctype()) class CloneNode(CoercionNode): @@ -2960,12 +2975,13 @@ def __init__(self, arg): CoercionNode.__init__(self, arg) self.type = arg.type + self.result_ctype = arg.result_ctype - def result_code(self): - return self.arg.result + def calculate_result_code(self): + return self.arg.result_code - def result_as_extension_type(self): - return self.arg.result_as_extension_type() + #def result_as_extension_type(self): + # return self.arg.result_as_extension_type() def generate_evaluation_code(self, code): pass @@ -3085,24 +3101,24 @@ PyErr_SetString(PyExc_ValueError, "unpack sequence of wrong size"); } -static PyObject *__Pyx_UnpackItem(PyObject *seq, int i) { - PyObject *item = PySequence_GetItem(seq, i); - if (!item) { +static PyObject *__Pyx_UnpackItem(PyObject *seq, Py_ssize_t i) { + PyObject *item; + if (!(item = PySequence_GetItem(seq, i))) { if (PyErr_ExceptionMatches(PyExc_IndexError)) - __Pyx_UnpackError(); + __Pyx_UnpackError(); } return item; } -static int __Pyx_EndUnpack(PyObject *seq, int i) { - PyObject *item = PySequence_GetItem(seq, i); - if (item) { +static int __Pyx_EndUnpack(PyObject *seq, Py_ssize_t i) { + PyObject *item; + if (item = PySequence_GetItem(seq, i)) { Py_DECREF(item); __Pyx_UnpackError(); return -1; } PyErr_Clear(); - return 0; + return 0; } """ Modified: lxml/pyrex/Pyrex/Compiler/Lexicon.py ============================================================================== --- lxml/pyrex/Pyrex/Compiler/Lexicon.py (original) +++ lxml/pyrex/Pyrex/Compiler/Lexicon.py Tue Apr 25 15:43:21 2006 @@ -19,7 +19,6 @@ hexdigit = Any("0123456789ABCDEFabcdef") indentation = Bol + Rep(Any(" \t")) - #resword = apply(Str, reserved_words) decimal = Rep1(digit) dot = Str(".") exponent = Any("Ee") + Opt(Any("+-")) + decimal @@ -27,6 +26,7 @@ name = letter + Rep(letter | digit) intconst = decimal | (Str("0x") + Rep1(hexdigit)) + longconst = intconst + Str("L") fltconst = (decimal_fract + Opt(exponent)) | (decimal + exponent) imagconst = (intconst | fltconst) + Any("jJ") @@ -73,9 +73,9 @@ lineterm = Eol + Opt(Str("\n")) return Lexicon([ - #(resword, TEXT), (name, 'IDENT'), (intconst, 'INT'), + (longconst, 'LONG'), (fltconst, 'FLOAT'), (imagconst, 'IMAG'), (punct | diphthong, TEXT), Modified: lxml/pyrex/Pyrex/Compiler/Main.py ============================================================================== --- lxml/pyrex/Pyrex/Compiler/Main.py (original) +++ lxml/pyrex/Pyrex/Compiler/Main.py Tue Apr 25 15:43:21 2006 @@ -2,7 +2,7 @@ # Pyrex Top Level # -import sys +import os, sys if sys.version_info[:2] < (2, 2): print >>sys.stderr, "Sorry, Pyrex requires Python 2.2 or later" sys.exit(1) @@ -132,7 +132,7 @@ def parse(self, source_filename, type_names, pxd): # Parse the given source file and return a parse tree. - f = open(source_filename, "r") + f = open(source_filename, "rU") s = PyrexScanner(f, source_filename, type_names = type_names, context = self) try: @@ -166,23 +166,39 @@ if options.output_file: result.c_file = os.path.join(cwd, options.output_file) else: - result.c_file = replace_suffix(source, ".c") + if options.cplus: + c_suffix = ".cpp" + else: + c_suffix = ".c" + result.c_file = replace_suffix(source, c_suffix) module_name = self.extract_module_name(source) initial_pos = (source, 1, 0) scope = self.find_module(module_name, pos = initial_pos, need_pxd = 0) + errors_occurred = False try: tree = self.parse(source, scope.type_names, pxd = 0) tree.process_implementation(scope, result) except CompileError: - result.c_file = None + errors_occurred = True Errors.close_listing_file() result.num_errors = Errors.num_errors if result.num_errors > 0: + errors_occurred = True + if errors_occurred: + try: + os.unlink(result.c_file) + except EnvironmentError: + pass result.c_file = None if result.c_file and not options.c_only and c_compile: - result.object_file = c_compile(result.c_file) + result.object_file = c_compile(result.c_file, + verbose_flag = options.show_version, + cplus = options.cplus) if not options.obj_only and c_link: - result.extension_file = c_link(result.object_file) + result.extension_file = c_link(result.object_file, + extra_objects = options.objects, + verbose_flag = options.show_version, + cplus = options.cplus) return result #------------------------------------------------------------------------ @@ -200,10 +216,18 @@ errors_to_stderr boolean Echo errors to stderr when using .lis include_path [string] Directories to search for include files output_file string Name of generated .c file + + Following options are experimental and only used on MacOSX: + + c_only boolean Stop after generating C file (default) + obj_only boolean Stop after compiling to .o file + objects [string] Extra .o files to link with + cplus boolean Compile as c++ code """ def __init__(self, defaults = None, **kw): self.include_path = [] + self.objects = [] if defaults: self.__dict__.update(defaults.__dict__) self.__dict__.update(kw) @@ -230,6 +254,7 @@ self.object_file = None self.extension_file = None + def compile(source, options = None, c_compile = 0, c_link = 0): """ compile(source, options = default_options) @@ -288,6 +313,7 @@ errors_to_stderr = 1, c_only = 1, obj_only = 1, + cplus = 0, output_file = None) if sys.platform == "mac": @@ -295,6 +321,8 @@ default_options.use_listing_file = 1 elif sys.platform == "darwin": from Pyrex.Mac.DarwinSystem import c_compile, c_link, CCompilerError +elif 'linux' in sys.platform: + from Pyrex.PC.LinuxSystem import c_compile, c_link, CCompilerError else: c_compile = None c_link = None Modified: lxml/pyrex/Pyrex/Compiler/Naming.py ============================================================================== --- lxml/pyrex/Pyrex/Compiler/Naming.py (original) +++ lxml/pyrex/Pyrex/Compiler/Naming.py Tue Apr 25 15:43:21 2006 @@ -39,6 +39,7 @@ filename_cname = pyrex_prefix + "filename" filetable_cname = pyrex_prefix + "f" filenames_cname = pyrex_prefix + "filenames" +fileinit_cname = pyrex_prefix + "init_filenames" intern_tab_cname = pyrex_prefix + "intern_tab" kwds_cname = pyrex_prefix + "kwds" lineno_cname = pyrex_prefix + "lineno" @@ -49,3 +50,5 @@ self_cname = pyrex_prefix + "self" stringtab_cname = pyrex_prefix + "string_tab" vtabslot_cname = pyrex_prefix + "vtab" + +extern_c_macro = pyrex_prefix.upper() + "EXTERN_C" Modified: lxml/pyrex/Pyrex/Compiler/Nodes.py ============================================================================== --- lxml/pyrex/Pyrex/Compiler/Nodes.py (original) +++ lxml/pyrex/Pyrex/Compiler/Nodes.py Tue Apr 25 15:43:21 2006 @@ -44,7 +44,7 @@ # 'type' attribute of each ExprNode. Insert coercion nodes into the # tree where needed to convert to and from Python objects. # Allocate temporary locals for intermediate results. Fill - # in the 'result' attribute of each ExprNode with a C code + # in the 'result_code' attribute of each ExprNode with a C code # fragment. # # (3) generate_code @@ -71,7 +71,6 @@ def generate_const_definitions(self, env, code): if env.const_entries: code.putln("") - #code.put_var_declarations(env.const_entries, static = 1) for entry in env.const_entries: if not entry.is_interned: code.put_var_declaration(entry, static = 1) @@ -127,38 +126,41 @@ if entry.visibility == 'public': public_extension_types.append(entry) if public_vars_and_funcs or public_extension_types: - #import os - #outname_base, _ = os.path.splitext(result.c_file) - #result.h_file = outname_base + ".h" - #result.i_file = outname_base + ".pxi" result.h_file = replace_suffix(result.c_file, ".h") result.i_file = replace_suffix(result.c_file, ".pxi") h_code = Code.CCodeWriter(result.h_file) i_code = Code.PyrexCodeWriter(result.i_file) + self.generate_extern_c_macro_definition(h_code) for entry in public_vars_and_funcs: - h_code.putln("extern %s;" % + h_code.putln("%s %s;" % ( + Naming.extern_c_macro, entry.type.declaration_code( - entry.cname, dll_linkage = "DL_IMPORT")) + entry.cname, dll_linkage = "DL_IMPORT"))) i_code.putln("cdef extern %s" % entry.type.declaration_code(entry.cname, pyrex = 1)) for entry in public_extension_types: self.generate_cclass_header_code(entry.type, h_code) self.generate_cclass_include_code(entry.type, i_code) - h_code.putln("extern DL_IMPORT(void) init%s(void);" % env.module_name) - #result.h_file_generated = 1 - #result.i_file_generated = 1 + h_code.putln("PyMODINIT_FUNC init%s(void);" % env.module_name) def generate_cclass_header_code(self, type, h_code): - h_code.putln("extern DL_IMPORT(PyTypeObject) %s;" % type.typeobj_cname) + #h_code.putln("extern DL_IMPORT(PyTypeObject) %s;" % type.typeobj_cname) + h_code.putln("%s DL_IMPORT(PyTypeObject) %s;" % ( + Naming.extern_c_macro, + type.typeobj_cname)) self.generate_obj_struct_definition(type, h_code) def generate_cclass_include_code(self, type, i_code): i_code.putln("cdef extern class %s.%s:" % ( type.module_name, type.name)) i_code.indent() - for entry in type.scope.var_entries: - i_code.putln("cdef %s" % - entry.type.declaration_code(entry.cname, pyrex = 1)) + var_entries = type.scope.var_entries + if var_entries: + for entry in var_entries: + i_code.putln("cdef %s" % + entry.type.declaration_code(entry.cname, pyrex = 1)) + else: + i_code.putln("pass") i_code.dedent() def generate_c_code(self, env, result): @@ -168,7 +170,8 @@ code.init_labels() self.generate_module_preamble(env, modules, code) for module in modules: - self.generate_declarations_for_module(module, code) + self.generate_declarations_for_module(module, code, + definition = module is env) code.putln("") code.putln("/* Implementation of %s */" % env.qualified_name) self.generate_const_definitions(env, code) @@ -179,27 +182,38 @@ self.generate_py_string_table(env, code) self.generate_typeobj_definitions(env, code) self.generate_method_table(env, code) + self.generate_filename_init_prototype(code) self.generate_module_init_func(modules[:-1], env, code) self.generate_filename_table(code) self.generate_utility_functions(env, code) result.c_file_generated = 1 def find_referenced_modules(self, env, module_list, modules_seen): - for imported_module in env.cimported_modules: - if imported_module not in modules_seen: - modules_seen[imported_module] = 1 + if env not in modules_seen: + modules_seen[env] = 1 + for imported_module in env.cimported_modules: self.find_referenced_modules(imported_module, module_list, modules_seen) - module_list.append(env) + module_list.append(env) def generate_module_preamble(self, env, cimported_modules, code): code.putln('/* Generated by Pyrex %s on %s */' % ( Version.version, time.asctime())) code.putln('') + code.putln('#define PY_SSIZE_T_CLEAN') for filename in env.python_include_files: code.putln('#include "%s"' % filename) code.putln("#ifndef PY_LONG_LONG") code.putln(" #define PY_LONG_LONG LONG_LONG") code.putln("#endif") + code.putln("#if PY_VERSION_HEX < 0x02050000") + code.putln(" typedef int Py_ssize_t;") + code.putln(" #define PY_SSIZE_T_MAX INT_MAX") + code.putln(" #define PY_SSIZE_T_MIN INT_MIN") + code.putln(" #define PyInt_FromSsize_t(z) PyInt_FromLong(z)") + code.putln(" #define PyInt_AsSsize_t(o) PyInt_AsLong(o)") + code.putln("#endif") + self.generate_extern_c_macro_definition(code) + code.putln("%s double pow(double, double);" % Naming.extern_c_macro) self.generate_includes(env, cimported_modules, code) #for filename in env.include_files: # code.putln('#include "%s"' % filename) @@ -214,11 +228,19 @@ code.putln('static PyObject *%s;' % Naming.builtins_cname) code.putln('static int %s;' % Naming.lineno_cname) code.putln('static char *%s;' % Naming.filename_cname) - code.putln('staticforward char **%s;' % Naming.filetable_cname) + code.putln('static char **%s;' % Naming.filetable_cname) if env.doc: code.putln('') code.putln('static char %s[] = "%s";' % (env.doc_cname, env.doc)) + def generate_extern_c_macro_definition(self, code): + name = Naming.extern_c_macro + code.putln("#ifdef __cplusplus") + code.putln('#define %s extern "C"' % name) + code.putln("#else") + code.putln("#define %s extern" % name) + code.putln("#endif") + def generate_includes(self, env, cimported_modules, code): includes = env.include_files[:] for module in cimported_modules: @@ -241,15 +263,13 @@ # Some C compilers don't like an empty array code.putln("0") code.putln("};") - code.putln("statichere char **%s = %s;" % - (Naming.filetable_cname, Naming.filenames_cname)) - def generate_declarations_for_module(self, env, code): + def generate_declarations_for_module(self, env, code, definition): code.putln("") code.putln("/* Declarations from %s */" % env.qualified_name) self.generate_type_predeclarations(env, code) self.generate_type_definitions(env, code) - self.generate_global_declarations(env, code) + self.generate_global_declarations(env, code, definition) self.generate_cfunction_predeclarations(env, code) def generate_type_predeclarations(self, env, code): @@ -329,11 +349,17 @@ name = entry.type.typeobj_cname if name: if entry.visibility == 'extern' and not entry.in_cinclude: - code.putln("extern DL_IMPORT(PyTypeObject) %s;" % name) + code.putln("%s DL_IMPORT(PyTypeObject) %s;" % ( + Naming.extern_c_macro, + name)) elif entry.visibility == 'public': - code.putln("DL_EXPORT(PyTypeObject) %s;" % name) - else: - code.putln("staticforward PyTypeObject %s;" % name) + #code.putln("DL_EXPORT(PyTypeObject) %s;" % name) + code.putln("%s DL_EXPORT(PyTypeObject) %s;" % ( + Naming.extern_c_macro, + name)) + # ??? Do we really need the rest of this? ??? + #else: + # code.putln("staticforward PyTypeObject %s;" % name) def generate_exttype_vtable_struct(self, entry, code): # Generate struct declaration for an extension type's vtable. @@ -393,13 +419,13 @@ attr.type.declaration_code(attr.cname)) code.putln(footer) - def generate_global_declarations(self, env, code): + def generate_global_declarations(self, env, code, definition): code.putln("") for entry in env.c_class_entries: code.putln("static PyTypeObject *%s = 0;" % entry.type.typeptr_cname) code.put_var_declarations(env.var_entries, static = 1, - dll_linkage = "DL_EXPORT") + dll_linkage = "DL_EXPORT", definition = definition) code.put_var_declarations(env.default_entries, static = 1) def generate_cfunction_predeclarations(self, env, code): @@ -412,7 +438,7 @@ header = entry.type.declaration_code(entry.cname, dll_linkage = dll_linkage) if entry.visibility <> 'private': - storage_class = "" + storage_class = "%s " % Naming.extern_c_macro else: storage_class = "static " code.putln("%s%s; /*proto*/" % ( @@ -420,7 +446,7 @@ header)) def generate_typeobj_definitions(self, env, code): - modname = env.module_name + full_module_name = env.qualified_name for entry in env.c_class_entries: #print "generate_typeobj_definitions:", entry.name #print "...visibility =", entry.visibility @@ -451,7 +477,7 @@ self.generate_method_table(scope, code) self.generate_member_table(scope, code) self.generate_getset_table(scope, code) - self.generate_typeobj_definition(modname, entry, code) + self.generate_typeobj_definition(full_module_name, entry, code) def generate_exttype_vtable(self, scope, code): # Generate the definition of an extension type's vtable. @@ -484,9 +510,9 @@ self.generate_self_cast(scope, code) type = scope.parent_type if type.vtabslot_cname: - code.putln("p->%s = (struct %s *)%s;" % ( - type.vtabslot_cname, + code.putln("*(struct %s **)&p->%s = %s;" % ( type.vtabstruct_cname, + type.vtabslot_cname, type.vtabptr_cname)) for entry in scope.var_entries: if entry.type.is_pyobject: @@ -510,12 +536,7 @@ code.putln( "static void %s(PyObject *o) {" % scope.mangle_internal("tp_dealloc")) - # only need the object cast to the type if we need to decref - # some instance attributes - for entry in scope.var_entries: - if entry.type.is_pyobject: - self.generate_self_cast(scope, code) - break + self.generate_self_cast(scope, code) self.generate_usr_dealloc_call(scope, code) for entry in scope.var_entries: if entry.type.is_pyobject: @@ -559,16 +580,12 @@ code.putln( "static int %s(PyObject *o, visitproc v, void *a) {" % scope.mangle_internal("tp_traverse")) - # only need e, p if we have object attributes - for entry in scope.var_entries: - if entry.type.is_pyobject: - code.putln( - "int e;") - self.generate_self_cast(scope, code) - break + code.putln( + "int e;") + self.generate_self_cast(scope, code) if base_type: code.putln( - "%s->tp_traverse(o, v, a);" % + "e = %s->tp_traverse(o, v, a); if (e) return e;" % base_type.typeptr_cname) for entry in scope.var_entries: if entry.type.is_pyobject: @@ -594,11 +611,7 @@ code.putln( "static int %s(PyObject *o) {" % scope.mangle_internal("tp_clear")) - # only need cast to self type if have object attributes to dereference - for entry in scope.var_entries: - if entry.type.is_pyobject: - self.generate_self_cast(scope, code) - break + self.generate_self_cast(scope, code) if base_type: code.putln( "%s->tp_clear(o);" % @@ -619,12 +632,12 @@ # a __getitem__ method is present. It converts its # argument to a Python integer and calls mp_subscript. code.putln( - "static PyObject *%s(PyObject *o, int i) {" % + "static PyObject *%s(PyObject *o, Py_ssize_t i) {" % scope.mangle_internal("sq_item")) code.putln( "PyObject *r;") code.putln( - "PyObject *x = PyInt_FromLong(i); if(!x) return 0;") + "PyObject *x = PyInt_FromSsize_t(i); if(!x) return 0;") code.putln( "r = o->ob_type->tp_as_mapping->mp_subscript(o, x);") code.putln( @@ -710,7 +723,7 @@ del_entry = scope.lookup_here("__delslice__") code.putln("") code.putln( - "static int %s(PyObject *o, int i, int j, PyObject *v) {" % + "static int %s(PyObject *o, Py_ssize_t i, Py_ssize_t j, PyObject *v) {" % scope.mangle_internal("sq_ass_slice")) code.putln( "if (v) {") @@ -957,7 +970,8 @@ if entry.visibility == 'public': header = "DL_EXPORT(PyTypeObject) %s = {" else: - header = "statichere PyTypeObject %s = {" + #header = "statichere PyTypeObject %s = {" + header = "PyTypeObject %s = {" #code.putln(header % scope.parent_type.typeobj_cname) code.putln(header % type.typeobj_cname) code.putln( @@ -1076,28 +1090,44 @@ code.putln( "};") + def generate_filename_init_prototype(self, code): + code.putln(""); + code.putln("static void %s(void); /*proto*/" % Naming.fileinit_cname) + def generate_module_init_func(self, imported_modules, env, code): code.putln("") - header = "DL_EXPORT(void) init%s(void)" % env.module_name + header = "PyMODINIT_FUNC init%s(void)" % env.module_name code.putln("%s; /*proto*/" % header) code.putln("%s {" % header) code.put_var_declarations(env.temp_entries) + #code.putln("/*--- Libary function declarations ---*/") env.generate_library_function_declarations(code) + self.generate_filename_init_call(code) + #code.putln("/*--- Module creation code ---*/") self.generate_module_creation_code(env, code) + #code.putln("/*--- Intern code ---*/") self.generate_intern_code(env, code) + #code.putln("/*--- String init code ---*/") self.generate_string_init_code(env, code) + #code.putln("/*--- Global init code ---*/") self.generate_global_init_code(env, code) + #code.putln("/*--- Type import code ---*/") for module in imported_modules: self.generate_type_import_code_for_module(module, env, code) + #code.putln("/*--- Type init code ---*/") self.generate_type_init_code(env, code) + #code.putln("/*--- Execution code ---*/") self.body.generate_execution_code(code) code.putln("return;") code.put_label(code.error_label) code.put_var_xdecrefs(env.temp_entries) - code.putln('__Pyx_AddTraceback("%s");' % (env.module_name)) + code.putln('__Pyx_AddTraceback("%s");' % (env.qualified_name)) env.use_utility_code(traceback_utility_code) code.putln('}') + def generate_filename_init_call(self, code): + code.putln("%s();" % Naming.fileinit_cname) + def generate_module_creation_code(self, env, code): # Generate code to create the module object and # install the builtins. @@ -1227,6 +1257,17 @@ scope.class_name, typeobj_cname, code.error_goto(entry.pos))) + weakref_entry = scope.lookup_here("__weakref__") + if weakref_entry: + if weakref_entry.type is py_object_type: + tp_weaklistoffset = "%s.tp_weaklistoffset" % typeobj_cname + code.putln("if (%s == 0) %s = offsetof(struct %s, %s);" % ( + tp_weaklistoffset, + tp_weaklistoffset, + type.objstruct_cname, + weakref_entry.cname)) + else: + error(weakref_entry.pos, "__weakref__ slot must be of type 'object'") def generate_exttype_vtable_init_code(self, entry, code): # Generate code to initialise the C method table of an @@ -1244,10 +1285,9 @@ Naming.obj_base_cname, type.base_type.vtabptr_cname)) for meth_entry in type.scope.cfunc_entries: - #if not meth_entry.is_inherited: if meth_entry.func_cname: code.putln( - "%s.%s = (void *)%s;" % ( + "*(void **)&%s.%s = (void *)%s;" % ( type.vtable_cname, meth_entry.cname, meth_entry.func_cname)) @@ -1264,6 +1304,11 @@ def generate_utility_functions(self, env, code): code.putln("") code.putln("/* Runtime support code */") + code.putln("") + code.putln("static void %s(void) {" % Naming.fileinit_cname) + code.putln("%s = %s;" % + (Naming.filetable_cname, Naming.filenames_cname)) + code.putln("}") for utility_code in env.utility_code_used: code.put(utility_code) @@ -1375,7 +1420,7 @@ if not self.dimension.type.is_int: error(self.dimension.pos, "Array dimension not integer") #size = self.dimension.value - size = self.dimension.result + size = self.dimension.result_code else: size = None if not base_type.is_complete(): @@ -1422,7 +1467,7 @@ else: if self.exception_value: self.exception_value.analyse_const_expression(env) - exc_val = self.exception_value.result + exc_val = self.exception_value.result_code if not return_type.assignable_from(self.exception_value.type): error(self.exception_value.pos, "Exception value incompatible with function return type") @@ -1606,7 +1651,7 @@ def analyse_declarations(self, env, enum_entry): if self.value: self.value.analyse_const_expression(env) - value = self.value.result + value = self.value.result_code else: value = self.name entry = env.declare_const(self.name, enum_entry.type, @@ -1697,17 +1742,16 @@ # ----- Default return value code.putln("") if self.return_type.is_pyobject: - if self.return_type.is_extension_type: - cast = "(PyObject *)" - else: - cast = None + #if self.return_type.is_extension_type: + # lhs = "(PyObject *)%s" % Naming.retval_cname + #else: lhs = Naming.retval_cname - code.put_init_to_py_none(cast, lhs) + code.put_init_to_py_none(lhs, self.return_type) else: val = self.return_type.default_value if val: code.putln("%s = %s;" % (Naming.retval_cname, val)) - code.put_goto(code.return_label) + code.putln("goto %s;" % code.return_label) # ----- Error cleanup code.put_label(code.error_label) code.put_var_xdecrefs(lenv.temp_entries) @@ -1734,10 +1778,10 @@ self.put_stararg_decrefs(code) if not self.return_type.is_void: retval_code = Naming.retval_cname - if self.return_type.is_extension_type: - retval_code = "((%s)%s) " % ( - self.return_type.declaration_code(""), - retval_code) + #if self.return_type.is_extension_type: + # retval_code = "((%s)%s) " % ( + # self.return_type.declaration_code(""), + # retval_code) code.putln("return %s;" % retval_code) code.putln("}") @@ -1819,7 +1863,7 @@ header = self.return_type.declaration_code(entity, dll_linkage = dll_linkage) if self.visibility <> 'private': - storage_class = "" + storage_class = "%s " % Naming.extern_c_macro else: storage_class = "static " code.putln("%s%s {" % ( @@ -2243,11 +2287,11 @@ code.putln( "%s = %s;" % ( arg.default_entry.cname, - default.result)) + default.result_as(arg.default_entry.type))) if default.is_temp and default.type.is_pyobject: code.putln( "%s = 0;" % - default.result) + default.result_code) # For Python class methods, create and store function object if self.assmt: self.assmt.generate_execution_code(code) @@ -2298,8 +2342,8 @@ self.classobj.analyse_expressions(env) genv = env.global_scope() cenv = PyClassScope(name = self.name, outer_scope = genv) - cenv.class_dict_cname = self.dict.result - cenv.class_obj_cname = self.classobj.result + cenv.class_dict_cname = self.dict.result_code + cenv.class_obj_cname = self.classobj.result_code self.scope = cenv self.body.analyse_declarations(cenv) self.body.analyse_expressions(cenv) @@ -2307,7 +2351,7 @@ self.dict.release_temp(env) self.classobj.release_temp(env) self.target.release_target_temp(env) - env.recycle_pending_temps() + #env.recycle_pending_temps() def generate_function_definitions(self, env, code): self.generate_py_string_decls(self.scope, code) @@ -2452,12 +2496,12 @@ def analyse_expressions(self, env): self.expr.analyse_expressions(env) self.expr.release_temp(env) - env.recycle_pending_temps() # TEMPORARY + #env.recycle_pending_temps() # TEMPORARY def generate_execution_code(self, code): self.expr.generate_evaluation_code(code) - if not self.expr.is_temp and self.expr.result: - code.putln("%s;" % self.expr.result) + if not self.expr.is_temp and self.expr.result_code: + code.putln("%s;" % self.expr.result_code) self.expr.generate_disposal_code(code) @@ -2502,15 +2546,6 @@ self.lhs.allocate_target_temps(env) self.lhs.release_target_temp(env) self.rhs.release_temp(env) - -# def analyse_assignment(self, env, lhs, rhs): -# # Returns coerced RHS. -# rhs.analyse_types(env) -# lhs.analyse_target_types(env) -# rhs = rhs.coerce_to(lhs.type, env) -# rhs.allocate_temps(env) -# lhs.allocate_target_temps(env) -# return rhs def generate_rhs_evaluation_code(self, code): self.rhs.generate_evaluation_code(code) @@ -2640,7 +2675,7 @@ arg.allocate_temps(env) arg.release_temp(env) self.args[i] = arg - env.recycle_pending_temps() # TEMPORARY + #env.recycle_pending_temps() # TEMPORARY env.use_utility_code(printing_utility_code) def generate_execution_code(self, code): @@ -2648,7 +2683,7 @@ arg.generate_evaluation_code(code) code.putln( "if (__Pyx_PrintItem(%s) < 0) %s" % ( - arg.result, + arg.py_result(), code.error_goto(self.pos))) arg.generate_disposal_code(code) if not self.ends_with_comma: @@ -2671,7 +2706,7 @@ arg.analyse_target_expression(env) if not arg.type.is_pyobject: error(arg.pos, "Deletion of non-Python object") - env.recycle_pending_temps() # TEMPORARY + #env.recycle_pending_temps() # TEMPORARY def generate_execution_code(self, code): for arg in self.args: @@ -2699,7 +2734,9 @@ if not code.break_label: error(self.pos, "break statement not inside loop") else: - code.put_goto(code.break_label) + code.putln( + "goto %s;" % + code.break_label) class ContinueStatNode(StatNode): @@ -2713,7 +2750,9 @@ elif not code.continue_label: error(self.pos, "continue statement not inside loop") else: - code.put_goto(code.continue_label) + code.putln( + "goto %s;" % + code.continue_label) class ReturnStatNode(StatNode): @@ -2721,10 +2760,12 @@ # # value ExprNode or None # return_type PyrexType + # temps_in_use [Entry] Temps in use at time of return def analyse_expressions(self, env): return_type = env.return_type self.return_type = return_type + self.temps_in_use = env.temps_in_use() if not return_type: error(self.pos, "Return not inside a function body") return @@ -2747,32 +2788,27 @@ if not self.return_type: # error reported earlier return + for entry in self.temps_in_use: + code.put_var_decref_clear(entry) if self.value: self.value.generate_evaluation_code(code) - if self.value.type.is_pyobject and not self.value.is_temp: - code.put_incref(self.value.result, self.value.type) - if self.return_type.is_extension_type: - cast = "(%s)" % self.return_type.declaration_code("") - else: - cast = "" + self.value.make_owned_reference(code) code.putln( - "%s = %s%s;" % ( + "%s = %s;" % ( Naming.retval_cname, - cast, - self.value.result)) + self.value.result_as(self.return_type))) self.value.generate_post_assignment_code(code) else: if self.return_type.is_pyobject: - code.putln( - "%s = Py_None; Py_INCREF(%s);" % ( - Naming.retval_cname, - Naming.retval_cname)) + code.put_init_to_py_none(Naming.retval_cname, self.return_type) elif self.return_type.is_returncode: code.putln( "%s = %s;" % ( Naming.retval_cname, self.return_type.default_value)) - code.put_goto(code.return_label) + code.putln( + "goto %s;" % + code.return_label) class RaiseStatNode(StatNode): @@ -2801,7 +2837,7 @@ self.exc_value.release_temp(env) if self.exc_tb: self.exc_tb.release_temp(env) - env.recycle_pending_temps() # TEMPORARY + #env.recycle_pending_temps() # TEMPORARY if not (self.exc_type or self.exc_value or self.exc_tb): env.use_utility_code(reraise_utility_code) else: @@ -2810,17 +2846,17 @@ def generate_execution_code(self, code): if self.exc_type: self.exc_type.generate_evaluation_code(code) - type_code = self.exc_type.result + type_code = self.exc_type.py_result() else: type_code = 0 if self.exc_value: self.exc_value.generate_evaluation_code(code) - value_code = self.exc_value.result + value_code = self.exc_value.py_result() else: value_code = "0" if self.exc_tb: self.exc_tb.generate_evaluation_code(code) - tb_code = self.exc_tb.result + tb_code = self.exc_tb.py_result() else: tb_code = "0" if self.exc_type or self.exc_value or self.exc_tb: @@ -2857,7 +2893,7 @@ self.cond.release_temp(env) if self.value: self.value.release_temp(env) - env.recycle_pending_temps() # TEMPORARY + #env.recycle_pending_temps() # TEMPORARY def generate_execution_code(self, code): self.cond.generate_evaluation_code(code) @@ -2865,11 +2901,11 @@ self.value.generate_evaluation_code(code) code.putln( "if (!%s) {" % - self.cond.result) + self.cond.result_code) if self.value: code.putln( "PyErr_SetObject(PyExc_AssertionError, %s);" % - self.value.result) + self.value.py_result()) else: code.putln( "PyErr_SetNone(PyExc_AssertionError);") @@ -2925,16 +2961,18 @@ self.condition = \ self.condition.analyse_temp_boolean_expression(env) self.condition.release_temp(env) - env.recycle_pending_temps() # TEMPORARY + #env.recycle_pending_temps() # TEMPORARY self.body.analyse_expressions(env) def generate_execution_code(self, code, end_label): self.condition.generate_evaluation_code(code) code.putln( "if (%s) {" % - self.condition.result) + self.condition.result_code) self.body.generate_execution_code(code) - code.put_goto(end_label) + code.putln( + "goto %s;" % + end_label) code.putln("}") @@ -2954,7 +2992,7 @@ self.condition = \ self.condition.analyse_temp_boolean_expression(env) self.condition.release_temp(env) - env.recycle_pending_temps() # TEMPORARY + #env.recycle_pending_temps() # TEMPORARY self.body.analyse_expressions(env) if self.else_clause: self.else_clause.analyse_expressions(env) @@ -2963,12 +3001,12 @@ old_loop_labels = code.new_loop_labels() code.putln( "while (1) {") + code.put_label(code.continue_label) self.condition.generate_evaluation_code(code) code.putln( "if (!%s) break;" % - self.condition.result) + self.condition.result_code) self.body.generate_execution_code(code) - code.put_label(code.continue_label) code.putln("}") break_label = code.break_label code.set_loop_labels(old_loop_labels) @@ -3004,22 +3042,22 @@ self.target.allocate_target_temps(env) self.item.release_temp(env) self.target.release_target_temp(env) - env.recycle_pending_temps() # TEMPORARY + #env.recycle_pending_temps() # TEMPORARY self.body.analyse_expressions(env) - self.iterator.release_temp(env) - env.recycle_pending_temps() # TEMPORARY + #env.recycle_pending_temps() # TEMPORARY if self.else_clause: self.else_clause.analyse_expressions(env) - + self.iterator.release_temp(env) + def generate_execution_code(self, code): old_loop_labels = code.new_loop_labels() self.iterator.generate_evaluation_code(code) code.putln( "for (;;) {") + code.put_label(code.continue_label) self.item.generate_evaluation_code(code) self.target.generate_assignment_code(self.item, code) self.body.generate_execution_code(code) - code.put_label(code.continue_label) code.putln( "}") break_label = code.break_label @@ -3075,7 +3113,7 @@ c_loopvar_node = ExprNodes.TempNode(self.pos, PyrexTypes.c_long_type, env) c_loopvar_node.allocate_temps(env) - self.loopvar_name = c_loopvar_node.result + self.loopvar_name = c_loopvar_node.result_code self.py_loopvar_node = \ ExprNodes.CloneNode(c_loopvar_node).coerce_to_pyobject(env) self.bound1.allocate_temps(env) @@ -3093,7 +3131,7 @@ self.else_clause.analyse_expressions(env) self.bound1.release_temp(env) self.bound2.release_temp(env) - env.recycle_pending_temps() # TEMPORARY + #env.recycle_pending_temps() # TEMPORARY def generate_execution_code(self, code): old_loop_labels = code.new_loop_labels() @@ -3103,8 +3141,8 @@ code.putln( "for (%s = %s%s; %s %s %s; %s%s) {" % ( self.loopvar_name, - self.bound1.result, offset, - self.loopvar_name, self.relation2, self.bound2.result, + self.bound1.result_code, offset, + self.loopvar_name, self.relation2, self.bound2.result_code, incop, self.loopvar_name)) if self.py_loopvar_node: self.py_loopvar_node.generate_evaluation_code(code) @@ -3170,7 +3208,9 @@ self.else_clause.generate_execution_code(code) code.putln( "}") - code.put_goto(end_label) + code.putln( + "goto %s;" % + end_label) code.put_label(our_error_label) code.put_var_xdecrefs_clear(self.cleanup_list) default_clause_seen = 0 @@ -3182,7 +3222,9 @@ error(except_clause.pos, "Default except clause not last") except_clause.generate_handling_code(code, end_label) if not default_clause_seen: - code.put_goto(code.error_label) + code.putln( + "goto %s;" % + code.error_label) code.put_label(end_label) @@ -3218,7 +3260,7 @@ self.exc_value.release_temp(env) if self.target: self.target.release_target_temp(env) - env.recycle_pending_temps() # TEMPORARY + #env.recycle_pending_temps() # TEMPORARY self.body.analyse_expressions(env) def generate_handling_code(self, code, end_label): @@ -3228,7 +3270,7 @@ code.putln( "%s = PyErr_ExceptionMatches(%s);" % ( self.match_flag, - self.pattern.result)) + self.pattern.py_result())) self.pattern.generate_disposal_code(code) code.putln( "if (%s) {" % @@ -3247,7 +3289,9 @@ else: self.exc_value.generate_disposal_code(code) self.body.generate_execution_code(code) - code.put_goto(end_label) + code.putln( + "goto %s;" + % end_label) code.putln( "}") @@ -3320,7 +3364,6 @@ code.putln( "__pyx_why = 0; goto %s;" % catch_label) - code.mark_label_used(catch_label) for i in range(len(new_labels)): if new_labels[i] and new_labels[i] <> "": if new_labels[i] == new_error_label: @@ -3332,7 +3375,6 @@ new_labels[i], i+1, catch_label)) - code.mark_label_used(catch_label) code.put_label(catch_label) code.set_all_labels(old_labels) self.finally_clause.generate_execution_code(code) @@ -3347,7 +3389,6 @@ "case %s: goto %s;" % ( i+1, old_labels[i])) - code.mark_label_used(old_labels[i]) code.putln( "}") code.putln( @@ -3367,7 +3408,9 @@ code.putln( "%s = %s;" % ( self.lineno_var, Naming.lineno_cname)) - code.put_goto(catch_label) + code.putln( + "goto %s;" % + catch_label) code.putln( "}") @@ -3385,7 +3428,9 @@ code.putln( "%s = 0;" % var) - code.put_goto(error_label) + code.putln( + "goto %s;" % + error_label) code.putln( "}") @@ -3469,7 +3514,7 @@ target.release_temp(env) self.module.release_temp(env) self.item.release_temp(env) - env.recycle_pending_temps() # TEMPORARY + #env.recycle_pending_temps() # TEMPORARY def generate_execution_code(self, code): self.module.generate_evaluation_code(code) @@ -3477,20 +3522,20 @@ for cname, target in self.interned_items: code.putln( '%s = PyObject_GetAttr(%s, %s); if (!%s) %s' % ( - self.item.result, - self.module.result, + self.item.result_code, + self.module.py_result(), cname, - self.item.result, + self.item.result_code, code.error_goto(self.pos))) target.generate_assignment_code(self.item, code) else: for name, target in self.items: code.putln( '%s = PyObject_GetAttrString(%s, "%s"); if (!%s) %s' % ( - self.item.result, - self.module.result, + self.item.result_code, + self.module.py_result(), name, - self.item.result, + self.item.result_code, code.error_goto(self.pos))) target.generate_assignment_code(self.item, code) self.module.generate_disposal_code(code) @@ -3505,8 +3550,8 @@ """ typedef struct {PyObject **p; char *s;} __Pyx_InternTabEntry; /*proto*/ typedef struct {PyObject **p; char *s; long n;} __Pyx_StringTabEntry; /*proto*/ -static PyObject *__Pyx_UnpackItem(PyObject *, int); /*proto*/ -static int __Pyx_EndUnpack(PyObject *, int); /*proto*/ +static PyObject *__Pyx_UnpackItem(PyObject *, Py_ssize_t); /*proto*/ +static int __Pyx_EndUnpack(PyObject *, Py_ssize_t); /*proto*/ static int __Pyx_PrintItem(PyObject *); /*proto*/ static int __Pyx_PrintNewline(void); /*proto*/ static void __Pyx_Raise(PyObject *type, PyObject *value, PyObject *tb); /*proto*/ @@ -3516,7 +3561,7 @@ static int __Pyx_ArgTypeTest(PyObject *obj, PyTypeObject *type, int none_allowed, char *name); /*proto*/ static int __Pyx_TypeTest(PyObject *obj, PyTypeObject *type); /*proto*/ static int __Pyx_GetStarArgs(PyObject **args, PyObject **kwds,\ - char *kwd_list[], int nargs, PyObject **args2, PyObject **kwds2); /*proto*/ + char *kwd_list[], Py_ssize_t nargs, PyObject **args2, PyObject **kwds2); /*proto*/ static void __Pyx_WriteUnraisable(char *name); /*proto*/ static void __Pyx_AddTraceback(char *funcname); /*proto*/ static PyTypeObject *__Pyx_ImportType(char *module_name, char *class_name, long size); /*proto*/ @@ -3558,7 +3603,7 @@ return -1; if (PyString_Check(v)) { char *s = PyString_AsString(v); - int len = PyString_Size(v); + Py_ssize_t len = PyString_Size(v); if (len > 0 && isspace(Py_CHARMASK(s[len-1])) && s[len-1] != ' ') @@ -3613,7 +3658,7 @@ } if (PyString_Check(type)) ; - else if (PyClass_Check(type)) + else if (PyType_Check(type) || PyClass_Check(type)) ; /*PyErr_NormalizeException(&type, &value, &tb);*/ else if (PyInstance_Check(type)) { /* Raising an instance. The value should be a dummy. */ @@ -3702,7 +3747,7 @@ PyObject **args, PyObject **kwds, char *kwd_list[], - int nargs, + Py_ssize_t nargs, PyObject **args2, PyObject **kwds2) { Modified: lxml/pyrex/Pyrex/Compiler/Parsing.py ============================================================================== --- lxml/pyrex/Pyrex/Compiler/Parsing.py (original) +++ lxml/pyrex/Pyrex/Compiler/Parsing.py Tue Apr 25 15:43:21 2006 @@ -422,13 +422,15 @@ value = int(s.systring) s.next() return ExprNodes.IntNode(pos, value = value) + elif sy == 'LONG': + value = s.systring + s.next() + return ExprNodes.LongNode(pos, value = value) elif sy == 'FLOAT': - #value = float(s.systring) value = s.systring s.next() return ExprNodes.FloatNode(pos, value = value) elif sy == 'IMAG': - #value = float(s.systring[:-1]) value = s.systring[:-1] s.next() return ExprNodes.ImagNode(pos, value = value) @@ -500,6 +502,10 @@ if kind == 'r': if systr == '\\\n': chars.append(r'\\\n') + elif systr == r'\"': + chars.append(r'\\\"') + elif systr == r'\\': + chars.append(r'\\\\') else: chars.append('\\' + systr) else: @@ -1041,7 +1047,7 @@ s.expect_newline("Syntax error in include statement") include_file_path = s.context.find_include_file(include_file_name, pos) if include_file_path: - f = open(include_file_path, "r") + f = open(include_file_path, "rU") s2 = PyrexScanner(f, include_file_path, s) try: tree = p_statement_list(s2, level) @@ -1251,7 +1257,7 @@ # "void", "signed", "unsigned" #) -basic_c_type_names = ("void", "char", "int", "float", "double") +basic_c_type_names = ("void", "char", "int", "float", "double", "Py_ssize_t") sign_and_longness_words = ("short", "long", "signed", "unsigned") @@ -1377,7 +1383,7 @@ if s.sy == "-": sign = "-" s.next() - if s.sy in ('INT', 'FLOAT', 'NULL'): + if s.sy in ('INT', 'LONG', 'FLOAT', 'NULL'): s.systring = sign + s.systring return p_atom(s) else: @@ -1575,6 +1581,8 @@ declarators = [declarator] while s.sy == ',': s.next() + if s.sy == 'NEWLINE': + break declarator = p_c_declarator(s, cmethod_flag = cmethod_flag) declarators.append(declarator) s.expect_newline("Syntax error in C variable declaration") Modified: lxml/pyrex/Pyrex/Compiler/PyrexTypes.py ============================================================================== --- lxml/pyrex/Pyrex/Compiler/PyrexTypes.py (original) +++ lxml/pyrex/Pyrex/Compiler/PyrexTypes.py Tue Apr 25 15:43:21 2006 @@ -35,8 +35,8 @@ # of this type, given a code fragment for the entity. # * If for_display, this is for reading by a human in an error # message; otherwise it must be valid C code. - # * If dll_linkage is not None, it must be 'DL_IMPORT' or - # 'DL_EXPORT', and will be added to the base type part of + # * If dll_linkage is not None, it must be 'DL_EXPORT' or + # 'DL_IMPORT', and will be added to the base type part of # the declaration. # * If pyrex = 1, this is for use in a 'cdef extern' # statement of a Pyrex include file. @@ -111,6 +111,9 @@ # A type is incomplete if it is an unsized array, # a struct whose attributes are not defined, etc. return 1 + + def cast_code(self, expr_code): + return "((%s)%s)" % (self.declaration_code(""), expr_code) class CTypedefType: @@ -289,11 +292,15 @@ is_numeric = 1 default_value = "0" - parsetuple_formats = "chilLfd?" # rank -> format + parsetuple_formats = ( # rank -> format + "?HIkK????", # unsigned + "chilL?fd?", # signed + ) - def __init__(self, rank, pymemberdef_typecode = None): + def __init__(self, rank, signed = 1, pymemberdef_typecode = None): self.rank = rank - ptf = self.parsetuple_formats[rank] + self.signed = signed + ptf = self.parsetuple_formats[signed][rank] if ptf == '?': ptf = None self.parsetuple_format = ptf @@ -329,36 +336,49 @@ from_py_function = "PyInt_AsLong" def __init__(self, rank, signed, pymemberdef_typecode = None, is_returncode = 0): - CNumericType.__init__(self, rank, pymemberdef_typecode) - self.signed = signed + CNumericType.__init__(self, rank, signed, pymemberdef_typecode) self.is_returncode = is_returncode +class CPySSizeTType(CIntType): + + to_py_function = "PyInt_FromSsize_t" + from_py_function = "PyInt_AsSsize_t" + + +class CUIntType(CIntType): + + to_py_function = "PyLong_FromUnsignedLong" + from_py_function = "PyInt_AsUnsignedLongMask" + + class CULongType(CIntType): to_py_function = "PyLong_FromUnsignedLong" - from_py_function = "PyLong_AsUnsignedLong" + from_py_function = "PyInt_AsUnsignedLongMask" class CLongLongType(CIntType): to_py_function = "PyLong_FromLongLong" - from_py_function = "PyLong_AsLongLong" + from_py_function = "PyInt_AsUnsignedLongLongMask" class CULongLongType(CIntType): to_py_function = "PyLong_FromUnsignedLongLong" - from_py_function = "PyLong_AsUnsignedLongLong" + from_py_function = "PyInt_AsUnsignedLongLongMask" class CFloatType(CNumericType): is_float = 1 - signed = 1 to_py_function = "PyFloat_FromDouble" from_py_function = "PyFloat_AsDouble" - + + def __init__(self, rank, pymemberdef_typecode = None): + CNumericType.__init__(self, rank, 1, pymemberdef_typecode) + class CArrayType(CType): # base_type CType Element type @@ -423,6 +443,7 @@ def declaration_code(self, entity_code, for_display = 0, dll_linkage = None, pyrex = 0): + #print "CPtrType.declaration_code: pointer to", self.base_type ### return self.base_type.declaration_code( "(*%s)" % entity_code, for_display, dll_linkage, pyrex) @@ -521,7 +542,7 @@ if not arg_decl_code and not pyrex: arg_decl_code = "void" exc_clause = "" - if for_display: + if pyrex or for_display: if self.exception_value and self.exception_check: exc_clause = " except? %s" % self.exception_value elif self.exception_value: @@ -683,17 +704,18 @@ c_short_type = CIntType(1, 1, "T_SHORT") c_int_type = CIntType(2, 1, "T_INT") c_long_type = CIntType(3, 1, "T_LONG") -c_longlong_type = CLongLongType(4, 1) +c_longlong_type = CLongLongType(4, 1, "T_LONGLONG") +c_py_ssize_t_type = CPySSizeTType(5, 1) c_uchar_type = CIntType(0, 0, "T_UBYTE") c_ushort_type = CIntType(1, 0, "T_USHORT") -c_uint_type = CIntType(2, 0, "T_UINT") +c_uint_type = CUIntType(2, 0, "T_UINT") c_ulong_type = CULongType(3, 0, "T_ULONG") -c_ulonglong_type = CULongLongType(4, 0) +c_ulonglong_type = CULongLongType(4, 0, "T_ULONGLONG") -c_float_type = CFloatType(5, "T_FLOAT") -c_double_type = CFloatType(6, "T_DOUBLE") -c_longdouble_type = CFloatType(7) +c_float_type = CFloatType(6, "T_FLOAT") +c_double_type = CFloatType(7, "T_DOUBLE") +c_longdouble_type = CFloatType(8) c_null_ptr_type = CNullPtrType(c_void_type) c_char_array_type = CCharArrayType(None) @@ -705,7 +727,7 @@ error_type = ErrorType() -lowest_float_rank = 5 +lowest_float_rank = 6 rank_to_type_name = ( "char", # 0 @@ -713,9 +735,10 @@ "int", # 2 "long", # 3 "PY_LONG_LONG", # 4 - "float", # 5 - "double", # 6 - "long double", # 7 + "Py_ssize_t", # 5 + "float", # 6 + "double", # 7 + "long double", # 8 ) sign_and_rank_to_type = { @@ -730,9 +753,10 @@ (1, 2): c_int_type, (1, 3): c_long_type, (1, 4): c_longlong_type, - (1, 5): c_float_type, - (1, 6): c_double_type, - (1, 7): c_longdouble_type, + (1, 5): c_py_ssize_t_type, + (1, 6): c_float_type, + (1, 7): c_double_type, + (1, 8): c_longdouble_type, } modifiers_and_name_to_type = { @@ -748,6 +772,7 @@ (1, 0, "int"): c_int_type, (1, 1, "int"): c_long_type, (1, 2, "int"): c_longlong_type, + (1, 0, "Py_ssize_t"): c_py_ssize_t_type, (1, 0, "float"): c_float_type, (1, 0, "double"): c_double_type, (1, 1, "double"): c_longdouble_type, @@ -788,3 +813,19 @@ else: return base +def same_type(type1, type2): + return type1.same_as(type2) + +def assignable_from(type1, type2): + return type1.assignable_from(type2) + +def typecast(to_type, from_type, expr_code): + # Return expr_code cast to a C type which can be + # assigned to to_type, assuming its existing C type + # is from_type. + if to_type is from_type or \ + (not to_type.is_pyobject and assignable_from(to_type, from_type)): + return expr_code + else: + #print "typecast: to", to_type, "from", from_type ### + return to_type.cast_code(expr_code) Modified: lxml/pyrex/Pyrex/Compiler/Scanning.py ============================================================================== --- lxml/pyrex/Pyrex/Compiler/Scanning.py (original) +++ lxml/pyrex/Pyrex/Compiler/Scanning.py Tue Apr 25 15:43:21 2006 @@ -21,7 +21,6 @@ debug_scanner = 0 trace_scanner = 0 -#scanner_dump_file = open("Lexicon_dump.txt", "w") scanner_debug_flags = 0 scanner_dump_file = None binary_lexicon_pickle = 1 @@ -39,7 +38,7 @@ import md5 try: try: - f = open(path) + f = open(path, "rU") text = f.read() except IOError, e: print "Unable to hash scanner source file (%s)" % e @@ -150,11 +149,6 @@ def __call__(self, stream, text): return getattr(stream, self.name)(text) - - -#def make_lexicon(): -# import Lexicon -# return Lexicon.lexicon #------------------------------------------------------------------ Modified: lxml/pyrex/Pyrex/Compiler/Symtab.py ============================================================================== --- lxml/pyrex/Pyrex/Compiler/Symtab.py (original) +++ lxml/pyrex/Pyrex/Compiler/Symtab.py Tue Apr 25 15:43:21 2006 @@ -22,7 +22,6 @@ # cname string C name of entity # type PyrexType Type of entity # doc string Doc string - # #borrowed bool Is a borrowed reference # init string Initial value # visibility 'private' or 'public' or 'extern' # is_builtin boolean Is a Python builtin name @@ -171,7 +170,6 @@ return self.global_scope().intern(name) def qualifying_scope(self): - #return self.outer_scope return self.parent_scope def mangle(self, prefix, name = None): @@ -208,9 +206,6 @@ def qualify_name(self, name): return "%s.%s" % (self.qualified_name, name) - #def undeclare(self, name): - # del self.entries[name] - def declare_const(self, name, type, value, pos, cname = None): # Add an entry for a named constant. if not cname: @@ -420,17 +415,23 @@ % cname) self.free_temp_entries.append(entry) - def recycle_pending_temps(self): - # Obsolete - pass + def temps_in_use(self): + # Return a new list of temp entries currently in use. + return [entry for entry in self.temp_entries + if entry not in self.free_temp_entries] + + #def recycle_pending_temps(self): + # # Obsolete + # pass def use_utility_code(self, new_code): self.global_scope().use_utility_code(new_code) def generate_library_function_declarations(self, code): # Generate extern decls for C library funcs used. - if self.pow_function_used: - code.putln("extern double pow(double, double);") + #if self.pow_function_used: + # code.putln("%s double pow(double, double);" % Naming.extern_c_macro) + pass def defines_any(self, names): # Test whether any of the given names are @@ -885,8 +886,8 @@ def release_temp(self, cname): self.outer_scope.release_temp(cname) - def recycle_pending_temps(self): - self.outer_scope.recycle_pending_temps() + #def recycle_pending_temps(self): + # self.outer_scope.recycle_pending_temps() def add_default_value(self, type): return self.outer_scope.add_default_value(type) @@ -977,6 +978,8 @@ def declare_cfunction(self, name, type, pos, cname = None, visibility = 'private', defining = 0): + if get_special_method_signature(name): + error(pos, "Special methods must be declared with 'def', not 'cdef'") args = type.args if not args: error(pos, "C method has no self argument") Modified: lxml/pyrex/Pyrex/Compiler/TypeSlots.py ============================================================================== --- lxml/pyrex/Pyrex/Compiler/TypeSlots.py (original) +++ lxml/pyrex/Pyrex/Compiler/TypeSlots.py Tue Apr 25 15:43:21 2006 @@ -26,6 +26,7 @@ # 'i' int # 'I' int * # 'l' long + # 'Z' Py_ssize_t # 's' char * # 'S' char ** # 'r' int used only to signal exception @@ -42,6 +43,7 @@ 'i': PyrexTypes.c_int_type, 'I': PyrexTypes.c_int_ptr_type, 'l': PyrexTypes.c_long_type, + 'Z': PyrexTypes.c_py_ssize_t_type, 's': PyrexTypes.c_char_ptr_type, 'S': PyrexTypes.c_char_ptr_ptr_type, 'r': PyrexTypes.c_returncode_type, @@ -212,9 +214,10 @@ # Descriptor for the type flags slot. def slot_code(self, scope): - value = "Py_TPFLAGS_DEFAULT|Py_TPFLAGS_CHECKTYPES|Py_TPFLAGS_BASETYPE" - if scope.has_pyobject_attrs: - value += "|Py_TPFLAGS_HAVE_GC" + # Always add Py_TPFLAGS_HAVE_GC -- PyType_Ready doesn't seem to inherit it + value = "Py_TPFLAGS_DEFAULT|Py_TPFLAGS_CHECKTYPES|Py_TPFLAGS_BASETYPE|Py_TPFLAGS_HAVE_GC" + #if scope.has_pyobject_attrs: + # value += "|Py_TPFLAGS_HAVE_GC" return value @@ -353,18 +356,30 @@ iternaryfunc = Signature("TOO", "O") # typedef PyObject * (*ternaryfunc)(PyObject *, PyObject *, PyObject *); callfunc = Signature("T*", "O") # typedef PyObject * (*ternaryfunc)(PyObject *, PyObject *, PyObject *); inquiry = Signature("T", "i") # typedef int (*inquiry)(PyObject *); +lenfunc = Signature("T", "Z") # typedef Py_ssize_t (*lenfunc)(PyObject *); + # typedef int (*coercion)(PyObject **, PyObject **); intargfunc = Signature("Ti", "O") # typedef PyObject *(*intargfunc)(PyObject *, int); +ssizeargfunc = Signature("TZ", "O") # typedef PyObject *(*ssizeargfunc)(PyObject *, Py_ssize_t); intintargfunc = Signature("Tii", "O") # typedef PyObject *(*intintargfunc)(PyObject *, int, int); +ssizessizeargfunc = Signature("TZZ", "O") # typedef PyObject *(*ssizessizeargfunc)(PyObject *, Py_ssize_t, Py_ssize_t); intobjargproc = Signature("TiO", 'r') # typedef int(*intobjargproc)(PyObject *, int, PyObject *); +ssizeobjargproc = Signature("TZO", 'r') # typedef int(*ssizeobjargproc)(PyObject *, Py_ssize_t, PyObject *); intintobjargproc = Signature("TiiO", 'r') # typedef int(*intintobjargproc)(PyObject *, int, int, PyObject *); +ssizessizeobjargproc = Signature("TZZO", 'r') # typedef int(*ssizessizeobjargproc)(PyObject *, Py_ssize_t, Py_ssize_t, PyObject *); + intintargproc = Signature("Tii", 'r') +ssizessizeargproc = Signature("TZZ", 'r') objargfunc = Signature("TO", "O") objobjargproc = Signature("TOO", 'r') # typedef int (*objobjargproc)(PyObject *, PyObject *, PyObject *); getreadbufferproc = Signature("TiP", 'i') # typedef int (*getreadbufferproc)(PyObject *, int, void **); getwritebufferproc = Signature("TiP", 'i') # typedef int (*getwritebufferproc)(PyObject *, int, void **); getsegcountproc = Signature("TI", 'i') # typedef int (*getsegcountproc)(PyObject *, int *); getcharbufferproc = Signature("TiS", 'i') # typedef int (*getcharbufferproc)(PyObject *, int, const char **); +readbufferproc = Signature("TZP", "Z") # typedef Py_ssize_t (*readbufferproc)(PyObject *, Py_ssize_t, void **); +writebufferproc = Signature("TZP", "Z") # typedef Py_ssize_t (*writebufferproc)(PyObject *, Py_ssize_t, void **); +segcountproc = Signature("TZ", "Z") # typedef Py_ssize_t (*segcountproc)(PyObject *, Py_ssize_t *); +writebufferproc = Signature("TZS", "Z") # typedef Py_ssize_t (*charbufferproc)(PyObject *, Py_ssize_t, char **); objargproc = Signature("TO", 'r') # typedef int (*objobjproc)(PyObject *, PyObject *); # typedef int (*visitproc)(PyObject *, void *); # typedef int (*traverseproc)(PyObject *, visitproc, void *); @@ -453,14 +468,17 @@ MethodSlot(binaryfunc, "nb_true_divide", "__truediv__"), MethodSlot(ibinaryfunc, "nb_inplace_floor_divide", "__ifloordiv__"), MethodSlot(ibinaryfunc, "nb_inplace_true_divide", "__itruediv__"), + + # Added in release 2.5 +# MethodSlot(lenfunc, "nb_index", "??"), ) PySequenceMethods = ( - MethodSlot(inquiry, "sq_length", "__len__"), # EmptySlot("sq_length"), # mp_length used instead + MethodSlot(lenfunc, "sq_length", "__len__"), # EmptySlot("sq_length"), # mp_length used instead EmptySlot("sq_concat"), # nb_add used instead EmptySlot("sq_repeat"), # nb_multiply used instead SyntheticSlot("sq_item", ["__getitem__"], "0"), #EmptySlot("sq_item"), # mp_subscript used instead - MethodSlot(intintargfunc, "sq_slice", "__getslice__"), + MethodSlot(ssizessizeargfunc, "sq_slice", "__getslice__"), EmptySlot("sq_ass_item"), # mp_ass_subscript used instead SyntheticSlot("sq_ass_slice", ["__setslice__", "__delslice__"], "0"), MethodSlot(cmpfunc, "sq_contains", "__contains__"), @@ -469,9 +487,9 @@ ) PyMappingMethods = ( - MethodSlot(inquiry, "mp_length", "__len__"), + MethodSlot(lenfunc, "mp_length", "__len__"), MethodSlot(objargfunc, "mp_subscript", "__getitem__"), - SyntheticSlot("mp_ass_subscript", ["__setitem__"], "0"), + SyntheticSlot("mp_ass_subscript", ["__setitem__", "__delitem__"], "0"), ) PyBufferProcs = ( @@ -564,8 +582,8 @@ MethodSlot(destructor, "", "__dealloc__") MethodSlot(objobjargproc, "", "__setitem__") MethodSlot(objargproc, "", "__delitem__") -MethodSlot(intintobjargproc, "", "__setslice__") -MethodSlot(intintargproc, "", "__delslice__") +MethodSlot(ssizessizeobjargproc, "", "__setslice__") +MethodSlot(ssizessizeargproc, "", "__delslice__") MethodSlot(getattrofunc, "", "__getattr__") MethodSlot(setattrofunc, "", "__setattr__") MethodSlot(delattrofunc, "", "__delattr__") Modified: lxml/pyrex/Pyrex/Compiler/Version.py ============================================================================== --- lxml/pyrex/Pyrex/Compiler/Version.py (original) +++ lxml/pyrex/Pyrex/Compiler/Version.py Tue Apr 25 15:43:21 2006 @@ -1 +1 @@ -version = '0.9.3.1_lxml' +version = '0.9.4.1' Modified: lxml/pyrex/Pyrex/Distutils/build_ext.py ============================================================================== --- lxml/pyrex/Pyrex/Distutils/build_ext.py (original) +++ lxml/pyrex/Pyrex/Distutils/build_ext.py Tue Apr 25 15:43:21 2006 @@ -7,7 +7,8 @@ # Pyrex is (c) Greg Ewing. import distutils.command.build_ext -import Pyrex.Compiler.Main +#import Pyrex.Compiler.Main +from Pyrex.Compiler.Main import CompilationOptions, default_options, compile from Pyrex.Compiler.Errors import PyrexError from distutils.dep_util import newer import os @@ -18,62 +19,45 @@ class build_ext (distutils.command.build_ext.build_ext): - description = "compile Pyrex scripts, then build C/C++ extensions (compile/link to build directory)" + description = "compile Pyrex scripts, then build C/C++ extensions (compile/link to build directory)" - def finalize_options (self): - distutils.command.build_ext.build_ext.finalize_options(self) + def finalize_options (self): + distutils.command.build_ext.build_ext.finalize_options(self) + + # The following hack should no longer be needed. + if 0: + # compiling with mingw32 gets an "initializer not a constant" error + # doesn't appear to happen with MSVC! + # so if we are compiling with mingw32, + # switch to C++ mode, to avoid the problem + if self.compiler == 'mingw32': + self.swig_cpp = 1 + + def swig_sources (self, sources, extension = None): + if not self.extensions: + return + + # collect the names of the source (.pyx) files + pyx_sources = [] + pyx_sources = [source for source in sources if source.endswith('.pyx')] + other_sources = [source for source in sources if not source.endswith('.pyx')] + + #suffix = self.swig_cpp and '.cpp' or '.c' + suffix = '.c' + for pyx in pyx_sources: + # should I raise an exception if it doesn't exist? + if os.path.exists(pyx): + source = pyx + target = replace_suffix(source, suffix) + if newer(source, target) or self.force: + self.pyrex_compile(source) + + return [replace_suffix(src, suffix) for src in pyx_sources] + other_sources + + def pyrex_compile(self, source): + options = CompilationOptions(default_options, + include_path = self.include_dirs) + result = compile(source, options) + if result.num_errors <> 0: + sys.exit(1) - # The following hack should no longer be needed. - if 0: - # compiling with mingw32 gets an "initializer not a constant" error - # doesn't appear to happen with MSVC! - # so if we are compiling with mingw32, - # switch to C++ mode, to avoid the problem - if self.compiler == 'mingw32': - self.swig_cpp = 1 - - def swig_sources (self, sources, *otherargs): - if not self.extensions: - return - - # collect the names of the source (.pyx) files - pyx_sources = [] - pyx_sources = [source for source in sources if source.endswith('.pyx')] - other_sources = [source for source in sources if not source.endswith('.pyx')] - - extension = self.swig_cpp and '.cpp' or '.c' - for pyx in pyx_sources: - # should I raise an exception if it doesn't exist? - if os.path.exists(pyx): - source = pyx - #target = source.replace('.pyx', extension) - target = replace_suffix(source, extension) - if newer(source, target) or self.force: - self.pyrex_compile(source) - - if self.swig_cpp: - # rename .c to .cpp (Pyrex always builds .c ...) - if os.path.exists(target): - os.unlink(target) - #os.rename(source.replace('.pyx', '.c'), target) - os.rename(replace_suffix(source, '.c'), target) - # massage the cpp file - self.c_to_cpp(target) - - return [replace_suffix(src, extension) for src in pyx_sources] + other_sources - - def pyrex_compile(self, source): - result = Pyrex.Compiler.Main.compile(source) - if result.num_errors <> 0: - sys.exit(1) - - def c_to_cpp(self, filename): - """touch up the Pyrex generated c/cpp files to meet mingw32/distutils requirements.""" - f = open(filename, 'r') - lines = [line for line in f.readlines() if not line.startswith('staticforward PyTypeObject __pyx_type_')] - f.close() - f = open(filename, 'w') - lines.insert(1, 'extern "C" {\n') - lines.append('}\n') - f.write(''.join(lines)) - f.close() Modified: lxml/pyrex/Pyrex/Mac/DarwinSystem.py ============================================================================== --- lxml/pyrex/Pyrex/Mac/DarwinSystem.py (original) +++ lxml/pyrex/Pyrex/Mac/DarwinSystem.py Tue Apr 25 15:43:21 2006 @@ -3,6 +3,8 @@ # verbose = 0 +gcc_pendantic = True +gcc_warnings_are_errors = False import os from Pyrex.Utils import replace_suffix @@ -12,13 +14,17 @@ "/Library/Frameworks/Python.framework/Headers" ] -compiler = "gcc" +compilers = ["gcc", "g++"] compiler_options = \ "-g -c -fno-strict-aliasing -Wno-long-double -no-cpp-precomp " \ - "-mno-fused-madd -fno-common -dynamic" \ + "-mno-fused-madd -fno-common -dynamic " \ .split() +if gcc_pendantic: + compiler_options.extend(["-pedantic", "-Wno-long-long"]) +if gcc_warnings_are_errors: + compiler_options.append("-Werror") -linker = "gcc" +linkers = ["gcc", "g++"] linker_options = \ "-Wl,-F.,-w -bundle -framework Python" \ .split() @@ -26,15 +32,16 @@ class CCompilerError(PyrexError): pass -def c_compile(c_file, verbose_flag = 0): +def c_compile(c_file, verbose_flag = 0, cplus = 0, obj_suffix = ".o"): # Compile the given C source file to produce # an object file. Returns the pathname of the # resulting file. c_file = os.path.join(os.getcwd(), c_file) - o_file = replace_suffix(c_file, ".o") + o_file = replace_suffix(c_file, obj_suffix) include_options = [] for dir in py_include_dirs: include_options.append("-I%s" % dir) + compiler = compilers[bool(cplus)] args = [compiler] + compiler_options + include_options + [c_file, "-o", o_file] if verbose_flag or verbose: print " ".join(args) @@ -43,14 +50,15 @@ raise CCompilerError("C compiler returned status %s" % status) return o_file -def c_link(obj_file, verbose_flag = 0): - return c_link_list([obj_file], verbose_flag) +def c_link(obj_file, verbose_flag = 0, extra_objects = [], cplus = 0): + return c_link_list([obj_file] + extra_objects, verbose_flag, cplus) -def c_link_list(obj_files, verbose_flag = 0): +def c_link_list(obj_files, verbose_flag = 0, cplus = 0): # Link the given object files into a dynamically # loadable extension file. Returns the pathname # of the resulting file. out_file = replace_suffix(obj_files[0], ".so") + linker = linkers[bool(cplus)] args = [linker] + linker_options + obj_files + ["-o", out_file] if verbose_flag or verbose: print " ".join(args) Modified: lxml/pyrex/Pyrex/Utils.py ============================================================================== --- lxml/pyrex/Pyrex/Utils.py (original) +++ lxml/pyrex/Pyrex/Utils.py Tue Apr 25 15:43:21 2006 @@ -8,12 +8,9 @@ def replace_suffix(path, newsuf): base, _ = os.path.splitext(path) return base + newsuf - -def default_open_new_file(path): - return open(path, "w") -if sys.platform == "mac": - from Pyrex.Mac.MacUtils import open_new_file -else: - open_new_file = default_open_new_file +def open_new_file(path): + # Open and truncate existing file to + # preserve metadata on the Mac. + return open(path, "w+") Modified: lxml/pyrex/ToDo.txt ============================================================================== --- lxml/pyrex/ToDo.txt (original) +++ lxml/pyrex/ToDo.txt Tue Apr 25 15:43:21 2006 @@ -269,3 +269,16 @@ Initial values when declaring variables? Do something about __stdcall. + +Support class methods in extension types using METH_CLASS flag. + +Disallow defaulting types to 'object' in C declarations? + +C globals with static initialisers. + +Find a way of providing C-only initialisers for extension types. + +Metaclasses for extension types? + +Make extension types use Py_TPFLAGS_HEAPTYPE so their __module__ +will get set dynamically?