From scoder at codespeak.net Fri Dec 1 09:48:13 2006 From: scoder at codespeak.net (scoder at codespeak.net) Date: Fri, 1 Dec 2006 09:48:13 +0100 (CET) Subject: [Lxml-checkins] r35185 - in lxml/trunk: . src/lxml Message-ID: <20061201084813.0B1AF1006C@code0.codespeak.net> Author: scoder Date: Fri Dec 1 09:47:54 2006 New Revision: 35185 Modified: lxml/trunk/CHANGES.txt lxml/trunk/src/lxml/etree.pyx Log: accept QName objects in Element.find*() Modified: lxml/trunk/CHANGES.txt ============================================================================== --- lxml/trunk/CHANGES.txt (original) +++ lxml/trunk/CHANGES.txt Fri Dec 1 09:47:54 2006 @@ -15,6 +15,12 @@ possible to pass an --rpath directly to distutils; previously this was being shadowed. +Bugs fixed +---------- + +* Element.find*() did not accept QName objects as path + + 1.1.2 (2006-10-30) ================== Modified: lxml/trunk/src/lxml/etree.pyx ============================================================================== --- lxml/trunk/src/lxml/etree.pyx (original) +++ lxml/trunk/src/lxml/etree.pyx Fri Dec 1 09:47:54 2006 @@ -202,6 +202,22 @@ raise type, value, traceback +cdef class QName: + """QName wrapper. + """ + cdef readonly object text + def __init__(self, text_or_uri, tag=None): + if tag is not None: + text_or_uri = "{%s}%s" % (text_or_uri, tag) + elif not _isString(text_or_uri): + text_or_uri = str(text_or_uri) + self.text = text_or_uri + def __str__(self): + return self.text + def __hash__(self): + return self.text.__hash__() + + # forward declaration of _BaseParser, see parser.pxi cdef class _BaseParser @@ -1176,16 +1192,22 @@ def find(self, path): """Finds the first matching subelement, by tag name or path. """ + if isinstance(path, QName): + path = (path).text return _elementpath.find(self, path) def findtext(self, path, default=None): """Finds text for the first matching subelement, by tag name or path. """ + if isinstance(path, QName): + path = (path).text return _elementpath.findtext(self, path, default) def findall(self, path): """Finds all matching subelements, by tag name or path. """ + if isinstance(path, QName): + path = (path).text return _elementpath.findall(self, path) def xpath(self, _path, namespaces=None, extensions=None, **_variables): @@ -1699,21 +1721,6 @@ fromstring = XML -cdef class QName: - """QName wrapper. - """ - cdef readonly object text - def __init__(self, text_or_uri, tag=None): - if tag is not None: - text_or_uri = "{%s}%s" % (text_or_uri, tag) - elif not _isString(text_or_uri): - text_or_uri = str(text_or_uri) - self.text = text_or_uri - def __str__(self): - return self.text - def __hash__(self): - return self.text.__hash__() - def iselement(element): """Checks if an object appears to be a valid element object. """ From scoder at codespeak.net Fri Dec 1 09:53:57 2006 From: scoder at codespeak.net (scoder at codespeak.net) Date: Fri, 1 Dec 2006 09:53:57 +0100 (CET) Subject: [Lxml-checkins] r35186 - lxml/branch/nscleanup Message-ID: <20061201085357.30D091006C@code0.codespeak.net> Author: scoder Date: Fri Dec 1 09:53:51 2006 New Revision: 35186 Added: lxml/branch/nscleanup/ - copied from r35185, lxml/trunk/ Log: new branch to play around with a custom implementation of xmlReconciliateNs() From scoder at codespeak.net Fri Dec 1 09:55:25 2006 From: scoder at codespeak.net (scoder at codespeak.net) Date: Fri, 1 Dec 2006 09:55:25 +0100 (CET) Subject: [Lxml-checkins] r35187 - in lxml/branch/nscleanup/src/lxml: . tests Message-ID: <20061201085525.D33051006C@code0.codespeak.net> Author: scoder Date: Fri Dec 1 09:55:17 2006 New Revision: 35187 Modified: lxml/branch/nscleanup/src/lxml/apihelpers.pxi lxml/branch/nscleanup/src/lxml/etree.pyx lxml/branch/nscleanup/src/lxml/etree_defs.h lxml/branch/nscleanup/src/lxml/proxy.pxi lxml/branch/nscleanup/src/lxml/public-api.pxi lxml/branch/nscleanup/src/lxml/tests/test_etree.py lxml/branch/nscleanup/src/lxml/tree.pxd Log: preliminary implementation with 3 failing test cases Modified: lxml/branch/nscleanup/src/lxml/apihelpers.pxi ============================================================================== --- lxml/branch/nscleanup/src/lxml/apihelpers.pxi (original) +++ lxml/branch/nscleanup/src/lxml/apihelpers.pxi Fri Dec 1 09:55:17 2006 @@ -147,7 +147,7 @@ if attr_ns_utf is None: tree.xmlNewProp(c_node, _cstr(attr_name_utf), _cstr(value_utf)) else: - c_ns = doc._findOrBuildNodeNs(c_node, _cstr(attr_ns_utf)) + c_ns = doc._findOrBuildNodeNs(c_node, _cstr(attr_ns_utf), NULL) tree.xmlNewNsProp(c_node, c_ns, _cstr(attr_name_utf), _cstr(value_utf)) @@ -203,7 +203,8 @@ if ns is None: tree.xmlSetProp(element._c_node, c_tag, c_value) else: - c_ns = element._doc._findOrBuildNodeNs(element._c_node, _cstr(ns)) + c_ns = element._doc._findOrBuildNodeNs(element._c_node, + _cstr(ns), NULL) tree.xmlSetNsProp(element._c_node, c_ns, c_tag, c_value) return 0 Modified: lxml/branch/nscleanup/src/lxml/etree.pyx ============================================================================== --- lxml/branch/nscleanup/src/lxml/etree.pyx (original) +++ lxml/branch/nscleanup/src/lxml/etree.pyx Fri Dec 1 09:55:17 2006 @@ -298,24 +298,46 @@ self._ns_counter = self._ns_counter + 1 return ns - cdef xmlNs* _findOrBuildNodeNs(self, xmlNode* c_node, char* href): + cdef xmlNs* _findOrBuildNodeNs(self, xmlNode* c_node, + char* c_href, char* c_prefix): """Get or create namespace structure for a node. """ + cdef int i cdef xmlNs* c_ns + cdef xmlNs* c_doc_ns # look for existing ns - c_ns = tree.xmlSearchNsByHref(self._c_doc, c_node, href) + c_ns = tree.xmlSearchNsByHref(self._c_doc, c_node, c_href) if c_ns is not NULL: return c_ns - # create ns if existing ns cannot be found - # try to simulate ElementTree's namespace prefix creation - prefix = self.buildNewPrefix() - c_ns = tree.xmlNewNs(c_node, href, _cstr(prefix)) + + if c_prefix is NULL or \ + tree.xmlSearchNs(self._c_doc, c_node, c_prefix) is not NULL: + # try to simulate ElementTree's namespace prefix creation + for i from 0 <= i < 10000: + prefix = self.buildNewPrefix() + c_prefix = _cstr(prefix) + # make sure it's not used already + if tree.xmlSearchNs(self._c_doc, c_node, c_prefix) is NULL: + break + if i >= 10000: + # XXX too many prefixes in use - this is pretty bad! + return NULL + + c_ns = tree.xmlNewNs(c_node, c_href, c_prefix) + return c_ns + c_doc_ns = self._c_doc.oldNs + if c_doc_ns is NULL: + # this will create the XML namespace: + c_doc_ns = tree.xmlSearchNs(self._c_doc, c_node, 'xml') + while c_doc_ns.next is not NULL: + c_doc_ns = c_doc_ns.next + c_doc_ns.next = c_ns return c_ns cdef void _setNodeNs(self, xmlNode* c_node, char* href): "Lookup namespace structure and set it for the node." cdef xmlNs* c_ns - c_ns = self._findOrBuildNodeNs(c_node, href) + c_ns = self._findOrBuildNodeNs(c_node, href, NULL) tree.xmlSetNs(c_node, c_ns) cdef void _setNodeNamespaces(self, xmlNode* c_node, Modified: lxml/branch/nscleanup/src/lxml/etree_defs.h ============================================================================== --- lxml/branch/nscleanup/src/lxml/etree_defs.h (original) +++ lxml/branch/nscleanup/src/lxml/etree_defs.h Fri Dec 1 09:55:17 2006 @@ -59,6 +59,11 @@ ((c_node)->type == XML_COMMENT_NODE) || \ ((c_node)->type == XML_PI_NODE)) +#define _isElementOrXInclude(c_node) \ + (_isElement(c_node) || \ + ((c_node)->type == XML_XINCLUDE_START) || \ + ((c_node)->type == XML_XINCLUDE_END)) + #define _getNs(c_node) \ (((c_node)->ns == 0) ? 0 : ((c_node)->ns->href)) Modified: lxml/branch/nscleanup/src/lxml/proxy.pxi ============================================================================== --- lxml/branch/nscleanup/src/lxml/proxy.pxi (original) +++ lxml/branch/nscleanup/src/lxml/proxy.pxi Fri Dec 1 09:55:17 2006 @@ -169,10 +169,11 @@ tree below (including the current node). It also reconciliates namespaces so they're correct inside the new environment. """ - tree.xmlReconciliateNs(doc._c_doc, node._c_node) - if node._doc is not doc: - node._doc = doc - changeDocumentBelow(node._c_node, doc) + _moveNodeToDocument(node, doc) +## tree.xmlReconciliateNs(doc._c_doc, node._c_node) +## if node._doc is not doc: +## node._doc = doc +## changeDocumentBelow(node._c_node, doc) cdef void changeDocumentBelow(xmlNode* c_parent, _Document doc): """Update the Python references in the tree below the node. @@ -187,3 +188,136 @@ if c_node._private is not NULL: (<_NodeBase>c_node._private)._doc = doc tree.END_FOR_EACH_ELEMENT_FROM(c_node) + + +cdef void _moveNodeToDocument(_NodeBase node, _Document doc): + """Fix the xmlNs pointers of a node and its subtree that were moved. + + Mainly copied from libxml2's xmlReconciliateNs(). Expects libxml2 doc + pointers of node to be correct already, but fixes _Document references. + """ + cdef xmlDoc* c_doc + cdef xmlNode* c_element + cdef xmlNode* c_start_node + cdef xmlNode* c_node + cdef xmlNs** c_ns_new_cache + cdef xmlNs** c_ns_old_cache + cdef xmlNs* c_ns + cdef xmlNs* c_new_ns + cdef cstd.size_t i, c_cache_size, c_cache_last + + c_element = node._c_node + c_doc = c_element.doc + + if not tree._isElementOrXInclude(c_element): + return + + c_start_node = c_element + c_ns_new_cache = NULL + c_ns_old_cache = NULL + c_cache_size = 0 + c_cache_last = 0 + + while c_element is not NULL: + # remove namespaces defined here if already known in ancestors + if c_element.nsDef is not NULL: + while c_element.nsDef is not NULL and \ + tree.xmlSearchNsByHref(c_element.doc, c_element.parent, + c_element.nsDef.href) is not NULL: + c_element.nsDef = c_element.nsDef.next + if c_element.nsDef is not NULL: + c_ns = c_element.nsDef + while c_ns.next is not NULL: + if tree.xmlSearchNsByHref(c_element.doc, c_element.parent, + c_ns.next.href) is not NULL: + c_ns.next = c_ns.next.next + else: + c_ns = c_ns.next + + # make sure ns declaration of element and its attributes is stored + # in this document + c_node = c_element + while c_node is not NULL: + if c_node.ns is not NULL: + c_ns = c_node.ns + for i from 0 <= i < c_cache_last: + if c_ns is c_ns_old_cache[i]: + c_node.ns = c_ns_new_cache[i] + c_ns = NULL + break + + if c_ns is not NULL: + # not in cache, must find a replacement from this document + c_new_ns = doc._findOrBuildNodeNs(c_node, c_ns.href, c_ns.prefix) + print "FOUND:", c_new_ns.href + if c_cache_last >= c_cache_size: + # must resize cache + if c_cache_size == 0: + c_cache_size = 20 + else: + c_cache_size = c_cache_size * 2 + c_ns_new_cache = python.PyMem_Realloc( + c_ns_new_cache, c_cache_size * sizeof(xmlNs*)) + if c_ns_new_cache is NULL: + python.PyMem_Free(c_ns_old_cache) + python.PyErr_NoMemory() + c_ns_old_cache = python.PyMem_Realloc( + c_ns_old_cache, c_cache_size * sizeof(xmlNs*)) + if c_ns_old_cache is NULL: + python.PyMem_Free(c_ns_new_cache) + python.PyErr_NoMemory() + c_ns_new_cache[c_cache_last] = c_new_ns + c_ns_old_cache[c_cache_last] = c_node.ns + c_cache_last = c_cache_last + 1 + c_node.ns = c_ns + if c_node is c_element: + c_node = c_element.properties + else: + c_node = c_node.next + + # traverse to next element, start with children + c_node = c_element.children + while c_node is not NULL and \ + not tree._isElementOrXInclude(c_node): + c_node = c_node.next + + if c_node is NULL: + # no children => back off and continue with siblings and parents + + # fix _Document reference (may dealloc the original document!) + if c_element._private is not NULL: + (<_NodeBase>c_element._private)._doc = doc + + if c_element is c_start_node: + break + + # continue with siblings + c_node = c_element.next + while (c_node is not NULL and + not tree._isElementOrXInclude(c_node)): + c_node = c_node.next + # if that didn't help, back off through parents' siblings + while c_node is NULL: + c_element = c_element.parent + if c_element is NULL or not tree._isElementOrXInclude(c_element): + break + + # fix _Document reference (may dealloc the original document!) + if c_element._private is not NULL: + (<_NodeBase>c_element._private)._doc = doc + + if c_element is c_start_node: + break + # parents already done -> look for their siblings + c_node = c_element.next + while (c_node is not NULL and + not tree._isElementOrXInclude(c_node)): + c_node = c_node.next + if c_node is c_start_node: + break + c_element = c_node + + if c_ns_new_cache is not NULL: + python.PyMem_Free(c_ns_new_cache) + if c_ns_old_cache is not NULL: + python.PyMem_Free(c_ns_old_cache) Modified: lxml/branch/nscleanup/src/lxml/public-api.pxi ============================================================================== --- lxml/branch/nscleanup/src/lxml/public-api.pxi (original) +++ lxml/branch/nscleanup/src/lxml/public-api.pxi Fri Dec 1 09:55:17 2006 @@ -138,4 +138,4 @@ char* href) except NULL: if doc is None: raise TypeError - return doc._findOrBuildNodeNs(c_node, href) + return doc._findOrBuildNodeNs(c_node, href, NULL) Modified: lxml/branch/nscleanup/src/lxml/tests/test_etree.py ============================================================================== --- lxml/branch/nscleanup/src/lxml/tests/test_etree.py (original) +++ lxml/branch/nscleanup/src/lxml/tests/test_etree.py Fri Dec 1 09:55:17 2006 @@ -893,6 +893,56 @@ '', self._writeElement(e)) + def test_namespaces_default_copy_element(self): + etree = self.etree + + r = {None: 'http://ns.infrae.com/foo'} + e1 = etree.Element('{http://ns.infrae.com/foo}bar', nsmap=r) + e2 = etree.Element('{http://ns.infrae.com/foo}bar', nsmap=r) + + e1.append(e2) + + self.assertEquals( + None, + e1.prefix) + self.assertEquals( + None, + e2.prefix) + self.assertEquals( + '{http://ns.infrae.com/foo}bar', + e1.tag) + self.assertEquals( + '{http://ns.infrae.com/foo}bar', + e2.tag) + + print etree.tostring(e1) + print etree.tostring(e2) + + def test_namespaces_copy_element(self): + etree = self.etree + + r = {None: 'http://ns.infrae.com/BAR'} + e1 = etree.Element('{http://ns.infrae.com/BAR}bar', nsmap=r) + e2 = etree.Element('{http://ns.infrae.com/foo}bar', nsmap=r) + + e1.append(e2) + + self.assertEquals( + None, + e1.prefix) + self.assertNotEquals( + None, + e2.prefix) + self.assertEquals( + '{http://ns.infrae.com/BAR}bar', + e1.tag) + self.assertEquals( + '{http://ns.infrae.com/foo}bar', + e2.tag) + + print etree.tostring(e1) + print etree.tostring(e2) + def test_element_nsmap(self): etree = self.etree Modified: lxml/branch/nscleanup/src/lxml/tree.pxd ============================================================================== --- lxml/branch/nscleanup/src/lxml/tree.pxd (original) +++ lxml/branch/nscleanup/src/lxml/tree.pxd Fri Dec 1 09:55:17 2006 @@ -77,7 +77,6 @@ XML_NAMESPACE_DECL= 18 XML_XINCLUDE_START= 19 XML_XINCLUDE_END= 20 - ctypedef struct xmlNs: char* href @@ -193,7 +192,7 @@ cdef xmlAttr* xmlHasProp(xmlNode* node, char* name) cdef xmlAttr* xmlHasNsProp(xmlNode* node, char* name, char* nameSpace) cdef char* xmlNodeGetContent(xmlNode* cur) - cdef xmlNs* xmlSearchNs(xmlDoc* doc, xmlNode* node, char* nameSpace) + cdef xmlNs* xmlSearchNs(xmlDoc* doc, xmlNode* node, char* prefix) cdef xmlNs* xmlSearchNsByHref(xmlDoc* doc, xmlNode* node, char* href) cdef int xmlIsBlankNode(xmlNode* node) cdef void xmlElemDump(FILE* f, xmlDoc* doc, xmlNode* cur) @@ -206,6 +205,7 @@ cdef xmlNode* xmlCopyNode(xmlNode* node, int extended) cdef xmlNode* xmlDocCopyNode(xmlNode* node, xmlDoc* doc, int extended) cdef int xmlReconciliateNs(xmlDoc* doc, xmlNode* tree) + cdef xmlNs* xmlNewReconciliedNs(xmlDoc* doc, xmlNode* tree, xmlNs* ns) cdef xmlBuffer* xmlBufferCreate() cdef char* xmlBufferContent(xmlBuffer* buf) cdef int xmlBufferLength(xmlBuffer* buf) @@ -260,6 +260,7 @@ cdef extern from "etree_defs.h": cdef int _isElement(xmlNode* node) + cdef int _isElementOrXInclude(xmlNode* node) cdef char* _getNs(xmlNode* node) cdef void BEGIN_FOR_EACH_ELEMENT_FROM(xmlNode* tree_top, xmlNode* start_node, int inclusive) From faassen at codespeak.net Fri Dec 1 13:46:59 2006 From: faassen at codespeak.net (faassen at codespeak.net) Date: Fri, 1 Dec 2006 13:46:59 +0100 (CET) Subject: [Lxml-checkins] r35199 - lxml/trunk/src/lxml/tests Message-ID: <20061201124659.936D310068@code0.codespeak.net> Author: faassen Date: Fri Dec 1 13:46:56 2006 New Revision: 35199 Modified: lxml/trunk/src/lxml/tests/common_imports.py lxml/trunk/src/lxml/tests/test_elementtree.py lxml/trunk/src/lxml/tests/test_nsclasses.py lxml/trunk/src/lxml/tests/test_objectify.py Log: Some backwards compatibility work for Python 2.3. Modified: lxml/trunk/src/lxml/tests/common_imports.py ============================================================================== --- lxml/trunk/src/lxml/tests/common_imports.py (original) +++ lxml/trunk/src/lxml/tests/common_imports.py Fri Dec 1 13:46:56 2006 @@ -23,6 +23,11 @@ # we need our own version to make it work (Python 2.3?) import local_doctest as doctest +try: + from operator import itemgetter +except ImportError: + def itemgetter(item): + return lambda obj: obj[item] class HelperTestCase(unittest.TestCase): def parse(self, text): @@ -32,6 +37,12 @@ def _rootstring(self, tree): return etree.tostring(tree.getroot()).replace(' ', '').replace('\n', '') + # assertFalse doesn't exist in Python 2.3 + try: + unittest.TestCase.assertFalse + except AttributeError: + assertFalse = unittest.TestCase.failIf + class SillyFileLike: def __init__(self, xml_data=''): self.xml_data = xml_data Modified: lxml/trunk/src/lxml/tests/test_elementtree.py ============================================================================== --- lxml/trunk/src/lxml/tests/test_elementtree.py (original) +++ lxml/trunk/src/lxml/tests/test_elementtree.py Fri Dec 1 13:46:56 2006 @@ -11,9 +11,10 @@ import unittest, doctest import os, re, shutil, tempfile, copy -from common_imports import StringIO, etree, ElementTree, HelperTestCase, fileInTestDir, canonicalize +from common_imports import StringIO, etree, ElementTree +from common_imports import HelperTestCase, fileInTestDir, canonicalize -class ETreeTestCaseBase(unittest.TestCase): +class ETreeTestCaseBase(HelperTestCase): etree = None def setUp(self): Modified: lxml/trunk/src/lxml/tests/test_nsclasses.py ============================================================================== --- lxml/trunk/src/lxml/tests/test_nsclasses.py (original) +++ lxml/trunk/src/lxml/tests/test_nsclasses.py Fri Dec 1 13:46:56 2006 @@ -10,7 +10,6 @@ from common_imports import etree, HelperTestCase, doctest class ETreeNamespaceClassesTestCase(HelperTestCase): - assertFalse = HelperTestCase.failIf class default_class(etree.ElementBase): pass Modified: lxml/trunk/src/lxml/tests/test_objectify.py ============================================================================== --- lxml/trunk/src/lxml/tests/test_objectify.py (original) +++ lxml/trunk/src/lxml/tests/test_objectify.py Fri Dec 1 13:46:56 2006 @@ -12,6 +12,7 @@ from common_imports import etree, StringIO, HelperTestCase, fileInTestDir from common_imports import SillyFileLike, canonicalize, doctest +from common_imports import itemgetter from lxml import objectify @@ -119,7 +120,7 @@ self.assertEquals("0", root.c1.c2[0].text) self.assertEquals("1", root.c1.c2[1].text) self.assertEquals("2", root.c1.c2[2].text) - self.assertRaises(IndexError, operator.itemgetter(3), root.c1.c2) + self.assertRaises(IndexError, itemgetter(3), root.c1.c2) def test_child_index_neg(self): root = self.XML(xml_str) @@ -127,7 +128,7 @@ self.assertEquals("0", root.c1.c2[-3].text) self.assertEquals("1", root.c1.c2[-2].text) self.assertEquals("2", root.c1.c2[-1].text) - self.assertRaises(IndexError, operator.itemgetter(-4), root.c1.c2) + self.assertRaises(IndexError, itemgetter(-4), root.c1.c2) def test_child_len(self): root = self.XML(xml_str) From scoder at codespeak.net Fri Dec 1 14:32:34 2006 From: scoder at codespeak.net (scoder at codespeak.net) Date: Fri, 1 Dec 2006 14:32:34 +0100 (CET) Subject: [Lxml-checkins] r35201 - lxml/trunk/src/lxml/tests Message-ID: <20061201133234.0784A10063@code0.codespeak.net> Author: scoder Date: Fri Dec 1 14:32:32 2006 New Revision: 35201 Modified: lxml/trunk/src/lxml/tests/test_objectify.py Log: rewrote test case to check Element identity instead of equality Modified: lxml/trunk/src/lxml/tests/test_objectify.py ============================================================================== --- lxml/trunk/src/lxml/tests/test_objectify.py (original) +++ lxml/trunk/src/lxml/tests/test_objectify.py Fri Dec 1 14:32:32 2006 @@ -214,8 +214,7 @@ self.assertEquals(1, len(root.findall("c"))) self.assertEquals(2, len(root.findall(".//c"))) self.assertEquals(3, len(root.findall(".//b"))) - self.assertEquals(root.findall(".//b")[:2], - root.getchildren()[:2]) + self.assert_(root.findall(".//b")[1] is root.getchildren()[1]) def test_findall_ns(self): XML = self.XML From scoder at codespeak.net Mon Dec 4 10:20:40 2006 From: scoder at codespeak.net (scoder at codespeak.net) Date: Mon, 4 Dec 2006 10:20:40 +0100 (CET) Subject: [Lxml-checkins] r35240 - in lxml/trunk: . src/lxml Message-ID: <20061204092040.5839610068@code0.codespeak.net> Author: scoder Date: Mon Dec 4 10:20:37 2006 New Revision: 35240 Modified: lxml/trunk/CHANGES.txt lxml/trunk/src/lxml/ElementInclude.py Log: small fix to actually check if xi:fallback tag is misplaced before complaining Modified: lxml/trunk/CHANGES.txt ============================================================================== --- lxml/trunk/CHANGES.txt (original) +++ lxml/trunk/CHANGES.txt Mon Dec 4 10:20:37 2006 @@ -8,6 +8,9 @@ Features added -------------- +* ElementInclude module for ElementTree compatible XInclude processing that + honours custom resolvers registered with the source document + * ElementTree.parser property holds the parser used to parse the document * setup.py has been refactored for greater readability and flexibility. Modified: lxml/trunk/src/lxml/ElementInclude.py ============================================================================== --- lxml/trunk/src/lxml/ElementInclude.py (original) +++ lxml/trunk/src/lxml/ElementInclude.py Mon Dec 4 10:20:37 2006 @@ -186,9 +186,11 @@ "unknown parse type in xi:include tag (%r)" % parse ) elif e.tag == XINCLUDE_FALLBACK: - raise FatalIncludeError( - "xi:fallback tag must be child of xi:include (%r)" % e.tag - ) + parent = e.getparent() + if parent is not None and parent.tag != XINCLUDE_INCLUDE: + raise FatalIncludeError( + "xi:fallback tag must be child of xi:include (%r)" % e.tag + ) else: raise FatalIncludeError( "Invalid element found in XInclude namespace (%r)" % e.tag From scoder at codespeak.net Mon Dec 4 10:25:32 2006 From: scoder at codespeak.net (scoder at codespeak.net) Date: Mon, 4 Dec 2006 10:25:32 +0100 (CET) Subject: [Lxml-checkins] r35241 - in lxml/branch/nscleanup/src/lxml: . tests Message-ID: <20061204092532.153BF10068@code0.codespeak.net> Author: scoder Date: Mon Dec 4 10:25:29 2006 New Revision: 35241 Modified: lxml/branch/nscleanup/src/lxml/etree.pyx lxml/branch/nscleanup/src/lxml/proxy.pxi lxml/branch/nscleanup/src/lxml/tests/test_etree.py Log: cleanup, new test case and bugfix for stupid typo Modified: lxml/branch/nscleanup/src/lxml/etree.pyx ============================================================================== --- lxml/branch/nscleanup/src/lxml/etree.pyx (original) +++ lxml/branch/nscleanup/src/lxml/etree.pyx Mon Dec 4 10:25:29 2006 @@ -323,16 +323,7 @@ # XXX too many prefixes in use - this is pretty bad! return NULL - c_ns = tree.xmlNewNs(c_node, c_href, c_prefix) - return c_ns - c_doc_ns = self._c_doc.oldNs - if c_doc_ns is NULL: - # this will create the XML namespace: - c_doc_ns = tree.xmlSearchNs(self._c_doc, c_node, 'xml') - while c_doc_ns.next is not NULL: - c_doc_ns = c_doc_ns.next - c_doc_ns.next = c_ns - return c_ns + return tree.xmlNewNs(c_node, c_href, c_prefix) cdef void _setNodeNs(self, xmlNode* c_node, char* href): "Lookup namespace structure and set it for the node." Modified: lxml/branch/nscleanup/src/lxml/proxy.pxi ============================================================================== --- lxml/branch/nscleanup/src/lxml/proxy.pxi (original) +++ lxml/branch/nscleanup/src/lxml/proxy.pxi Mon Dec 4 10:25:29 2006 @@ -1,4 +1,4 @@ -# Proxy functions +# Proxy functions and low level node allocation stuff # Proxies represent elements, their reference is stored in the C # structure of the respective node to avoid multiple instantiation of @@ -234,7 +234,7 @@ else: c_ns = c_ns.next - # make sure ns declaration of element and its attributes is stored + # make sure the namespace of an element and its attributes is declared # in this document c_node = c_element while c_node is not NULL: @@ -249,7 +249,6 @@ if c_ns is not NULL: # not in cache, must find a replacement from this document c_new_ns = doc._findOrBuildNodeNs(c_node, c_ns.href, c_ns.prefix) - print "FOUND:", c_new_ns.href if c_cache_last >= c_cache_size: # must resize cache if c_cache_size == 0: @@ -269,12 +268,13 @@ c_ns_new_cache[c_cache_last] = c_new_ns c_ns_old_cache[c_cache_last] = c_node.ns c_cache_last = c_cache_last + 1 - c_node.ns = c_ns + c_node.ns = c_new_ns if c_node is c_element: + # after the element, continue with its attributes c_node = c_element.properties else: c_node = c_node.next - + # traverse to next element, start with children c_node = c_element.children while c_node is not NULL and \ @@ -317,6 +317,7 @@ break c_element = c_node + # cleanup if c_ns_new_cache is not NULL: python.PyMem_Free(c_ns_new_cache) if c_ns_old_cache is not NULL: Modified: lxml/branch/nscleanup/src/lxml/tests/test_etree.py ============================================================================== --- lxml/branch/nscleanup/src/lxml/tests/test_etree.py (original) +++ lxml/branch/nscleanup/src/lxml/tests/test_etree.py Mon Dec 4 10:25:29 2006 @@ -943,6 +943,25 @@ print etree.tostring(e1) print etree.tostring(e2) + def test_namespaces_reuse_after_move(self): + Element = self.etree.Element + ElementTree = self.etree.ElementTree + + ns_href = "http://a.b.c" + one = self.etree.parse( + StringIO('' % ns_href)) + baz = one.getroot()[0][0] + + two = self.etree.parse( + StringIO('' % ns_href)) + two.getroot().append(baz) + del one # make sure the source document is deallocated + + self.assertEquals('{%s}baz' % ns_href, baz.tag) + self.assertEquals( + '' % ns_href, + self.etree.tostring(two)) + def test_element_nsmap(self): etree = self.etree From scoder at codespeak.net Mon Dec 4 17:32:10 2006 From: scoder at codespeak.net (scoder at codespeak.net) Date: Mon, 4 Dec 2006 17:32:10 +0100 (CET) Subject: [Lxml-checkins] r35260 - in lxml/trunk: . src/lxml Message-ID: <20061204163210.1E6CB1007F@code0.codespeak.net> Author: scoder Date: Mon Dec 4 17:32:08 2006 New Revision: 35260 Modified: lxml/trunk/CHANGES.txt lxml/trunk/src/lxml/etree_defs.h Log: threading crash also appears under 2.4.1, not only 2.3 Modified: lxml/trunk/CHANGES.txt ============================================================================== --- lxml/trunk/CHANGES.txt (original) +++ lxml/trunk/CHANGES.txt Mon Dec 4 17:32:08 2006 @@ -13,7 +13,7 @@ * ElementTree.parser property holds the parser used to parse the document -* setup.py has been refactored for greater readability and flexibility. +* setup.py has been refactored for greater readability and flexibility * --rpath flag to setup.py to induce automatic linking-in of dynamic library runtime search paths has been renamed to --auto-rpath. This makes it @@ -23,6 +23,9 @@ Bugs fixed ---------- +* Error handling could crash under Python <= 2.4.1 - fixed by disabling thread + support in these environments + * Element.find*() did not accept QName objects as path Modified: lxml/trunk/src/lxml/etree_defs.h ============================================================================== --- lxml/trunk/src/lxml/etree_defs.h (original) +++ lxml/trunk/src/lxml/etree_defs.h Mon Dec 4 17:32:08 2006 @@ -16,8 +16,8 @@ #endif #endif -/* Threading can crash under Python 2.3 */ -#if PY_VERSION_HEX < 0x02040000 +/* Threading can crash under Python <= 2.4.1 */ +#if PY_VERSION_HEX < 0x02040200 #ifndef WITHOUT_THREADING #define WITHOUT_THREADING #endif From scoder at codespeak.net Thu Dec 7 08:59:56 2006 From: scoder at codespeak.net (scoder at codespeak.net) Date: Thu, 7 Dec 2006 08:59:56 +0100 (CET) Subject: [Lxml-checkins] r35415 - in lxml/trunk: . src/lxml src/lxml/tests Message-ID: <20061207075956.18FCC1007B@code0.codespeak.net> Author: scoder Date: Thu Dec 7 08:59:52 2006 New Revision: 35415 Modified: lxml/trunk/CHANGES.txt lxml/trunk/src/lxml/python.pxd lxml/trunk/src/lxml/serializer.pxi lxml/trunk/src/lxml/tests/test_elementtree.py Log: ElementTree.write() did not raise an exception when the file was not writable Modified: lxml/trunk/CHANGES.txt ============================================================================== --- lxml/trunk/CHANGES.txt (original) +++ lxml/trunk/CHANGES.txt Thu Dec 7 08:59:52 2006 @@ -23,6 +23,8 @@ Bugs fixed ---------- +* ElementTree.write() did not raise an exception when the file was not writable + * Error handling could crash under Python <= 2.4.1 - fixed by disabling thread support in these environments Modified: lxml/trunk/src/lxml/python.pxd ============================================================================== --- lxml/trunk/src/lxml/python.pxd (original) +++ lxml/trunk/src/lxml/python.pxd Thu Dec 7 08:59:52 2006 @@ -73,7 +73,10 @@ cdef void* PyMem_Malloc(size_t size) cdef void* PyMem_Realloc(void* p, size_t size) cdef void PyMem_Free(void* p) - cdef object PyErr_NoMemory() # always returns NULL to pass on exception + + # these two always return NULL to pass on the exception + cdef object PyErr_NoMemory() + cdef object PyErr_SetFromErrno(object type) ctypedef enum PyGILState_STATE: PyGILState_LOCKED Modified: lxml/trunk/src/lxml/serializer.pxi ============================================================================== --- lxml/trunk/src/lxml/serializer.pxi (original) +++ lxml/trunk/src/lxml/serializer.pxi Thu Dec 7 08:59:52 2006 @@ -166,6 +166,8 @@ filename8 = _encodeFilename(f) c_buffer = tree.xmlOutputBufferCreateFilename( _cstr(filename8), enchandler, 0) + if c_buffer is NULL: + python.PyErr_SetFromErrno(IOError) state = python.PyEval_SaveThread() elif hasattr(f, 'write'): writer = _FilelikeWriter(f) @@ -217,10 +219,11 @@ writer._exc_context._raise_if_stored() if bytes < 0: - if writer is not None and len(writer.error_log): - message = writer.error_log[0].message - else: - message = "C14N failed" + message = "C14N failed" + if writer is not None: + errors = writer.error_log + if len(errors): + message = errors[0].message raise C14NError, message # dump node to file (mainly for debug) Modified: lxml/trunk/src/lxml/tests/test_elementtree.py ============================================================================== --- lxml/trunk/src/lxml/tests/test_elementtree.py (original) +++ lxml/trunk/src/lxml/tests/test_elementtree.py Thu Dec 7 08:59:52 2006 @@ -623,6 +623,14 @@ self.assertEquals( 'This is a test.' % (i, i), canonicalize(data)) + + def test_write_fail(self): + ElementTree = self.etree.ElementTree + XML = self.etree.XML + + tree = ElementTree( XML('This is a test.') ) + self.assertRaises(IOError, tree.write, + "definitely////\\-\\nonexisting\\-\\////FILE") # this could trigger a crash, apparently because the document # reference was prematurely garbage collected From scoder at codespeak.net Tue Dec 12 10:44:16 2006 From: scoder at codespeak.net (scoder at codespeak.net) Date: Tue, 12 Dec 2006 10:44:16 +0100 (CET) Subject: [Lxml-checkins] r35615 - lxml/branch/nscleanup/src/lxml Message-ID: <20061212094416.ABB4B10070@code0.codespeak.net> Author: scoder Date: Tue Dec 12 10:44:12 2006 New Revision: 35615 Modified: lxml/branch/nscleanup/src/lxml/proxy.pxi Log: fix for namespace cleanup Modified: lxml/branch/nscleanup/src/lxml/proxy.pxi ============================================================================== --- lxml/branch/nscleanup/src/lxml/proxy.pxi (original) +++ lxml/branch/nscleanup/src/lxml/proxy.pxi Tue Dec 12 10:44:12 2006 @@ -45,30 +45,31 @@ # always call _destroyFakeDoc() after use! cdef xmlNode* c_child cdef xmlNode* c_root + cdef xmlNode* c_new_root cdef xmlDoc* c_doc c_root = tree.xmlDocGetRootElement(c_base_doc) if c_root is c_node: # already the root node return c_base_doc - c_doc = _copyDoc(c_base_doc, 0) # non recursive! - c_root = tree.xmlDocCopyNode(c_node, c_doc, 2) # non recursive! - tree.xmlDocSetRootElement(c_doc, c_root) - - c_root.children = c_node.children - c_root.last = c_node.last - c_root.next = c_root.prev = c_root.parent = NULL + c_doc = _copyDoc(c_base_doc, 0) # non recursive! + c_new_root = tree.xmlDocCopyNode(c_node, c_doc, 2) # non recursive! + tree.xmlDocSetRootElement(c_doc, c_new_root) + + c_new_root.children = c_node.children + c_new_root.last = c_node.last + c_new_root.next = c_new_root.prev = c_new_root.parent = NULL # store original node c_doc._private = c_node # divert parent pointers of children - c_child = c_root.children + c_child = c_new_root.children while c_child is not NULL: - c_child.parent = c_root + c_child.parent = c_new_root c_child = c_child.next - c_doc.children = c_root + c_doc.children = c_new_root return c_doc cdef void _destroyFakeDoc(xmlDoc* c_base_doc, xmlDoc* c_doc): @@ -219,20 +220,27 @@ c_cache_last = 0 while c_element is not NULL: - # remove namespaces defined here if already known in ancestors + # remove namespaces defined here that are known in the new ancestors if c_element.nsDef is not NULL: - while c_element.nsDef is not NULL and \ - tree.xmlSearchNsByHref(c_element.doc, c_element.parent, - c_element.nsDef.href) is not NULL: + while c_element.nsDef is not NULL: + c_ns = tree.xmlSearchNsByHref( + c_element.doc, c_element.parent, c_element.nsDef.href) + if c_ns is NULL: + break c_element.nsDef = c_element.nsDef.next if c_element.nsDef is not NULL: - c_ns = c_element.nsDef - while c_ns.next is not NULL: - if tree.xmlSearchNsByHref(c_element.doc, c_element.parent, - c_ns.next.href) is not NULL: - c_ns.next = c_ns.next.next + c_new_ns = c_element.nsDef + while c_new_ns.next is not NULL: + if c_new_ns.next is not c_element.ns: + c_ns = tree.xmlSearchNsByHref( + c_element.doc, c_element.parent, c_new_ns.next.href) + if c_ns is not NULL: + # not known or at least not different + c_new_ns.next = c_new_ns.next.next + else: + c_new_ns = c_new_ns.next else: - c_ns = c_ns.next + c_new_ns = c_new_ns.next # make sure the namespace of an element and its attributes is declared # in this document From scoder at codespeak.net Tue Dec 12 10:44:56 2006 From: scoder at codespeak.net (scoder at codespeak.net) Date: Tue, 12 Dec 2006 10:44:56 +0100 (CET) Subject: [Lxml-checkins] r35616 - lxml/branch/nscleanup/src/lxml/tests Message-ID: <20061212094456.27DC010070@code0.codespeak.net> Author: scoder Date: Tue Dec 12 10:44:53 2006 New Revision: 35616 Modified: lxml/branch/nscleanup/src/lxml/tests/test_etree.py Log: cleanup, new test case for namespace serialisation (fails) Modified: lxml/branch/nscleanup/src/lxml/tests/test_etree.py ============================================================================== --- lxml/branch/nscleanup/src/lxml/tests/test_etree.py (original) +++ lxml/branch/nscleanup/src/lxml/tests/test_etree.py Tue Dec 12 10:44:53 2006 @@ -907,16 +907,13 @@ e1.prefix) self.assertEquals( None, - e2.prefix) + e1[0].prefix) self.assertEquals( '{http://ns.infrae.com/foo}bar', e1.tag) self.assertEquals( '{http://ns.infrae.com/foo}bar', - e2.tag) - - print etree.tostring(e1) - print etree.tostring(e2) + e1[0].tag) def test_namespaces_copy_element(self): etree = self.etree @@ -940,13 +937,7 @@ '{http://ns.infrae.com/foo}bar', e2.tag) - print etree.tostring(e1) - print etree.tostring(e2) - def test_namespaces_reuse_after_move(self): - Element = self.etree.Element - ElementTree = self.etree.ElementTree - ns_href = "http://a.b.c" one = self.etree.parse( StringIO('' % ns_href)) @@ -962,6 +953,20 @@ '' % ns_href, self.etree.tostring(two)) + def test_namespaces_after_serialize(self): + parse = self.etree.parse + tostring = self.etree.tostring + + ns_href = "http://a.b.c" + one = parse( + StringIO('' % ns_href)) + baz = one.getroot()[0][0] + + print tostring(baz) + parsed = parse(StringIO( tostring(baz) )).getroot() + + self.assertEquals('{%s}baz' % ns_href, parsed.tag) + def test_element_nsmap(self): etree = self.etree From scoder at codespeak.net Tue Dec 12 10:45:24 2006 From: scoder at codespeak.net (scoder at codespeak.net) Date: Tue, 12 Dec 2006 10:45:24 +0100 (CET) Subject: [Lxml-checkins] r35617 - lxml/branch/nscleanup/src/lxml Message-ID: <20061212094524.5ACA510070@code0.codespeak.net> Author: scoder Date: Tue Dec 12 10:45:21 2006 New Revision: 35617 Modified: lxml/branch/nscleanup/src/lxml/parser.pxi Log: enable COMPACT parsing Modified: lxml/branch/nscleanup/src/lxml/parser.pxi ============================================================================== --- lxml/branch/nscleanup/src/lxml/parser.pxi (original) +++ lxml/branch/nscleanup/src/lxml/parser.pxi Tue Dec 12 10:45:21 2006 @@ -614,7 +614,8 @@ cdef int _XML_DEFAULT_PARSE_OPTIONS _XML_DEFAULT_PARSE_OPTIONS = ( xmlparser.XML_PARSE_NOENT | - xmlparser.XML_PARSE_NOCDATA + xmlparser.XML_PARSE_NOCDATA | + xmlparser.XML_PARSE_COMPACT ) cdef class XMLParser(_BaseParser): @@ -754,7 +755,8 @@ ############################################################ cdef int _HTML_DEFAULT_PARSE_OPTIONS -_HTML_DEFAULT_PARSE_OPTIONS = 0 +_HTML_DEFAULT_PARSE_OPTIONS = \ + htmlparser.HTML_PARSE_COMPACT cdef class HTMLParser(_BaseParser): """The HTML parser. This parser allows reading HTML into a normal XML