[Lxml-checkins] r39362 - in lxml/trunk: . src/lxml src/lxml/tests
scoder at codespeak.net
scoder at codespeak.net
Sat Feb 24 17:16:19 CET 2007
Author: scoder
Date: Sat Feb 24 17:16:16 2007
New Revision: 39362
Modified:
lxml/trunk/CHANGES.txt
lxml/trunk/src/lxml/proxy.pxi
lxml/trunk/src/lxml/tests/test_etree.py
Log:
merged replacement for _xmlReconsiliateNs() from 'nscleanup' branch
Modified: lxml/trunk/CHANGES.txt
==============================================================================
--- lxml/trunk/CHANGES.txt (original)
+++ lxml/trunk/CHANGES.txt Sat Feb 24 17:16:16 2007
@@ -20,6 +20,9 @@
Other changes
-------------
+* optimised replacement for libxml2's _xmlReconsiliateNs(). This allows lxml
+ a better handling of namespaces when moving elements between documents.
+
* major restructuring in the documentation
Modified: lxml/trunk/src/lxml/proxy.pxi
==============================================================================
--- lxml/trunk/src/lxml/proxy.pxi (original)
+++ lxml/trunk/src/lxml/proxy.pxi Sat Feb 24 17:16:16 2007
@@ -54,6 +54,7 @@
c_doc = _copyDoc(c_base_doc, 0) # non recursive!
c_root = tree.xmlDocCopyNode(c_node, c_doc, 2) # non recursive!
tree.xmlDocSetRootElement(c_doc, c_root)
+ _copyParentNamespaces(c_node, c_new_root)
c_root.children = c_node.children
c_root.last = c_node.last
@@ -90,6 +91,26 @@
c_root.children = c_root.last = NULL
tree.xmlFreeDoc(c_doc)
+cdef void _copyParentNamespaces(xmlNode* c_from_node, xmlNode* c_to_node):
+ """Copy the namespaces of all ancestors of c_from_node to c_to_node.
+
+ This is used in _fakeRootDoc() to avoid loosing namespace declarations.
+ """
+ cdef xmlNode* c_parent
+ cdef xmlNs* c_ns
+ cdef xmlNs* c_new_ns
+ cdef int prefix_known
+ c_parent = c_from_node.parent
+ while c_parent is not NULL and tree._isElementOrXInclude(c_parent):
+ c_new_ns = c_parent.nsDef
+ while c_new_ns is not NULL:
+ # check if prefix is already defined
+ c_ns = tree.xmlSearchNs(c_to_node.doc, c_to_node, c_new_ns.prefix)
+ if c_ns is NULL:
+ tree.xmlNewNs(c_to_node, c_new_ns.href, c_new_ns.prefix)
+ c_new_ns = c_new_ns.next
+ c_parent = c_parent.parent
+
################################################################################
# support for freeing tree elements when proxy objects are destroyed
@@ -159,31 +180,144 @@
tree.xmlFreeDoc(c_doc)
################################################################################
-# change _Document references when a node changes documents
+# fix _Document references and namespaces when a node changes documents
cdef void moveNodeToDocument(_Element node, _Document doc):
- """For a node and all nodes below, change document.
+ """Fix the xmlNs pointers of a node and its subtree that were moved.
- A node can change document in certain operations as an XML
- subtree can move. This updates all possible proxies in the
- tree below (including the current node). It also reconciliates
- namespaces so they're correct inside the new environment.
- """
- tree.xmlReconciliateNs(doc._c_doc, node._c_node)
- if node._doc is not doc:
- node._doc = doc
- changeDocumentBelow(node._c_node, doc)
-
-cdef void changeDocumentBelow(xmlNode* c_parent, _Document doc):
- """Update the Python references in the tree below the node.
- Does not update the node itself.
-
- Note that we expect C pointers to the document to be updated already by
- libxml2.
+ Mainly copied from libxml2's xmlReconciliateNs(). Expects libxml2 doc
+ pointers of node to be correct already, but fixes _Document references.
"""
+ cdef xmlDoc* c_doc
+ cdef xmlNode* c_element
+ cdef xmlNode* c_start_node
cdef xmlNode* c_node
- c_node = c_parent.children
- tree.BEGIN_FOR_EACH_ELEMENT_FROM(c_parent, c_node, 1)
- if c_node._private is not NULL:
- (<_Element>c_node._private)._doc = doc
- tree.END_FOR_EACH_ELEMENT_FROM(c_node)
+ cdef xmlNs** c_ns_new_cache
+ cdef xmlNs** c_ns_old_cache
+ cdef xmlNs* c_ns
+ cdef xmlNs* c_new_ns
+ cdef cstd.size_t i, c_cache_size, c_cache_last
+
+ c_element = node._c_node
+ c_doc = c_element.doc
+
+ if not tree._isElementOrXInclude(c_element):
+ return
+
+ c_start_node = c_element
+ c_ns_new_cache = NULL
+ c_ns_old_cache = NULL
+ c_cache_size = 0
+ c_cache_last = 0
+
+ while c_element is not NULL:
+ # remove namespaces defined here that are known in the new ancestors
+ if c_element.nsDef is not NULL:
+ while c_element.nsDef is not NULL:
+ c_ns = tree.xmlSearchNsByHref(
+ c_element.doc, c_element.parent, c_element.nsDef.href)
+ if c_ns is NULL:
+ break
+ c_element.nsDef = c_element.nsDef.next
+ if c_element.nsDef is not NULL:
+ c_new_ns = c_element.nsDef
+ while c_new_ns.next is not NULL:
+ if c_new_ns.next is not c_element.ns:
+ c_ns = tree.xmlSearchNsByHref(
+ c_element.doc, c_element.parent, c_new_ns.next.href)
+ if c_ns is not NULL:
+ # not known or at least not different
+ c_new_ns.next = c_new_ns.next.next
+ else:
+ c_new_ns = c_new_ns.next
+ else:
+ c_new_ns = c_new_ns.next
+
+ # make sure the namespace of an element and its attributes is declared
+ # in this document
+ c_node = c_element
+ while c_node is not NULL:
+ if c_node.ns is not NULL:
+ c_ns = c_node.ns
+ for i from 0 <= i < c_cache_last:
+ if c_ns is c_ns_old_cache[i]:
+ c_node.ns = c_ns_new_cache[i]
+ c_ns = NULL
+ break
+
+ if c_ns is not NULL:
+ # not in cache, must find a replacement from this document
+ c_new_ns = doc._findOrBuildNodeNs(c_node, c_ns.href, c_ns.prefix)
+ if c_cache_last >= c_cache_size:
+ # must resize cache
+ if c_cache_size == 0:
+ c_cache_size = 20
+ else:
+ c_cache_size = c_cache_size * 2
+ c_ns_new_cache = <xmlNs**> python.PyMem_Realloc(
+ c_ns_new_cache, c_cache_size * sizeof(xmlNs*))
+ if c_ns_new_cache is NULL:
+ python.PyMem_Free(c_ns_old_cache)
+ python.PyErr_NoMemory()
+ c_ns_old_cache = <xmlNs**> python.PyMem_Realloc(
+ c_ns_old_cache, c_cache_size * sizeof(xmlNs*))
+ if c_ns_old_cache is NULL:
+ python.PyMem_Free(c_ns_new_cache)
+ python.PyErr_NoMemory()
+ c_ns_new_cache[c_cache_last] = c_new_ns
+ c_ns_old_cache[c_cache_last] = c_node.ns
+ c_cache_last = c_cache_last + 1
+ c_node.ns = c_new_ns
+ if c_node is c_element:
+ # after the element, continue with its attributes
+ c_node = <xmlNode*>c_element.properties
+ else:
+ c_node = c_node.next
+
+ # traverse to next element, start with children
+ c_node = c_element.children
+ while c_node is not NULL and \
+ not tree._isElementOrXInclude(c_node):
+ c_node = c_node.next
+
+ if c_node is NULL:
+ # no children => back off and continue with siblings and parents
+
+ # fix _Document reference (may dealloc the original document!)
+ if c_element._private is not NULL:
+ (<_NodeBase>c_element._private)._doc = doc
+
+ if c_element is c_start_node:
+ break
+
+ # continue with siblings
+ c_node = c_element.next
+ while (c_node is not NULL and
+ not tree._isElementOrXInclude(c_node)):
+ c_node = c_node.next
+ # if that didn't help, back off through parents' siblings
+ while c_node is NULL:
+ c_element = c_element.parent
+ if c_element is NULL or not tree._isElementOrXInclude(c_element):
+ break
+
+ # fix _Document reference (may dealloc the original document!)
+ if c_element._private is not NULL:
+ (<_NodeBase>c_element._private)._doc = doc
+
+ if c_element is c_start_node:
+ break
+ # parents already done -> look for their siblings
+ c_node = c_element.next
+ while (c_node is not NULL and
+ not tree._isElementOrXInclude(c_node)):
+ c_node = c_node.next
+ if c_node is c_start_node:
+ break
+ c_element = c_node
+
+ # cleanup
+ if c_ns_new_cache is not NULL:
+ python.PyMem_Free(c_ns_new_cache)
+ if c_ns_old_cache is not NULL:
+ python.PyMem_Free(c_ns_old_cache)
Modified: lxml/trunk/src/lxml/tests/test_etree.py
==============================================================================
--- lxml/trunk/src/lxml/tests/test_etree.py (original)
+++ lxml/trunk/src/lxml/tests/test_etree.py Sat Feb 24 17:16:16 2007
@@ -893,6 +893,81 @@
'<z xmlns="http://ns.infrae.com/foo" xmlns:hoi="http://ns.infrae.com/hoi"><hoi:x></hoi:x></z>',
self._writeElement(e))
+ def test_namespaces_default_copy_element(self):
+ etree = self.etree
+
+ r = {None: 'http://ns.infrae.com/foo'}
+ e1 = etree.Element('{http://ns.infrae.com/foo}bar', nsmap=r)
+ e2 = etree.Element('{http://ns.infrae.com/foo}bar', nsmap=r)
+
+ e1.append(e2)
+
+ self.assertEquals(
+ None,
+ e1.prefix)
+ self.assertEquals(
+ None,
+ e1[0].prefix)
+ self.assertEquals(
+ '{http://ns.infrae.com/foo}bar',
+ e1.tag)
+ self.assertEquals(
+ '{http://ns.infrae.com/foo}bar',
+ e1[0].tag)
+
+ def test_namespaces_copy_element(self):
+ etree = self.etree
+
+ r = {None: 'http://ns.infrae.com/BAR'}
+ e1 = etree.Element('{http://ns.infrae.com/BAR}bar', nsmap=r)
+ e2 = etree.Element('{http://ns.infrae.com/foo}bar', nsmap=r)
+
+ e1.append(e2)
+
+ self.assertEquals(
+ None,
+ e1.prefix)
+ self.assertNotEquals(
+ None,
+ e2.prefix)
+ self.assertEquals(
+ '{http://ns.infrae.com/BAR}bar',
+ e1.tag)
+ self.assertEquals(
+ '{http://ns.infrae.com/foo}bar',
+ e2.tag)
+
+ def test_namespaces_reuse_after_move(self):
+ ns_href = "http://a.b.c"
+ one = self.etree.parse(
+ StringIO('<foo><bar xmlns:ns="%s"><ns:baz/></bar></foo>' % ns_href))
+ baz = one.getroot()[0][0]
+
+ two = self.etree.parse(
+ StringIO('<root xmlns:ns="%s"/>' % ns_href))
+ two.getroot().append(baz)
+ del one # make sure the source document is deallocated
+
+ self.assertEquals('{%s}baz' % ns_href, baz.tag)
+ self.assertEquals(
+ '<root xmlns:ns="%s"><ns:baz/></root>' % ns_href,
+ self.etree.tostring(two))
+
+ def _test_namespaces_after_serialize(self):
+ # FIXME: this currently fails - fix serializer.pxi!
+ parse = self.etree.parse
+ tostring = self.etree.tostring
+
+ ns_href = "http://a.b.c"
+ one = parse(
+ StringIO('<foo><bar xmlns:ns="%s"><ns:baz/></bar></foo>' % ns_href))
+ baz = one.getroot()[0][0]
+
+ print tostring(baz)
+ parsed = parse(StringIO( tostring(baz) )).getroot()
+
+ self.assertEquals('{%s}baz' % ns_href, parsed.tag)
+
def test_element_nsmap(self):
etree = self.etree
More information about the lxml-checkins
mailing list