[Lxml-checkins] r35187 - in lxml/branch/nscleanup/src/lxml: . tests

scoder at codespeak.net scoder at codespeak.net
Fri Dec 1 09:55:25 CET 2006


Author: scoder
Date: Fri Dec  1 09:55:17 2006
New Revision: 35187

Modified:
   lxml/branch/nscleanup/src/lxml/apihelpers.pxi
   lxml/branch/nscleanup/src/lxml/etree.pyx
   lxml/branch/nscleanup/src/lxml/etree_defs.h
   lxml/branch/nscleanup/src/lxml/proxy.pxi
   lxml/branch/nscleanup/src/lxml/public-api.pxi
   lxml/branch/nscleanup/src/lxml/tests/test_etree.py
   lxml/branch/nscleanup/src/lxml/tree.pxd
Log:
preliminary implementation with 3 failing test cases

Modified: lxml/branch/nscleanup/src/lxml/apihelpers.pxi
==============================================================================
--- lxml/branch/nscleanup/src/lxml/apihelpers.pxi	(original)
+++ lxml/branch/nscleanup/src/lxml/apihelpers.pxi	Fri Dec  1 09:55:17 2006
@@ -147,7 +147,7 @@
             if attr_ns_utf is None:
                 tree.xmlNewProp(c_node, _cstr(attr_name_utf), _cstr(value_utf))
             else:
-                c_ns = doc._findOrBuildNodeNs(c_node, _cstr(attr_ns_utf))
+                c_ns = doc._findOrBuildNodeNs(c_node, _cstr(attr_ns_utf), NULL)
                 tree.xmlNewNsProp(c_node, c_ns,
                                   _cstr(attr_name_utf), _cstr(value_utf))
 
@@ -203,7 +203,8 @@
     if ns is None:
         tree.xmlSetProp(element._c_node, c_tag, c_value)
     else:
-        c_ns = element._doc._findOrBuildNodeNs(element._c_node, _cstr(ns))
+        c_ns = element._doc._findOrBuildNodeNs(element._c_node,
+                                               _cstr(ns), NULL)
         tree.xmlSetNsProp(element._c_node, c_ns, c_tag, c_value)
     return 0
 

Modified: lxml/branch/nscleanup/src/lxml/etree.pyx
==============================================================================
--- lxml/branch/nscleanup/src/lxml/etree.pyx	(original)
+++ lxml/branch/nscleanup/src/lxml/etree.pyx	Fri Dec  1 09:55:17 2006
@@ -298,24 +298,46 @@
         self._ns_counter = self._ns_counter + 1
         return ns
 
-    cdef xmlNs* _findOrBuildNodeNs(self, xmlNode* c_node, char* href):
+    cdef xmlNs* _findOrBuildNodeNs(self, xmlNode* c_node,
+                                   char* c_href, char* c_prefix):
         """Get or create namespace structure for a node.
         """
+        cdef int i
         cdef xmlNs* c_ns
+        cdef xmlNs* c_doc_ns
         # look for existing ns
-        c_ns = tree.xmlSearchNsByHref(self._c_doc, c_node, href)
+        c_ns = tree.xmlSearchNsByHref(self._c_doc, c_node, c_href)
         if c_ns is not NULL:
             return c_ns
-        # create ns if existing ns cannot be found
-        # try to simulate ElementTree's namespace prefix creation
-        prefix = self.buildNewPrefix()
-        c_ns = tree.xmlNewNs(c_node, href, _cstr(prefix))
+
+        if c_prefix is NULL or \
+               tree.xmlSearchNs(self._c_doc, c_node, c_prefix) is not NULL:
+            # try to simulate ElementTree's namespace prefix creation
+            for i from 0 <= i < 10000:
+                prefix = self.buildNewPrefix()
+                c_prefix = _cstr(prefix)
+                # make sure it's not used already
+                if tree.xmlSearchNs(self._c_doc, c_node, c_prefix) is NULL:
+                    break
+            if i >= 10000:
+                # XXX too many prefixes in use - this is pretty bad!
+                return NULL
+
+        c_ns = tree.xmlNewNs(c_node, c_href, c_prefix)
+        return c_ns
+        c_doc_ns = self._c_doc.oldNs
+        if c_doc_ns is NULL:
+            # this will create the XML namespace:
+            c_doc_ns = tree.xmlSearchNs(self._c_doc, c_node, 'xml')
+        while c_doc_ns.next is not NULL:
+            c_doc_ns = c_doc_ns.next
+        c_doc_ns.next = c_ns
         return c_ns
 
     cdef void _setNodeNs(self, xmlNode* c_node, char* href):
         "Lookup namespace structure and set it for the node."
         cdef xmlNs* c_ns
-        c_ns = self._findOrBuildNodeNs(c_node, href)
+        c_ns = self._findOrBuildNodeNs(c_node, href, NULL)
         tree.xmlSetNs(c_node, c_ns)
 
     cdef void _setNodeNamespaces(self, xmlNode* c_node,

Modified: lxml/branch/nscleanup/src/lxml/etree_defs.h
==============================================================================
--- lxml/branch/nscleanup/src/lxml/etree_defs.h	(original)
+++ lxml/branch/nscleanup/src/lxml/etree_defs.h	Fri Dec  1 09:55:17 2006
@@ -59,6 +59,11 @@
 	 ((c_node)->type == XML_COMMENT_NODE) || \
          ((c_node)->type == XML_PI_NODE))
 
+#define _isElementOrXInclude(c_node) \
+        (_isElement(c_node)                     || \
+         ((c_node)->type == XML_XINCLUDE_START) || \
+         ((c_node)->type == XML_XINCLUDE_END))
+
 #define _getNs(c_node) \
         (((c_node)->ns == 0) ? 0 : ((c_node)->ns->href))
 

Modified: lxml/branch/nscleanup/src/lxml/proxy.pxi
==============================================================================
--- lxml/branch/nscleanup/src/lxml/proxy.pxi	(original)
+++ lxml/branch/nscleanup/src/lxml/proxy.pxi	Fri Dec  1 09:55:17 2006
@@ -169,10 +169,11 @@
     tree below (including the current node). It also reconciliates
     namespaces so they're correct inside the new environment.
     """
-    tree.xmlReconciliateNs(doc._c_doc, node._c_node)
-    if node._doc is not doc:
-        node._doc = doc
-        changeDocumentBelow(node._c_node, doc)
+    _moveNodeToDocument(node, doc)
+##     tree.xmlReconciliateNs(doc._c_doc, node._c_node)
+##     if node._doc is not doc:
+##         node._doc = doc
+##         changeDocumentBelow(node._c_node, doc)
 
 cdef void changeDocumentBelow(xmlNode* c_parent, _Document doc):
     """Update the Python references in the tree below the node.
@@ -187,3 +188,136 @@
     if c_node._private is not NULL:
         (<_NodeBase>c_node._private)._doc = doc
     tree.END_FOR_EACH_ELEMENT_FROM(c_node)
+
+
+cdef void _moveNodeToDocument(_NodeBase node, _Document doc):
+    """Fix the xmlNs pointers of a node and its subtree that were moved.
+
+    Mainly copied from libxml2's xmlReconciliateNs().  Expects libxml2 doc
+    pointers of node to be correct already, but fixes _Document references.
+    """
+    cdef xmlDoc* c_doc
+    cdef xmlNode* c_element
+    cdef xmlNode* c_start_node
+    cdef xmlNode* c_node
+    cdef xmlNs** c_ns_new_cache
+    cdef xmlNs** c_ns_old_cache
+    cdef xmlNs* c_ns
+    cdef xmlNs* c_new_ns
+    cdef cstd.size_t i, c_cache_size, c_cache_last
+
+    c_element = node._c_node
+    c_doc = c_element.doc
+
+    if not tree._isElementOrXInclude(c_element):
+        return
+
+    c_start_node = c_element
+    c_ns_new_cache = NULL
+    c_ns_old_cache = NULL
+    c_cache_size = 0
+    c_cache_last = 0
+
+    while c_element is not NULL:
+        # remove namespaces defined here if already known in ancestors
+        if c_element.nsDef is not NULL:
+            while c_element.nsDef is not NULL and \
+                      tree.xmlSearchNsByHref(c_element.doc, c_element.parent,
+                                             c_element.nsDef.href) is not NULL:
+                c_element.nsDef = c_element.nsDef.next
+            if c_element.nsDef is not NULL:
+                c_ns = c_element.nsDef
+                while c_ns.next is not NULL:
+                    if tree.xmlSearchNsByHref(c_element.doc, c_element.parent,
+                                              c_ns.next.href) is not NULL:
+                        c_ns.next = c_ns.next.next
+                    else:
+                        c_ns = c_ns.next
+
+        # make sure ns declaration of element and its attributes is stored
+        # in this document
+        c_node = c_element
+        while c_node is not NULL:
+            if c_node.ns is not NULL:
+                c_ns = c_node.ns
+                for i from 0 <= i < c_cache_last:
+                    if c_ns is c_ns_old_cache[i]:
+                        c_node.ns = c_ns_new_cache[i]
+                        c_ns = NULL
+                        break
+
+                if c_ns is not NULL:
+                    # not in cache, must find a replacement from this document
+                    c_new_ns = doc._findOrBuildNodeNs(c_node, c_ns.href, c_ns.prefix)
+                    print "FOUND:", c_new_ns.href
+                    if c_cache_last >= c_cache_size:
+                        # must resize cache
+                        if c_cache_size == 0:
+                            c_cache_size = 20
+                        else:
+                            c_cache_size = c_cache_size * 2
+                        c_ns_new_cache = <xmlNs**> python.PyMem_Realloc(
+                            c_ns_new_cache, c_cache_size * sizeof(xmlNs*))
+                        if c_ns_new_cache is NULL:
+                            python.PyMem_Free(c_ns_old_cache)
+                            python.PyErr_NoMemory()
+                        c_ns_old_cache = <xmlNs**> python.PyMem_Realloc(
+                            c_ns_old_cache, c_cache_size * sizeof(xmlNs*))
+                        if c_ns_old_cache is NULL:
+                            python.PyMem_Free(c_ns_new_cache)
+                            python.PyErr_NoMemory()
+                    c_ns_new_cache[c_cache_last] = c_new_ns
+                    c_ns_old_cache[c_cache_last] = c_node.ns
+                    c_cache_last = c_cache_last + 1
+                    c_node.ns = c_ns
+            if c_node is c_element:
+                c_node = <xmlNode*>c_element.properties
+            else:
+                c_node = c_node.next
+                
+        # traverse to next element, start with children
+        c_node = c_element.children
+        while c_node is not NULL and \
+              not tree._isElementOrXInclude(c_node):
+            c_node = c_node.next
+
+        if c_node is NULL:
+            # no children => back off and continue with siblings and parents
+
+            # fix _Document reference (may dealloc the original document!)
+            if c_element._private is not NULL:
+                (<_NodeBase>c_element._private)._doc = doc
+
+            if c_element is c_start_node:
+                break
+
+            # continue with siblings
+            c_node = c_element.next
+            while (c_node is not NULL and
+                   not tree._isElementOrXInclude(c_node)):
+                c_node = c_node.next
+            # if that didn't help, back off through parents' siblings
+            while c_node is NULL:
+                c_element = c_element.parent
+                if c_element is NULL or not tree._isElementOrXInclude(c_element):
+                    break
+
+                # fix _Document reference (may dealloc the original document!)
+                if c_element._private is not NULL:
+                    (<_NodeBase>c_element._private)._doc = doc
+
+                if c_element is c_start_node:
+                    break
+                # parents already done -> look for their siblings
+                c_node = c_element.next
+                while (c_node is not NULL and
+                       not tree._isElementOrXInclude(c_node)):
+                    c_node = c_node.next
+        if c_node is c_start_node:
+            break
+        c_element = c_node
+
+    if c_ns_new_cache is not NULL:
+        python.PyMem_Free(c_ns_new_cache)
+    if c_ns_old_cache is not NULL:
+        python.PyMem_Free(c_ns_old_cache)

Modified: lxml/branch/nscleanup/src/lxml/public-api.pxi
==============================================================================
--- lxml/branch/nscleanup/src/lxml/public-api.pxi	(original)
+++ lxml/branch/nscleanup/src/lxml/public-api.pxi	Fri Dec  1 09:55:17 2006
@@ -138,4 +138,4 @@
                                           char* href) except NULL:
     if doc is None:
         raise TypeError
-    return doc._findOrBuildNodeNs(c_node, href)
+    return doc._findOrBuildNodeNs(c_node, href, NULL)

Modified: lxml/branch/nscleanup/src/lxml/tests/test_etree.py
==============================================================================
--- lxml/branch/nscleanup/src/lxml/tests/test_etree.py	(original)
+++ lxml/branch/nscleanup/src/lxml/tests/test_etree.py	Fri Dec  1 09:55:17 2006
@@ -893,6 +893,56 @@
             '<z xmlns="http://ns.infrae.com/foo" xmlns:hoi="http://ns.infrae.com/hoi"><hoi:x></hoi:x></z>',
             self._writeElement(e))
 
+    def test_namespaces_default_copy_element(self):
+        etree = self.etree
+
+        r = {None: 'http://ns.infrae.com/foo'}
+        e1 = etree.Element('{http://ns.infrae.com/foo}bar', nsmap=r)
+        e2 = etree.Element('{http://ns.infrae.com/foo}bar', nsmap=r)
+
+        e1.append(e2)
+
+        self.assertEquals(
+            None,
+            e1.prefix)
+        self.assertEquals(
+            None,
+            e2.prefix)
+        self.assertEquals(
+            '{http://ns.infrae.com/foo}bar',
+            e1.tag)
+        self.assertEquals(
+            '{http://ns.infrae.com/foo}bar',
+            e2.tag)
+
+        print etree.tostring(e1)
+        print etree.tostring(e2)
+
+    def test_namespaces_copy_element(self):
+        etree = self.etree
+
+        r = {None: 'http://ns.infrae.com/BAR'}
+        e1 = etree.Element('{http://ns.infrae.com/BAR}bar', nsmap=r)
+        e2 = etree.Element('{http://ns.infrae.com/foo}bar', nsmap=r)
+
+        e1.append(e2)
+
+        self.assertEquals(
+            None,
+            e1.prefix)
+        self.assertNotEquals(
+            None,
+            e2.prefix)
+        self.assertEquals(
+            '{http://ns.infrae.com/BAR}bar',
+            e1.tag)
+        self.assertEquals(
+            '{http://ns.infrae.com/foo}bar',
+            e2.tag)
+
+        print etree.tostring(e1)
+        print etree.tostring(e2)
+
     def test_element_nsmap(self):
         etree = self.etree
 

Modified: lxml/branch/nscleanup/src/lxml/tree.pxd
==============================================================================
--- lxml/branch/nscleanup/src/lxml/tree.pxd	(original)
+++ lxml/branch/nscleanup/src/lxml/tree.pxd	Fri Dec  1 09:55:17 2006
@@ -77,7 +77,6 @@
         XML_NAMESPACE_DECL=         18
         XML_XINCLUDE_START=         19
         XML_XINCLUDE_END=           20
-
     
     ctypedef struct xmlNs:
         char* href
@@ -193,7 +192,7 @@
     cdef xmlAttr* xmlHasProp(xmlNode* node, char* name)
     cdef xmlAttr* xmlHasNsProp(xmlNode* node, char* name, char* nameSpace)
     cdef char* xmlNodeGetContent(xmlNode* cur)
-    cdef xmlNs* xmlSearchNs(xmlDoc* doc, xmlNode* node, char* nameSpace)
+    cdef xmlNs* xmlSearchNs(xmlDoc* doc, xmlNode* node, char* prefix)
     cdef xmlNs* xmlSearchNsByHref(xmlDoc* doc, xmlNode* node, char* href)
     cdef int xmlIsBlankNode(xmlNode* node)
     cdef void xmlElemDump(FILE* f, xmlDoc* doc, xmlNode* cur)
@@ -206,6 +205,7 @@
     cdef xmlNode* xmlCopyNode(xmlNode* node, int extended)
     cdef xmlNode* xmlDocCopyNode(xmlNode* node, xmlDoc* doc, int extended)
     cdef int xmlReconciliateNs(xmlDoc* doc, xmlNode* tree)
+    cdef xmlNs* xmlNewReconciliedNs(xmlDoc* doc, xmlNode* tree, xmlNs* ns)
     cdef xmlBuffer* xmlBufferCreate()
     cdef char* xmlBufferContent(xmlBuffer* buf)
     cdef int xmlBufferLength(xmlBuffer* buf)
@@ -260,6 +260,7 @@
 
 cdef extern from "etree_defs.h":
     cdef int _isElement(xmlNode* node)
+    cdef int _isElementOrXInclude(xmlNode* node)
     cdef char* _getNs(xmlNode* node)
     cdef void BEGIN_FOR_EACH_ELEMENT_FROM(xmlNode* tree_top,
                                           xmlNode* start_node, int inclusive)


More information about the lxml-checkins mailing list