[Lxml-checkins] r35187 - in lxml/branch/nscleanup/src/lxml: . tests
scoder at codespeak.net
scoder at codespeak.net
Fri Dec 1 09:55:25 CET 2006
Author: scoder
Date: Fri Dec 1 09:55:17 2006
New Revision: 35187
Modified:
lxml/branch/nscleanup/src/lxml/apihelpers.pxi
lxml/branch/nscleanup/src/lxml/etree.pyx
lxml/branch/nscleanup/src/lxml/etree_defs.h
lxml/branch/nscleanup/src/lxml/proxy.pxi
lxml/branch/nscleanup/src/lxml/public-api.pxi
lxml/branch/nscleanup/src/lxml/tests/test_etree.py
lxml/branch/nscleanup/src/lxml/tree.pxd
Log:
preliminary implementation with 3 failing test cases
Modified: lxml/branch/nscleanup/src/lxml/apihelpers.pxi
==============================================================================
--- lxml/branch/nscleanup/src/lxml/apihelpers.pxi (original)
+++ lxml/branch/nscleanup/src/lxml/apihelpers.pxi Fri Dec 1 09:55:17 2006
@@ -147,7 +147,7 @@
if attr_ns_utf is None:
tree.xmlNewProp(c_node, _cstr(attr_name_utf), _cstr(value_utf))
else:
- c_ns = doc._findOrBuildNodeNs(c_node, _cstr(attr_ns_utf))
+ c_ns = doc._findOrBuildNodeNs(c_node, _cstr(attr_ns_utf), NULL)
tree.xmlNewNsProp(c_node, c_ns,
_cstr(attr_name_utf), _cstr(value_utf))
@@ -203,7 +203,8 @@
if ns is None:
tree.xmlSetProp(element._c_node, c_tag, c_value)
else:
- c_ns = element._doc._findOrBuildNodeNs(element._c_node, _cstr(ns))
+ c_ns = element._doc._findOrBuildNodeNs(element._c_node,
+ _cstr(ns), NULL)
tree.xmlSetNsProp(element._c_node, c_ns, c_tag, c_value)
return 0
Modified: lxml/branch/nscleanup/src/lxml/etree.pyx
==============================================================================
--- lxml/branch/nscleanup/src/lxml/etree.pyx (original)
+++ lxml/branch/nscleanup/src/lxml/etree.pyx Fri Dec 1 09:55:17 2006
@@ -298,24 +298,46 @@
self._ns_counter = self._ns_counter + 1
return ns
- cdef xmlNs* _findOrBuildNodeNs(self, xmlNode* c_node, char* href):
+ cdef xmlNs* _findOrBuildNodeNs(self, xmlNode* c_node,
+ char* c_href, char* c_prefix):
"""Get or create namespace structure for a node.
"""
+ cdef int i
cdef xmlNs* c_ns
+ cdef xmlNs* c_doc_ns
# look for existing ns
- c_ns = tree.xmlSearchNsByHref(self._c_doc, c_node, href)
+ c_ns = tree.xmlSearchNsByHref(self._c_doc, c_node, c_href)
if c_ns is not NULL:
return c_ns
- # create ns if existing ns cannot be found
- # try to simulate ElementTree's namespace prefix creation
- prefix = self.buildNewPrefix()
- c_ns = tree.xmlNewNs(c_node, href, _cstr(prefix))
+
+ if c_prefix is NULL or \
+ tree.xmlSearchNs(self._c_doc, c_node, c_prefix) is not NULL:
+ # try to simulate ElementTree's namespace prefix creation
+ for i from 0 <= i < 10000:
+ prefix = self.buildNewPrefix()
+ c_prefix = _cstr(prefix)
+ # make sure it's not used already
+ if tree.xmlSearchNs(self._c_doc, c_node, c_prefix) is NULL:
+ break
+ if i >= 10000:
+ # XXX too many prefixes in use - this is pretty bad!
+ return NULL
+
+ c_ns = tree.xmlNewNs(c_node, c_href, c_prefix)
+ return c_ns
+ c_doc_ns = self._c_doc.oldNs
+ if c_doc_ns is NULL:
+ # this will create the XML namespace:
+ c_doc_ns = tree.xmlSearchNs(self._c_doc, c_node, 'xml')
+ while c_doc_ns.next is not NULL:
+ c_doc_ns = c_doc_ns.next
+ c_doc_ns.next = c_ns
return c_ns
cdef void _setNodeNs(self, xmlNode* c_node, char* href):
"Lookup namespace structure and set it for the node."
cdef xmlNs* c_ns
- c_ns = self._findOrBuildNodeNs(c_node, href)
+ c_ns = self._findOrBuildNodeNs(c_node, href, NULL)
tree.xmlSetNs(c_node, c_ns)
cdef void _setNodeNamespaces(self, xmlNode* c_node,
Modified: lxml/branch/nscleanup/src/lxml/etree_defs.h
==============================================================================
--- lxml/branch/nscleanup/src/lxml/etree_defs.h (original)
+++ lxml/branch/nscleanup/src/lxml/etree_defs.h Fri Dec 1 09:55:17 2006
@@ -59,6 +59,11 @@
((c_node)->type == XML_COMMENT_NODE) || \
((c_node)->type == XML_PI_NODE))
+#define _isElementOrXInclude(c_node) \
+ (_isElement(c_node) || \
+ ((c_node)->type == XML_XINCLUDE_START) || \
+ ((c_node)->type == XML_XINCLUDE_END))
+
#define _getNs(c_node) \
(((c_node)->ns == 0) ? 0 : ((c_node)->ns->href))
Modified: lxml/branch/nscleanup/src/lxml/proxy.pxi
==============================================================================
--- lxml/branch/nscleanup/src/lxml/proxy.pxi (original)
+++ lxml/branch/nscleanup/src/lxml/proxy.pxi Fri Dec 1 09:55:17 2006
@@ -169,10 +169,11 @@
tree below (including the current node). It also reconciliates
namespaces so they're correct inside the new environment.
"""
- tree.xmlReconciliateNs(doc._c_doc, node._c_node)
- if node._doc is not doc:
- node._doc = doc
- changeDocumentBelow(node._c_node, doc)
+ _moveNodeToDocument(node, doc)
+## tree.xmlReconciliateNs(doc._c_doc, node._c_node)
+## if node._doc is not doc:
+## node._doc = doc
+## changeDocumentBelow(node._c_node, doc)
cdef void changeDocumentBelow(xmlNode* c_parent, _Document doc):
"""Update the Python references in the tree below the node.
@@ -187,3 +188,136 @@
if c_node._private is not NULL:
(<_NodeBase>c_node._private)._doc = doc
tree.END_FOR_EACH_ELEMENT_FROM(c_node)
+
+
+cdef void _moveNodeToDocument(_NodeBase node, _Document doc):
+ """Fix the xmlNs pointers of a node and its subtree that were moved.
+
+ Mainly copied from libxml2's xmlReconciliateNs(). Expects libxml2 doc
+ pointers of node to be correct already, but fixes _Document references.
+ """
+ cdef xmlDoc* c_doc
+ cdef xmlNode* c_element
+ cdef xmlNode* c_start_node
+ cdef xmlNode* c_node
+ cdef xmlNs** c_ns_new_cache
+ cdef xmlNs** c_ns_old_cache
+ cdef xmlNs* c_ns
+ cdef xmlNs* c_new_ns
+ cdef cstd.size_t i, c_cache_size, c_cache_last
+
+ c_element = node._c_node
+ c_doc = c_element.doc
+
+ if not tree._isElementOrXInclude(c_element):
+ return
+
+ c_start_node = c_element
+ c_ns_new_cache = NULL
+ c_ns_old_cache = NULL
+ c_cache_size = 0
+ c_cache_last = 0
+
+ while c_element is not NULL:
+ # remove namespaces defined here if already known in ancestors
+ if c_element.nsDef is not NULL:
+ while c_element.nsDef is not NULL and \
+ tree.xmlSearchNsByHref(c_element.doc, c_element.parent,
+ c_element.nsDef.href) is not NULL:
+ c_element.nsDef = c_element.nsDef.next
+ if c_element.nsDef is not NULL:
+ c_ns = c_element.nsDef
+ while c_ns.next is not NULL:
+ if tree.xmlSearchNsByHref(c_element.doc, c_element.parent,
+ c_ns.next.href) is not NULL:
+ c_ns.next = c_ns.next.next
+ else:
+ c_ns = c_ns.next
+
+ # make sure ns declaration of element and its attributes is stored
+ # in this document
+ c_node = c_element
+ while c_node is not NULL:
+ if c_node.ns is not NULL:
+ c_ns = c_node.ns
+ for i from 0 <= i < c_cache_last:
+ if c_ns is c_ns_old_cache[i]:
+ c_node.ns = c_ns_new_cache[i]
+ c_ns = NULL
+ break
+
+ if c_ns is not NULL:
+ # not in cache, must find a replacement from this document
+ c_new_ns = doc._findOrBuildNodeNs(c_node, c_ns.href, c_ns.prefix)
+ print "FOUND:", c_new_ns.href
+ if c_cache_last >= c_cache_size:
+ # must resize cache
+ if c_cache_size == 0:
+ c_cache_size = 20
+ else:
+ c_cache_size = c_cache_size * 2
+ c_ns_new_cache = <xmlNs**> python.PyMem_Realloc(
+ c_ns_new_cache, c_cache_size * sizeof(xmlNs*))
+ if c_ns_new_cache is NULL:
+ python.PyMem_Free(c_ns_old_cache)
+ python.PyErr_NoMemory()
+ c_ns_old_cache = <xmlNs**> python.PyMem_Realloc(
+ c_ns_old_cache, c_cache_size * sizeof(xmlNs*))
+ if c_ns_old_cache is NULL:
+ python.PyMem_Free(c_ns_new_cache)
+ python.PyErr_NoMemory()
+ c_ns_new_cache[c_cache_last] = c_new_ns
+ c_ns_old_cache[c_cache_last] = c_node.ns
+ c_cache_last = c_cache_last + 1
+ c_node.ns = c_ns
+ if c_node is c_element:
+ c_node = <xmlNode*>c_element.properties
+ else:
+ c_node = c_node.next
+
+ # traverse to next element, start with children
+ c_node = c_element.children
+ while c_node is not NULL and \
+ not tree._isElementOrXInclude(c_node):
+ c_node = c_node.next
+
+ if c_node is NULL:
+ # no children => back off and continue with siblings and parents
+
+ # fix _Document reference (may dealloc the original document!)
+ if c_element._private is not NULL:
+ (<_NodeBase>c_element._private)._doc = doc
+
+ if c_element is c_start_node:
+ break
+
+ # continue with siblings
+ c_node = c_element.next
+ while (c_node is not NULL and
+ not tree._isElementOrXInclude(c_node)):
+ c_node = c_node.next
+ # if that didn't help, back off through parents' siblings
+ while c_node is NULL:
+ c_element = c_element.parent
+ if c_element is NULL or not tree._isElementOrXInclude(c_element):
+ break
+
+ # fix _Document reference (may dealloc the original document!)
+ if c_element._private is not NULL:
+ (<_NodeBase>c_element._private)._doc = doc
+
+ if c_element is c_start_node:
+ break
+ # parents already done -> look for their siblings
+ c_node = c_element.next
+ while (c_node is not NULL and
+ not tree._isElementOrXInclude(c_node)):
+ c_node = c_node.next
+ if c_node is c_start_node:
+ break
+ c_element = c_node
+
+ if c_ns_new_cache is not NULL:
+ python.PyMem_Free(c_ns_new_cache)
+ if c_ns_old_cache is not NULL:
+ python.PyMem_Free(c_ns_old_cache)
Modified: lxml/branch/nscleanup/src/lxml/public-api.pxi
==============================================================================
--- lxml/branch/nscleanup/src/lxml/public-api.pxi (original)
+++ lxml/branch/nscleanup/src/lxml/public-api.pxi Fri Dec 1 09:55:17 2006
@@ -138,4 +138,4 @@
char* href) except NULL:
if doc is None:
raise TypeError
- return doc._findOrBuildNodeNs(c_node, href)
+ return doc._findOrBuildNodeNs(c_node, href, NULL)
Modified: lxml/branch/nscleanup/src/lxml/tests/test_etree.py
==============================================================================
--- lxml/branch/nscleanup/src/lxml/tests/test_etree.py (original)
+++ lxml/branch/nscleanup/src/lxml/tests/test_etree.py Fri Dec 1 09:55:17 2006
@@ -893,6 +893,56 @@
'<z xmlns="http://ns.infrae.com/foo" xmlns:hoi="http://ns.infrae.com/hoi"><hoi:x></hoi:x></z>',
self._writeElement(e))
+ def test_namespaces_default_copy_element(self):
+ etree = self.etree
+
+ r = {None: 'http://ns.infrae.com/foo'}
+ e1 = etree.Element('{http://ns.infrae.com/foo}bar', nsmap=r)
+ e2 = etree.Element('{http://ns.infrae.com/foo}bar', nsmap=r)
+
+ e1.append(e2)
+
+ self.assertEquals(
+ None,
+ e1.prefix)
+ self.assertEquals(
+ None,
+ e2.prefix)
+ self.assertEquals(
+ '{http://ns.infrae.com/foo}bar',
+ e1.tag)
+ self.assertEquals(
+ '{http://ns.infrae.com/foo}bar',
+ e2.tag)
+
+ print etree.tostring(e1)
+ print etree.tostring(e2)
+
+ def test_namespaces_copy_element(self):
+ etree = self.etree
+
+ r = {None: 'http://ns.infrae.com/BAR'}
+ e1 = etree.Element('{http://ns.infrae.com/BAR}bar', nsmap=r)
+ e2 = etree.Element('{http://ns.infrae.com/foo}bar', nsmap=r)
+
+ e1.append(e2)
+
+ self.assertEquals(
+ None,
+ e1.prefix)
+ self.assertNotEquals(
+ None,
+ e2.prefix)
+ self.assertEquals(
+ '{http://ns.infrae.com/BAR}bar',
+ e1.tag)
+ self.assertEquals(
+ '{http://ns.infrae.com/foo}bar',
+ e2.tag)
+
+ print etree.tostring(e1)
+ print etree.tostring(e2)
+
def test_element_nsmap(self):
etree = self.etree
Modified: lxml/branch/nscleanup/src/lxml/tree.pxd
==============================================================================
--- lxml/branch/nscleanup/src/lxml/tree.pxd (original)
+++ lxml/branch/nscleanup/src/lxml/tree.pxd Fri Dec 1 09:55:17 2006
@@ -77,7 +77,6 @@
XML_NAMESPACE_DECL= 18
XML_XINCLUDE_START= 19
XML_XINCLUDE_END= 20
-
ctypedef struct xmlNs:
char* href
@@ -193,7 +192,7 @@
cdef xmlAttr* xmlHasProp(xmlNode* node, char* name)
cdef xmlAttr* xmlHasNsProp(xmlNode* node, char* name, char* nameSpace)
cdef char* xmlNodeGetContent(xmlNode* cur)
- cdef xmlNs* xmlSearchNs(xmlDoc* doc, xmlNode* node, char* nameSpace)
+ cdef xmlNs* xmlSearchNs(xmlDoc* doc, xmlNode* node, char* prefix)
cdef xmlNs* xmlSearchNsByHref(xmlDoc* doc, xmlNode* node, char* href)
cdef int xmlIsBlankNode(xmlNode* node)
cdef void xmlElemDump(FILE* f, xmlDoc* doc, xmlNode* cur)
@@ -206,6 +205,7 @@
cdef xmlNode* xmlCopyNode(xmlNode* node, int extended)
cdef xmlNode* xmlDocCopyNode(xmlNode* node, xmlDoc* doc, int extended)
cdef int xmlReconciliateNs(xmlDoc* doc, xmlNode* tree)
+ cdef xmlNs* xmlNewReconciliedNs(xmlDoc* doc, xmlNode* tree, xmlNs* ns)
cdef xmlBuffer* xmlBufferCreate()
cdef char* xmlBufferContent(xmlBuffer* buf)
cdef int xmlBufferLength(xmlBuffer* buf)
@@ -260,6 +260,7 @@
cdef extern from "etree_defs.h":
cdef int _isElement(xmlNode* node)
+ cdef int _isElementOrXInclude(xmlNode* node)
cdef char* _getNs(xmlNode* node)
cdef void BEGIN_FOR_EACH_ELEMENT_FROM(xmlNode* tree_top,
xmlNode* start_node, int inclusive)
More information about the lxml-checkins
mailing list