[Lxml-checkins] r39364 - lxml/trunk/src/lxml

scoder at codespeak.net scoder at codespeak.net
Sat Feb 24 17:44:55 CET 2007


Author: scoder
Date: Sat Feb 24 17:44:53 2007
New Revision: 39364

Modified:
   lxml/trunk/src/lxml/apihelpers.pxi
   lxml/trunk/src/lxml/etree.pyx
   lxml/trunk/src/lxml/etree_defs.h
   lxml/trunk/src/lxml/parser.pxi
   lxml/trunk/src/lxml/proxy.pxi
   lxml/trunk/src/lxml/public-api.pxi
   lxml/trunk/src/lxml/tree.pxd
Log:
merged replacement for _xmlReconsiliateNs() from 'nscleanup' branch (part II)

Modified: lxml/trunk/src/lxml/apihelpers.pxi
==============================================================================
--- lxml/trunk/src/lxml/apihelpers.pxi	(original)
+++ lxml/trunk/src/lxml/apihelpers.pxi	Sat Feb 24 17:44:53 2007
@@ -147,7 +147,7 @@
             if attr_ns_utf is None:
                 tree.xmlNewProp(c_node, _cstr(attr_name_utf), _cstr(value_utf))
             else:
-                c_ns = doc._findOrBuildNodeNs(c_node, _cstr(attr_ns_utf))
+                c_ns = doc._findOrBuildNodeNs(c_node, _cstr(attr_ns_utf), NULL)
                 tree.xmlNewNsProp(c_node, c_ns,
                                   _cstr(attr_name_utf), _cstr(value_utf))
 
@@ -203,7 +203,8 @@
     if ns is None:
         tree.xmlSetProp(element._c_node, c_tag, c_value)
     else:
-        c_ns = element._doc._findOrBuildNodeNs(element._c_node, _cstr(ns))
+        c_ns = element._doc._findOrBuildNodeNs(element._c_node,
+                                               _cstr(ns), NULL)
         tree.xmlSetNsProp(element._c_node, c_ns, c_tag, c_value)
     return 0
 

Modified: lxml/trunk/src/lxml/etree.pyx
==============================================================================
--- lxml/trunk/src/lxml/etree.pyx	(original)
+++ lxml/trunk/src/lxml/etree.pyx	Sat Feb 24 17:44:53 2007
@@ -295,24 +295,38 @@
         self._ns_counter = self._ns_counter + 1
         return ns
 
-    cdef xmlNs* _findOrBuildNodeNs(self, xmlNode* c_node, char* href):
-        """Get or create namespace structure for a node.
+    cdef xmlNs* _findOrBuildNodeNs(self, xmlNode* c_node,
+                                   char* c_href, char* c_prefix):
+        """Get or create namespace structure for a node.  Reuses the prefix if
+        possible.
         """
+        cdef int i
         cdef xmlNs* c_ns
+        cdef xmlNs* c_doc_ns
         # look for existing ns
-        c_ns = tree.xmlSearchNsByHref(self._c_doc, c_node, href)
+        c_ns = tree.xmlSearchNsByHref(self._c_doc, c_node, c_href)
         if c_ns is not NULL:
             return c_ns
-        # create ns if existing ns cannot be found
-        # try to simulate ElementTree's namespace prefix creation
-        prefix = self.buildNewPrefix()
-        c_ns = tree.xmlNewNs(c_node, href, _cstr(prefix))
-        return c_ns
+
+        if c_prefix is NULL or \
+               tree.xmlSearchNs(self._c_doc, c_node, c_prefix) is not NULL:
+            # try to simulate ElementTree's namespace prefix creation
+            for i from 0 <= i < 10000:
+                prefix = self.buildNewPrefix()
+                c_prefix = _cstr(prefix)
+                # make sure it's not used already
+                if tree.xmlSearchNs(self._c_doc, c_node, c_prefix) is NULL:
+                    break
+            if i >= 10000:
+                # XXX too many prefixes in use - this is pretty bad!
+                return NULL
+
+        return tree.xmlNewNs(c_node, c_href, c_prefix)
 
     cdef void _setNodeNs(self, xmlNode* c_node, char* href):
         "Lookup namespace structure and set it for the node."
         cdef xmlNs* c_ns
-        c_ns = self._findOrBuildNodeNs(c_node, href)
+        c_ns = self._findOrBuildNodeNs(c_node, href, NULL)
         tree.xmlSetNs(c_node, c_ns)
 
     cdef void _setNodeNamespaces(self, xmlNode* c_node,

Modified: lxml/trunk/src/lxml/etree_defs.h
==============================================================================
--- lxml/trunk/src/lxml/etree_defs.h	(original)
+++ lxml/trunk/src/lxml/etree_defs.h	Sat Feb 24 17:44:53 2007
@@ -65,6 +65,11 @@
 	 ((c_node)->type == XML_COMMENT_NODE) || \
          ((c_node)->type == XML_PI_NODE))
 
+#define _isElementOrXInclude(c_node) \
+        (_isElement(c_node)                     || \
+         ((c_node)->type == XML_XINCLUDE_START) || \
+         ((c_node)->type == XML_XINCLUDE_END))
+
 #define _getNs(c_node) \
         (((c_node)->ns == 0) ? 0 : ((c_node)->ns->href))
 

Modified: lxml/trunk/src/lxml/parser.pxi
==============================================================================
--- lxml/trunk/src/lxml/parser.pxi	(original)
+++ lxml/trunk/src/lxml/parser.pxi	Sat Feb 24 17:44:53 2007
@@ -628,7 +628,8 @@
 cdef int _XML_DEFAULT_PARSE_OPTIONS
 _XML_DEFAULT_PARSE_OPTIONS = (
     xmlparser.XML_PARSE_NOENT |
-    xmlparser.XML_PARSE_NOCDATA
+    xmlparser.XML_PARSE_NOCDATA |
+    xmlparser.XML_PARSE_COMPACT
     )
 
 cdef class XMLParser(_BaseParser):
@@ -768,7 +769,8 @@
 ############################################################
 
 cdef int _HTML_DEFAULT_PARSE_OPTIONS
-_HTML_DEFAULT_PARSE_OPTIONS = 0
+_HTML_DEFAULT_PARSE_OPTIONS = \
+    htmlparser.HTML_PARSE_COMPACT
 
 cdef class HTMLParser(_BaseParser):
     """The HTML parser.  This parser allows reading HTML into a normal XML

Modified: lxml/trunk/src/lxml/proxy.pxi
==============================================================================
--- lxml/trunk/src/lxml/proxy.pxi	(original)
+++ lxml/trunk/src/lxml/proxy.pxi	Sat Feb 24 17:44:53 2007
@@ -1,4 +1,4 @@
-# Proxy functions
+# Proxy functions and low level node allocation stuff
 
 # Proxies represent elements, their reference is stored in the C
 # structure of the respective node to avoid multiple instantiation of
@@ -45,31 +45,33 @@
     # always call _destroyFakeDoc() after use!
     cdef xmlNode* c_child
     cdef xmlNode* c_root
+    cdef xmlNode* c_new_root
     cdef xmlDoc*  c_doc
     c_root = tree.xmlDocGetRootElement(c_base_doc)
     if c_root is c_node:
         # already the root node
         return c_base_doc
 
-    c_doc  = _copyDoc(c_base_doc, 0)               # non recursive!
-    c_root = tree.xmlDocCopyNode(c_node, c_doc, 2) # non recursive!
-    tree.xmlDocSetRootElement(c_doc, c_root)
+    c_doc  = _copyDoc(c_base_doc, 0)                   # non recursive!
+    c_new_root = tree.xmlDocCopyNode(c_node, c_doc, 2) # non recursive!
+    tree.xmlDocSetRootElement(c_doc, c_new_root)
     _copyParentNamespaces(c_node, c_new_root)
+    _copyParentNamespaces(c_node, c_root)
 
-    c_root.children = c_node.children
-    c_root.last = c_node.last
-    c_root.next = c_root.prev = c_root.parent = NULL
+    c_new_root.children = c_node.children
+    c_new_root.last = c_node.last
+    c_new_root.next = c_new_root.prev = c_new_root.parent = NULL
 
     # store original node
     c_doc._private = c_node
 
     # divert parent pointers of children
-    c_child = c_root.children
+    c_child = c_new_root.children
     while c_child is not NULL:
-        c_child.parent = c_root
+        c_child.parent = c_new_root
         c_child = c_child.next
 
-    c_doc.children = c_root
+    c_doc.children = c_new_root
     return c_doc
 
 cdef void _destroyFakeDoc(xmlDoc* c_base_doc, xmlDoc* c_doc):
@@ -247,7 +249,8 @@
 
                 if c_ns is not NULL:
                     # not in cache, must find a replacement from this document
-                    c_new_ns = doc._findOrBuildNodeNs(c_node, c_ns.href, c_ns.prefix)
+                    c_new_ns = doc._findOrBuildNodeNs(c_node,
+                                                      c_ns.href, c_ns.prefix)
                     if c_cache_last >= c_cache_size:
                         # must resize cache
                         if c_cache_size == 0:
@@ -285,7 +288,7 @@
 
             # fix _Document reference (may dealloc the original document!)
             if c_element._private is not NULL:
-                (<_NodeBase>c_element._private)._doc = doc
+                (<_Element>c_element._private)._doc = doc
 
             if c_element is c_start_node:
                 break
@@ -303,7 +306,7 @@
 
                 # fix _Document reference (may dealloc the original document!)
                 if c_element._private is not NULL:
-                    (<_NodeBase>c_element._private)._doc = doc
+                    (<_Element>c_element._private)._doc = doc
 
                 if c_element is c_start_node:
                     break

Modified: lxml/trunk/src/lxml/public-api.pxi
==============================================================================
--- lxml/trunk/src/lxml/public-api.pxi	(original)
+++ lxml/trunk/src/lxml/public-api.pxi	Sat Feb 24 17:44:53 2007
@@ -138,4 +138,4 @@
                                           char* href) except NULL:
     if doc is None:
         raise TypeError
-    return doc._findOrBuildNodeNs(c_node, href)
+    return doc._findOrBuildNodeNs(c_node, href, NULL)

Modified: lxml/trunk/src/lxml/tree.pxd
==============================================================================
--- lxml/trunk/src/lxml/tree.pxd	(original)
+++ lxml/trunk/src/lxml/tree.pxd	Sat Feb 24 17:44:53 2007
@@ -77,7 +77,6 @@
         XML_NAMESPACE_DECL=         18
         XML_XINCLUDE_START=         19
         XML_XINCLUDE_END=           20
-
     
     ctypedef struct xmlNs:
         char* href
@@ -193,7 +192,7 @@
     cdef xmlAttr* xmlHasProp(xmlNode* node, char* name)
     cdef xmlAttr* xmlHasNsProp(xmlNode* node, char* name, char* nameSpace)
     cdef char* xmlNodeGetContent(xmlNode* cur)
-    cdef xmlNs* xmlSearchNs(xmlDoc* doc, xmlNode* node, char* nameSpace)
+    cdef xmlNs* xmlSearchNs(xmlDoc* doc, xmlNode* node, char* prefix)
     cdef xmlNs* xmlSearchNsByHref(xmlDoc* doc, xmlNode* node, char* href)
     cdef int xmlIsBlankNode(xmlNode* node)
     cdef void xmlElemDump(FILE* f, xmlDoc* doc, xmlNode* cur)
@@ -206,6 +205,7 @@
     cdef xmlNode* xmlCopyNode(xmlNode* node, int extended)
     cdef xmlNode* xmlDocCopyNode(xmlNode* node, xmlDoc* doc, int extended)
     cdef int xmlReconciliateNs(xmlDoc* doc, xmlNode* tree)
+    cdef xmlNs* xmlNewReconciliedNs(xmlDoc* doc, xmlNode* tree, xmlNs* ns)
     cdef xmlBuffer* xmlBufferCreate()
     cdef char* xmlBufferContent(xmlBuffer* buf)
     cdef int xmlBufferLength(xmlBuffer* buf)
@@ -260,6 +260,7 @@
 
 cdef extern from "etree_defs.h":
     cdef int _isElement(xmlNode* node)
+    cdef int _isElementOrXInclude(xmlNode* node)
     cdef char* _getNs(xmlNode* node)
     cdef void BEGIN_FOR_EACH_ELEMENT_FROM(xmlNode* tree_top,
                                           xmlNode* start_node, int inclusive)


More information about the lxml-checkins mailing list