[Lxml-checkins] r54448 - in lxml/trunk: . src/lxml

scoder at codespeak.net scoder at codespeak.net
Mon May 5 21:43:10 CEST 2008


Author: scoder
Date: Mon May  5 21:43:09 2008
New Revision: 54448

Modified:
   lxml/trunk/   (props changed)
   lxml/trunk/CHANGES.txt
   lxml/trunk/src/lxml/apihelpers.pxi
   lxml/trunk/src/lxml/lxml.objectify.pyx
   lxml/trunk/src/lxml/objectpath.pxi
Log:
 r4161 at delle:  sbehnel | 2008-05-05 09:54:55 +0200
 special node matcher for objectify, exploits the fact that all node names come from the document dictionary


Modified: lxml/trunk/CHANGES.txt
==============================================================================
--- lxml/trunk/CHANGES.txt	(original)
+++ lxml/trunk/CHANGES.txt	Mon May  5 21:43:09 2008
@@ -19,6 +19,9 @@
 Other changes
 -------------
 
+* Up to several times faster attribute access (i.e. tree traversal) in
+  lxml.objectify.
+
 
 2.1beta2 (2008-05-02)
 =====================

Modified: lxml/trunk/src/lxml/apihelpers.pxi
==============================================================================
--- lxml/trunk/src/lxml/apihelpers.pxi	(original)
+++ lxml/trunk/src/lxml/apihelpers.pxi	Mon May  5 21:43:09 2008
@@ -696,8 +696,8 @@
     elif c_href is NULL:
         if _getNs(c_node) is not NULL:
             return 0
-        return cstd.strcmp(c_node.name, c_name) == 0
-    elif cstd.strcmp(c_node.name, c_name) == 0:
+        return c_node.name == c_name or cstd.strcmp(c_node.name, c_name) == 0
+    elif c_node.name == c_name or cstd.strcmp(c_node.name, c_name) == 0:
         c_node_href = _getNs(c_node)
         if c_node_href is NULL:
             return c_href[0] == c'\0'

Modified: lxml/trunk/src/lxml/lxml.objectify.pyx
==============================================================================
--- lxml/trunk/src/lxml/lxml.objectify.pyx	(original)
+++ lxml/trunk/src/lxml/lxml.objectify.pyx	Mon May  5 21:43:09 2008
@@ -385,6 +385,17 @@
             prefix = '.'.join(prefix)
         return _buildDescendantPaths(self._c_node, prefix)
 
+cdef inline bint _tagMatches(tree.xmlNode* c_node, char* c_href, char* c_name):
+    cdef char* c_node_href
+    if c_node.name != c_name:
+        return 0
+    if c_href == NULL:
+        return 1
+    c_node_href = tree._getNs(c_node)
+    if c_node_href == NULL:
+        return c_href[0] == c'\0'
+    return cstd.strcmp(c_node_href, c_href) == 0
+
 cdef Py_ssize_t _countSiblings(tree.xmlNode* c_start_node):
     cdef tree.xmlNode* c_node
     cdef char* c_href
@@ -396,13 +407,13 @@
     c_node = c_start_node.next
     while c_node is not NULL:
         if c_node.type == tree.XML_ELEMENT_NODE and \
-               cetree.tagMatches(c_node, c_href, c_tag):
+               _tagMatches(c_node, c_href, c_tag):
             count = count + 1
         c_node = c_node.next
     c_node = c_start_node.prev
     while c_node is not NULL:
         if c_node.type == tree.XML_ELEMENT_NODE and \
-               cetree.tagMatches(c_node, c_href, c_tag):
+               _tagMatches(c_node, c_href, c_tag):
             count = count + 1
         c_node = c_node.prev
     return count
@@ -418,7 +429,7 @@
         next = cetree.previousElement
     while c_node is not NULL:
         if c_node.type == tree.XML_ELEMENT_NODE and \
-               cetree.tagMatches(c_node, href, name):
+               _tagMatches(c_node, href, name):
             index = index - 1
             if index < 0:
                 return c_node
@@ -430,9 +441,12 @@
     cdef tree.xmlNode* c_node
     cdef char* c_href
     cdef char* c_tag
-    ns, tag = cetree.getNsTag(tag)
-    c_tag = _cstr(tag)
     c_node = parent._c_node
+    ns, tag = cetree.getNsTag(tag)
+    c_tag = tree.xmlDictExists(
+        c_node.doc.dict, _cstr(tag), python.PyString_GET_SIZE(tag))
+    if c_tag is NULL:
+        return None
     if ns is None:
         c_href = tree._getNs(c_node)
     else:

Modified: lxml/trunk/src/lxml/objectpath.pxi
==============================================================================
--- lxml/trunk/src/lxml/objectpath.pxi	(original)
+++ lxml/trunk/src/lxml/objectpath.pxi	Mon May  5 21:43:09 2008
@@ -206,7 +206,11 @@
         c_path = c_path + 1
         if c_path[0].href is not NULL:
             c_href = c_path[0].href # otherwise: keep parent namespace
-        c_name = c_path[0].name
+        c_name = tree.xmlDictExists(c_node.doc.dict, c_path[0].name, -1)
+        if c_name is NULL:
+            c_name = c_path[0].name
+            c_node = NULL
+            break
         c_index = c_path[0].index
 
         if c_index < 0:
@@ -253,14 +257,17 @@
         c_path = c_path + 1
         if c_path[0].href is not NULL:
             c_href = c_path[0].href # otherwise: keep parent namespace
-        c_name = c_path[0].name
         c_index = c_path[0].index
-
-        if c_index < 0:
-            c_child = c_node.last
+        c_name = tree.xmlDictExists(c_node.doc.dict, c_path[0].name, -1)
+        if c_name is NULL:
+            c_name = c_path[0].name
+            c_child = NULL
         else:
-            c_child = c_node.children
-        c_child = _findFollowingSibling(c_child, c_href, c_name, c_index)
+            if c_index < 0:
+                c_child = c_node.last
+            else:
+                c_child = c_node.children
+            c_child = _findFollowingSibling(c_child, c_href, c_name, c_index)
 
         if c_child is not NULL:
             c_node = c_child


More information about the lxml-checkins mailing list