[Lxml-checkins] r54488 - in lxml/trunk: . src/lxml

scoder at codespeak.net scoder at codespeak.net
Tue May 6 20:39:49 CEST 2008


Author: scoder
Date: Tue May  6 20:39:46 2008
New Revision: 54488

Modified:
   lxml/trunk/   (props changed)
   lxml/trunk/src/lxml/parser.pxi
Log:
 r4168 at delle:  sbehnel | 2008-05-05 23:41:19 +0200
 fix HTML names to always come from the dictionary


Modified: lxml/trunk/src/lxml/parser.pxi
==============================================================================
--- lxml/trunk/src/lxml/parser.pxi	(original)
+++ lxml/trunk/src/lxml/parser.pxi	Tue May  6 20:39:46 2008
@@ -520,6 +520,8 @@
 
         if well_formed:
             __GLOBAL_PARSER_CONTEXT.initDocDict(result)
+            if c_ctxt.html:
+                _fixHtmlDictNames(result)
         else:
             # free broken document
             tree.xmlFreeDoc(result)
@@ -540,6 +542,31 @@
         result.URL = tree.xmlStrdup(_cstr(filename))
     return result
 
+cdef int _fixHtmlDictNames(xmlDoc* c_doc) except -1:
+    cdef char* c_name
+    cdef xmlNode* c_attr
+    cdef xmlNode* c_node = c_doc.children
+    tree.BEGIN_FOR_EACH_ELEMENT_FROM(<xmlNode*>c_doc, c_node, 0)
+    if c_node.type == tree.XML_ELEMENT_NODE:
+        if not tree.xmlDictOwns(c_doc.dict, c_node.name):
+            c_name = tree.xmlDictLookup(c_doc.dict, c_node.name, -1)
+            if c_name is NULL:
+                python.PyErr_NoMemory()
+                return -1
+            tree.xmlFree(c_node.name)
+            c_node.name = c_name
+        c_attr = <xmlNode*>c_node.properties
+        while c_attr is not NULL:
+            if not tree.xmlDictOwns(c_doc.dict, c_attr.name):
+                c_name = tree.xmlDictLookup(c_doc.dict, c_attr.name, -1)
+                if c_name is NULL:
+                    python.PyErr_NoMemory()
+                    return -1
+                tree.xmlFree(c_attr.name)
+                c_attr.name = c_name
+            c_attr = c_attr.next
+    tree.END_FOR_EACH_ELEMENT_FROM(c_node)
+
 
 cdef class _BaseParser:
     cdef ElementClassLookup _class_lookup


More information about the lxml-checkins mailing list