[Lxml-checkins] r44186 - in lxml/branch/lxml-1.3: . src/lxml src/lxml/tests

scoder at codespeak.net scoder at codespeak.net
Tue Jun 12 19:11:48 CEST 2007


Author: scoder
Date: Tue Jun 12 19:11:47 2007
New Revision: 44186

Modified:
   lxml/branch/lxml-1.3/CHANGES.txt
   lxml/branch/lxml-1.3/selftest.py
   lxml/branch/lxml-1.3/selftest2.py
   lxml/branch/lxml-1.3/src/lxml/etree.pyx
   lxml/branch/lxml-1.3/src/lxml/iterparse.pxi
   lxml/branch/lxml-1.3/src/lxml/tests/test_elementtree.py
   lxml/branch/lxml-1.3/src/lxml/xmlparser.pxd
Log:
merged in revs 43159:43235 from trunk

Modified: lxml/branch/lxml-1.3/CHANGES.txt
==============================================================================
--- lxml/branch/lxml-1.3/CHANGES.txt	(original)
+++ lxml/branch/lxml-1.3/CHANGES.txt	Tue Jun 12 19:11:47 2007
@@ -22,6 +22,12 @@
 Bugs fixed
 ----------
 
+* More ET compatible behaviour when writing out XML declarations or not
+
+* ``Element.attrib`` was missing ``clear()`` method
+
+* More robust error handling in ``iterparse()``
+
 * Documents lost their top-level PIs and comments on serialisation
 
 * lxml.sax failed on comments and PIs. Comments are now properly ignored and

Modified: lxml/branch/lxml-1.3/selftest.py
==============================================================================
--- lxml/branch/lxml-1.3/selftest.py	(original)
+++ lxml/branch/lxml-1.3/selftest.py	Tue Jun 12 19:11:47 2007
@@ -272,28 +272,31 @@
 ##     '<p>spam<b>egg</b></p>'
 ##     """
 
-## def parseliteral():
-##     r"""
-##     >>> element = ElementTree.XML("<html><body>text</body></html>")
-##     >>> ElementTree.ElementTree(element).write(sys.stdout)
-##     <html><body>text</body></html>
-##     >>> element = ElementTree.fromstring("<html><body>text</body></html>")
-##     >>> ElementTree.ElementTree(element).write(sys.stdout)
-##     <html><body>text</body></html>
-##     >>> print ElementTree.tostring(element)
-##     <html><body>text</body></html>
-##     >>> print ElementTree.tostring(element, "ascii")
-##     <?xml version='1.0' encoding='ascii'?>
-##     <html><body>text</body></html>
-##     >>> _, ids = ElementTree.XMLID("<html><body>text</body></html>")
-##     >>> len(ids)
-##     0
-##     >>> _, ids = ElementTree.XMLID("<html><body id='body'>text</body></html>")
-##     >>> len(ids)
-##     1
-##     >>> ids["body"].tag
-##     'body'
-##     """
+def parseliteral():
+    r"""
+    >>> element = ElementTree.XML("<html><body>text</body></html>")
+    >>> ElementTree.ElementTree(element).write(sys.stdout)
+    <html><body>text</body></html>
+    >>> element = ElementTree.fromstring("<html><body>text</body></html>")
+    >>> ElementTree.ElementTree(element).write(sys.stdout)
+    <html><body>text</body></html>
+    >>> print ElementTree.tostring(element)
+    <html><body>text</body></html>
+
+# looks different in lxml
+#    >>> print ElementTree.tostring(element, "ascii")
+#    <?xml version='1.0' encoding='ascii'?>
+#    <html><body>text</body></html>
+
+    >>> _, ids = ElementTree.XMLID("<html><body>text</body></html>")
+    >>> len(ids)
+    0
+    >>> _, ids = ElementTree.XMLID("<html><body id='body'>text</body></html>")
+    >>> len(ids)
+    1
+    >>> ids["body"].tag
+    'body'
+    """
 
 ## def simpleparsefile():
 ##     """
@@ -519,16 +522,18 @@
 
 ##     """
 
-## def xmllang():
-##     """
-##     This appears to be a problem; in underlying libxml2?
+def xmllang():
+    """
+    This appears to be a problem; in underlying libxml2?
     
-##     1) xml namespace
+    1) xml namespace
 
-##     >>> elem = ElementTree.XML("<tag xml:lang='en' />")
-##     >>> serialize(elem) # 1.1
-##     '<tag xml:lang="en" />'
-##     """
+    >>> elem = ElementTree.XML("<tag xml:lang='en' />")
+    >>> serialize(elem) # 1.1
+    '<tag xml:lang="en"/>'
+
+#   '<tag xml:lang="en" />' # ElementTree produces an extra blank
+    """
     
 def namespace():
     """

Modified: lxml/branch/lxml-1.3/selftest2.py
==============================================================================
--- lxml/branch/lxml-1.3/selftest2.py	(original)
+++ lxml/branch/lxml-1.3/selftest2.py	Tue Jun 12 19:11:47 2007
@@ -133,30 +133,30 @@
     '<tag>text<subtag>subtext</subtag></tag>'
     """
 
-## def encoding():
-##     r"""
-##     Test encoding issues.
+def encoding():
+    r"""
+    Test encoding issues.
 
-##     >>> elem = ElementTree.Element("tag")
-##     >>> elem.text = u"abc"
-##     >>> serialize(elem)
-##     '<tag>abc</tag>'
-##     >>> serialize(elem, "utf-8")
-##     '<tag>abc</tag>'
-##     >>> serialize(elem, "us-ascii")
-##     '<tag>abc</tag>'
-##     >>> serialize(elem, "iso-8859-1")
-##     "<?xml version='1.0' encoding='iso-8859-1'?>\n<tag>abc</tag>"
+    >>> elem = ElementTree.Element("tag")
+    >>> elem.text = u"abc"
+    >>> serialize(elem)
+    '<tag>abc</tag>'
+    >>> serialize(elem, "utf-8")
+    '<tag>abc</tag>'
+    >>> serialize(elem, "us-ascii")
+    '<tag>abc</tag>'
+    >>> serialize(elem, "iso-8859-1").lower()
+    "<?xml version='1.0' encoding='iso-8859-1'?>\n<tag>abc</tag>"
 
-##     >>> elem.text = "<&\"\'>"
-##     >>> serialize(elem)
-##     '<tag>&lt;&amp;"\'&gt;</tag>'
-##     >>> serialize(elem, "utf-8")
-##     '<tag>&lt;&amp;"\'&gt;</tag>'
-##     >>> serialize(elem, "us-ascii") # cdata characters
-##     '<tag>&lt;&amp;"\'&gt;</tag>'
-##     >>> serialize(elem, "iso-8859-1")
-##     '<?xml version=\'1.0\' encoding=\'iso-8859-1\'?>\n<tag>&lt;&amp;"\'&gt;</tag>'
+    >>> elem.text = "<&\"\'>"
+    >>> serialize(elem)
+    '<tag>&lt;&amp;"\'&gt;</tag>'
+    >>> serialize(elem, "utf-8")
+    '<tag>&lt;&amp;"\'&gt;</tag>'
+    >>> serialize(elem, "us-ascii") # cdata characters
+    '<tag>&lt;&amp;"\'&gt;</tag>'
+    >>> serialize(elem, "iso-8859-1").lower()
+    '<?xml version=\'1.0\' encoding=\'iso-8859-1\'?>\n<tag>&lt;&amp;"\'&gt;</tag>'
 
 ##     >>> elem.attrib["key"] = "<&\"\'>"
 ##     >>> elem.text = None
@@ -169,16 +169,16 @@
 ##     >>> serialize(elem, "iso-8859-1")
 ##     '<?xml version=\'1.0\' encoding=\'iso-8859-1\'?>\n<tag key="&lt;&amp;&quot;&apos;&gt;" />'
 
-##     >>> elem.text = u'\xe5\xf6\xf6<>'
-##     >>> elem.attrib.clear()
-##     >>> serialize(elem)
-##     '<tag>&#229;&#246;&#246;&lt;&gt;</tag>'
-##     >>> serialize(elem, "utf-8")
-##     '<tag>\xc3\xa5\xc3\xb6\xc3\xb6&lt;&gt;</tag>'
-##     >>> serialize(elem, "us-ascii")
-##     '<tag>&#229;&#246;&#246;&lt;&gt;</tag>'
-##     >>> serialize(elem, "iso-8859-1")
-##     "<?xml version='1.0' encoding='iso-8859-1'?>\n<tag>\xe5\xf6\xf6&lt;&gt;</tag>"
+    >>> elem.text = u'\xe5\xf6\xf6<>'
+    >>> elem.attrib.clear()
+    >>> serialize(elem)
+    '<tag>&#229;&#246;&#246;&lt;&gt;</tag>'
+    >>> serialize(elem, "utf-8")
+    '<tag>\xc3\xa5\xc3\xb6\xc3\xb6&lt;&gt;</tag>'
+    >>> serialize(elem, "us-ascii")
+    '<tag>&#229;&#246;&#246;&lt;&gt;</tag>'
+    >>> serialize(elem, "iso-8859-1").lower()
+    "<?xml version='1.0' encoding='iso-8859-1'?>\n<tag>\xe5\xf6\xf6&lt;&gt;</tag>"
 
 ##     >>> elem.attrib["key"] = u'\xe5\xf6\xf6<>'
 ##     >>> elem.text = None
@@ -191,25 +191,25 @@
 ##     >>> serialize(elem, "iso-8859-1")
 ##     '<?xml version=\'1.0\' encoding=\'iso-8859-1\'?>\n<tag key="\xe5\xf6\xf6&lt;&gt;" />'
 
-##     """
+    """
 
-## def qname():
-##     """
-##     Test QName handling.
+def qname():
+    """
+    Test QName handling.
 
-##     1) decorated tags
+    1) decorated tags
 
-##     >>> elem = ElementTree.Element("{uri}tag")
-##     >>> serialize(elem) # 1.1
-##     '<ns0:tag xmlns:ns0="uri" />'
+    >>> elem = ElementTree.Element("{uri}tag")
+    >>> serialize(elem) # 1.1
+    '<ns0:tag xmlns:ns0="uri"/>'
 
 ##     2) decorated attributes
 
 ##     >>> elem.attrib["{uri}key"] = "value"
 ##     >>> serialize(elem) # 2.1
-##     '<ns0:tag ns0:key="value" xmlns:ns0="uri" />'
+##     '<ns0:tag ns0:key="value" xmlns:ns0="uri"/>'
 
-##     """
+    """
 
 def cdata():
     """

Modified: lxml/branch/lxml-1.3/src/lxml/etree.pyx
==============================================================================
--- lxml/branch/lxml-1.3/src/lxml/etree.pyx	(original)
+++ lxml/branch/lxml-1.3/src/lxml/etree.pyx	Tue Jun 12 19:11:47 2007
@@ -1453,6 +1453,12 @@
             _delAttribute(self._element, key)
             return result
 
+    def clear(self):
+        cdef xmlNode* c_node
+        c_node = self._element._c_node
+        while c_node.properties is not NULL:
+            tree.xmlRemoveProp(c_node.properties)
+
     # ACCESSORS
     def __repr__(self):
         return repr(dict( _attributeIteratorFactory(self._element, 3) ))
@@ -1871,17 +1877,15 @@
     """
     cdef int write_declaration
     cdef int c_pretty_print
-    if encoding is None:
-        encoding = 'ASCII'
-    else:
-        encoding = encoding.upper()
     c_pretty_print = bool(pretty_print)
     if xml_declaration is None:
         # by default, write an XML declaration only for non-standard encodings
-        write_declaration = encoding not in \
+        write_declaration = encoding is not None and encoding.upper() not in \
                             ('ASCII', 'UTF-8', 'UTF8', 'US-ASCII')
     else:
         write_declaration = bool(xml_declaration)
+    if encoding is None:
+        encoding = 'ASCII'
 
     if isinstance(element_or_tree, _Element):
         return _tostring(<_Element>element_or_tree,

Modified: lxml/branch/lxml-1.3/src/lxml/iterparse.pxi
==============================================================================
--- lxml/branch/lxml-1.3/src/lxml/iterparse.pxi	(original)
+++ lxml/branch/lxml-1.3/src/lxml/iterparse.pxi	Tue Jun 12 19:11:47 2007
@@ -48,7 +48,7 @@
         c_ns = c_ns.next
     return count
 
-cdef class _IterparseResolverContext(_ResolverContext):
+cdef class _IterparseContext(_ResolverContext):
     cdef xmlparser.startElementNsSAX2Func _origSaxStart
     cdef xmlparser.endElementNsSAX2Func   _origSaxEnd
     cdef _Element  _root
@@ -64,8 +64,8 @@
     cdef char*  _tag_href
     cdef char*  _tag_name
 
-    def __init__(self, *args):
-        _ResolverContext.__init__(self, *args)
+    def __init__(self, _ResolverRegistry resolvers):
+        _ResolverContext.__init__(self, resolvers)
         self._ns_stack = []
         self._pop_ns = self._ns_stack.pop
         self._node_stack = []
@@ -90,7 +90,7 @@
                                      ITERPARSE_FILTER_END_NS):
             sax.endElementNs = _saxEnd
 
-    cdef void _setEventFilter(self, events, tag):
+    cdef _setEventFilter(self, events, tag):
         self._event_filter = _buildIterparseEventFilter(events)
         if tag is None or tag == '*':
             self._tag_href  = NULL
@@ -109,8 +109,7 @@
             if self._tag_href is NULL and self._tag_name is NULL:
                 self._tag_tuple = None
 
-    cdef void startNode(self, xmlNode* c_node):
-        cdef _Element node
+    cdef int startNode(self, xmlNode* c_node) except -1:
         cdef xmlNs* c_ns
         cdef int ns_count
         if self._event_filter & ITERPARSE_FILTER_START_NS:
@@ -129,9 +128,9 @@
                 python.PyList_Append(self._node_stack, node)
             if self._event_filter & ITERPARSE_FILTER_START:
                 python.PyList_Append(self._events, ("start", node))
+        return 0
 
-    cdef void endNode(self, xmlNode* c_node):
-        cdef _Element node
+    cdef int endNode(self, xmlNode* c_node) except -1:
         cdef xmlNs* c_ns
         cdef int ns_count
         if self._event_filter & ITERPARSE_FILTER_END:
@@ -141,7 +140,6 @@
                                          ITERPARSE_FILTER_START_NS | \
                                          ITERPARSE_FILTER_END_NS):
                     node = self._pop_node()
-                    assert node._c_node is c_node
                 else:
                     if self._doc is None:
                         self._doc = _documentFactory(c_node.doc, None)
@@ -155,23 +153,36 @@
                 event = ("end-ns", None)
                 for i from 0 <= i < ns_count:
                     python.PyList_Append(self._events, event)
+        return 0
                 
 
 cdef void _pushSaxStartEvent(xmlparser.xmlParserCtxt* c_ctxt, xmlNode* c_node):
-    cdef _IterparseResolverContext context
-    context = <_IterparseResolverContext>c_ctxt._private
-    context.startNode(c_node)
+    cdef _IterparseContext context
+    context = <_IterparseContext>c_ctxt._private
+    try:
+        context.startNode(c_node)
+    except:
+        if c_ctxt.errNo == xmlerror.XML_ERR_OK:
+            c_ctxt.errNo = xmlerror.XML_ERR_INTERNAL_ERROR
+        c_ctxt.disableSAX = 1
+        context._store_raised()
 
 cdef void _pushSaxEndEvent(xmlparser.xmlParserCtxt* c_ctxt, xmlNode* c_node):
-    cdef _IterparseResolverContext context
-    context = <_IterparseResolverContext>c_ctxt._private
-    context.endNode(c_node)
+    cdef _IterparseContext context
+    context = <_IterparseContext>c_ctxt._private
+    try:
+        context.endNode(c_node)
+    except:
+        if c_ctxt.errNo == xmlerror.XML_ERR_OK:
+            c_ctxt.errNo = xmlerror.XML_ERR_INTERNAL_ERROR
+        c_ctxt.disableSAX = 1
+        context._store_raised()
 
 cdef xmlparser.startElementNsSAX2Func _getOrigStart(xmlparser.xmlParserCtxt* c_ctxt):
-    return (<_IterparseResolverContext>c_ctxt._private)._origSaxStart
+    return (<_IterparseContext>c_ctxt._private)._origSaxStart
 
 cdef xmlparser.endElementNsSAX2Func _getOrigEnd(xmlparser.xmlParserCtxt* c_ctxt):
-    return (<_IterparseResolverContext>c_ctxt._private)._origSaxEnd
+    return (<_IterparseContext>c_ctxt._private)._origSaxEnd
 
 cdef void _saxStart(void* ctxt, char* localname, char* prefix, char* URI,
                     int nb_namespaces, char** namespaces,
@@ -230,7 +241,7 @@
     def __init__(self, source, events=("end",), tag=None,
                  attribute_defaults=False, dtd_validation=False,
                  load_dtd=False, no_network=False, remove_blank_text=False):
-        cdef _IterparseResolverContext context
+        cdef _IterparseContext context
         cdef char* c_filename
         cdef int parse_options
         if not hasattr(source, 'read'):
@@ -246,7 +257,7 @@
             c_filename = NULL
 
         self._source = source
-        _BaseParser.__init__(self, _IterparseResolverContext)
+        _BaseParser.__init__(self, _IterparseContext)
 
         parse_options = _XML_DEFAULT_PARSE_OPTIONS
         if load_dtd:
@@ -263,7 +274,7 @@
             parse_options = parse_options | xmlparser.XML_PARSE_NOBLANKS
         self._parse_options = parse_options
 
-        context = <_IterparseResolverContext>self._context
+        context = <_IterparseContext>self._context
         context._setEventFilter(events, tag)
         context._wrapCallbacks(self._parser_ctxt.sax)
         xmlparser.xmlCtxtUseOptions(self._parser_ctxt, parse_options)
@@ -274,12 +285,12 @@
         return self
 
     def __next__(self):
-        cdef _IterparseResolverContext context
+        cdef _IterparseContext context
         cdef int error
         cdef char* c_filename
         if self._source is None:
             raise StopIteration
-        context = <_IterparseResolverContext>self._context
+        context = <_IterparseContext>self._context
         if python.PyList_GET_SIZE(context._events) > context._event_index:
             item = python.PyList_GET_ITEM(context._events, context._event_index)
             python.Py_INCREF(item) # 'borrowed reference' from PyList_GET_ITEM
@@ -291,7 +302,6 @@
         while python.PyList_GET_SIZE(context._events) == 0 and error == 0:
             data = self._source.read(__ITERPARSE_CHUNK_SIZE)
             if not python.PyString_Check(data):
-                #xmlparser.xmlParseChunk(self._parser_ctxt, NULL, 0, 1)
                 self._source = None
                 raise TypeError, "reading file objects must return plain strings"
             elif data:
@@ -307,6 +317,7 @@
             _raiseParseError(self._parser_ctxt, self._filename)
         if python.PyList_GET_SIZE(context._events) == 0:
             self.root = context._root
+            self._source = None
             raise StopIteration
 
         context._event_index = 1
@@ -316,8 +327,8 @@
 
 
 cdef class iterwalk:
-    """A tree walker that generates ``iterparse()`` events from an existing
-    tree as if it was parsing XML data.
+    """A tree walker that generates events from an existing tree as if it was
+    parsing XML data with ``iterparse()``.
     """
     cdef object _node_stack
     cdef object _pop_node

Modified: lxml/branch/lxml-1.3/src/lxml/tests/test_elementtree.py
==============================================================================
--- lxml/branch/lxml-1.3/src/lxml/tests/test_elementtree.py	(original)
+++ lxml/branch/lxml-1.3/src/lxml/tests/test_elementtree.py	Tue Jun 12 19:11:47 2007
@@ -290,6 +290,27 @@
         self.assertEquals(None, root.get('three'))
         self.assertEquals('foo', root.get('three', 'foo'))
 
+    def test_attrib_clear(self):
+        XML = self.etree.XML
+        
+        root = XML('<doc one="One" two="Two"/>')
+        self.assertEquals('One', root.get('one'))
+        self.assertEquals('Two', root.get('two'))
+        root.attrib.clear()
+        self.assertEquals(None, root.get('one'))
+        self.assertEquals(None, root.get('two'))
+
+    def test_attrib_set_clear(self):
+        Element = self.etree.Element
+        
+        root = Element("root", one="One")
+        root.set("two", "Two")
+        self.assertEquals('One', root.get('one'))
+        self.assertEquals('Two', root.get('two'))
+        root.attrib.clear()
+        self.assertEquals(None, root.get('one'))
+        self.assertEquals(None, root.get('two'))
+
     def test_attribute_update_dict(self):
         XML = self.etree.XML
         

Modified: lxml/branch/lxml-1.3/src/lxml/xmlparser.pxd
==============================================================================
--- lxml/branch/lxml-1.3/src/lxml/xmlparser.pxd	(original)
+++ lxml/branch/lxml-1.3/src/lxml/xmlparser.pxd	Tue Jun 12 19:11:47 2007
@@ -52,6 +52,8 @@
         int wellFormed
         int recovery
         int options
+        int disableSAX
+        int errNo
         xmlError lastError
         xmlNode* node
         xmlSAXHandler* sax


More information about the lxml-checkins mailing list