[Lxml-checkins] r43160 - in lxml/trunk: . src/lxml

scoder at codespeak.net scoder at codespeak.net
Fri May 11 11:25:34 CEST 2007


Author: scoder
Date: Fri May 11 11:25:34 2007
New Revision: 43160

Modified:
   lxml/trunk/CHANGES.txt
   lxml/trunk/src/lxml/iterparse.pxi
   lxml/trunk/src/lxml/xmlparser.pxd
Log:
more robust error handling in iterparse()

Modified: lxml/trunk/CHANGES.txt
==============================================================================
--- lxml/trunk/CHANGES.txt	(original)
+++ lxml/trunk/CHANGES.txt	Fri May 11 11:25:34 2007
@@ -38,6 +38,8 @@
 Bugs fixed
 ----------
 
+* More robust error handling in ``iterparse()``
+
 * Documents lost their top-level PIs and comments on serialisation
 
 * lxml.sax failed on comments and PIs. Comments are now properly ignored and

Modified: lxml/trunk/src/lxml/iterparse.pxi
==============================================================================
--- lxml/trunk/src/lxml/iterparse.pxi	(original)
+++ lxml/trunk/src/lxml/iterparse.pxi	Fri May 11 11:25:34 2007
@@ -48,7 +48,7 @@
         c_ns = c_ns.next
     return count
 
-cdef class _IterparseResolverContext(_ResolverContext):
+cdef class _IterparseContext(_ResolverContext):
     cdef xmlparser.startElementNsSAX2Func _origSaxStart
     cdef xmlparser.endElementNsSAX2Func   _origSaxEnd
     cdef _Element  _root
@@ -64,8 +64,8 @@
     cdef char*  _tag_href
     cdef char*  _tag_name
 
-    def __init__(self, *args):
-        _ResolverContext.__init__(self, *args)
+    def __init__(self, _ResolverRegistry resolvers):
+        _ResolverContext.__init__(self, resolvers)
         self._ns_stack = []
         self._pop_ns = self._ns_stack.pop
         self._node_stack = []
@@ -90,7 +90,7 @@
                                      ITERPARSE_FILTER_END_NS):
             sax.endElementNs = _saxEnd
 
-    cdef void _setEventFilter(self, events, tag):
+    cdef _setEventFilter(self, events, tag):
         self._event_filter = _buildIterparseEventFilter(events)
         if tag is None or tag == '*':
             self._tag_href  = NULL
@@ -109,8 +109,7 @@
             if self._tag_href is NULL and self._tag_name is NULL:
                 self._tag_tuple = None
 
-    cdef void startNode(self, xmlNode* c_node):
-        cdef _Element node
+    cdef int startNode(self, xmlNode* c_node) except -1:
         cdef xmlNs* c_ns
         cdef int ns_count
         if self._event_filter & ITERPARSE_FILTER_START_NS:
@@ -129,9 +128,9 @@
                 python.PyList_Append(self._node_stack, node)
             if self._event_filter & ITERPARSE_FILTER_START:
                 python.PyList_Append(self._events, ("start", node))
+        return 0
 
-    cdef void endNode(self, xmlNode* c_node):
-        cdef _Element node
+    cdef int endNode(self, xmlNode* c_node) except -1:
         cdef xmlNs* c_ns
         cdef int ns_count
         if self._event_filter & ITERPARSE_FILTER_END:
@@ -141,7 +140,6 @@
                                          ITERPARSE_FILTER_START_NS | \
                                          ITERPARSE_FILTER_END_NS):
                     node = self._pop_node()
-                    assert node._c_node is c_node
                 else:
                     if self._doc is None:
                         self._doc = _documentFactory(c_node.doc, None)
@@ -155,23 +153,36 @@
                 event = ("end-ns", None)
                 for i from 0 <= i < ns_count:
                     python.PyList_Append(self._events, event)
+        return 0
                 
 
 cdef void _pushSaxStartEvent(xmlparser.xmlParserCtxt* c_ctxt, xmlNode* c_node):
-    cdef _IterparseResolverContext context
-    context = <_IterparseResolverContext>c_ctxt._private
-    context.startNode(c_node)
+    cdef _IterparseContext context
+    context = <_IterparseContext>c_ctxt._private
+    try:
+        context.startNode(c_node)
+    except:
+        if c_ctxt.errNo == xmlerror.XML_ERR_OK:
+            c_ctxt.errNo = xmlerror.XML_ERR_INTERNAL_ERROR
+        c_ctxt.disableSAX = 1
+        context._store_raised()
 
 cdef void _pushSaxEndEvent(xmlparser.xmlParserCtxt* c_ctxt, xmlNode* c_node):
-    cdef _IterparseResolverContext context
-    context = <_IterparseResolverContext>c_ctxt._private
-    context.endNode(c_node)
+    cdef _IterparseContext context
+    context = <_IterparseContext>c_ctxt._private
+    try:
+        context.endNode(c_node)
+    except:
+        if c_ctxt.errNo == xmlerror.XML_ERR_OK:
+            c_ctxt.errNo = xmlerror.XML_ERR_INTERNAL_ERROR
+        c_ctxt.disableSAX = 1
+        context._store_raised()
 
 cdef xmlparser.startElementNsSAX2Func _getOrigStart(xmlparser.xmlParserCtxt* c_ctxt):
-    return (<_IterparseResolverContext>c_ctxt._private)._origSaxStart
+    return (<_IterparseContext>c_ctxt._private)._origSaxStart
 
 cdef xmlparser.endElementNsSAX2Func _getOrigEnd(xmlparser.xmlParserCtxt* c_ctxt):
-    return (<_IterparseResolverContext>c_ctxt._private)._origSaxEnd
+    return (<_IterparseContext>c_ctxt._private)._origSaxEnd
 
 cdef void _saxStart(void* ctxt, char* localname, char* prefix, char* URI,
                     int nb_namespaces, char** namespaces,
@@ -230,7 +241,7 @@
     def __init__(self, source, events=("end",), tag=None,
                  attribute_defaults=False, dtd_validation=False,
                  load_dtd=False, no_network=False, remove_blank_text=False):
-        cdef _IterparseResolverContext context
+        cdef _IterparseContext context
         cdef char* c_filename
         cdef int parse_options
         if not hasattr(source, 'read'):
@@ -246,7 +257,7 @@
             c_filename = NULL
 
         self._source = source
-        _BaseParser.__init__(self, _IterparseResolverContext)
+        _BaseParser.__init__(self, _IterparseContext)
 
         parse_options = _XML_DEFAULT_PARSE_OPTIONS
         if load_dtd:
@@ -263,7 +274,7 @@
             parse_options = parse_options | xmlparser.XML_PARSE_NOBLANKS
         self._parse_options = parse_options
 
-        context = <_IterparseResolverContext>self._context
+        context = <_IterparseContext>self._context
         context._setEventFilter(events, tag)
         context._wrapCallbacks(self._parser_ctxt.sax)
         xmlparser.xmlCtxtUseOptions(self._parser_ctxt, parse_options)
@@ -274,12 +285,12 @@
         return self
 
     def __next__(self):
-        cdef _IterparseResolverContext context
+        cdef _IterparseContext context
         cdef int error
         cdef char* c_filename
         if self._source is None:
             raise StopIteration
-        context = <_IterparseResolverContext>self._context
+        context = <_IterparseContext>self._context
         if python.PyList_GET_SIZE(context._events) > context._event_index:
             item = python.PyList_GET_ITEM(context._events, context._event_index)
             python.Py_INCREF(item) # 'borrowed reference' from PyList_GET_ITEM
@@ -291,7 +302,6 @@
         while python.PyList_GET_SIZE(context._events) == 0 and error == 0:
             data = self._source.read(__ITERPARSE_CHUNK_SIZE)
             if not python.PyString_Check(data):
-                #xmlparser.xmlParseChunk(self._parser_ctxt, NULL, 0, 1)
                 self._source = None
                 raise TypeError, "reading file objects must return plain strings"
             elif data:
@@ -307,6 +317,7 @@
             _raiseParseError(self._parser_ctxt, self._filename)
         if python.PyList_GET_SIZE(context._events) == 0:
             self.root = context._root
+            self._source = None
             raise StopIteration
 
         context._event_index = 1
@@ -316,8 +327,8 @@
 
 
 cdef class iterwalk:
-    """A tree walker that generates ``iterparse()`` events from an existing
-    tree as if it was parsing XML data.
+    """A tree walker that generates events from an existing tree as if it was
+    parsing XML data with ``iterparse()``.
     """
     cdef object _node_stack
     cdef object _pop_node

Modified: lxml/trunk/src/lxml/xmlparser.pxd
==============================================================================
--- lxml/trunk/src/lxml/xmlparser.pxd	(original)
+++ lxml/trunk/src/lxml/xmlparser.pxd	Fri May 11 11:25:34 2007
@@ -52,6 +52,8 @@
         int wellFormed
         int recovery
         int options
+        int disableSAX
+        int errNo
         xmlError lastError
         xmlNode* node
         xmlSAXHandler* sax


More information about the lxml-checkins mailing list