[Lxml-checkins] r52878 - in lxml/branch/lxml-2.0: . src/lxml src/lxml/tests

scoder at codespeak.net scoder at codespeak.net
Sun Mar 23 18:52:16 CET 2008


Author: scoder
Date: Sun Mar 23 18:52:15 2008
New Revision: 52878

Modified:
   lxml/branch/lxml-2.0/CHANGES.txt
   lxml/branch/lxml-2.0/src/lxml/iterparse.pxi
   lxml/branch/lxml-2.0/src/lxml/parser.pxi
   lxml/branch/lxml-2.0/src/lxml/tests/test_xmlschema.py
   lxml/branch/lxml-2.0/src/lxml/xmlschema.pxi
Log:
iterparse crash fix merged from trunk rev 52877

Modified: lxml/branch/lxml-2.0/CHANGES.txt
==============================================================================
--- lxml/branch/lxml-2.0/CHANGES.txt	(original)
+++ lxml/branch/lxml-2.0/CHANGES.txt	Sun Mar 23 18:52:15 2008
@@ -16,6 +16,8 @@
 Bugs fixed
 ----------
 
+* Crash when using ``iterparse()`` with XML Schema validation.
+
 * The BeautifulSoup parser (soupparser.py) did not replace entities,
   which made them turn up in text content.
 

Modified: lxml/branch/lxml-2.0/src/lxml/iterparse.pxi
==============================================================================
--- lxml/branch/lxml-2.0/src/lxml/iterparse.pxi	(original)
+++ lxml/branch/lxml-2.0/src/lxml/iterparse.pxi	Sun Mar 23 18:52:15 2008
@@ -382,7 +382,8 @@
                     error = xmlparser.xmlParseChunk(pctxt, NULL, 0, 1)
                 self._source = None
                 break
-        if error != 0:
+        if error != 0 or (context._validator is not None and
+                          not context._validator.isvalid()):
             self._source = None
             del context._events[:]
             _raiseParseError(pctxt, self._filename, context._error_log)

Modified: lxml/branch/lxml-2.0/src/lxml/parser.pxi
==============================================================================
--- lxml/branch/lxml-2.0/src/lxml/parser.pxi	(original)
+++ lxml/branch/lxml-2.0/src/lxml/parser.pxi	Sun Mar 23 18:52:15 2008
@@ -387,6 +387,8 @@
     cdef python.PyThread_type_lock _lock
 
     def __dealloc__(self):
+        if self._validator is not None:
+            self._validator.disconnect()
         if self._lock is not NULL:
             python.PyThread_free_lock(self._lock)
         if self._c_ctxt is not NULL:
@@ -425,10 +427,10 @@
         return 0
 
     cdef int cleanup(self) except -1:
-        self._resetParserContext()
-        self.clear()
         if self._validator is not None:
             self._validator.disconnect()
+        self._resetParserContext()
+        self.clear()
         self._error_log.disconnect()
         if config.ENABLE_THREADING and self._lock is not NULL:
             python.PyThread_release_lock(self._lock)

Modified: lxml/branch/lxml-2.0/src/lxml/tests/test_xmlschema.py
==============================================================================
--- lxml/branch/lxml-2.0/src/lxml/tests/test_xmlschema.py	(original)
+++ lxml/branch/lxml-2.0/src/lxml/tests/test_xmlschema.py	Sun Mar 23 18:52:15 2008
@@ -66,6 +66,41 @@
         self.assertRaises(etree.XMLSyntaxError,
                           self.parse, '<a><c></c></a>', parser=parser)
 
+    def test_xmlschema_iterparse(self):
+        schema = self.parse('''
+<xsd:schema xmlns:xsd="http://www.w3.org/2001/XMLSchema">
+  <xsd:element name="a" type="AType"/>
+  <xsd:complexType name="AType">
+    <xsd:sequence>
+      <xsd:element name="b" type="xsd:string" />
+    </xsd:sequence>
+  </xsd:complexType>
+</xsd:schema>
+''')
+        schema = etree.XMLSchema(schema)
+        xml = StringIO('<a><b></b></a>')
+        events = [ (event, el.tag)
+                   for (event, el) in etree.iterparse(xml, schema=schema) ]
+
+        self.assertEquals([('end', 'b'), ('end', 'a')],
+                          events)
+
+    def test_xmlschema_iterparse_fail(self):
+        schema = self.parse('''
+<xsd:schema xmlns:xsd="http://www.w3.org/2001/XMLSchema">
+  <xsd:element name="a" type="AType"/>
+  <xsd:complexType name="AType">
+    <xsd:sequence>
+      <xsd:element name="b" type="xsd:string" />
+    </xsd:sequence>
+  </xsd:complexType>
+</xsd:schema>
+''')
+        schema = etree.XMLSchema(schema)
+        self.assertRaises(
+            etree.XMLSyntaxError,
+            list, etree.iterparse(StringIO('<a><c></c></a>'), schema=schema))
+
     def test_xmlschema_elementtree_error(self):
         self.assertRaises(ValueError, etree.XMLSchema, etree.ElementTree())
 

Modified: lxml/branch/lxml-2.0/src/lxml/xmlschema.pxi
==============================================================================
--- lxml/branch/lxml-2.0/src/lxml/xmlschema.pxi	(original)
+++ lxml/branch/lxml-2.0/src/lxml/xmlschema.pxi	Sun Mar 23 18:52:15 2008
@@ -136,8 +136,7 @@
     cdef xmlschema.xmlSchemaSAXPlugStruct* _sax_plug
 
     def __dealloc__(self):
-        if self._sax_plug:
-            self.disconnect()
+        self.disconnect()
         if self._valid_ctxt:
             xmlschema.xmlSchemaFreeValidCtxt(self._valid_ctxt)
 
@@ -154,8 +153,9 @@
             self._valid_ctxt, &c_ctxt.sax, &c_ctxt.userData)
 
     cdef void disconnect(self):
-        xmlschema.xmlSchemaSAXUnplug(self._sax_plug)
-        self._sax_plug = NULL
+        if self._sax_plug is not NULL:
+            xmlschema.xmlSchemaSAXUnplug(self._sax_plug)
+            self._sax_plug = NULL
 
     cdef bint isvalid(self):
         if self._valid_ctxt is NULL:


More information about the lxml-checkins mailing list