[Lxml-checkins] r43690 - in lxml/trunk: . doc src/lxml

scoder at codespeak.net scoder at codespeak.net
Sat May 26 20:45:09 CEST 2007


Author: scoder
Date: Sat May 26 20:45:08 2007
New Revision: 43690

Modified:
   lxml/trunk/CHANGES.txt
   lxml/trunk/doc/api.txt
   lxml/trunk/doc/xpathxslt.txt
   lxml/trunk/src/lxml/iterparse.pxi
   lxml/trunk/src/lxml/parser.pxi
   lxml/trunk/src/lxml/xmlerror.pxi
Log:
display first error in exception string instead of last one

Modified: lxml/trunk/CHANGES.txt
==============================================================================
--- lxml/trunk/CHANGES.txt	(original)
+++ lxml/trunk/CHANGES.txt	Sat May 26 20:45:08 2007
@@ -46,6 +46,9 @@
 Bugs fixed
 ----------
 
+* The text in exceptions raised by XML parsers and XPath evaluators now
+  reports the first error that occurred instead of the last
+
 * XSLT parsing failed to pass resolver context on to imported documents
 
 * ``ETXPath`` was missing the ``regexp`` keyword argument

Modified: lxml/trunk/doc/api.txt
==============================================================================
--- lxml/trunk/doc/api.txt	(original)
+++ lxml/trunk/doc/api.txt	Sat May 26 20:45:08 2007
@@ -265,18 +265,44 @@
     <test/>
   </root>
 
-By default, lxml (and ElementTree) output the XML declaration only if it is
-required.  You can enable or disable it explicitly by passing another keyword
-argument for the serialisation::
+By default, lxml (just as ElementTree) outputs the XML declaration only if it
+is required by the standard::
 
-  >>> print etree.tostring(root, xml_declaration=True)
-  <?xml version='1.0' encoding='ASCII'?>
-  <root><test/></root>
+  >>> unicode_root = etree.Element(u"t\u1234st")
+  >>> unicode_root.text = u"t\u4321st"
+  >>> etree.tostring(unicode_root, encoding="utf-8")
+  '<t\xe1\x88\xb4st>t\xe4\x8c\xa1st</t\xe1\x88\xb4st>'
+
+  >>> print etree.tostring(unicode_root, encoding="iso-8859-1")
+  <?xml version='1.0' encoding='iso-8859-1'?>
+  <t&#4660;st>t&#17185;st</t&#4660;st>
 
 Also see the general remarks on `Unicode support`_.
 
 .. _`Unicode support`: parsing.html#python-unicode-strings
 
+You can enable or disable the declaration explicitly by passing another
+keyword argument for the serialisation::
+
+  >>> print etree.tostring(root, xml_declaration=True)
+  <?xml version='1.0' encoding='ASCII'?>
+  <root><test/></root>
+
+  >>> etree.tostring(unicode_root, encoding="utf-8",
+  ...                              xml_declaration=False)
+  '<t\xe1\x88\xb4st>t\xe4\x8c\xa1st</t\xe1\x88\xb4st>'
+
+Note that a standard compliant XML parser will not consider the last line
+well-formed XML if the encoding is not explicitly provided somehow, e.g. in an
+underlying transport protocol::
+
+  >>> notxml = etree.tostring(unicode_root, encoding="utf-8",
+  ...                                       xml_declaration=False)
+  >>> etree.XML(notxml)
+  Traceback (most recent call last):
+    ...
+  XMLSyntaxError: line 1: error parsing attribute name
+
 
 XInclude and ElementInclude
 ---------------------------

Modified: lxml/trunk/doc/xpathxslt.txt
==============================================================================
--- lxml/trunk/doc/xpathxslt.txt	(original)
+++ lxml/trunk/doc/xpathxslt.txt	Sat May 26 20:45:08 2007
@@ -277,7 +277,7 @@
   >>> find = etree.XPath("\\")
   Traceback (most recent call last):
     ...
-  XPathSyntaxError: Error in xpath expression
+  XPathSyntaxError: Invalid expression
 
 lxml will also try to give you a hint what went wrong, so if you pass a more
 complex expression, you may get a somewhat more specific error::

Modified: lxml/trunk/src/lxml/iterparse.pxi
==============================================================================
--- lxml/trunk/src/lxml/iterparse.pxi	(original)
+++ lxml/trunk/src/lxml/iterparse.pxi	Sat May 26 20:45:08 2007
@@ -314,7 +314,7 @@
                 break
         if error != 0:
             self._source = None
-            _raiseParseError(self._parser_ctxt, self._filename)
+            _raiseParseError(self._parser_ctxt, self._filename, None)
         if python.PyList_GET_SIZE(context._events) == 0:
             self.root = context._root
             self._source = None

Modified: lxml/trunk/src/lxml/parser.pxi
==============================================================================
--- lxml/trunk/src/lxml/parser.pxi	(original)
+++ lxml/trunk/src/lxml/parser.pxi	Sat May 26 20:45:08 2007
@@ -429,9 +429,6 @@
         def __get__(self):
             return self._error_log.copy()
 
-    def __dummy(self):
-        pass
-
     def setElementClassLookup(self, ElementClassLookup lookup = None):
         """Set a lookup scheme for element classes generated from this parser.
 
@@ -496,7 +493,8 @@
             python.PyEval_RestoreThread(state)
 
             recover = self._parse_options & xmlparser.XML_PARSE_RECOVER
-            return _handleParseResult(pctxt, result, None, recover)
+            return _handleParseResult(pctxt, result, None,
+                                      self._error_log._first_error, recover)
         finally:
             self._cleanup()
             self._context.clear()
@@ -529,7 +527,8 @@
             python.PyEval_RestoreThread(state)
 
             recover = self._parse_options & xmlparser.XML_PARSE_RECOVER
-            return _handleParseResult(pctxt, result, None, recover)
+            return _handleParseResult(pctxt, result, None,
+                                      self._error_log._first_error, recover)
         finally:
             self._cleanup()
             self._context.clear()
@@ -558,7 +557,8 @@
             python.PyEval_RestoreThread(state)
 
             recover = self._parse_options & xmlparser.XML_PARSE_RECOVER
-            return _handleParseResult(pctxt, result, c_filename, recover)
+            return _handleParseResult(pctxt, result, c_filename,
+                                      self._error_log._first_error, recover)
         finally:
             self._cleanup()
             self._context.clear()
@@ -583,14 +583,15 @@
                 pctxt, self._parse_options, self._parser_type)
 
             recover = self._parse_options & xmlparser.XML_PARSE_RECOVER
-            return _handleParseResult(pctxt, result, filename, recover)
+            return _handleParseResult(pctxt, result, filename,
+                                      self._error_log._first_error, recover)
         finally:
             self._cleanup()
             self._context.clear()
             self._error_log.disconnect()
             self._unlockParser()
 
-cdef int _raiseParseError(xmlParserCtxt* ctxt, filename) except 0:
+cdef int _raiseParseError(xmlParserCtxt* ctxt, filename, error) except 0:
     if filename is not None and \
            ctxt.lastError.domain == xmlerror.XML_FROM_IO:
         if ctxt.lastError.message is not NULL:
@@ -599,16 +600,21 @@
         else:
             message = "Error reading file '%s'" % filename
         raise IOError, message
+    elif error is not None and error.message is not None:
+        message = error.message
+        if error.line > 0:
+            message = "line %d: %s" % (error.line, message)
+        raise XMLSyntaxError, message
     elif ctxt.lastError.message is not NULL:
         message = (ctxt.lastError.message).strip()
-        if ctxt.lastError.line >= 0:
+        if ctxt.lastError.line > 0:
             message = "line %d: %s" % (ctxt.lastError.line, message)
         raise XMLSyntaxError, message
     else:
         raise XMLSyntaxError
 
 cdef xmlDoc* _handleParseResult(xmlParserCtxt* ctxt, xmlDoc* result,
-                                filename, int recover) except NULL:
+                                filename, error, int recover) except NULL:
     cdef _ResolverContext context
     if ctxt.myDoc is not NULL:
         if ctxt.myDoc != result:
@@ -632,7 +638,7 @@
             context._raise_if_stored()
 
     if result is NULL:
-        _raiseParseError(ctxt, filename)
+        _raiseParseError(ctxt, filename, error)
     elif result.URL is NULL and filename is not None:
         result.URL = tree.xmlStrdup(_cstr(filename))
     return result
@@ -715,7 +721,7 @@
         pctxt, c_text, NULL, NULL, options)
     try:
         recover = options & xmlparser.XML_PARSE_RECOVER
-        c_doc = _handleParseResult(pctxt, c_doc, None, recover)
+        c_doc = _handleParseResult(pctxt, c_doc, None, None, recover)
     finally:
         xmlparser.xmlFreeParserCtxt(pctxt)
     return c_doc
@@ -739,7 +745,7 @@
             filename = None
         else:
             filename = c_filename
-        c_doc = _handleParseResult(pctxt, c_doc, filename, recover)
+        c_doc = _handleParseResult(pctxt, c_doc, filename, None, recover)
     finally:
         xmlparser.xmlFreeParserCtxt(pctxt)
     return c_doc

Modified: lxml/trunk/src/lxml/xmlerror.pxi
==============================================================================
--- lxml/trunk/src/lxml/xmlerror.pxi	(original)
+++ lxml/trunk/src/lxml/xmlerror.pxi	Sat May 26 20:45:08 2007
@@ -222,10 +222,13 @@
         return self.filter_from_level(ErrorLevels.WARNING)
 
 cdef class _ErrorLog(_ListErrorLog):
+    cdef object _first_error
     def __init__(self):
+        self._first_error = None
         _ListErrorLog.__init__(self, [])
 
     cdef void connect(self):
+        self._first_error = None
         del self._entries[:]
         connectErrorLog(<void*>self)
 
@@ -233,6 +236,7 @@
         connectErrorLog(NULL)
 
     def clear(self):
+        self._first_error = None
         del self._entries[:]
 
     def copy(self):
@@ -244,6 +248,8 @@
         return iter(self._entries[:])
 
     def receive(self, entry):
+        if self._first_error is None:
+            self._first_error = entry
         python.PyList_Append(self._entries, entry)
 
 cdef class _DomainErrorLog(_ErrorLog):


More information about the lxml-checkins mailing list