[Lxml-checkins] r43690 - in lxml/trunk: . doc src/lxml
scoder at codespeak.net
scoder at codespeak.net
Sat May 26 20:45:09 CEST 2007
Author: scoder
Date: Sat May 26 20:45:08 2007
New Revision: 43690
Modified:
lxml/trunk/CHANGES.txt
lxml/trunk/doc/api.txt
lxml/trunk/doc/xpathxslt.txt
lxml/trunk/src/lxml/iterparse.pxi
lxml/trunk/src/lxml/parser.pxi
lxml/trunk/src/lxml/xmlerror.pxi
Log:
display first error in exception string instead of last one
Modified: lxml/trunk/CHANGES.txt
==============================================================================
--- lxml/trunk/CHANGES.txt (original)
+++ lxml/trunk/CHANGES.txt Sat May 26 20:45:08 2007
@@ -46,6 +46,9 @@
Bugs fixed
----------
+* The text in exceptions raised by XML parsers and XPath evaluators now
+ reports the first error that occurred instead of the last
+
* XSLT parsing failed to pass resolver context on to imported documents
* ``ETXPath`` was missing the ``regexp`` keyword argument
Modified: lxml/trunk/doc/api.txt
==============================================================================
--- lxml/trunk/doc/api.txt (original)
+++ lxml/trunk/doc/api.txt Sat May 26 20:45:08 2007
@@ -265,18 +265,44 @@
<test/>
</root>
-By default, lxml (and ElementTree) output the XML declaration only if it is
-required. You can enable or disable it explicitly by passing another keyword
-argument for the serialisation::
+By default, lxml (just as ElementTree) outputs the XML declaration only if it
+is required by the standard::
- >>> print etree.tostring(root, xml_declaration=True)
- <?xml version='1.0' encoding='ASCII'?>
- <root><test/></root>
+ >>> unicode_root = etree.Element(u"t\u1234st")
+ >>> unicode_root.text = u"t\u4321st"
+ >>> etree.tostring(unicode_root, encoding="utf-8")
+ '<t\xe1\x88\xb4st>t\xe4\x8c\xa1st</t\xe1\x88\xb4st>'
+
+ >>> print etree.tostring(unicode_root, encoding="iso-8859-1")
+ <?xml version='1.0' encoding='iso-8859-1'?>
+ <tሴst>t䌡st</tሴst>
Also see the general remarks on `Unicode support`_.
.. _`Unicode support`: parsing.html#python-unicode-strings
+You can enable or disable the declaration explicitly by passing another
+keyword argument for the serialisation::
+
+ >>> print etree.tostring(root, xml_declaration=True)
+ <?xml version='1.0' encoding='ASCII'?>
+ <root><test/></root>
+
+ >>> etree.tostring(unicode_root, encoding="utf-8",
+ ... xml_declaration=False)
+ '<t\xe1\x88\xb4st>t\xe4\x8c\xa1st</t\xe1\x88\xb4st>'
+
+Note that a standard compliant XML parser will not consider the last line
+well-formed XML if the encoding is not explicitly provided somehow, e.g. in an
+underlying transport protocol::
+
+ >>> notxml = etree.tostring(unicode_root, encoding="utf-8",
+ ... xml_declaration=False)
+ >>> etree.XML(notxml)
+ Traceback (most recent call last):
+ ...
+ XMLSyntaxError: line 1: error parsing attribute name
+
XInclude and ElementInclude
---------------------------
Modified: lxml/trunk/doc/xpathxslt.txt
==============================================================================
--- lxml/trunk/doc/xpathxslt.txt (original)
+++ lxml/trunk/doc/xpathxslt.txt Sat May 26 20:45:08 2007
@@ -277,7 +277,7 @@
>>> find = etree.XPath("\\")
Traceback (most recent call last):
...
- XPathSyntaxError: Error in xpath expression
+ XPathSyntaxError: Invalid expression
lxml will also try to give you a hint what went wrong, so if you pass a more
complex expression, you may get a somewhat more specific error::
Modified: lxml/trunk/src/lxml/iterparse.pxi
==============================================================================
--- lxml/trunk/src/lxml/iterparse.pxi (original)
+++ lxml/trunk/src/lxml/iterparse.pxi Sat May 26 20:45:08 2007
@@ -314,7 +314,7 @@
break
if error != 0:
self._source = None
- _raiseParseError(self._parser_ctxt, self._filename)
+ _raiseParseError(self._parser_ctxt, self._filename, None)
if python.PyList_GET_SIZE(context._events) == 0:
self.root = context._root
self._source = None
Modified: lxml/trunk/src/lxml/parser.pxi
==============================================================================
--- lxml/trunk/src/lxml/parser.pxi (original)
+++ lxml/trunk/src/lxml/parser.pxi Sat May 26 20:45:08 2007
@@ -429,9 +429,6 @@
def __get__(self):
return self._error_log.copy()
- def __dummy(self):
- pass
-
def setElementClassLookup(self, ElementClassLookup lookup = None):
"""Set a lookup scheme for element classes generated from this parser.
@@ -496,7 +493,8 @@
python.PyEval_RestoreThread(state)
recover = self._parse_options & xmlparser.XML_PARSE_RECOVER
- return _handleParseResult(pctxt, result, None, recover)
+ return _handleParseResult(pctxt, result, None,
+ self._error_log._first_error, recover)
finally:
self._cleanup()
self._context.clear()
@@ -529,7 +527,8 @@
python.PyEval_RestoreThread(state)
recover = self._parse_options & xmlparser.XML_PARSE_RECOVER
- return _handleParseResult(pctxt, result, None, recover)
+ return _handleParseResult(pctxt, result, None,
+ self._error_log._first_error, recover)
finally:
self._cleanup()
self._context.clear()
@@ -558,7 +557,8 @@
python.PyEval_RestoreThread(state)
recover = self._parse_options & xmlparser.XML_PARSE_RECOVER
- return _handleParseResult(pctxt, result, c_filename, recover)
+ return _handleParseResult(pctxt, result, c_filename,
+ self._error_log._first_error, recover)
finally:
self._cleanup()
self._context.clear()
@@ -583,14 +583,15 @@
pctxt, self._parse_options, self._parser_type)
recover = self._parse_options & xmlparser.XML_PARSE_RECOVER
- return _handleParseResult(pctxt, result, filename, recover)
+ return _handleParseResult(pctxt, result, filename,
+ self._error_log._first_error, recover)
finally:
self._cleanup()
self._context.clear()
self._error_log.disconnect()
self._unlockParser()
-cdef int _raiseParseError(xmlParserCtxt* ctxt, filename) except 0:
+cdef int _raiseParseError(xmlParserCtxt* ctxt, filename, error) except 0:
if filename is not None and \
ctxt.lastError.domain == xmlerror.XML_FROM_IO:
if ctxt.lastError.message is not NULL:
@@ -599,16 +600,21 @@
else:
message = "Error reading file '%s'" % filename
raise IOError, message
+ elif error is not None and error.message is not None:
+ message = error.message
+ if error.line > 0:
+ message = "line %d: %s" % (error.line, message)
+ raise XMLSyntaxError, message
elif ctxt.lastError.message is not NULL:
message = (ctxt.lastError.message).strip()
- if ctxt.lastError.line >= 0:
+ if ctxt.lastError.line > 0:
message = "line %d: %s" % (ctxt.lastError.line, message)
raise XMLSyntaxError, message
else:
raise XMLSyntaxError
cdef xmlDoc* _handleParseResult(xmlParserCtxt* ctxt, xmlDoc* result,
- filename, int recover) except NULL:
+ filename, error, int recover) except NULL:
cdef _ResolverContext context
if ctxt.myDoc is not NULL:
if ctxt.myDoc != result:
@@ -632,7 +638,7 @@
context._raise_if_stored()
if result is NULL:
- _raiseParseError(ctxt, filename)
+ _raiseParseError(ctxt, filename, error)
elif result.URL is NULL and filename is not None:
result.URL = tree.xmlStrdup(_cstr(filename))
return result
@@ -715,7 +721,7 @@
pctxt, c_text, NULL, NULL, options)
try:
recover = options & xmlparser.XML_PARSE_RECOVER
- c_doc = _handleParseResult(pctxt, c_doc, None, recover)
+ c_doc = _handleParseResult(pctxt, c_doc, None, None, recover)
finally:
xmlparser.xmlFreeParserCtxt(pctxt)
return c_doc
@@ -739,7 +745,7 @@
filename = None
else:
filename = c_filename
- c_doc = _handleParseResult(pctxt, c_doc, filename, recover)
+ c_doc = _handleParseResult(pctxt, c_doc, filename, None, recover)
finally:
xmlparser.xmlFreeParserCtxt(pctxt)
return c_doc
Modified: lxml/trunk/src/lxml/xmlerror.pxi
==============================================================================
--- lxml/trunk/src/lxml/xmlerror.pxi (original)
+++ lxml/trunk/src/lxml/xmlerror.pxi Sat May 26 20:45:08 2007
@@ -222,10 +222,13 @@
return self.filter_from_level(ErrorLevels.WARNING)
cdef class _ErrorLog(_ListErrorLog):
+ cdef object _first_error
def __init__(self):
+ self._first_error = None
_ListErrorLog.__init__(self, [])
cdef void connect(self):
+ self._first_error = None
del self._entries[:]
connectErrorLog(<void*>self)
@@ -233,6 +236,7 @@
connectErrorLog(NULL)
def clear(self):
+ self._first_error = None
del self._entries[:]
def copy(self):
@@ -244,6 +248,8 @@
return iter(self._entries[:])
def receive(self, entry):
+ if self._first_error is None:
+ self._first_error = entry
python.PyList_Append(self._entries, entry)
cdef class _DomainErrorLog(_ErrorLog):
More information about the lxml-checkins
mailing list