From jholg at codespeak.net Mon Feb 1 22:13:32 2010
From: jholg at codespeak.net (jholg at codespeak.net)
Date: Mon, 1 Feb 2010 22:13:32 +0100 (CET)
Subject: [Lxml-checkins] r71046 - in lxml/trunk/src/lxml: isoschematron tests
Message-ID: <20100201211332.D0A2116801B@codespeak.net>
Author: jholg
Date: Mon Feb 1 22:13:32 2010
New Revision: 71046
Modified:
lxml/trunk/src/lxml/isoschematron/__init__.py
lxml/trunk/src/lxml/tests/test_isoschematron.py
Log:
Changes:
* stylesheet_params now raises error for None args, with test
* Schematron class docstring update
Modified: lxml/trunk/src/lxml/isoschematron/__init__.py
==============================================================================
--- lxml/trunk/src/lxml/isoschematron/__init__.py (original)
+++ lxml/trunk/src/lxml/isoschematron/__init__.py Mon Feb 1 22:13:32 2010
@@ -69,10 +69,15 @@
def stylesheet_params(**kwargs):
"""Convert keyword args to a dictionary of stylesheet parameters.
- Conversion follows these rules:
+ XSL stylesheet parameters must be XPath expressions, i.e.:
+ * string expressions, like "'5'"
+ * simple (number) expressions, like "5"
+ * valid XPath expressions, like "/a/b/text()"
+ This function converts native Python keyword arguments to stylesheet
+ parameters following these rules:
If an arg is a string wrap it with XSLT.strparam().
If an arg is an XPath object use its path string.
- If arg is None ignore the parameter.
+ If arg is None raise TypeError.
Else convert arg to string.
"""
result = {}
@@ -80,7 +85,7 @@
if isinstance(val, basestring):
val = _etree.XSLT.strparam(val)
elif val is None:
- continue
+ raise TypeError('None not allowed as a stylesheet parameter')
elif not isinstance(val, _etree.XPath):
val = unicode(val)
result[key] = val
@@ -93,14 +98,11 @@
stylesheet arguments.
kwargsDict entries with a value of None are ignored.
"""
- if paramsDict:
- # beware of changing mutable default arg
- paramsDict = dict(paramsDict)
- for k, v in kwargsDict.items():
- if v is not None: # None values do not override
- paramsDict[k] = v
- else:
- paramsDict = kwargsDict
+ # beware of changing mutable default arg
+ paramsDict = dict(paramsDict)
+ for k, v in kwargsDict.items():
+ if v is not None: # None values do not override
+ paramsDict[k] = v
paramsDict = stylesheet_params(**paramsDict)
return paramsDict
@@ -122,9 +124,12 @@
The ``include`` and ``expand`` keyword arguments can be used to switch off
steps 1) and 2).
- To set parameters for steps 1), 2) and 3) hand dictionaries containing xslt
- parameters to the keyword arguments ``include_params``, ``expand_params``
- or ``compile_params``.
+ To set parameters for steps 1), 2) and 3) hand parameter dictionaries to the
+ keyword arguments ``include_params``, ``expand_params`` or
+ ``compile_params``.
+ For convenience, the compile-step parameter ``phase`` is also exposed as a
+ keyword argument ``phase``. This takes precedence if the parameter is also
+ given in the parameter dictionary.
If ``store_schematron`` is set to True, the (included-and-expanded)
schematron document tree is stored and available through the ``schematron``
property.
Modified: lxml/trunk/src/lxml/tests/test_isoschematron.py
==============================================================================
--- lxml/trunk/src/lxml/tests/test_isoschematron.py (original)
+++ lxml/trunk/src/lxml/tests/test_isoschematron.py Mon Feb 1 22:13:32 2010
@@ -762,6 +762,21 @@
self.assert_(relaxng(tree_invalid), relaxng.error_log)
self.assert_(not schematron(tree_invalid))
+ def test_schematron_invalid_args(self):
+ schema = self.parse('''\
+
+
+ mandatory number_of_entries tests
+
+ [ERROR] number_of_entries () must equal the number of entries/entry elements ()
+
+
+
+''')
+ # handing phase as keyword arg will *not* raise the type error
+ self.assertRaises(TypeError, isoschematron.Schematron, schema,
+ compile_params={'phase': None})
+
#TODO: test xslt parameters for inclusion, expand & compile steps (?)
From jholg at codespeak.net Wed Feb 3 15:30:10 2010
From: jholg at codespeak.net (jholg at codespeak.net)
Date: Wed, 3 Feb 2010 15:30:10 +0100 (CET)
Subject: [Lxml-checkins] r71085 - in lxml/trunk/src/lxml: . tests
Message-ID: <20100203143010.0B7F81683D1@codespeak.net>
Author: jholg
Date: Wed Feb 3 15:30:10 2010
New Revision: 71085
Modified:
lxml/trunk/src/lxml/lxml.objectify.pyx
lxml/trunk/src/lxml/tests/test_objectify.py
Log:
Bug #509504: Fixed objectify pickle support, adding support for
ObjectifiedElement subclasses.
Modified: lxml/trunk/src/lxml/lxml.objectify.pyx
==============================================================================
--- lxml/trunk/src/lxml/lxml.objectify.pyx (original)
+++ lxml/trunk/src/lxml/lxml.objectify.pyx Wed Feb 3 15:30:10 2010
@@ -151,6 +151,10 @@
else:
return textOf(self._c_node) or u''
+ # pickle support for objectified Element
+ def __reduce__(self):
+ return (fromstring, (etree.tostring(self),))
+
property text:
def __get__(self):
return textOf(self._c_node)
@@ -1359,31 +1363,24 @@
################################################################################
-# Pickle support
+# Pickle support for objectified ElementTree
def __unpickleElementTree(data):
return etree.ElementTree(fromstring(data))
-cdef _setupPickle(elementReduceFunction, elementTreeReduceFunction):
+cdef _setupPickle(elementTreeReduceFunction):
if python.IS_PYTHON3:
import copyreg
else:
import copy_reg as copyreg
- copyreg.constructor(fromstring)
- copyreg.constructor(__unpickleElementTree)
- copyreg.pickle(ObjectifiedElement,
- elementReduceFunction, fromstring)
copyreg.pickle(etree._ElementTree,
elementTreeReduceFunction, __unpickleElementTree)
-def pickleReduceElement(obj):
- return (fromstring, (etree.tostring(obj),))
-
def pickleReduceElementTree(obj):
return (__unpickleElementTree, (etree.tostring(obj),))
-_setupPickle(pickleReduceElement, pickleReduceElementTree)
-del pickleReduceElement, pickleReduceElementTree
+_setupPickle(pickleReduceElementTree)
+del pickleReduceElementTree
################################################################################
# Element class lookup
Modified: lxml/trunk/src/lxml/tests/test_objectify.py
==============================================================================
--- lxml/trunk/src/lxml/tests/test_objectify.py (original)
+++ lxml/trunk/src/lxml/tests/test_objectify.py Wed Feb 3 15:30:10 2010
@@ -2328,6 +2328,43 @@
etree.tostring(new_tree),
etree.tostring(tree))
+ def test_pickle_intelement(self):
+ self._test_pickle('42')
+ self._test_pickle(objectify.DataElement(42))
+
+ def test_pickle_floattelement(self):
+ self._test_pickle('42.0')
+ self._test_pickle(objectify.DataElement(42.0))
+
+ def test_pickle_strelement(self):
+ self._test_pickle('Pickle me!')
+ self._test_pickle(objectify.DataElement('Pickle me!'))
+
+ def test_pickle_boolelement(self):
+ self._test_pickle('true')
+ self._test_pickle('false')
+ self._test_pickle(objectify.DataElement(True))
+ self._test_pickle(objectify.DataElement(False))
+
+ def test_pickle_noneelement(self):
+ self._test_pickle('''
+''')
+ self._test_pickle(objectify.DataElement(None))
+
+ def _test_pickle(self, stringOrElt):
+ import pickle
+ if isinstance(stringOrElt, (etree._Element, etree._ElementTree)):
+ elt = stringOrElt
+ else:
+ elt = self.XML(stringOrElt)
+ out = BytesIO()
+ pickle.dump(elt, out)
+
+ new_elt = pickle.loads(out.getvalue())
+ self.assertEquals(
+ etree.tostring(new_elt),
+ etree.tostring(elt))
+
# E-Factory tests, need to use sub-elements as root element is always
# type-looked-up as ObjectifiedElement (no annotations)
def test_efactory_int(self):
From jholg at codespeak.net Thu Feb 4 00:16:33 2010
From: jholg at codespeak.net (jholg at codespeak.net)
Date: Thu, 4 Feb 2010 00:16:33 +0100 (CET)
Subject: [Lxml-checkins] r71090 - in lxml/trunk/src/lxml: isoschematron tests
Message-ID: <20100203231633.DF7AC1683C5@codespeak.net>
Author: jholg
Date: Thu Feb 4 00:16:33 2010
New Revision: 71090
Modified:
lxml/trunk/src/lxml/isoschematron/__init__.py
lxml/trunk/src/lxml/tests/test_isoschematron.py
Log:
Expose extract, include, expand, compile steps and the xpath for error
selection from the result document to allow for customization.
Modified: lxml/trunk/src/lxml/isoschematron/__init__.py
==============================================================================
--- lxml/trunk/src/lxml/isoschematron/__init__.py (original)
+++ lxml/trunk/src/lxml/isoschematron/__init__.py Thu Feb 4 00:16:33 2010
@@ -5,6 +5,7 @@
import os.path
from lxml import etree as _etree # due to validator __init__ signature
+
# some compat stuff, borrowed from lxml.html
try:
bytes = __builtins__["bytes"]
@@ -23,6 +24,12 @@
basestring = str
+__all__ = ['extract_xsd', 'extract_rng', 'iso_dsdl_include',
+ 'iso_abstract_expand', 'iso_svrl_for_xslt1',
+ 'svrl_validation_errors', 'schematron_schema_valid',
+ 'stylesheet_params', 'Schematron']
+
+
# some namespaces
#FIXME: Maybe lxml should provide a dedicated place for common namespace
#FIXME: definitions?
@@ -39,9 +46,9 @@
# the iso-schematron skeleton implementation steps aka xsl transformations
-extract_from_xsd = _etree.XSLT(_etree.parse(
+extract_xsd = _etree.XSLT(_etree.parse(
os.path.join(_resources_dir, 'xsl', 'XSD2Schtrn.xsl')))
-extract_from_rng = _etree.XSLT(_etree.parse(
+extract_rng = _etree.XSLT(_etree.parse(
os.path.join(_resources_dir, 'xsl', 'RNG2Schtrn.xsl')))
iso_dsdl_include = _etree.XSLT(_etree.parse(
os.path.join(_resources_dir, 'xsl', 'iso-schematron-xslt1',
@@ -52,9 +59,6 @@
iso_svrl_for_xslt1 = _etree.XSLT(_etree.parse(
os.path.join(_resources_dir,
'xsl', 'iso-schematron-xslt1', 'iso_svrl_for_xslt1.xsl')))
-# if you want to use another "meta-stylesheet" for compilation to xslt, plug it
-# here
-iso_compile2xslt = iso_svrl_for_xslt1
# svrl result accessors
@@ -117,10 +121,10 @@
implementation, the validator is created as an XSLT 1.0 stylesheet using
these steps:
- 0) (Extract from XML Schema or RelaxNG schema)
- 1) Process inclusions
- 2) Process abstract patterns
- 3) Compile the schematron schema to XSLT
+ 0) (Extract from XML Schema or RelaxNG schema)
+ 1) Process inclusions
+ 2) Process abstract patterns
+ 3) Compile the schematron schema to XSLT
The ``include`` and ``expand`` keyword arguments can be used to switch off
steps 1) and 2).
@@ -178,15 +182,43 @@
1
"""
+ # libxml2 error categorization for validation errors
_domain = _etree.ErrorDomains.SCHEMATRONV
_level = _etree.ErrorLevels.ERROR
_error_type = _etree.ErrorTypes.SCHEMATRONV_ASSERT
+ def _extract(self, element):
+ """Extract embedded schematron schema from non-schematron host schema.
+ This method will only be called by __init__ if the given schema document
+ is not a schematron schema by itself.
+ Must return a schematron schema document tree or None.
+ """
+ schematron = None
+ if element.tag == _xml_schema_root:
+ schematron = self._extract_xsd(element)
+ elif element.nsmap[element.prefix] == RELAXNG_NS:
+ # RelaxNG does not have a single unique root element
+ schematron = self._extract_rng(element)
+ return schematron
+
+ # customization points
+ # etree.XSLT objects that provide the extract, include, expand, compile
+ # steps
+ _extract_xsd = extract_xsd
+ _extract_rng = extract_rng
+ _include = iso_dsdl_include
+ _expand = iso_abstract_expand
+ _compile = iso_svrl_for_xslt1
+ # etree.XPath object that determines input document validity when applied to
+ # the svrl result report; must return a list of result elements (empty if
+ # valid)
+ _validation_errors = svrl_validation_errors
+
def __init__(self, etree=None, file=None, include=True, expand=True,
include_params={}, expand_params={}, compile_params={},
store_schematron=False, store_xslt=False, store_report=False,
phase=None):
- super(self.__class__, self).__init__()
+ super(Schematron, self).__init__()
self._store_report = store_report
self._schematron = None
@@ -210,20 +242,17 @@
raise ValueError("Empty tree")
if root.tag == _schematron_root:
schematron = root
- elif root.tag == _xml_schema_root:
- schematron = extract_from_xsd(root)
- elif root.nsmap[root.prefix] == RELAXNG_NS:
- # RelaxNG does not have a single unique root element
- schematron = extract_from_rng(root)
else:
+ schematron = self._extract(root)
+ if schematron is None:
raise _etree.SchematronParseError(
"Document is not a schematron schema or schematron-extractable")
# perform the iso-schematron skeleton implementation steps to get a
# validating xslt
if include:
- schematron = iso_dsdl_include(schematron, **include_params)
+ schematron = self._include(schematron, **include_params)
if expand:
- schematron = iso_abstract_expand(schematron, **expand_params)
+ schematron = self._expand(schematron, **expand_params)
if not schematron_schema_valid(schematron):
raise _etree.SchematronParseError(
"invalid schematron schema: %s" %
@@ -233,7 +262,7 @@
# add new compile keyword args here if exposing them
compile_kwargs = {'phase': phase}
compile_params = _stylesheet_param_dict(compile_params, compile_kwargs)
- validator_xslt = iso_compile2xslt(schematron, **compile_params)
+ validator_xslt = self._compile(schematron, **compile_params)
if store_xslt:
self._validator_xslt = validator_xslt
self._validator = _etree.XSLT(validator_xslt)
@@ -247,7 +276,7 @@
result = self._validator(etree)
if self._store_report:
self._validation_report = result
- errors = svrl_validation_errors(result)
+ errors = self._validation_errors(result)
if errors:
if isinstance(etree, _etree._Element):
fname = etree.getroottree().docinfo.URL or ''
Modified: lxml/trunk/src/lxml/tests/test_isoschematron.py
==============================================================================
--- lxml/trunk/src/lxml/tests/test_isoschematron.py (original)
+++ lxml/trunk/src/lxml/tests/test_isoschematron.py Thu Feb 4 00:16:33 2010
@@ -777,6 +777,66 @@
self.assertRaises(TypeError, isoschematron.Schematron, schema,
compile_params={'phase': None})
+ def test_schematron_customization(self):
+ class MySchematron(isoschematron.Schematron):
+ def _extract(self, root):
+ schematron = (root.xpath(
+ '//sch:schema',
+ namespaces={'sch': "http://purl.oclc.org/dsdl/schematron"})
+ or [None])[0]
+ return schematron
+
+ def _include(self, schematron, **kwargs):
+ raise RuntimeError('inclusion unsupported')
+
+ def _expand(self, schematron, **kwargs):
+ raise RuntimeError('expansion unsupported')
+
+ def _validation_errors(self, validationReport):
+ valid = etree.XPath(
+ 'count(//svrl:successful-report[@flag="critical"])=1',
+ namespaces={'svrl': isoschematron.SVRL_NS})(
+ validationReport)
+ if valid:
+ return []
+ error = etree.Element('Error')
+ error.text = 'missing critical condition report'
+ return [error]
+
+ tree_valid = self.parse('')
+ tree_invalid = self.parse('')
+ schema = self.parse('''\
+
+
+
+ Open Model
+
+ BBB element must be present
+ CCC element must be present
+
+
+
+ Closed model"
+
+ BBB element must be present
+ CCC element must be present
+ Only BBB and CCC children must be present
+
+
+
+
+''')
+ # check if overridden _include is run
+ self.assertRaises(RuntimeError, MySchematron, schema, store_report=True)
+ # check if overridden _expand is run
+ self.assertRaises(RuntimeError, MySchematron, schema, store_report=True,
+ include=False)
+
+ schema = MySchematron(schema, store_report=True, include=False,
+ expand=False)
+ self.assert_(schema.validate(tree_valid))
+ self.assert_(not schema.validate(tree_invalid))
+
#TODO: test xslt parameters for inclusion, expand & compile steps (?)
From scoder at codespeak.net Mon Feb 8 13:14:06 2010
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Mon, 8 Feb 2010 13:14:06 +0100 (CET)
Subject: [Lxml-checkins] r71165 - in lxml/trunk: . doc src/lxml
Message-ID: <20100208121406.F31BC282BD8@codespeak.net>
Author: scoder
Date: Mon Feb 8 13:14:05 2010
New Revision: 71165
Modified:
lxml/trunk/ (props changed)
lxml/trunk/doc/cssselect.txt
lxml/trunk/src/lxml/cssselect.py
Log:
r5463 at lenny: sbehnel | 2010-02-08 11:59:46 +0100
fix typos in exception messages and docs
Modified: lxml/trunk/doc/cssselect.txt
==============================================================================
--- lxml/trunk/doc/cssselect.txt (original)
+++ lxml/trunk/doc/cssselect.txt Mon Feb 8 13:14:05 2010
@@ -74,8 +74,8 @@
* UI states: ``:enabled``, ``:disabled``, ``:indeterminate``
(``:checked`` and ``:unchecked`` *are* available)
-Also, none of the psuedo-elements apply, because the selector only
-returns elements and psuedo-elements select portions of text, like
+Also, none of the pseudo-elements apply, because the selector only
+returns elements and pseudo-elements select portions of text, like
``::first-line``.
Modified: lxml/trunk/src/lxml/cssselect.py
==============================================================================
--- lxml/trunk/src/lxml/cssselect.py (original)
+++ lxml/trunk/src/lxml/cssselect.py Mon Feb 8 13:14:05 2010
@@ -128,11 +128,11 @@
sel_path = self.selector.xpath()
if self.name in self.unsupported:
raise ExpressionError(
- "The psuedo-class %r is not supported" % self.name)
+ "The pseudo-class %r is not supported" % self.name)
method = '_xpath_' + self.name.replace('-', '_')
if not hasattr(self, method):
raise ExpressionError(
- "The psuedo-class %r is unknown" % self.name)
+ "The pseudo-class %r is unknown" % self.name)
method = getattr(self, method)
return method(sel_path, self.expr)
@@ -240,11 +240,11 @@
el_xpath = self.element.xpath()
if self.ident in self.unsupported:
raise ExpressionError(
- "The psuedo-class %r is unsupported" % self.ident)
+ "The pseudo-class %r is unsupported" % self.ident)
method = '_xpath_' + self.ident.replace('-', '_')
if not hasattr(self, method):
raise ExpressionError(
- "The psuedo-class %r is unknown" % self.ident)
+ "The pseudo-class %r is unknown" % self.ident)
method = getattr(self, method)
el_xpath = method(el_xpath)
return el_xpath
From scoder at codespeak.net Sun Feb 28 10:25:58 2010
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Sun, 28 Feb 2010 10:25:58 +0100 (CET)
Subject: [Lxml-checkins] r71545 - in lxml/trunk: . src/lxml
Message-ID: <20100228092558.07520282BDC@codespeak.net>
Author: scoder
Date: Sun Feb 28 10:25:56 2010
New Revision: 71545
Modified:
lxml/trunk/ (props changed)
lxml/trunk/src/lxml/cssselect.py
Log:
r5469 at lenny: sbehnel | 2010-02-08 14:20:44 +0100
cleanup
Modified: lxml/trunk/src/lxml/cssselect.py
==============================================================================
--- lxml/trunk/src/lxml/cssselect.py (original)
+++ lxml/trunk/src/lxml/cssselect.py Sun Feb 28 10:25:56 2010
@@ -834,17 +834,17 @@
## Tokenizing
############################################################
-_whitespace_re = re.compile(r'\s+', re.UNICODE)
+_match_whitespace = re.compile(r'\s+', re.UNICODE).match
-_comment_re = re.compile(r'/\*.*?\*/', re.DOTALL)
+_replace_comments = re.compile(r'/\*.*?\*/', re.DOTALL).sub
-_count_re = re.compile(r'[+-]?\d*n(?:[+-]\d+)?')
+_match_count_number = re.compile(r'[+-]?\d*n(?:[+-]\d+)?').match
def tokenize(s):
pos = 0
- s = _comment_re.sub('', s)
+ s = _replace_comments('', s)
while 1:
- match = _whitespace_re.match(s, pos=pos)
+ match = _match_whitespace(s, pos=pos)
if match:
preceding_whitespace_pos = pos
pos = match.end()
@@ -852,7 +852,7 @@
preceding_whitespace_pos = 0
if pos >= len(s):
return
- match = _count_re.match(s, pos=pos)
+ match = _match_count_number(s, pos=pos)
if match and match.group() != 'n':
sym = s[pos:match.end()]
yield Symbol(sym, pos)
From scoder at codespeak.net Sun Feb 28 10:26:05 2010
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Sun, 28 Feb 2010 10:26:05 +0100 (CET)
Subject: [Lxml-checkins] r71546 - in lxml/trunk: . src/lxml
Message-ID: <20100228092605.85840282BDC@codespeak.net>
Author: scoder
Date: Sun Feb 28 10:26:01 2010
New Revision: 71546
Modified:
lxml/trunk/ (props changed)
lxml/trunk/src/lxml/lxml.etree.pyx
Log:
r5470 at lenny: sbehnel | 2010-02-08 19:50:50 +0100
added module level __all__ to lxml.etree, minor cleanup
Modified: lxml/trunk/src/lxml/lxml.etree.pyx
==============================================================================
--- lxml/trunk/src/lxml/lxml.etree.pyx (original)
+++ lxml/trunk/src/lxml/lxml.etree.pyx Sun Feb 28 10:26:01 2010
@@ -4,6 +4,39 @@
__docformat__ = u"restructuredtext en"
+__all__ = [
+ 'AttributeBasedElementClassLookup', 'C14NError', 'CDATA',
+ 'Comment', 'CommentBase', 'CustomElementClassLookup', 'DEBUG',
+ 'DTD', 'DTDError', 'DTDParseError', 'DTDValidateError',
+ 'DocumentInvalid', 'ETCompatXMLParser', 'ETXPath', 'Element',
+ 'ElementBase', 'ElementClassLookup', 'ElementDefaultClassLookup',
+ 'ElementNamespaceClassLookup', 'ElementTree', 'Entity', 'EntityBase',
+ 'Error', 'ErrorDomains', 'ErrorLevels', 'ErrorTypes', 'Extension',
+ 'FallbackElementClassLookup', 'FunctionNamespace', 'HTML',
+ 'HTMLParser', 'LIBXML_COMPILED_VERSION', 'LIBXML_VERSION',
+ 'LIBXSLT_COMPILED_VERSION', 'LIBXSLT_VERSION', 'LXML_VERSION',
+ 'LxmlError', 'LxmlRegistryError', 'LxmlSyntaxError',
+ 'NamespaceRegistryError', 'PI', 'PIBase', 'ParseError',
+ 'ParserBasedElementClassLookup', 'ParserError', 'ProcessingInstruction',
+ 'PyErrorLog', 'PythonElementClassLookup', 'QName', 'RelaxNG',
+ 'RelaxNGError', 'RelaxNGErrorTypes', 'RelaxNGParseError',
+ 'RelaxNGValidateError', 'Resolver', 'Schematron', 'SchematronError',
+ 'SchematronParseError', 'SchematronValidateError', 'SerialisationError',
+ 'SubElement', 'TreeBuilder', 'XInclude', 'XIncludeError', 'XML',
+ 'XMLDTDID', 'XMLID', 'XMLParser', 'XMLSchema', 'XMLSchemaError',
+ 'XMLSchemaParseError', 'XMLSchemaValidateError', 'XMLSyntaxError',
+ 'XPath', 'XPathDocumentEvaluator', 'XPathError', 'XPathEvalError',
+ 'XPathEvaluator', 'XPathFunctionError', 'XPathResultError',
+ 'XPathSyntaxError', 'XSLT', 'XSLTAccessControl', 'XSLTApplyError',
+ 'XSLTError', 'XSLTExtension', 'XSLTExtensionError', 'XSLTParseError',
+ 'XSLTSaveError', 'cleanup_namespaces', 'clear_error_log', 'dump',
+ 'fromstring', 'fromstringlist', 'get_default_parser', 'iselement',
+ 'iterparse', 'iterwalk', 'parse', 'parseid', 'set_default_parser',
+ 'set_element_class_lookup', 'strip_attributes', 'strip_elements',
+ 'strip_tags', 'tostring', 'tostringlist', 'tounicode',
+ 'use_global_python_log'
+ ]
+
cimport tree, python, config
from tree cimport xmlDoc, xmlNode, xmlAttr, xmlNs, _isElement, _getNs
from python cimport callable, _cstr, _isString
@@ -127,11 +160,8 @@
else:
self.error_log = error_log.copy()
-cdef object _LxmlError
-_LxmlError = LxmlError
-
-cdef object error_super_init
-error_super_init = Error.__init__
+cdef object _LxmlError = LxmlError
+cdef object error_super_init = Error.__init__
# superclass for all syntax errors
From scoder at codespeak.net Sun Feb 28 10:26:14 2010
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Sun, 28 Feb 2010 10:26:14 +0100 (CET)
Subject: [Lxml-checkins] r71547 - in lxml/trunk: . src/lxml
Message-ID: <20100228092614.E7A0D282BE1@codespeak.net>
Author: scoder
Date: Sun Feb 28 10:26:12 2010
New Revision: 71547
Modified:
lxml/trunk/ (props changed)
lxml/trunk/src/lxml/extensions.pxi
Log:
r5471 at lenny: sbehnel | 2010-02-28 09:11:32 +0100
code cleanup
Modified: lxml/trunk/src/lxml/extensions.pxi
==============================================================================
--- lxml/trunk/src/lxml/extensions.pxi (original)
+++ lxml/trunk/src/lxml/extensions.pxi Sun Feb 28 10:26:12 2010
@@ -556,7 +556,7 @@
result = []
if xpathObj.nodesetval is NULL:
return result
- for i from 0 <= i < xpathObj.nodesetval.nodeNr:
+ for i in range(xpathObj.nodesetval.nodeNr):
c_node = xpathObj.nodesetval.nodeTab[i]
_unpackNodeSetEntry(result, c_node, doc,
smart_string, is_fragment)
From scoder at codespeak.net Sun Feb 28 10:26:17 2010
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Sun, 28 Feb 2010 10:26:17 +0100 (CET)
Subject: [Lxml-checkins] r71548 - in lxml/trunk: . src/lxml/isoschematron
Message-ID: <20100228092617.65F72282BE1@codespeak.net>
Author: scoder
Date: Sun Feb 28 10:26:15 2010
New Revision: 71548
Modified:
lxml/trunk/ (props changed)
lxml/trunk/src/lxml/isoschematron/__init__.py
Log:
r5472 at lenny: sbehnel | 2010-02-28 10:24:44 +0100
Py3 fix
Modified: lxml/trunk/src/lxml/isoschematron/__init__.py
==============================================================================
--- lxml/trunk/src/lxml/isoschematron/__init__.py (original)
+++ lxml/trunk/src/lxml/isoschematron/__init__.py Sun Feb 28 10:26:15 2010
@@ -2,6 +2,7 @@
of the pure-xslt 'skeleton' implementation.
"""
+import sys
import os.path
from lxml import etree as _etree # due to validator __init__ signature
@@ -235,9 +236,9 @@
root = etree.getroot()
elif file is not None:
root = _etree.parse(file).getroot()
- except Exception, e:
+ except Exception:
raise _etree.SchematronParseError(
- "No tree or file given: %s" % e)
+ "No tree or file given: %s" % sys.exc_info()[1])
if root is None:
raise ValueError("Empty tree")
if root.tag == _schematron_root:
From scoder at codespeak.net Sun Feb 28 10:31:58 2010
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Sun, 28 Feb 2010 10:31:58 +0100 (CET)
Subject: [Lxml-checkins] r71549 - in lxml/trunk/src/lxml: . tests
Message-ID: <20100228093158.1F1E6282BDC@codespeak.net>
Author: scoder
Date: Sun Feb 28 10:31:57 2010
New Revision: 71549
Modified:
lxml/trunk/src/lxml/extensions.pxi
lxml/trunk/src/lxml/tests/test_xpathevaluator.py
lxml/trunk/src/lxml/xpath.pxi
Log:
fix bug #502963: crash when reading smart XPath strings from a document other than the original context document
Modified: lxml/trunk/src/lxml/extensions.pxi
==============================================================================
--- lxml/trunk/src/lxml/extensions.pxi (original)
+++ lxml/trunk/src/lxml/extensions.pxi Sun Feb 28 10:31:57 2010
@@ -40,6 +40,7 @@
cdef bint _build_smart_strings
# for exception handling and temporary reference keeping:
cdef _TempStore _temp_refs
+ cdef set _temp_documents
cdef _ExceptionContext _exc
def __init__(self, namespaces, extensions, enable_regexp,
@@ -91,6 +92,7 @@
self._extensions = extensions
self._namespaces = namespaces
self._temp_refs = _TempStore()
+ self._temp_documents = set()
self._build_smart_strings = build_smart_strings
if enable_regexp:
@@ -307,7 +309,8 @@
cdef _release_temp_refs(self):
u"Free temporarily referenced objects from this context."
self._temp_refs.clear()
-
+ self._temp_documents.clear()
+
cdef _hold(self, obj):
u"""A way to temporarily hold references to nodes in the evaluator.
@@ -318,7 +321,7 @@
cdef _Element element
if isinstance(obj, _Element):
self._temp_refs.add(obj)
- self._temp_refs.add((<_Element>obj)._doc)
+ self._temp_documents.add((<_Element>obj)._doc)
return
elif _isString(obj) or not python.PySequence_Check(obj):
return
@@ -327,7 +330,19 @@
#print "Holding element:", element._c_node
self._temp_refs.add(o)
#print "Holding document:", element._doc._c_doc
- self._temp_refs.add((<_Element>o)._doc)
+ self._temp_documents.add((<_Element>o)._doc)
+
+ cdef _Document _findDocumentForNode(self, xmlNode* c_node):
+ u"""If an XPath expression returns an element from a different
+ document than the current context document, we call this to
+ see if it was possibly created by an extension and is a known
+ document instance.
+ """
+ cdef _Document doc
+ for doc in self._temp_documents:
+ if doc._c_doc is c_node.doc:
+ return doc
+ return None
def Extension(module, function_mapping=None, *, ns=None):
u"""Extension(module, function_mapping=None, ns=None)
@@ -520,18 +535,18 @@
return xpath.xmlXPathWrapNodeSet(resultSet)
cdef object _unwrapXPathObject(xpath.xmlXPathObject* xpathObj,
- _Document doc, bint smart_string):
+ _Document doc, _BaseContext context):
if xpathObj.type == xpath.XPATH_UNDEFINED:
raise XPathResultError, u"Undefined xpath result"
elif xpathObj.type == xpath.XPATH_NODESET:
- return _createNodeSetResult(xpathObj, doc, smart_string, 0)
+ return _createNodeSetResult(xpathObj, doc, context)
elif xpathObj.type == xpath.XPATH_BOOLEAN:
return xpathObj.boolval
elif xpathObj.type == xpath.XPATH_NUMBER:
return xpathObj.floatval
elif xpathObj.type == xpath.XPATH_STRING:
stringval = funicode(xpathObj.stringval)
- if smart_string:
+ if context._build_smart_strings:
stringval = _elementStringResultFactory(
stringval, None, None, 0)
return stringval
@@ -544,12 +559,12 @@
elif xpathObj.type == xpath.XPATH_USERS:
raise NotImplementedError, u"XPATH_USERS"
elif xpathObj.type == xpath.XPATH_XSLT_TREE:
- return _createNodeSetResult(xpathObj, doc, smart_string, 1)
+ return _createNodeSetResult(xpathObj, doc, context)
else:
raise XPathResultError, u"Unknown xpath result %s" % unicode(xpathObj.type)
cdef object _createNodeSetResult(xpath.xmlXPathObject* xpathObj, _Document doc,
- bint smart_string, bint is_fragment):
+ _BaseContext context):
cdef xmlNode* c_node
cdef int i
cdef list result
@@ -558,12 +573,12 @@
return result
for i in range(xpathObj.nodesetval.nodeNr):
c_node = xpathObj.nodesetval.nodeTab[i]
- _unpackNodeSetEntry(result, c_node, doc,
- smart_string, is_fragment)
+ _unpackNodeSetEntry(result, c_node, doc, context,
+ xpathObj.type == xpath.XPATH_XSLT_TREE)
return result
cdef _unpackNodeSetEntry(list results, xmlNode* c_node, _Document doc,
- bint smart_string, bint is_fragment):
+ _BaseContext context, bint is_fragment):
cdef xmlNode* c_child
cdef char* s
if _isElement(c_node):
@@ -573,13 +588,14 @@
# -> we store Python refs to these, so that is OK
# XSLT: can it leak when merging trees from multiple sources?
c_node = tree.xmlDocCopyNode(c_node, doc._c_doc, 1)
+ # FIXME: call _instantiateElementFromXPath() instead?
results.append(
_fakeDocElementFactory(doc, c_node))
elif c_node.type == tree.XML_TEXT_NODE or \
c_node.type == tree.XML_CDATA_SECTION_NODE or \
c_node.type == tree.XML_ATTRIBUTE_NODE:
results.append(
- _buildElementStringResult(doc, c_node, smart_string))
+ _buildElementStringResult(doc, c_node, context))
elif c_node.type == tree.XML_NAMESPACE_DECL:
s = (c_node).href
if s is NULL:
@@ -598,8 +614,7 @@
if is_fragment:
c_child = c_node.children
while c_child is not NULL:
- _unpackNodeSetEntry(results, c_child, doc,
- smart_string, is_fragment)
+ _unpackNodeSetEntry(results, c_child, doc, context, 0)
c_child = c_child.next
elif c_node.type == tree.XML_XINCLUDE_START or \
c_node.type == tree.XML_XINCLUDE_END:
@@ -617,6 +632,20 @@
xpathObj.nodesetval = NULL
xpath.xmlXPathFreeObject(xpathObj)
+cdef _Element _instantiateElementFromXPath(xmlNode* c_node, _Document doc,
+ _BaseContext context):
+ # NOTE: this may copy the element - only call this when it can't leak
+ if c_node.doc != doc._c_doc and c_node.doc._private is NULL:
+ # not from the context document and not from a fake document
+ # either => may still be from a known document, e.g. one
+ # created by an extension function
+ doc = context._findDocumentForNode(c_node)
+ if doc is None:
+ # not from a known document at all! => can only make a
+ # safety copy here
+ c_node = tree.xmlDocCopyNode(c_node, doc._c_doc, 1)
+ return _fakeDocElementFactory(doc, c_node)
+
################################################################################
# special str/unicode subclasses
@@ -664,7 +693,7 @@
return uresult
cdef object _buildElementStringResult(_Document doc, xmlNode* c_node,
- bint smart_string):
+ _BaseContext context):
cdef _Element parent = None
cdef object attrname = None
cdef xmlNode* c_element
@@ -687,7 +716,7 @@
c_element = _previousElement(c_node)
is_tail = c_element is not NULL
- if not smart_string:
+ if not context._build_smart_strings:
return value
if c_element is NULL:
@@ -697,12 +726,11 @@
c_element = c_element.parent
if c_element is not NULL:
- parent = _fakeDocElementFactory(doc, c_element)
+ parent = _instantiateElementFromXPath(c_element, doc, context)
return _elementStringResultFactory(
value, parent, attrname, is_tail)
-
################################################################################
# callbacks for XPath/XSLT extension functions
@@ -717,7 +745,7 @@
args = []
for i in range(nargs):
obj = xpath.valuePop(ctxt)
- o = _unwrapXPathObject(obj, doc, context._build_smart_strings)
+ o = _unwrapXPathObject(obj, doc, context)
_freeXPathObject(obj)
args.append(o)
args.reverse()
Modified: lxml/trunk/src/lxml/tests/test_xpathevaluator.py
==============================================================================
--- lxml/trunk/src/lxml/tests/test_xpathevaluator.py (original)
+++ lxml/trunk/src/lxml/tests/test_xpathevaluator.py Sun Feb 28 10:31:57 2010
@@ -137,6 +137,24 @@
self.assertEquals(False, hasattr(results[0], 'getparent'))
self.assertEquals(False, hasattr(results[0], 'attrname'))
+ def test_xpath_text_from_other_document(self):
+ xml_data = '''
+
+ '''
+
+ def lookup(dummy, id):
+ return etree.XML(xml_data).xpath('id(%r)' % id)
+ functions = {(None, 'lookup') : lookup}
+
+ root = etree.XML('')
+ values = root.xpath("lookup('k1')/value/text()",
+ extensions=functions)
+ self.assertEquals(['v1'], values)
+ self.assertEquals('value', values[0].getparent().tag)
+
def test_xpath_list_comment(self):
tree = self.parse('')
self.assertEquals([''],
Modified: lxml/trunk/src/lxml/xpath.pxi
==============================================================================
--- lxml/trunk/src/lxml/xpath.pxi (original)
+++ lxml/trunk/src/lxml/xpath.pxi Sun Feb 28 10:31:57 2010
@@ -212,8 +212,7 @@
self._raise_eval_error()
try:
- result = _unwrapXPathObject(xpathObj, doc,
- self._context._build_smart_strings)
+ result = _unwrapXPathObject(xpathObj, doc, self._context)
finally:
_freeXPathObject(xpathObj)
self._context._release_temp_refs()
From scoder at codespeak.net Sun Feb 28 10:34:35 2010
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Sun, 28 Feb 2010 10:34:35 +0100 (CET)
Subject: [Lxml-checkins] r71550 - in lxml/branch/lxml-2.2: . src/lxml
src/lxml/tests
Message-ID: <20100228093435.88B27282BDC@codespeak.net>
Author: scoder
Date: Sun Feb 28 10:34:33 2010
New Revision: 71550
Modified:
lxml/branch/lxml-2.2/ (props changed)
lxml/branch/lxml-2.2/CHANGES.txt
lxml/branch/lxml-2.2/INSTALL.txt (props changed)
lxml/branch/lxml-2.2/src/lxml/extensions.pxi
lxml/branch/lxml-2.2/src/lxml/tests/test_xpathevaluator.py
lxml/branch/lxml-2.2/src/lxml/xpath.pxi
Log:
trunk merge for bug #502963: fix crash when reading smart XPath strings from a document other than the original context document
Modified: lxml/branch/lxml-2.2/CHANGES.txt
==============================================================================
--- lxml/branch/lxml-2.2/CHANGES.txt (original)
+++ lxml/branch/lxml-2.2/CHANGES.txt Sun Feb 28 10:34:33 2010
@@ -14,6 +14,9 @@
Bugs fixed
----------
+* Crash in XPath evaluation when reading smart strings from a document
+ other than the original context document.
+
* Support recent versions of html5lib by not requiring its
``XHTMLParser`` in ``htmlparser.py`` anymore.
Modified: lxml/branch/lxml-2.2/src/lxml/extensions.pxi
==============================================================================
--- lxml/branch/lxml-2.2/src/lxml/extensions.pxi (original)
+++ lxml/branch/lxml-2.2/src/lxml/extensions.pxi Sun Feb 28 10:34:33 2010
@@ -40,6 +40,7 @@
cdef bint _build_smart_strings
# for exception handling and temporary reference keeping:
cdef _TempStore _temp_refs
+ cdef set _temp_documents
cdef _ExceptionContext _exc
def __init__(self, namespaces, extensions, enable_regexp,
@@ -91,6 +92,7 @@
self._extensions = extensions
self._namespaces = namespaces
self._temp_refs = _TempStore()
+ self._temp_documents = set()
self._build_smart_strings = build_smart_strings
if enable_regexp:
@@ -307,7 +309,8 @@
cdef _release_temp_refs(self):
u"Free temporarily referenced objects from this context."
self._temp_refs.clear()
-
+ self._temp_documents.clear()
+
cdef _hold(self, obj):
u"""A way to temporarily hold references to nodes in the evaluator.
@@ -318,7 +321,7 @@
cdef _Element element
if isinstance(obj, _Element):
self._temp_refs.add(obj)
- self._temp_refs.add((<_Element>obj)._doc)
+ self._temp_documents.add((<_Element>obj)._doc)
return
elif _isString(obj) or not python.PySequence_Check(obj):
return
@@ -327,7 +330,19 @@
#print "Holding element:", element._c_node
self._temp_refs.add(o)
#print "Holding document:", element._doc._c_doc
- self._temp_refs.add((<_Element>o)._doc)
+ self._temp_documents.add((<_Element>o)._doc)
+
+ cdef _Document _findDocumentForNode(self, xmlNode* c_node):
+ u"""If an XPath expression returns an element from a different
+ document than the current context document, we call this to
+ see if it was possibly created by an extension and is a known
+ document instance.
+ """
+ cdef _Document doc
+ for doc in self._temp_documents:
+ if doc._c_doc is c_node.doc:
+ return doc
+ return None
def Extension(module, function_mapping=None, *, ns=None):
u"""Extension(module, function_mapping=None, ns=None)
@@ -487,18 +502,18 @@
return xpath.xmlXPathWrapNodeSet(resultSet)
cdef object _unwrapXPathObject(xpath.xmlXPathObject* xpathObj,
- _Document doc, bint smart_string):
+ _Document doc, _BaseContext context):
if xpathObj.type == xpath.XPATH_UNDEFINED:
raise XPathResultError, u"Undefined xpath result"
elif xpathObj.type == xpath.XPATH_NODESET:
- return _createNodeSetResult(xpathObj, doc, smart_string, 0)
+ return _createNodeSetResult(xpathObj, doc, context)
elif xpathObj.type == xpath.XPATH_BOOLEAN:
return xpathObj.boolval
elif xpathObj.type == xpath.XPATH_NUMBER:
return xpathObj.floatval
elif xpathObj.type == xpath.XPATH_STRING:
stringval = funicode(xpathObj.stringval)
- if smart_string:
+ if context._build_smart_strings:
stringval = _elementStringResultFactory(
stringval, None, 0, 0)
return stringval
@@ -511,12 +526,12 @@
elif xpathObj.type == xpath.XPATH_USERS:
raise NotImplementedError, u"XPATH_USERS"
elif xpathObj.type == xpath.XPATH_XSLT_TREE:
- return _createNodeSetResult(xpathObj, doc, smart_string, 1)
+ return _createNodeSetResult(xpathObj, doc, context)
else:
raise XPathResultError, u"Unknown xpath result %s" % unicode(xpathObj.type)
cdef object _createNodeSetResult(xpath.xmlXPathObject* xpathObj, _Document doc,
- bint smart_string, bint is_fragment):
+ _BaseContext context):
cdef xmlNode* c_node
cdef int i
cdef list result
@@ -525,12 +540,12 @@
return result
for i from 0 <= i < xpathObj.nodesetval.nodeNr:
c_node = xpathObj.nodesetval.nodeTab[i]
- _unpackNodeSetEntry(result, c_node, doc,
- smart_string, is_fragment)
+ _unpackNodeSetEntry(result, c_node, doc, context,
+ xpathObj.type == xpath.XPATH_XSLT_TREE)
return result
cdef _unpackNodeSetEntry(list results, xmlNode* c_node, _Document doc,
- bint smart_string, bint is_fragment):
+ _BaseContext context, bint is_fragment):
cdef xmlNode* c_child
cdef char* s
if _isElement(c_node):
@@ -540,13 +555,14 @@
# -> we store Python refs to these, so that is OK
# XSLT: can it leak when merging trees from multiple sources?
c_node = tree.xmlDocCopyNode(c_node, doc._c_doc, 1)
+ # FIXME: call _instantiateElementFromXPath() instead?
results.append(
_fakeDocElementFactory(doc, c_node))
elif c_node.type == tree.XML_TEXT_NODE or \
c_node.type == tree.XML_CDATA_SECTION_NODE or \
c_node.type == tree.XML_ATTRIBUTE_NODE:
results.append(
- _buildElementStringResult(doc, c_node, smart_string))
+ _buildElementStringResult(doc, c_node, context))
elif c_node.type == tree.XML_NAMESPACE_DECL:
s = (c_node).href
if s is NULL:
@@ -565,8 +581,7 @@
if is_fragment:
c_child = c_node.children
while c_child is not NULL:
- _unpackNodeSetEntry(results, c_child, doc,
- smart_string, is_fragment)
+ _unpackNodeSetEntry(results, c_child, doc, context, 0)
c_child = c_child.next
elif c_node.type == tree.XML_XINCLUDE_START or \
c_node.type == tree.XML_XINCLUDE_END:
@@ -584,6 +599,20 @@
xpathObj.nodesetval = NULL
xpath.xmlXPathFreeObject(xpathObj)
+cdef _Element _instantiateElementFromXPath(xmlNode* c_node, _Document doc,
+ _BaseContext context):
+ # NOTE: this may copy the element - only call this when it can't leak
+ if c_node.doc != doc._c_doc and c_node.doc._private is NULL:
+ # not from the context document and not from a fake document
+ # either => may still be from a known document, e.g. one
+ # created by an extension function
+ doc = context._findDocumentForNode(c_node)
+ if doc is None:
+ # not from a known document at all! => can only make a
+ # safety copy here
+ c_node = tree.xmlDocCopyNode(c_node, doc._c_doc, 1)
+ return _fakeDocElementFactory(doc, c_node)
+
################################################################################
# special str/unicode subclasses
@@ -627,7 +656,7 @@
return uresult
cdef object _buildElementStringResult(_Document doc, xmlNode* c_node,
- bint smart_string):
+ _BaseContext context):
cdef _Element parent
cdef xmlNode* c_element
cdef char* s
@@ -650,7 +679,7 @@
c_element = _previousElement(c_node)
is_tail = c_element is not NULL
- if not smart_string:
+ if not context._build_smart_strings:
return value
if c_element is NULL:
@@ -660,12 +689,11 @@
c_element = c_element.parent
if c_element is not NULL:
- parent = _fakeDocElementFactory(doc, c_element)
+ parent = _instantiateElementFromXPath(c_element, doc, context)
return _elementStringResultFactory(
value, parent, is_attribute, is_tail)
-
################################################################################
# callbacks for XPath/XSLT extension functions
@@ -680,7 +708,7 @@
args = []
for i from 0 <= i < nargs:
obj = xpath.valuePop(ctxt)
- o = _unwrapXPathObject(obj, doc, context._build_smart_strings)
+ o = _unwrapXPathObject(obj, doc, context)
_freeXPathObject(obj)
args.append(o)
args.reverse()
Modified: lxml/branch/lxml-2.2/src/lxml/tests/test_xpathevaluator.py
==============================================================================
--- lxml/branch/lxml-2.2/src/lxml/tests/test_xpathevaluator.py (original)
+++ lxml/branch/lxml-2.2/src/lxml/tests/test_xpathevaluator.py Sun Feb 28 10:34:33 2010
@@ -129,6 +129,24 @@
self.assertEquals('CqWeRtZuI', results[0])
self.assertEquals(False, hasattr(results[0], 'getparent'))
+ def test_xpath_text_from_other_document(self):
+ xml_data = '''
+
+ '''
+
+ def lookup(dummy, id):
+ return etree.XML(xml_data).xpath('id(%r)' % id)
+ functions = {(None, 'lookup') : lookup}
+
+ root = etree.XML('')
+ values = root.xpath("lookup('k1')/value/text()",
+ extensions=functions)
+ self.assertEquals(['v1'], values)
+ self.assertEquals('value', values[0].getparent().tag)
+
def test_xpath_list_comment(self):
tree = self.parse('')
self.assertEquals([''],
Modified: lxml/branch/lxml-2.2/src/lxml/xpath.pxi
==============================================================================
--- lxml/branch/lxml-2.2/src/lxml/xpath.pxi (original)
+++ lxml/branch/lxml-2.2/src/lxml/xpath.pxi Sun Feb 28 10:34:33 2010
@@ -212,8 +212,7 @@
self._raise_eval_error()
try:
- result = _unwrapXPathObject(xpathObj, doc,
- self._context._build_smart_strings)
+ result = _unwrapXPathObject(xpathObj, doc, self._context)
finally:
_freeXPathObject(xpathObj)
self._context._release_temp_refs()
From scoder at codespeak.net Sun Feb 28 10:35:07 2010
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Sun, 28 Feb 2010 10:35:07 +0100 (CET)
Subject: [Lxml-checkins] r71551 - lxml/trunk
Message-ID: <20100228093507.4FD67282BDC@codespeak.net>
Author: scoder
Date: Sun Feb 28 10:35:05 2010
New Revision: 71551
Modified:
lxml/trunk/CHANGES.txt
Log:
changelog
Modified: lxml/trunk/CHANGES.txt
==============================================================================
--- lxml/trunk/CHANGES.txt (original)
+++ lxml/trunk/CHANGES.txt Sun Feb 28 10:35:05 2010
@@ -55,6 +55,9 @@
Bugs fixed
----------
+* Crash in XPath evaluation when reading smart strings from a document
+ other than the original context document.
+
* Parsing broken fragments in lxml.html could fail if the fragment
contained an orphaned closing '' tag.
From scoder at codespeak.net Sun Feb 28 11:00:47 2010
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Sun, 28 Feb 2010 11:00:47 +0100 (CET)
Subject: [Lxml-checkins] r71552 - in lxml/branch/lxml-2.2: . doc
Message-ID: <20100228100047.28D5C282BDC@codespeak.net>
Author: scoder
Date: Sun Feb 28 11:00:45 2010
New Revision: 71552
Modified:
lxml/branch/lxml-2.2/CHANGES.txt
lxml/branch/lxml-2.2/doc/main.txt
lxml/branch/lxml-2.2/setup.py
lxml/branch/lxml-2.2/version.txt
Log:
prepare release of lxml 2.2.5
Modified: lxml/branch/lxml-2.2/CHANGES.txt
==============================================================================
--- lxml/branch/lxml-2.2/CHANGES.txt (original)
+++ lxml/branch/lxml-2.2/CHANGES.txt Sun Feb 28 11:00:45 2010
@@ -2,7 +2,7 @@
lxml changelog
==============
-2.2.5 (?)
+2.2.5 (2010-02-28)
==================
Features added
Modified: lxml/branch/lxml-2.2/doc/main.txt
==============================================================================
--- lxml/branch/lxml-2.2/doc/main.txt (original)
+++ lxml/branch/lxml-2.2/doc/main.txt Sun Feb 28 11:00:45 2010
@@ -147,8 +147,8 @@
source release. If you can't wait, consider trying a less recent
release version first.
-The latest version is `lxml 2.2.4`_, released 2009-11-11
-(`changes for 2.2.4`_). `Older versions`_ are listed below.
+The latest version is `lxml 2.2.5`_, released 2010-02-28
+(`changes for 2.2.5`_). `Older versions`_ are listed below.
Please take a look at the `installation instructions`_!
@@ -221,7 +221,9 @@
`_ and the `current in-development
version `_.
-.. _`PDF documentation`: lxmldoc-2.2.4.pdf
+.. _`PDF documentation`: lxmldoc-2.2.5.pdf
+
+* `lxml 2.2.4`_, released 2009-11-11 (`changes for 2.2.4`_)
* `lxml 2.2.3`_, released 2009-10-30 (`changes for 2.2.3`_)
@@ -327,6 +329,7 @@
* `lxml 0.5`_, released 2005-04-08
+.. _`lxml 2.2.5`: lxml-2.2.5.tgz
.. _`lxml 2.2.4`: lxml-2.2.4.tgz
.. _`lxml 2.2.3`: lxml-2.2.3.tgz
.. _`lxml 2.2.2`: lxml-2.2.2.tgz
@@ -381,6 +384,7 @@
.. _`lxml 0.5`: lxml-0.5.tgz
.. _`changes for 2.2.4`: changes-2.2.4.html
+.. _`changes for 2.2.5`: changes-2.2.5.html
.. _`changes for 2.2.3`: changes-2.2.3.html
.. _`changes for 2.2.2`: changes-2.2.2.html
.. _`changes for 2.2.1`: changes-2.2.1.html
Modified: lxml/branch/lxml-2.2/setup.py
==============================================================================
--- lxml/branch/lxml-2.2/setup.py (original)
+++ lxml/branch/lxml-2.2/setup.py Sun Feb 28 11:00:45 2010
@@ -90,7 +90,7 @@
Running ``easy_install lxml==dev`` will install it from
http://codespeak.net/svn/lxml/trunk#egg=lxml-dev
-""" + branch_link) % { "branch_version" : versioninfo.branch_version() }) +
+""" + branch_link) % { "branch_version" : versioninfo.branch_version() }) + '\n' +
versioninfo.changes()),
classifiers = [
versioninfo.dev_status(),
Modified: lxml/branch/lxml-2.2/version.txt
==============================================================================
--- lxml/branch/lxml-2.2/version.txt (original)
+++ lxml/branch/lxml-2.2/version.txt Sun Feb 28 11:00:45 2010
@@ -1 +1 @@
-2.2.4
+2.2.5
From scoder at codespeak.net Sun Feb 28 11:02:29 2010
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Sun, 28 Feb 2010 11:02:29 +0100 (CET)
Subject: [Lxml-checkins] r71553 - lxml/tag/lxml-2.2.5
Message-ID: <20100228100229.D059F282BDC@codespeak.net>
Author: scoder
Date: Sun Feb 28 11:02:28 2010
New Revision: 71553
Added:
lxml/tag/lxml-2.2.5/
- copied from r71552, lxml/branch/lxml-2.2/
Log:
new tag for lxml 2.2.5
From scoder at codespeak.net Sun Feb 28 12:15:17 2010
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Sun, 28 Feb 2010 12:15:17 +0100 (CET)
Subject: [Lxml-checkins] r71554 - lxml/trunk
Message-ID: <20100228111517.1B73451054@codespeak.net>
Author: scoder
Date: Sun Feb 28 12:15:16 2010
New Revision: 71554
Modified:
lxml/trunk/ (props changed)
lxml/trunk/setup.py
Log:
r5481 at lenny: sbehnel | 2010-02-28 12:15:02 +0100
link fix
Modified: lxml/trunk/setup.py
==============================================================================
--- lxml/trunk/setup.py (original)
+++ lxml/trunk/setup.py Sun Feb 28 12:15:16 2010
@@ -82,7 +82,7 @@
RelaxNG, XML Schema, XSLT, C14N and much more.
To contact the project, go to the `project home page
-`_ or see our bug tracker at
+`_ or see our bug tracker at
https://launchpad.net/lxml
In case you want to use the current in-development version of lxml, you can
From scoder at codespeak.net Sun Feb 28 13:45:50 2010
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Sun, 28 Feb 2010 13:45:50 +0100 (CET)
Subject: [Lxml-checkins] r71555 - in lxml/trunk: . src/lxml
Message-ID: <20100228124550.1DC802E2BA4@codespeak.net>
Author: scoder
Date: Sun Feb 28 13:45:48 2010
New Revision: 71555
Modified:
lxml/trunk/ (props changed)
lxml/trunk/src/lxml/extensions.pxi
Log:
r5485 at lenny: sbehnel | 2010-02-28 13:40:53 +0100
code cleanup
Modified: lxml/trunk/src/lxml/extensions.pxi
==============================================================================
--- lxml/trunk/src/lxml/extensions.pxi (original)
+++ lxml/trunk/src/lxml/extensions.pxi Sun Feb 28 13:45:48 2010
@@ -777,5 +777,5 @@
else:
fref = rctxt.function
xpath.xmlXPathErr(ctxt, xpath.XPATH_UNKNOWN_FUNC_ERROR)
- exception = XPathFunctionError(u"XPath function '%s' not found" % fref)
- context._exc._store_exception(exception)
+ context._exc._store_exception(
+ XPathFunctionError(u"XPath function '%s' not found" % fref))
From scoder at codespeak.net Sun Feb 28 13:45:53 2010
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Sun, 28 Feb 2010 13:45:53 +0100 (CET)
Subject: [Lxml-checkins] r71556 - in lxml/trunk: . src/lxml src/lxml/tests
Message-ID: <20100228124553.E11AB2E2BA4@codespeak.net>
Author: scoder
Date: Sun Feb 28 13:45:51 2010
New Revision: 71556
Modified:
lxml/trunk/ (props changed)
lxml/trunk/CHANGES.txt
lxml/trunk/src/lxml/etree_defs.h
lxml/trunk/src/lxml/tests/test_xpathevaluator.py
lxml/trunk/src/lxml/xpath.pxd
lxml/trunk/src/lxml/xpath.pxi
lxml/trunk/src/lxml/xslt.pxd
Log:
r5486 at lenny: sbehnel | 2010-02-28 13:45:43 +0100
enable various EXSLT functions in XPath with libxslt 1.1.26+
Modified: lxml/trunk/CHANGES.txt
==============================================================================
--- lxml/trunk/CHANGES.txt (original)
+++ lxml/trunk/CHANGES.txt Sun Feb 28 13:45:51 2010
@@ -8,6 +8,9 @@
Features added
--------------
+* During regular XPath evaluation, various ESXLT functions are
+ available within their namespace when using libxslt 1.1.26 or later.
+
* Support passing a readily configured logger instance into
``PyErrorLog``, instead of a logger name.
Modified: lxml/trunk/src/lxml/etree_defs.h
==============================================================================
--- lxml/trunk/src/lxml/etree_defs.h (original)
+++ lxml/trunk/src/lxml/etree_defs.h Sun Feb 28 13:45:51 2010
@@ -131,6 +131,13 @@
# define xmlSchematronSetValidStructuredErrors(ctxt, errorfunc, data)
#endif
+/* libexslt 1.1.25+ support EXSLT functions in XPath */
+#if LIBXSLT_VERSION < 10125
+#define exsltDateXpathCtxtRegister(ctxt, prefix)
+#define exsltSetsXpathCtxtRegister(ctxt, prefix)
+#define exsltMathXpathCtxtRegister(ctxt, prefix)
+#define exsltStrXpathCtxtRegister(ctxt, prefix)
+#endif
/* work around MSDEV 6.0 */
#if (_MSC_VER == 1200) && (WINVER < 0x0500)
Modified: lxml/trunk/src/lxml/tests/test_xpathevaluator.py
==============================================================================
--- lxml/trunk/src/lxml/tests/test_xpathevaluator.py (original)
+++ lxml/trunk/src/lxml/tests/test_xpathevaluator.py Sun Feb 28 13:45:51 2010
@@ -566,6 +566,35 @@
def test_xpath_elementtree_error(self):
self.assertRaises(ValueError, etree.XPath('*'), etree.ElementTree())
+
+class ETreeXPathExsltTestCase(HelperTestCase):
+ "Tests for the EXSLT support in XPath (requires libxslt 1.1.25+)"
+
+ NSMAP = dict(
+ date = "http://exslt.org/dates-and-times",
+ math = "http://exslt.org/math",
+ set = "http://exslt.org/sets",
+ str = "http://exslt.org/strings",
+ )
+
+ def test_xpath_exslt_functions_date(self):
+ tree = self.parse('2009-11-122008-12-11')
+
+ match_dates = tree.xpath('//b[date:year(string()) = 2009]',
+ namespaces=self.NSMAP)
+ self.assertTrue(match_dates, str(match_dates))
+ self.assertEquals(len(match_dates), 1, str(match_dates))
+ self.assertEquals(match_dates[0].text, '2009-11-12')
+
+ def test_xpath_exslt_functions_strings(self):
+ tree = self.parse('2009-11-122008-12-11')
+
+ match_date = tree.xpath('str:replace(//b[1], "-", "*")',
+ namespaces=self.NSMAP)
+ self.assertTrue(match_date, str(match_date))
+ self.assertEquals(match_date, '2009*11*12')
+
+
class ETreeETXPathClassTestCase(HelperTestCase):
"Tests for the ETXPath class"
def test_xpath_compile_ns(self):
@@ -703,11 +732,13 @@
" lxml.etree.XPathResultError")
xpath.__doc__ = xpath.__doc__.replace(" exactly 2 arguments",
" exactly 2 positional arguments")
-
+
def test_suite():
suite = unittest.TestSuite()
suite.addTests([unittest.makeSuite(ETreeXPathTestCase)])
suite.addTests([unittest.makeSuite(ETreeXPathClassTestCase)])
+ if etree.LIBXSLT_COMPILED_VERSION >= (1,1,25):
+ suite.addTests([unittest.makeSuite(ETreeXPathExsltTestCase)])
suite.addTests([unittest.makeSuite(ETreeETXPathClassTestCase)])
suite.addTests([doctest.DocTestSuite()])
suite.addTests(
Modified: lxml/trunk/src/lxml/xpath.pxd
==============================================================================
--- lxml/trunk/src/lxml/xpath.pxd (original)
+++ lxml/trunk/src/lxml/xpath.pxd Sun Feb 28 13:45:51 2010
@@ -55,6 +55,7 @@
tree.xmlDoc* doc
tree.xmlNode* node
tree.xmlDict* dict
+ tree.xmlHashTable* nsHash
char* function
char* functionURI
# actually signature is void (*error)(void*, xmlError*)
Modified: lxml/trunk/src/lxml/xpath.pxi
==============================================================================
--- lxml/trunk/src/lxml/xpath.pxi (original)
+++ lxml/trunk/src/lxml/xpath.pxi Sun Feb 28 13:45:51 2010
@@ -65,6 +65,7 @@
self._register_context(doc)
self.registerGlobalNamespaces()
self.registerGlobalFunctions(self._xpathCtxt, _register_xpath_function)
+ self.registerExsltFunctions()
if self._variables is not None:
self.registerVariables(self._variables)
@@ -75,6 +76,17 @@
xpath.xmlXPathRegisteredVariablesCleanup(self._xpathCtxt)
self._cleanup_context()
+ cdef void registerExsltFunctions(self):
+ cdef xpath.xmlXPathContext* ctxt = self._xpathCtxt
+ cdef int i
+ cdef char* c_href
+ if xslt.LIBXSLT_VERSION < 10125:
+ # we'd only execute dummy functions anyway
+ return
+ tree.xmlHashScan(
+ self._xpathCtxt.nsHash, _registerExsltFunctionsForNamespaces,
+ self._xpathCtxt)
+
cdef registerVariables(self, variable_dict):
for name, value in variable_dict.items():
name_utf = self._to_utf(name)
@@ -93,6 +105,20 @@
cdef void _setupDict(self, xpath.xmlXPathContext* xpathCtxt):
__GLOBAL_PARSER_CONTEXT.initXPathParserDict(xpathCtxt)
+cdef void _registerExsltFunctionsForNamespaces(
+ void* _c_href, void* _ctxt, char* c_prefix):
+ cdef char* c_href = _c_href
+ cdef xpath.xmlXPathContext* ctxt = _ctxt
+
+ if cstd.strcmp(c_href, xslt.EXSLT_DATE_NAMESPACE) == 0:
+ xslt.exsltDateXpathCtxtRegister(ctxt, c_prefix)
+ elif cstd.strcmp(c_href, xslt.EXSLT_SETS_NAMESPACE) == 0:
+ xslt.exsltSetsXpathCtxtRegister(ctxt, c_prefix)
+ elif cstd.strcmp(c_href, xslt.EXSLT_MATH_NAMESPACE) == 0:
+ xslt.exsltMathXpathCtxtRegister(ctxt, c_prefix)
+ elif cstd.strcmp(c_href, xslt.EXSLT_STRINGS_NAMESPACE) == 0:
+ xslt.exsltStrXpathCtxtRegister(ctxt, c_prefix)
+
cdef bint _XPATH_VERSION_WARNING_REQUIRED
if _LIBXML_VERSION_INT == 20627:
_XPATH_VERSION_WARNING_REQUIRED = 1
@@ -115,7 +141,8 @@
u"Use it at your own risk.")
self._error_log = _ErrorLog()
self._context = _XPathContext(namespaces, extensions,
- enable_regexp, None, smart_strings)
+ enable_regexp, None,
+ smart_strings)
if config.ENABLE_THREADING:
self._eval_lock = python.PyThread_allocate_lock()
if self._eval_lock is NULL:
@@ -309,7 +336,8 @@
extensions=None, regexp=True, smart_strings=True):
XPathElementEvaluator.__init__(
self, etree._context_node, namespaces=namespaces,
- extensions=extensions, regexp=regexp, smart_strings=smart_strings)
+ extensions=extensions, regexp=regexp,
+ smart_strings=smart_strings)
def __call__(self, _path, **_variables):
u"""__call__(self, _path, **_variables)
@@ -453,7 +481,7 @@
_find_namespaces = re.compile('({[^}]+})').findall
cdef class ETXPath(XPath):
- u"""ETXPath(self, path, extensions=None, regexp=True)
+ u"""ETXPath(self, path, extensions=None, regexp=True, smart_strings=True)
Special XPath class that supports the ElementTree {uri} notation for namespaces.
Note that this class does not accept the ``namespace`` keyword
@@ -461,7 +489,8 @@
string. Smart strings will be returned for string results unless
you pass ``smart_strings=False``.
"""
- def __init__(self, path, *, extensions=None, regexp=True, smart_strings=True):
+ def __init__(self, path, *, extensions=None, regexp=True,
+ smart_strings=True):
path, namespaces = self._nsextract_path(path)
XPath.__init__(self, path, namespaces=namespaces,
extensions=extensions, regexp=regexp,
Modified: lxml/trunk/src/lxml/xslt.pxd
==============================================================================
--- lxml/trunk/src/lxml/xslt.pxd (original)
+++ lxml/trunk/src/lxml/xslt.pxd Sun Feb 28 13:45:51 2010
@@ -161,3 +161,15 @@
cdef extern from "libexslt/exslt.h":
cdef void exsltRegisterAll() nogil
+
+ # libexslt 1.1.25+
+ char* EXSLT_DATE_NAMESPACE
+ char* EXSLT_SETS_NAMESPACE
+ char* EXSLT_MATH_NAMESPACE
+ char* EXSLT_STRINGS_NAMESPACE
+
+ cdef int exsltDateXpathCtxtRegister(xmlXPathContext* ctxt, char* prefix)
+ cdef int exsltSetsXpathCtxtRegister(xmlXPathContext* ctxt, char* prefix)
+ cdef int exsltMathXpathCtxtRegister(xmlXPathContext* ctxt, char* prefix)
+ cdef int exsltStrXpathCtxtRegister(xmlXPathContext* ctxt, char* prefix)
+