From jholg at codespeak.net Mon Feb 1 22:13:32 2010 From: jholg at codespeak.net (jholg at codespeak.net) Date: Mon, 1 Feb 2010 22:13:32 +0100 (CET) Subject: [Lxml-checkins] r71046 - in lxml/trunk/src/lxml: isoschematron tests Message-ID: <20100201211332.D0A2116801B@codespeak.net> Author: jholg Date: Mon Feb 1 22:13:32 2010 New Revision: 71046 Modified: lxml/trunk/src/lxml/isoschematron/__init__.py lxml/trunk/src/lxml/tests/test_isoschematron.py Log: Changes: * stylesheet_params now raises error for None args, with test * Schematron class docstring update Modified: lxml/trunk/src/lxml/isoschematron/__init__.py ============================================================================== --- lxml/trunk/src/lxml/isoschematron/__init__.py (original) +++ lxml/trunk/src/lxml/isoschematron/__init__.py Mon Feb 1 22:13:32 2010 @@ -69,10 +69,15 @@ def stylesheet_params(**kwargs): """Convert keyword args to a dictionary of stylesheet parameters. - Conversion follows these rules: + XSL stylesheet parameters must be XPath expressions, i.e.: + * string expressions, like "'5'" + * simple (number) expressions, like "5" + * valid XPath expressions, like "/a/b/text()" + This function converts native Python keyword arguments to stylesheet + parameters following these rules: If an arg is a string wrap it with XSLT.strparam(). If an arg is an XPath object use its path string. - If arg is None ignore the parameter. + If arg is None raise TypeError. Else convert arg to string. """ result = {} @@ -80,7 +85,7 @@ if isinstance(val, basestring): val = _etree.XSLT.strparam(val) elif val is None: - continue + raise TypeError('None not allowed as a stylesheet parameter') elif not isinstance(val, _etree.XPath): val = unicode(val) result[key] = val @@ -93,14 +98,11 @@ stylesheet arguments. kwargsDict entries with a value of None are ignored. """ - if paramsDict: - # beware of changing mutable default arg - paramsDict = dict(paramsDict) - for k, v in kwargsDict.items(): - if v is not None: # None values do not override - paramsDict[k] = v - else: - paramsDict = kwargsDict + # beware of changing mutable default arg + paramsDict = dict(paramsDict) + for k, v in kwargsDict.items(): + if v is not None: # None values do not override + paramsDict[k] = v paramsDict = stylesheet_params(**paramsDict) return paramsDict @@ -122,9 +124,12 @@ The ``include`` and ``expand`` keyword arguments can be used to switch off steps 1) and 2). - To set parameters for steps 1), 2) and 3) hand dictionaries containing xslt - parameters to the keyword arguments ``include_params``, ``expand_params`` - or ``compile_params``. + To set parameters for steps 1), 2) and 3) hand parameter dictionaries to the + keyword arguments ``include_params``, ``expand_params`` or + ``compile_params``. + For convenience, the compile-step parameter ``phase`` is also exposed as a + keyword argument ``phase``. This takes precedence if the parameter is also + given in the parameter dictionary. If ``store_schematron`` is set to True, the (included-and-expanded) schematron document tree is stored and available through the ``schematron`` property. Modified: lxml/trunk/src/lxml/tests/test_isoschematron.py ============================================================================== --- lxml/trunk/src/lxml/tests/test_isoschematron.py (original) +++ lxml/trunk/src/lxml/tests/test_isoschematron.py Mon Feb 1 22:13:32 2010 @@ -762,6 +762,21 @@ self.assert_(relaxng(tree_invalid), relaxng.error_log) self.assert_(not schematron(tree_invalid)) + def test_schematron_invalid_args(self): + schema = self.parse('''\ + + + mandatory number_of_entries tests + + [ERROR] number_of_entries () must equal the number of entries/entry elements () + + + +''') + # handing phase as keyword arg will *not* raise the type error + self.assertRaises(TypeError, isoschematron.Schematron, schema, + compile_params={'phase': None}) + #TODO: test xslt parameters for inclusion, expand & compile steps (?) From jholg at codespeak.net Wed Feb 3 15:30:10 2010 From: jholg at codespeak.net (jholg at codespeak.net) Date: Wed, 3 Feb 2010 15:30:10 +0100 (CET) Subject: [Lxml-checkins] r71085 - in lxml/trunk/src/lxml: . tests Message-ID: <20100203143010.0B7F81683D1@codespeak.net> Author: jholg Date: Wed Feb 3 15:30:10 2010 New Revision: 71085 Modified: lxml/trunk/src/lxml/lxml.objectify.pyx lxml/trunk/src/lxml/tests/test_objectify.py Log: Bug #509504: Fixed objectify pickle support, adding support for ObjectifiedElement subclasses. Modified: lxml/trunk/src/lxml/lxml.objectify.pyx ============================================================================== --- lxml/trunk/src/lxml/lxml.objectify.pyx (original) +++ lxml/trunk/src/lxml/lxml.objectify.pyx Wed Feb 3 15:30:10 2010 @@ -151,6 +151,10 @@ else: return textOf(self._c_node) or u'' + # pickle support for objectified Element + def __reduce__(self): + return (fromstring, (etree.tostring(self),)) + property text: def __get__(self): return textOf(self._c_node) @@ -1359,31 +1363,24 @@ ################################################################################ -# Pickle support +# Pickle support for objectified ElementTree def __unpickleElementTree(data): return etree.ElementTree(fromstring(data)) -cdef _setupPickle(elementReduceFunction, elementTreeReduceFunction): +cdef _setupPickle(elementTreeReduceFunction): if python.IS_PYTHON3: import copyreg else: import copy_reg as copyreg - copyreg.constructor(fromstring) - copyreg.constructor(__unpickleElementTree) - copyreg.pickle(ObjectifiedElement, - elementReduceFunction, fromstring) copyreg.pickle(etree._ElementTree, elementTreeReduceFunction, __unpickleElementTree) -def pickleReduceElement(obj): - return (fromstring, (etree.tostring(obj),)) - def pickleReduceElementTree(obj): return (__unpickleElementTree, (etree.tostring(obj),)) -_setupPickle(pickleReduceElement, pickleReduceElementTree) -del pickleReduceElement, pickleReduceElementTree +_setupPickle(pickleReduceElementTree) +del pickleReduceElementTree ################################################################################ # Element class lookup Modified: lxml/trunk/src/lxml/tests/test_objectify.py ============================================================================== --- lxml/trunk/src/lxml/tests/test_objectify.py (original) +++ lxml/trunk/src/lxml/tests/test_objectify.py Wed Feb 3 15:30:10 2010 @@ -2328,6 +2328,43 @@ etree.tostring(new_tree), etree.tostring(tree)) + def test_pickle_intelement(self): + self._test_pickle('42') + self._test_pickle(objectify.DataElement(42)) + + def test_pickle_floattelement(self): + self._test_pickle('42.0') + self._test_pickle(objectify.DataElement(42.0)) + + def test_pickle_strelement(self): + self._test_pickle('Pickle me!') + self._test_pickle(objectify.DataElement('Pickle me!')) + + def test_pickle_boolelement(self): + self._test_pickle('true') + self._test_pickle('false') + self._test_pickle(objectify.DataElement(True)) + self._test_pickle(objectify.DataElement(False)) + + def test_pickle_noneelement(self): + self._test_pickle(''' +''') + self._test_pickle(objectify.DataElement(None)) + + def _test_pickle(self, stringOrElt): + import pickle + if isinstance(stringOrElt, (etree._Element, etree._ElementTree)): + elt = stringOrElt + else: + elt = self.XML(stringOrElt) + out = BytesIO() + pickle.dump(elt, out) + + new_elt = pickle.loads(out.getvalue()) + self.assertEquals( + etree.tostring(new_elt), + etree.tostring(elt)) + # E-Factory tests, need to use sub-elements as root element is always # type-looked-up as ObjectifiedElement (no annotations) def test_efactory_int(self): From jholg at codespeak.net Thu Feb 4 00:16:33 2010 From: jholg at codespeak.net (jholg at codespeak.net) Date: Thu, 4 Feb 2010 00:16:33 +0100 (CET) Subject: [Lxml-checkins] r71090 - in lxml/trunk/src/lxml: isoschematron tests Message-ID: <20100203231633.DF7AC1683C5@codespeak.net> Author: jholg Date: Thu Feb 4 00:16:33 2010 New Revision: 71090 Modified: lxml/trunk/src/lxml/isoschematron/__init__.py lxml/trunk/src/lxml/tests/test_isoschematron.py Log: Expose extract, include, expand, compile steps and the xpath for error selection from the result document to allow for customization. Modified: lxml/trunk/src/lxml/isoschematron/__init__.py ============================================================================== --- lxml/trunk/src/lxml/isoschematron/__init__.py (original) +++ lxml/trunk/src/lxml/isoschematron/__init__.py Thu Feb 4 00:16:33 2010 @@ -5,6 +5,7 @@ import os.path from lxml import etree as _etree # due to validator __init__ signature + # some compat stuff, borrowed from lxml.html try: bytes = __builtins__["bytes"] @@ -23,6 +24,12 @@ basestring = str +__all__ = ['extract_xsd', 'extract_rng', 'iso_dsdl_include', + 'iso_abstract_expand', 'iso_svrl_for_xslt1', + 'svrl_validation_errors', 'schematron_schema_valid', + 'stylesheet_params', 'Schematron'] + + # some namespaces #FIXME: Maybe lxml should provide a dedicated place for common namespace #FIXME: definitions? @@ -39,9 +46,9 @@ # the iso-schematron skeleton implementation steps aka xsl transformations -extract_from_xsd = _etree.XSLT(_etree.parse( +extract_xsd = _etree.XSLT(_etree.parse( os.path.join(_resources_dir, 'xsl', 'XSD2Schtrn.xsl'))) -extract_from_rng = _etree.XSLT(_etree.parse( +extract_rng = _etree.XSLT(_etree.parse( os.path.join(_resources_dir, 'xsl', 'RNG2Schtrn.xsl'))) iso_dsdl_include = _etree.XSLT(_etree.parse( os.path.join(_resources_dir, 'xsl', 'iso-schematron-xslt1', @@ -52,9 +59,6 @@ iso_svrl_for_xslt1 = _etree.XSLT(_etree.parse( os.path.join(_resources_dir, 'xsl', 'iso-schematron-xslt1', 'iso_svrl_for_xslt1.xsl'))) -# if you want to use another "meta-stylesheet" for compilation to xslt, plug it -# here -iso_compile2xslt = iso_svrl_for_xslt1 # svrl result accessors @@ -117,10 +121,10 @@ implementation, the validator is created as an XSLT 1.0 stylesheet using these steps: - 0) (Extract from XML Schema or RelaxNG schema) - 1) Process inclusions - 2) Process abstract patterns - 3) Compile the schematron schema to XSLT + 0) (Extract from XML Schema or RelaxNG schema) + 1) Process inclusions + 2) Process abstract patterns + 3) Compile the schematron schema to XSLT The ``include`` and ``expand`` keyword arguments can be used to switch off steps 1) and 2). @@ -178,15 +182,43 @@ 1 """ + # libxml2 error categorization for validation errors _domain = _etree.ErrorDomains.SCHEMATRONV _level = _etree.ErrorLevels.ERROR _error_type = _etree.ErrorTypes.SCHEMATRONV_ASSERT + def _extract(self, element): + """Extract embedded schematron schema from non-schematron host schema. + This method will only be called by __init__ if the given schema document + is not a schematron schema by itself. + Must return a schematron schema document tree or None. + """ + schematron = None + if element.tag == _xml_schema_root: + schematron = self._extract_xsd(element) + elif element.nsmap[element.prefix] == RELAXNG_NS: + # RelaxNG does not have a single unique root element + schematron = self._extract_rng(element) + return schematron + + # customization points + # etree.XSLT objects that provide the extract, include, expand, compile + # steps + _extract_xsd = extract_xsd + _extract_rng = extract_rng + _include = iso_dsdl_include + _expand = iso_abstract_expand + _compile = iso_svrl_for_xslt1 + # etree.XPath object that determines input document validity when applied to + # the svrl result report; must return a list of result elements (empty if + # valid) + _validation_errors = svrl_validation_errors + def __init__(self, etree=None, file=None, include=True, expand=True, include_params={}, expand_params={}, compile_params={}, store_schematron=False, store_xslt=False, store_report=False, phase=None): - super(self.__class__, self).__init__() + super(Schematron, self).__init__() self._store_report = store_report self._schematron = None @@ -210,20 +242,17 @@ raise ValueError("Empty tree") if root.tag == _schematron_root: schematron = root - elif root.tag == _xml_schema_root: - schematron = extract_from_xsd(root) - elif root.nsmap[root.prefix] == RELAXNG_NS: - # RelaxNG does not have a single unique root element - schematron = extract_from_rng(root) else: + schematron = self._extract(root) + if schematron is None: raise _etree.SchematronParseError( "Document is not a schematron schema or schematron-extractable") # perform the iso-schematron skeleton implementation steps to get a # validating xslt if include: - schematron = iso_dsdl_include(schematron, **include_params) + schematron = self._include(schematron, **include_params) if expand: - schematron = iso_abstract_expand(schematron, **expand_params) + schematron = self._expand(schematron, **expand_params) if not schematron_schema_valid(schematron): raise _etree.SchematronParseError( "invalid schematron schema: %s" % @@ -233,7 +262,7 @@ # add new compile keyword args here if exposing them compile_kwargs = {'phase': phase} compile_params = _stylesheet_param_dict(compile_params, compile_kwargs) - validator_xslt = iso_compile2xslt(schematron, **compile_params) + validator_xslt = self._compile(schematron, **compile_params) if store_xslt: self._validator_xslt = validator_xslt self._validator = _etree.XSLT(validator_xslt) @@ -247,7 +276,7 @@ result = self._validator(etree) if self._store_report: self._validation_report = result - errors = svrl_validation_errors(result) + errors = self._validation_errors(result) if errors: if isinstance(etree, _etree._Element): fname = etree.getroottree().docinfo.URL or '' Modified: lxml/trunk/src/lxml/tests/test_isoschematron.py ============================================================================== --- lxml/trunk/src/lxml/tests/test_isoschematron.py (original) +++ lxml/trunk/src/lxml/tests/test_isoschematron.py Thu Feb 4 00:16:33 2010 @@ -777,6 +777,66 @@ self.assertRaises(TypeError, isoschematron.Schematron, schema, compile_params={'phase': None}) + def test_schematron_customization(self): + class MySchematron(isoschematron.Schematron): + def _extract(self, root): + schematron = (root.xpath( + '//sch:schema', + namespaces={'sch': "http://purl.oclc.org/dsdl/schematron"}) + or [None])[0] + return schematron + + def _include(self, schematron, **kwargs): + raise RuntimeError('inclusion unsupported') + + def _expand(self, schematron, **kwargs): + raise RuntimeError('expansion unsupported') + + def _validation_errors(self, validationReport): + valid = etree.XPath( + 'count(//svrl:successful-report[@flag="critical"])=1', + namespaces={'svrl': isoschematron.SVRL_NS})( + validationReport) + if valid: + return [] + error = etree.Element('Error') + error.text = 'missing critical condition report' + return [error] + + tree_valid = self.parse('') + tree_invalid = self.parse('') + schema = self.parse('''\ + + + + Open Model + + BBB element must be present + CCC element must be present + + + + Closed model" + + BBB element must be present + CCC element must be present + Only BBB and CCC children must be present + + + + +''') + # check if overridden _include is run + self.assertRaises(RuntimeError, MySchematron, schema, store_report=True) + # check if overridden _expand is run + self.assertRaises(RuntimeError, MySchematron, schema, store_report=True, + include=False) + + schema = MySchematron(schema, store_report=True, include=False, + expand=False) + self.assert_(schema.validate(tree_valid)) + self.assert_(not schema.validate(tree_invalid)) + #TODO: test xslt parameters for inclusion, expand & compile steps (?) From scoder at codespeak.net Mon Feb 8 13:14:06 2010 From: scoder at codespeak.net (scoder at codespeak.net) Date: Mon, 8 Feb 2010 13:14:06 +0100 (CET) Subject: [Lxml-checkins] r71165 - in lxml/trunk: . doc src/lxml Message-ID: <20100208121406.F31BC282BD8@codespeak.net> Author: scoder Date: Mon Feb 8 13:14:05 2010 New Revision: 71165 Modified: lxml/trunk/ (props changed) lxml/trunk/doc/cssselect.txt lxml/trunk/src/lxml/cssselect.py Log: r5463 at lenny: sbehnel | 2010-02-08 11:59:46 +0100 fix typos in exception messages and docs Modified: lxml/trunk/doc/cssselect.txt ============================================================================== --- lxml/trunk/doc/cssselect.txt (original) +++ lxml/trunk/doc/cssselect.txt Mon Feb 8 13:14:05 2010 @@ -74,8 +74,8 @@ * UI states: ``:enabled``, ``:disabled``, ``:indeterminate`` (``:checked`` and ``:unchecked`` *are* available) -Also, none of the psuedo-elements apply, because the selector only -returns elements and psuedo-elements select portions of text, like +Also, none of the pseudo-elements apply, because the selector only +returns elements and pseudo-elements select portions of text, like ``::first-line``. Modified: lxml/trunk/src/lxml/cssselect.py ============================================================================== --- lxml/trunk/src/lxml/cssselect.py (original) +++ lxml/trunk/src/lxml/cssselect.py Mon Feb 8 13:14:05 2010 @@ -128,11 +128,11 @@ sel_path = self.selector.xpath() if self.name in self.unsupported: raise ExpressionError( - "The psuedo-class %r is not supported" % self.name) + "The pseudo-class %r is not supported" % self.name) method = '_xpath_' + self.name.replace('-', '_') if not hasattr(self, method): raise ExpressionError( - "The psuedo-class %r is unknown" % self.name) + "The pseudo-class %r is unknown" % self.name) method = getattr(self, method) return method(sel_path, self.expr) @@ -240,11 +240,11 @@ el_xpath = self.element.xpath() if self.ident in self.unsupported: raise ExpressionError( - "The psuedo-class %r is unsupported" % self.ident) + "The pseudo-class %r is unsupported" % self.ident) method = '_xpath_' + self.ident.replace('-', '_') if not hasattr(self, method): raise ExpressionError( - "The psuedo-class %r is unknown" % self.ident) + "The pseudo-class %r is unknown" % self.ident) method = getattr(self, method) el_xpath = method(el_xpath) return el_xpath From scoder at codespeak.net Sun Feb 28 10:25:58 2010 From: scoder at codespeak.net (scoder at codespeak.net) Date: Sun, 28 Feb 2010 10:25:58 +0100 (CET) Subject: [Lxml-checkins] r71545 - in lxml/trunk: . src/lxml Message-ID: <20100228092558.07520282BDC@codespeak.net> Author: scoder Date: Sun Feb 28 10:25:56 2010 New Revision: 71545 Modified: lxml/trunk/ (props changed) lxml/trunk/src/lxml/cssselect.py Log: r5469 at lenny: sbehnel | 2010-02-08 14:20:44 +0100 cleanup Modified: lxml/trunk/src/lxml/cssselect.py ============================================================================== --- lxml/trunk/src/lxml/cssselect.py (original) +++ lxml/trunk/src/lxml/cssselect.py Sun Feb 28 10:25:56 2010 @@ -834,17 +834,17 @@ ## Tokenizing ############################################################ -_whitespace_re = re.compile(r'\s+', re.UNICODE) +_match_whitespace = re.compile(r'\s+', re.UNICODE).match -_comment_re = re.compile(r'/\*.*?\*/', re.DOTALL) +_replace_comments = re.compile(r'/\*.*?\*/', re.DOTALL).sub -_count_re = re.compile(r'[+-]?\d*n(?:[+-]\d+)?') +_match_count_number = re.compile(r'[+-]?\d*n(?:[+-]\d+)?').match def tokenize(s): pos = 0 - s = _comment_re.sub('', s) + s = _replace_comments('', s) while 1: - match = _whitespace_re.match(s, pos=pos) + match = _match_whitespace(s, pos=pos) if match: preceding_whitespace_pos = pos pos = match.end() @@ -852,7 +852,7 @@ preceding_whitespace_pos = 0 if pos >= len(s): return - match = _count_re.match(s, pos=pos) + match = _match_count_number(s, pos=pos) if match and match.group() != 'n': sym = s[pos:match.end()] yield Symbol(sym, pos) From scoder at codespeak.net Sun Feb 28 10:26:05 2010 From: scoder at codespeak.net (scoder at codespeak.net) Date: Sun, 28 Feb 2010 10:26:05 +0100 (CET) Subject: [Lxml-checkins] r71546 - in lxml/trunk: . src/lxml Message-ID: <20100228092605.85840282BDC@codespeak.net> Author: scoder Date: Sun Feb 28 10:26:01 2010 New Revision: 71546 Modified: lxml/trunk/ (props changed) lxml/trunk/src/lxml/lxml.etree.pyx Log: r5470 at lenny: sbehnel | 2010-02-08 19:50:50 +0100 added module level __all__ to lxml.etree, minor cleanup Modified: lxml/trunk/src/lxml/lxml.etree.pyx ============================================================================== --- lxml/trunk/src/lxml/lxml.etree.pyx (original) +++ lxml/trunk/src/lxml/lxml.etree.pyx Sun Feb 28 10:26:01 2010 @@ -4,6 +4,39 @@ __docformat__ = u"restructuredtext en" +__all__ = [ + 'AttributeBasedElementClassLookup', 'C14NError', 'CDATA', + 'Comment', 'CommentBase', 'CustomElementClassLookup', 'DEBUG', + 'DTD', 'DTDError', 'DTDParseError', 'DTDValidateError', + 'DocumentInvalid', 'ETCompatXMLParser', 'ETXPath', 'Element', + 'ElementBase', 'ElementClassLookup', 'ElementDefaultClassLookup', + 'ElementNamespaceClassLookup', 'ElementTree', 'Entity', 'EntityBase', + 'Error', 'ErrorDomains', 'ErrorLevels', 'ErrorTypes', 'Extension', + 'FallbackElementClassLookup', 'FunctionNamespace', 'HTML', + 'HTMLParser', 'LIBXML_COMPILED_VERSION', 'LIBXML_VERSION', + 'LIBXSLT_COMPILED_VERSION', 'LIBXSLT_VERSION', 'LXML_VERSION', + 'LxmlError', 'LxmlRegistryError', 'LxmlSyntaxError', + 'NamespaceRegistryError', 'PI', 'PIBase', 'ParseError', + 'ParserBasedElementClassLookup', 'ParserError', 'ProcessingInstruction', + 'PyErrorLog', 'PythonElementClassLookup', 'QName', 'RelaxNG', + 'RelaxNGError', 'RelaxNGErrorTypes', 'RelaxNGParseError', + 'RelaxNGValidateError', 'Resolver', 'Schematron', 'SchematronError', + 'SchematronParseError', 'SchematronValidateError', 'SerialisationError', + 'SubElement', 'TreeBuilder', 'XInclude', 'XIncludeError', 'XML', + 'XMLDTDID', 'XMLID', 'XMLParser', 'XMLSchema', 'XMLSchemaError', + 'XMLSchemaParseError', 'XMLSchemaValidateError', 'XMLSyntaxError', + 'XPath', 'XPathDocumentEvaluator', 'XPathError', 'XPathEvalError', + 'XPathEvaluator', 'XPathFunctionError', 'XPathResultError', + 'XPathSyntaxError', 'XSLT', 'XSLTAccessControl', 'XSLTApplyError', + 'XSLTError', 'XSLTExtension', 'XSLTExtensionError', 'XSLTParseError', + 'XSLTSaveError', 'cleanup_namespaces', 'clear_error_log', 'dump', + 'fromstring', 'fromstringlist', 'get_default_parser', 'iselement', + 'iterparse', 'iterwalk', 'parse', 'parseid', 'set_default_parser', + 'set_element_class_lookup', 'strip_attributes', 'strip_elements', + 'strip_tags', 'tostring', 'tostringlist', 'tounicode', + 'use_global_python_log' + ] + cimport tree, python, config from tree cimport xmlDoc, xmlNode, xmlAttr, xmlNs, _isElement, _getNs from python cimport callable, _cstr, _isString @@ -127,11 +160,8 @@ else: self.error_log = error_log.copy() -cdef object _LxmlError -_LxmlError = LxmlError - -cdef object error_super_init -error_super_init = Error.__init__ +cdef object _LxmlError = LxmlError +cdef object error_super_init = Error.__init__ # superclass for all syntax errors From scoder at codespeak.net Sun Feb 28 10:26:14 2010 From: scoder at codespeak.net (scoder at codespeak.net) Date: Sun, 28 Feb 2010 10:26:14 +0100 (CET) Subject: [Lxml-checkins] r71547 - in lxml/trunk: . src/lxml Message-ID: <20100228092614.E7A0D282BE1@codespeak.net> Author: scoder Date: Sun Feb 28 10:26:12 2010 New Revision: 71547 Modified: lxml/trunk/ (props changed) lxml/trunk/src/lxml/extensions.pxi Log: r5471 at lenny: sbehnel | 2010-02-28 09:11:32 +0100 code cleanup Modified: lxml/trunk/src/lxml/extensions.pxi ============================================================================== --- lxml/trunk/src/lxml/extensions.pxi (original) +++ lxml/trunk/src/lxml/extensions.pxi Sun Feb 28 10:26:12 2010 @@ -556,7 +556,7 @@ result = [] if xpathObj.nodesetval is NULL: return result - for i from 0 <= i < xpathObj.nodesetval.nodeNr: + for i in range(xpathObj.nodesetval.nodeNr): c_node = xpathObj.nodesetval.nodeTab[i] _unpackNodeSetEntry(result, c_node, doc, smart_string, is_fragment) From scoder at codespeak.net Sun Feb 28 10:26:17 2010 From: scoder at codespeak.net (scoder at codespeak.net) Date: Sun, 28 Feb 2010 10:26:17 +0100 (CET) Subject: [Lxml-checkins] r71548 - in lxml/trunk: . src/lxml/isoschematron Message-ID: <20100228092617.65F72282BE1@codespeak.net> Author: scoder Date: Sun Feb 28 10:26:15 2010 New Revision: 71548 Modified: lxml/trunk/ (props changed) lxml/trunk/src/lxml/isoschematron/__init__.py Log: r5472 at lenny: sbehnel | 2010-02-28 10:24:44 +0100 Py3 fix Modified: lxml/trunk/src/lxml/isoschematron/__init__.py ============================================================================== --- lxml/trunk/src/lxml/isoschematron/__init__.py (original) +++ lxml/trunk/src/lxml/isoschematron/__init__.py Sun Feb 28 10:26:15 2010 @@ -2,6 +2,7 @@ of the pure-xslt 'skeleton' implementation. """ +import sys import os.path from lxml import etree as _etree # due to validator __init__ signature @@ -235,9 +236,9 @@ root = etree.getroot() elif file is not None: root = _etree.parse(file).getroot() - except Exception, e: + except Exception: raise _etree.SchematronParseError( - "No tree or file given: %s" % e) + "No tree or file given: %s" % sys.exc_info()[1]) if root is None: raise ValueError("Empty tree") if root.tag == _schematron_root: From scoder at codespeak.net Sun Feb 28 10:31:58 2010 From: scoder at codespeak.net (scoder at codespeak.net) Date: Sun, 28 Feb 2010 10:31:58 +0100 (CET) Subject: [Lxml-checkins] r71549 - in lxml/trunk/src/lxml: . tests Message-ID: <20100228093158.1F1E6282BDC@codespeak.net> Author: scoder Date: Sun Feb 28 10:31:57 2010 New Revision: 71549 Modified: lxml/trunk/src/lxml/extensions.pxi lxml/trunk/src/lxml/tests/test_xpathevaluator.py lxml/trunk/src/lxml/xpath.pxi Log: fix bug #502963: crash when reading smart XPath strings from a document other than the original context document Modified: lxml/trunk/src/lxml/extensions.pxi ============================================================================== --- lxml/trunk/src/lxml/extensions.pxi (original) +++ lxml/trunk/src/lxml/extensions.pxi Sun Feb 28 10:31:57 2010 @@ -40,6 +40,7 @@ cdef bint _build_smart_strings # for exception handling and temporary reference keeping: cdef _TempStore _temp_refs + cdef set _temp_documents cdef _ExceptionContext _exc def __init__(self, namespaces, extensions, enable_regexp, @@ -91,6 +92,7 @@ self._extensions = extensions self._namespaces = namespaces self._temp_refs = _TempStore() + self._temp_documents = set() self._build_smart_strings = build_smart_strings if enable_regexp: @@ -307,7 +309,8 @@ cdef _release_temp_refs(self): u"Free temporarily referenced objects from this context." self._temp_refs.clear() - + self._temp_documents.clear() + cdef _hold(self, obj): u"""A way to temporarily hold references to nodes in the evaluator. @@ -318,7 +321,7 @@ cdef _Element element if isinstance(obj, _Element): self._temp_refs.add(obj) - self._temp_refs.add((<_Element>obj)._doc) + self._temp_documents.add((<_Element>obj)._doc) return elif _isString(obj) or not python.PySequence_Check(obj): return @@ -327,7 +330,19 @@ #print "Holding element:", element._c_node self._temp_refs.add(o) #print "Holding document:", element._doc._c_doc - self._temp_refs.add((<_Element>o)._doc) + self._temp_documents.add((<_Element>o)._doc) + + cdef _Document _findDocumentForNode(self, xmlNode* c_node): + u"""If an XPath expression returns an element from a different + document than the current context document, we call this to + see if it was possibly created by an extension and is a known + document instance. + """ + cdef _Document doc + for doc in self._temp_documents: + if doc._c_doc is c_node.doc: + return doc + return None def Extension(module, function_mapping=None, *, ns=None): u"""Extension(module, function_mapping=None, ns=None) @@ -520,18 +535,18 @@ return xpath.xmlXPathWrapNodeSet(resultSet) cdef object _unwrapXPathObject(xpath.xmlXPathObject* xpathObj, - _Document doc, bint smart_string): + _Document doc, _BaseContext context): if xpathObj.type == xpath.XPATH_UNDEFINED: raise XPathResultError, u"Undefined xpath result" elif xpathObj.type == xpath.XPATH_NODESET: - return _createNodeSetResult(xpathObj, doc, smart_string, 0) + return _createNodeSetResult(xpathObj, doc, context) elif xpathObj.type == xpath.XPATH_BOOLEAN: return xpathObj.boolval elif xpathObj.type == xpath.XPATH_NUMBER: return xpathObj.floatval elif xpathObj.type == xpath.XPATH_STRING: stringval = funicode(xpathObj.stringval) - if smart_string: + if context._build_smart_strings: stringval = _elementStringResultFactory( stringval, None, None, 0) return stringval @@ -544,12 +559,12 @@ elif xpathObj.type == xpath.XPATH_USERS: raise NotImplementedError, u"XPATH_USERS" elif xpathObj.type == xpath.XPATH_XSLT_TREE: - return _createNodeSetResult(xpathObj, doc, smart_string, 1) + return _createNodeSetResult(xpathObj, doc, context) else: raise XPathResultError, u"Unknown xpath result %s" % unicode(xpathObj.type) cdef object _createNodeSetResult(xpath.xmlXPathObject* xpathObj, _Document doc, - bint smart_string, bint is_fragment): + _BaseContext context): cdef xmlNode* c_node cdef int i cdef list result @@ -558,12 +573,12 @@ return result for i in range(xpathObj.nodesetval.nodeNr): c_node = xpathObj.nodesetval.nodeTab[i] - _unpackNodeSetEntry(result, c_node, doc, - smart_string, is_fragment) + _unpackNodeSetEntry(result, c_node, doc, context, + xpathObj.type == xpath.XPATH_XSLT_TREE) return result cdef _unpackNodeSetEntry(list results, xmlNode* c_node, _Document doc, - bint smart_string, bint is_fragment): + _BaseContext context, bint is_fragment): cdef xmlNode* c_child cdef char* s if _isElement(c_node): @@ -573,13 +588,14 @@ # -> we store Python refs to these, so that is OK # XSLT: can it leak when merging trees from multiple sources? c_node = tree.xmlDocCopyNode(c_node, doc._c_doc, 1) + # FIXME: call _instantiateElementFromXPath() instead? results.append( _fakeDocElementFactory(doc, c_node)) elif c_node.type == tree.XML_TEXT_NODE or \ c_node.type == tree.XML_CDATA_SECTION_NODE or \ c_node.type == tree.XML_ATTRIBUTE_NODE: results.append( - _buildElementStringResult(doc, c_node, smart_string)) + _buildElementStringResult(doc, c_node, context)) elif c_node.type == tree.XML_NAMESPACE_DECL: s = (c_node).href if s is NULL: @@ -598,8 +614,7 @@ if is_fragment: c_child = c_node.children while c_child is not NULL: - _unpackNodeSetEntry(results, c_child, doc, - smart_string, is_fragment) + _unpackNodeSetEntry(results, c_child, doc, context, 0) c_child = c_child.next elif c_node.type == tree.XML_XINCLUDE_START or \ c_node.type == tree.XML_XINCLUDE_END: @@ -617,6 +632,20 @@ xpathObj.nodesetval = NULL xpath.xmlXPathFreeObject(xpathObj) +cdef _Element _instantiateElementFromXPath(xmlNode* c_node, _Document doc, + _BaseContext context): + # NOTE: this may copy the element - only call this when it can't leak + if c_node.doc != doc._c_doc and c_node.doc._private is NULL: + # not from the context document and not from a fake document + # either => may still be from a known document, e.g. one + # created by an extension function + doc = context._findDocumentForNode(c_node) + if doc is None: + # not from a known document at all! => can only make a + # safety copy here + c_node = tree.xmlDocCopyNode(c_node, doc._c_doc, 1) + return _fakeDocElementFactory(doc, c_node) + ################################################################################ # special str/unicode subclasses @@ -664,7 +693,7 @@ return uresult cdef object _buildElementStringResult(_Document doc, xmlNode* c_node, - bint smart_string): + _BaseContext context): cdef _Element parent = None cdef object attrname = None cdef xmlNode* c_element @@ -687,7 +716,7 @@ c_element = _previousElement(c_node) is_tail = c_element is not NULL - if not smart_string: + if not context._build_smart_strings: return value if c_element is NULL: @@ -697,12 +726,11 @@ c_element = c_element.parent if c_element is not NULL: - parent = _fakeDocElementFactory(doc, c_element) + parent = _instantiateElementFromXPath(c_element, doc, context) return _elementStringResultFactory( value, parent, attrname, is_tail) - ################################################################################ # callbacks for XPath/XSLT extension functions @@ -717,7 +745,7 @@ args = [] for i in range(nargs): obj = xpath.valuePop(ctxt) - o = _unwrapXPathObject(obj, doc, context._build_smart_strings) + o = _unwrapXPathObject(obj, doc, context) _freeXPathObject(obj) args.append(o) args.reverse() Modified: lxml/trunk/src/lxml/tests/test_xpathevaluator.py ============================================================================== --- lxml/trunk/src/lxml/tests/test_xpathevaluator.py (original) +++ lxml/trunk/src/lxml/tests/test_xpathevaluator.py Sun Feb 28 10:31:57 2010 @@ -137,6 +137,24 @@ self.assertEquals(False, hasattr(results[0], 'getparent')) self.assertEquals(False, hasattr(results[0], 'attrname')) + def test_xpath_text_from_other_document(self): + xml_data = ''' + + v1 + v2 +
+ ''' + + def lookup(dummy, id): + return etree.XML(xml_data).xpath('id(%r)' % id) + functions = {(None, 'lookup') : lookup} + + root = etree.XML('') + values = root.xpath("lookup('k1')/value/text()", + extensions=functions) + self.assertEquals(['v1'], values) + self.assertEquals('value', values[0].getparent().tag) + def test_xpath_list_comment(self): tree = self.parse('') self.assertEquals([''], Modified: lxml/trunk/src/lxml/xpath.pxi ============================================================================== --- lxml/trunk/src/lxml/xpath.pxi (original) +++ lxml/trunk/src/lxml/xpath.pxi Sun Feb 28 10:31:57 2010 @@ -212,8 +212,7 @@ self._raise_eval_error() try: - result = _unwrapXPathObject(xpathObj, doc, - self._context._build_smart_strings) + result = _unwrapXPathObject(xpathObj, doc, self._context) finally: _freeXPathObject(xpathObj) self._context._release_temp_refs() From scoder at codespeak.net Sun Feb 28 10:34:35 2010 From: scoder at codespeak.net (scoder at codespeak.net) Date: Sun, 28 Feb 2010 10:34:35 +0100 (CET) Subject: [Lxml-checkins] r71550 - in lxml/branch/lxml-2.2: . src/lxml src/lxml/tests Message-ID: <20100228093435.88B27282BDC@codespeak.net> Author: scoder Date: Sun Feb 28 10:34:33 2010 New Revision: 71550 Modified: lxml/branch/lxml-2.2/ (props changed) lxml/branch/lxml-2.2/CHANGES.txt lxml/branch/lxml-2.2/INSTALL.txt (props changed) lxml/branch/lxml-2.2/src/lxml/extensions.pxi lxml/branch/lxml-2.2/src/lxml/tests/test_xpathevaluator.py lxml/branch/lxml-2.2/src/lxml/xpath.pxi Log: trunk merge for bug #502963: fix crash when reading smart XPath strings from a document other than the original context document Modified: lxml/branch/lxml-2.2/CHANGES.txt ============================================================================== --- lxml/branch/lxml-2.2/CHANGES.txt (original) +++ lxml/branch/lxml-2.2/CHANGES.txt Sun Feb 28 10:34:33 2010 @@ -14,6 +14,9 @@ Bugs fixed ---------- +* Crash in XPath evaluation when reading smart strings from a document + other than the original context document. + * Support recent versions of html5lib by not requiring its ``XHTMLParser`` in ``htmlparser.py`` anymore. Modified: lxml/branch/lxml-2.2/src/lxml/extensions.pxi ============================================================================== --- lxml/branch/lxml-2.2/src/lxml/extensions.pxi (original) +++ lxml/branch/lxml-2.2/src/lxml/extensions.pxi Sun Feb 28 10:34:33 2010 @@ -40,6 +40,7 @@ cdef bint _build_smart_strings # for exception handling and temporary reference keeping: cdef _TempStore _temp_refs + cdef set _temp_documents cdef _ExceptionContext _exc def __init__(self, namespaces, extensions, enable_regexp, @@ -91,6 +92,7 @@ self._extensions = extensions self._namespaces = namespaces self._temp_refs = _TempStore() + self._temp_documents = set() self._build_smart_strings = build_smart_strings if enable_regexp: @@ -307,7 +309,8 @@ cdef _release_temp_refs(self): u"Free temporarily referenced objects from this context." self._temp_refs.clear() - + self._temp_documents.clear() + cdef _hold(self, obj): u"""A way to temporarily hold references to nodes in the evaluator. @@ -318,7 +321,7 @@ cdef _Element element if isinstance(obj, _Element): self._temp_refs.add(obj) - self._temp_refs.add((<_Element>obj)._doc) + self._temp_documents.add((<_Element>obj)._doc) return elif _isString(obj) or not python.PySequence_Check(obj): return @@ -327,7 +330,19 @@ #print "Holding element:", element._c_node self._temp_refs.add(o) #print "Holding document:", element._doc._c_doc - self._temp_refs.add((<_Element>o)._doc) + self._temp_documents.add((<_Element>o)._doc) + + cdef _Document _findDocumentForNode(self, xmlNode* c_node): + u"""If an XPath expression returns an element from a different + document than the current context document, we call this to + see if it was possibly created by an extension and is a known + document instance. + """ + cdef _Document doc + for doc in self._temp_documents: + if doc._c_doc is c_node.doc: + return doc + return None def Extension(module, function_mapping=None, *, ns=None): u"""Extension(module, function_mapping=None, ns=None) @@ -487,18 +502,18 @@ return xpath.xmlXPathWrapNodeSet(resultSet) cdef object _unwrapXPathObject(xpath.xmlXPathObject* xpathObj, - _Document doc, bint smart_string): + _Document doc, _BaseContext context): if xpathObj.type == xpath.XPATH_UNDEFINED: raise XPathResultError, u"Undefined xpath result" elif xpathObj.type == xpath.XPATH_NODESET: - return _createNodeSetResult(xpathObj, doc, smart_string, 0) + return _createNodeSetResult(xpathObj, doc, context) elif xpathObj.type == xpath.XPATH_BOOLEAN: return xpathObj.boolval elif xpathObj.type == xpath.XPATH_NUMBER: return xpathObj.floatval elif xpathObj.type == xpath.XPATH_STRING: stringval = funicode(xpathObj.stringval) - if smart_string: + if context._build_smart_strings: stringval = _elementStringResultFactory( stringval, None, 0, 0) return stringval @@ -511,12 +526,12 @@ elif xpathObj.type == xpath.XPATH_USERS: raise NotImplementedError, u"XPATH_USERS" elif xpathObj.type == xpath.XPATH_XSLT_TREE: - return _createNodeSetResult(xpathObj, doc, smart_string, 1) + return _createNodeSetResult(xpathObj, doc, context) else: raise XPathResultError, u"Unknown xpath result %s" % unicode(xpathObj.type) cdef object _createNodeSetResult(xpath.xmlXPathObject* xpathObj, _Document doc, - bint smart_string, bint is_fragment): + _BaseContext context): cdef xmlNode* c_node cdef int i cdef list result @@ -525,12 +540,12 @@ return result for i from 0 <= i < xpathObj.nodesetval.nodeNr: c_node = xpathObj.nodesetval.nodeTab[i] - _unpackNodeSetEntry(result, c_node, doc, - smart_string, is_fragment) + _unpackNodeSetEntry(result, c_node, doc, context, + xpathObj.type == xpath.XPATH_XSLT_TREE) return result cdef _unpackNodeSetEntry(list results, xmlNode* c_node, _Document doc, - bint smart_string, bint is_fragment): + _BaseContext context, bint is_fragment): cdef xmlNode* c_child cdef char* s if _isElement(c_node): @@ -540,13 +555,14 @@ # -> we store Python refs to these, so that is OK # XSLT: can it leak when merging trees from multiple sources? c_node = tree.xmlDocCopyNode(c_node, doc._c_doc, 1) + # FIXME: call _instantiateElementFromXPath() instead? results.append( _fakeDocElementFactory(doc, c_node)) elif c_node.type == tree.XML_TEXT_NODE or \ c_node.type == tree.XML_CDATA_SECTION_NODE or \ c_node.type == tree.XML_ATTRIBUTE_NODE: results.append( - _buildElementStringResult(doc, c_node, smart_string)) + _buildElementStringResult(doc, c_node, context)) elif c_node.type == tree.XML_NAMESPACE_DECL: s = (c_node).href if s is NULL: @@ -565,8 +581,7 @@ if is_fragment: c_child = c_node.children while c_child is not NULL: - _unpackNodeSetEntry(results, c_child, doc, - smart_string, is_fragment) + _unpackNodeSetEntry(results, c_child, doc, context, 0) c_child = c_child.next elif c_node.type == tree.XML_XINCLUDE_START or \ c_node.type == tree.XML_XINCLUDE_END: @@ -584,6 +599,20 @@ xpathObj.nodesetval = NULL xpath.xmlXPathFreeObject(xpathObj) +cdef _Element _instantiateElementFromXPath(xmlNode* c_node, _Document doc, + _BaseContext context): + # NOTE: this may copy the element - only call this when it can't leak + if c_node.doc != doc._c_doc and c_node.doc._private is NULL: + # not from the context document and not from a fake document + # either => may still be from a known document, e.g. one + # created by an extension function + doc = context._findDocumentForNode(c_node) + if doc is None: + # not from a known document at all! => can only make a + # safety copy here + c_node = tree.xmlDocCopyNode(c_node, doc._c_doc, 1) + return _fakeDocElementFactory(doc, c_node) + ################################################################################ # special str/unicode subclasses @@ -627,7 +656,7 @@ return uresult cdef object _buildElementStringResult(_Document doc, xmlNode* c_node, - bint smart_string): + _BaseContext context): cdef _Element parent cdef xmlNode* c_element cdef char* s @@ -650,7 +679,7 @@ c_element = _previousElement(c_node) is_tail = c_element is not NULL - if not smart_string: + if not context._build_smart_strings: return value if c_element is NULL: @@ -660,12 +689,11 @@ c_element = c_element.parent if c_element is not NULL: - parent = _fakeDocElementFactory(doc, c_element) + parent = _instantiateElementFromXPath(c_element, doc, context) return _elementStringResultFactory( value, parent, is_attribute, is_tail) - ################################################################################ # callbacks for XPath/XSLT extension functions @@ -680,7 +708,7 @@ args = [] for i from 0 <= i < nargs: obj = xpath.valuePop(ctxt) - o = _unwrapXPathObject(obj, doc, context._build_smart_strings) + o = _unwrapXPathObject(obj, doc, context) _freeXPathObject(obj) args.append(o) args.reverse() Modified: lxml/branch/lxml-2.2/src/lxml/tests/test_xpathevaluator.py ============================================================================== --- lxml/branch/lxml-2.2/src/lxml/tests/test_xpathevaluator.py (original) +++ lxml/branch/lxml-2.2/src/lxml/tests/test_xpathevaluator.py Sun Feb 28 10:34:33 2010 @@ -129,6 +129,24 @@ self.assertEquals('CqWeRtZuI', results[0]) self.assertEquals(False, hasattr(results[0], 'getparent')) + def test_xpath_text_from_other_document(self): + xml_data = ''' + + v1 + v2 +
+ ''' + + def lookup(dummy, id): + return etree.XML(xml_data).xpath('id(%r)' % id) + functions = {(None, 'lookup') : lookup} + + root = etree.XML('') + values = root.xpath("lookup('k1')/value/text()", + extensions=functions) + self.assertEquals(['v1'], values) + self.assertEquals('value', values[0].getparent().tag) + def test_xpath_list_comment(self): tree = self.parse('') self.assertEquals([''], Modified: lxml/branch/lxml-2.2/src/lxml/xpath.pxi ============================================================================== --- lxml/branch/lxml-2.2/src/lxml/xpath.pxi (original) +++ lxml/branch/lxml-2.2/src/lxml/xpath.pxi Sun Feb 28 10:34:33 2010 @@ -212,8 +212,7 @@ self._raise_eval_error() try: - result = _unwrapXPathObject(xpathObj, doc, - self._context._build_smart_strings) + result = _unwrapXPathObject(xpathObj, doc, self._context) finally: _freeXPathObject(xpathObj) self._context._release_temp_refs() From scoder at codespeak.net Sun Feb 28 10:35:07 2010 From: scoder at codespeak.net (scoder at codespeak.net) Date: Sun, 28 Feb 2010 10:35:07 +0100 (CET) Subject: [Lxml-checkins] r71551 - lxml/trunk Message-ID: <20100228093507.4FD67282BDC@codespeak.net> Author: scoder Date: Sun Feb 28 10:35:05 2010 New Revision: 71551 Modified: lxml/trunk/CHANGES.txt Log: changelog Modified: lxml/trunk/CHANGES.txt ============================================================================== --- lxml/trunk/CHANGES.txt (original) +++ lxml/trunk/CHANGES.txt Sun Feb 28 10:35:05 2010 @@ -55,6 +55,9 @@ Bugs fixed ---------- +* Crash in XPath evaluation when reading smart strings from a document + other than the original context document. + * Parsing broken fragments in lxml.html could fail if the fragment contained an orphaned closing '' tag. From scoder at codespeak.net Sun Feb 28 11:00:47 2010 From: scoder at codespeak.net (scoder at codespeak.net) Date: Sun, 28 Feb 2010 11:00:47 +0100 (CET) Subject: [Lxml-checkins] r71552 - in lxml/branch/lxml-2.2: . doc Message-ID: <20100228100047.28D5C282BDC@codespeak.net> Author: scoder Date: Sun Feb 28 11:00:45 2010 New Revision: 71552 Modified: lxml/branch/lxml-2.2/CHANGES.txt lxml/branch/lxml-2.2/doc/main.txt lxml/branch/lxml-2.2/setup.py lxml/branch/lxml-2.2/version.txt Log: prepare release of lxml 2.2.5 Modified: lxml/branch/lxml-2.2/CHANGES.txt ============================================================================== --- lxml/branch/lxml-2.2/CHANGES.txt (original) +++ lxml/branch/lxml-2.2/CHANGES.txt Sun Feb 28 11:00:45 2010 @@ -2,7 +2,7 @@ lxml changelog ============== -2.2.5 (?) +2.2.5 (2010-02-28) ================== Features added Modified: lxml/branch/lxml-2.2/doc/main.txt ============================================================================== --- lxml/branch/lxml-2.2/doc/main.txt (original) +++ lxml/branch/lxml-2.2/doc/main.txt Sun Feb 28 11:00:45 2010 @@ -147,8 +147,8 @@ source release. If you can't wait, consider trying a less recent release version first. -The latest version is `lxml 2.2.4`_, released 2009-11-11 -(`changes for 2.2.4`_). `Older versions`_ are listed below. +The latest version is `lxml 2.2.5`_, released 2010-02-28 +(`changes for 2.2.5`_). `Older versions`_ are listed below. Please take a look at the `installation instructions`_! @@ -221,7 +221,9 @@ `_ and the `current in-development version `_. -.. _`PDF documentation`: lxmldoc-2.2.4.pdf +.. _`PDF documentation`: lxmldoc-2.2.5.pdf + +* `lxml 2.2.4`_, released 2009-11-11 (`changes for 2.2.4`_) * `lxml 2.2.3`_, released 2009-10-30 (`changes for 2.2.3`_) @@ -327,6 +329,7 @@ * `lxml 0.5`_, released 2005-04-08 +.. _`lxml 2.2.5`: lxml-2.2.5.tgz .. _`lxml 2.2.4`: lxml-2.2.4.tgz .. _`lxml 2.2.3`: lxml-2.2.3.tgz .. _`lxml 2.2.2`: lxml-2.2.2.tgz @@ -381,6 +384,7 @@ .. _`lxml 0.5`: lxml-0.5.tgz .. _`changes for 2.2.4`: changes-2.2.4.html +.. _`changes for 2.2.5`: changes-2.2.5.html .. _`changes for 2.2.3`: changes-2.2.3.html .. _`changes for 2.2.2`: changes-2.2.2.html .. _`changes for 2.2.1`: changes-2.2.1.html Modified: lxml/branch/lxml-2.2/setup.py ============================================================================== --- lxml/branch/lxml-2.2/setup.py (original) +++ lxml/branch/lxml-2.2/setup.py Sun Feb 28 11:00:45 2010 @@ -90,7 +90,7 @@ Running ``easy_install lxml==dev`` will install it from http://codespeak.net/svn/lxml/trunk#egg=lxml-dev -""" + branch_link) % { "branch_version" : versioninfo.branch_version() }) + +""" + branch_link) % { "branch_version" : versioninfo.branch_version() }) + '\n' + versioninfo.changes()), classifiers = [ versioninfo.dev_status(), Modified: lxml/branch/lxml-2.2/version.txt ============================================================================== --- lxml/branch/lxml-2.2/version.txt (original) +++ lxml/branch/lxml-2.2/version.txt Sun Feb 28 11:00:45 2010 @@ -1 +1 @@ -2.2.4 +2.2.5 From scoder at codespeak.net Sun Feb 28 11:02:29 2010 From: scoder at codespeak.net (scoder at codespeak.net) Date: Sun, 28 Feb 2010 11:02:29 +0100 (CET) Subject: [Lxml-checkins] r71553 - lxml/tag/lxml-2.2.5 Message-ID: <20100228100229.D059F282BDC@codespeak.net> Author: scoder Date: Sun Feb 28 11:02:28 2010 New Revision: 71553 Added: lxml/tag/lxml-2.2.5/ - copied from r71552, lxml/branch/lxml-2.2/ Log: new tag for lxml 2.2.5 From scoder at codespeak.net Sun Feb 28 12:15:17 2010 From: scoder at codespeak.net (scoder at codespeak.net) Date: Sun, 28 Feb 2010 12:15:17 +0100 (CET) Subject: [Lxml-checkins] r71554 - lxml/trunk Message-ID: <20100228111517.1B73451054@codespeak.net> Author: scoder Date: Sun Feb 28 12:15:16 2010 New Revision: 71554 Modified: lxml/trunk/ (props changed) lxml/trunk/setup.py Log: r5481 at lenny: sbehnel | 2010-02-28 12:15:02 +0100 link fix Modified: lxml/trunk/setup.py ============================================================================== --- lxml/trunk/setup.py (original) +++ lxml/trunk/setup.py Sun Feb 28 12:15:16 2010 @@ -82,7 +82,7 @@ RelaxNG, XML Schema, XSLT, C14N and much more. To contact the project, go to the `project home page -`_ or see our bug tracker at +`_ or see our bug tracker at https://launchpad.net/lxml In case you want to use the current in-development version of lxml, you can From scoder at codespeak.net Sun Feb 28 13:45:50 2010 From: scoder at codespeak.net (scoder at codespeak.net) Date: Sun, 28 Feb 2010 13:45:50 +0100 (CET) Subject: [Lxml-checkins] r71555 - in lxml/trunk: . src/lxml Message-ID: <20100228124550.1DC802E2BA4@codespeak.net> Author: scoder Date: Sun Feb 28 13:45:48 2010 New Revision: 71555 Modified: lxml/trunk/ (props changed) lxml/trunk/src/lxml/extensions.pxi Log: r5485 at lenny: sbehnel | 2010-02-28 13:40:53 +0100 code cleanup Modified: lxml/trunk/src/lxml/extensions.pxi ============================================================================== --- lxml/trunk/src/lxml/extensions.pxi (original) +++ lxml/trunk/src/lxml/extensions.pxi Sun Feb 28 13:45:48 2010 @@ -777,5 +777,5 @@ else: fref = rctxt.function xpath.xmlXPathErr(ctxt, xpath.XPATH_UNKNOWN_FUNC_ERROR) - exception = XPathFunctionError(u"XPath function '%s' not found" % fref) - context._exc._store_exception(exception) + context._exc._store_exception( + XPathFunctionError(u"XPath function '%s' not found" % fref)) From scoder at codespeak.net Sun Feb 28 13:45:53 2010 From: scoder at codespeak.net (scoder at codespeak.net) Date: Sun, 28 Feb 2010 13:45:53 +0100 (CET) Subject: [Lxml-checkins] r71556 - in lxml/trunk: . src/lxml src/lxml/tests Message-ID: <20100228124553.E11AB2E2BA4@codespeak.net> Author: scoder Date: Sun Feb 28 13:45:51 2010 New Revision: 71556 Modified: lxml/trunk/ (props changed) lxml/trunk/CHANGES.txt lxml/trunk/src/lxml/etree_defs.h lxml/trunk/src/lxml/tests/test_xpathevaluator.py lxml/trunk/src/lxml/xpath.pxd lxml/trunk/src/lxml/xpath.pxi lxml/trunk/src/lxml/xslt.pxd Log: r5486 at lenny: sbehnel | 2010-02-28 13:45:43 +0100 enable various EXSLT functions in XPath with libxslt 1.1.26+ Modified: lxml/trunk/CHANGES.txt ============================================================================== --- lxml/trunk/CHANGES.txt (original) +++ lxml/trunk/CHANGES.txt Sun Feb 28 13:45:51 2010 @@ -8,6 +8,9 @@ Features added -------------- +* During regular XPath evaluation, various ESXLT functions are + available within their namespace when using libxslt 1.1.26 or later. + * Support passing a readily configured logger instance into ``PyErrorLog``, instead of a logger name. Modified: lxml/trunk/src/lxml/etree_defs.h ============================================================================== --- lxml/trunk/src/lxml/etree_defs.h (original) +++ lxml/trunk/src/lxml/etree_defs.h Sun Feb 28 13:45:51 2010 @@ -131,6 +131,13 @@ # define xmlSchematronSetValidStructuredErrors(ctxt, errorfunc, data) #endif +/* libexslt 1.1.25+ support EXSLT functions in XPath */ +#if LIBXSLT_VERSION < 10125 +#define exsltDateXpathCtxtRegister(ctxt, prefix) +#define exsltSetsXpathCtxtRegister(ctxt, prefix) +#define exsltMathXpathCtxtRegister(ctxt, prefix) +#define exsltStrXpathCtxtRegister(ctxt, prefix) +#endif /* work around MSDEV 6.0 */ #if (_MSC_VER == 1200) && (WINVER < 0x0500) Modified: lxml/trunk/src/lxml/tests/test_xpathevaluator.py ============================================================================== --- lxml/trunk/src/lxml/tests/test_xpathevaluator.py (original) +++ lxml/trunk/src/lxml/tests/test_xpathevaluator.py Sun Feb 28 13:45:51 2010 @@ -566,6 +566,35 @@ def test_xpath_elementtree_error(self): self.assertRaises(ValueError, etree.XPath('*'), etree.ElementTree()) + +class ETreeXPathExsltTestCase(HelperTestCase): + "Tests for the EXSLT support in XPath (requires libxslt 1.1.25+)" + + NSMAP = dict( + date = "http://exslt.org/dates-and-times", + math = "http://exslt.org/math", + set = "http://exslt.org/sets", + str = "http://exslt.org/strings", + ) + + def test_xpath_exslt_functions_date(self): + tree = self.parse('2009-11-122008-12-11') + + match_dates = tree.xpath('//b[date:year(string()) = 2009]', + namespaces=self.NSMAP) + self.assertTrue(match_dates, str(match_dates)) + self.assertEquals(len(match_dates), 1, str(match_dates)) + self.assertEquals(match_dates[0].text, '2009-11-12') + + def test_xpath_exslt_functions_strings(self): + tree = self.parse('2009-11-122008-12-11') + + match_date = tree.xpath('str:replace(//b[1], "-", "*")', + namespaces=self.NSMAP) + self.assertTrue(match_date, str(match_date)) + self.assertEquals(match_date, '2009*11*12') + + class ETreeETXPathClassTestCase(HelperTestCase): "Tests for the ETXPath class" def test_xpath_compile_ns(self): @@ -703,11 +732,13 @@ " lxml.etree.XPathResultError") xpath.__doc__ = xpath.__doc__.replace(" exactly 2 arguments", " exactly 2 positional arguments") - + def test_suite(): suite = unittest.TestSuite() suite.addTests([unittest.makeSuite(ETreeXPathTestCase)]) suite.addTests([unittest.makeSuite(ETreeXPathClassTestCase)]) + if etree.LIBXSLT_COMPILED_VERSION >= (1,1,25): + suite.addTests([unittest.makeSuite(ETreeXPathExsltTestCase)]) suite.addTests([unittest.makeSuite(ETreeETXPathClassTestCase)]) suite.addTests([doctest.DocTestSuite()]) suite.addTests( Modified: lxml/trunk/src/lxml/xpath.pxd ============================================================================== --- lxml/trunk/src/lxml/xpath.pxd (original) +++ lxml/trunk/src/lxml/xpath.pxd Sun Feb 28 13:45:51 2010 @@ -55,6 +55,7 @@ tree.xmlDoc* doc tree.xmlNode* node tree.xmlDict* dict + tree.xmlHashTable* nsHash char* function char* functionURI # actually signature is void (*error)(void*, xmlError*) Modified: lxml/trunk/src/lxml/xpath.pxi ============================================================================== --- lxml/trunk/src/lxml/xpath.pxi (original) +++ lxml/trunk/src/lxml/xpath.pxi Sun Feb 28 13:45:51 2010 @@ -65,6 +65,7 @@ self._register_context(doc) self.registerGlobalNamespaces() self.registerGlobalFunctions(self._xpathCtxt, _register_xpath_function) + self.registerExsltFunctions() if self._variables is not None: self.registerVariables(self._variables) @@ -75,6 +76,17 @@ xpath.xmlXPathRegisteredVariablesCleanup(self._xpathCtxt) self._cleanup_context() + cdef void registerExsltFunctions(self): + cdef xpath.xmlXPathContext* ctxt = self._xpathCtxt + cdef int i + cdef char* c_href + if xslt.LIBXSLT_VERSION < 10125: + # we'd only execute dummy functions anyway + return + tree.xmlHashScan( + self._xpathCtxt.nsHash, _registerExsltFunctionsForNamespaces, + self._xpathCtxt) + cdef registerVariables(self, variable_dict): for name, value in variable_dict.items(): name_utf = self._to_utf(name) @@ -93,6 +105,20 @@ cdef void _setupDict(self, xpath.xmlXPathContext* xpathCtxt): __GLOBAL_PARSER_CONTEXT.initXPathParserDict(xpathCtxt) +cdef void _registerExsltFunctionsForNamespaces( + void* _c_href, void* _ctxt, char* c_prefix): + cdef char* c_href = _c_href + cdef xpath.xmlXPathContext* ctxt = _ctxt + + if cstd.strcmp(c_href, xslt.EXSLT_DATE_NAMESPACE) == 0: + xslt.exsltDateXpathCtxtRegister(ctxt, c_prefix) + elif cstd.strcmp(c_href, xslt.EXSLT_SETS_NAMESPACE) == 0: + xslt.exsltSetsXpathCtxtRegister(ctxt, c_prefix) + elif cstd.strcmp(c_href, xslt.EXSLT_MATH_NAMESPACE) == 0: + xslt.exsltMathXpathCtxtRegister(ctxt, c_prefix) + elif cstd.strcmp(c_href, xslt.EXSLT_STRINGS_NAMESPACE) == 0: + xslt.exsltStrXpathCtxtRegister(ctxt, c_prefix) + cdef bint _XPATH_VERSION_WARNING_REQUIRED if _LIBXML_VERSION_INT == 20627: _XPATH_VERSION_WARNING_REQUIRED = 1 @@ -115,7 +141,8 @@ u"Use it at your own risk.") self._error_log = _ErrorLog() self._context = _XPathContext(namespaces, extensions, - enable_regexp, None, smart_strings) + enable_regexp, None, + smart_strings) if config.ENABLE_THREADING: self._eval_lock = python.PyThread_allocate_lock() if self._eval_lock is NULL: @@ -309,7 +336,8 @@ extensions=None, regexp=True, smart_strings=True): XPathElementEvaluator.__init__( self, etree._context_node, namespaces=namespaces, - extensions=extensions, regexp=regexp, smart_strings=smart_strings) + extensions=extensions, regexp=regexp, + smart_strings=smart_strings) def __call__(self, _path, **_variables): u"""__call__(self, _path, **_variables) @@ -453,7 +481,7 @@ _find_namespaces = re.compile('({[^}]+})').findall cdef class ETXPath(XPath): - u"""ETXPath(self, path, extensions=None, regexp=True) + u"""ETXPath(self, path, extensions=None, regexp=True, smart_strings=True) Special XPath class that supports the ElementTree {uri} notation for namespaces. Note that this class does not accept the ``namespace`` keyword @@ -461,7 +489,8 @@ string. Smart strings will be returned for string results unless you pass ``smart_strings=False``. """ - def __init__(self, path, *, extensions=None, regexp=True, smart_strings=True): + def __init__(self, path, *, extensions=None, regexp=True, + smart_strings=True): path, namespaces = self._nsextract_path(path) XPath.__init__(self, path, namespaces=namespaces, extensions=extensions, regexp=regexp, Modified: lxml/trunk/src/lxml/xslt.pxd ============================================================================== --- lxml/trunk/src/lxml/xslt.pxd (original) +++ lxml/trunk/src/lxml/xslt.pxd Sun Feb 28 13:45:51 2010 @@ -161,3 +161,15 @@ cdef extern from "libexslt/exslt.h": cdef void exsltRegisterAll() nogil + + # libexslt 1.1.25+ + char* EXSLT_DATE_NAMESPACE + char* EXSLT_SETS_NAMESPACE + char* EXSLT_MATH_NAMESPACE + char* EXSLT_STRINGS_NAMESPACE + + cdef int exsltDateXpathCtxtRegister(xmlXPathContext* ctxt, char* prefix) + cdef int exsltSetsXpathCtxtRegister(xmlXPathContext* ctxt, char* prefix) + cdef int exsltMathXpathCtxtRegister(xmlXPathContext* ctxt, char* prefix) + cdef int exsltStrXpathCtxtRegister(xmlXPathContext* ctxt, char* prefix) +