From lxml-checkins at codespeak.net Sat May 2 12:31:57 2009 From: lxml-checkins at codespeak.net (Mariela Manda) Date: Sat, 2 May 2009 16:31:57 +0600 Subject: [Lxml-checkins] Don't make we wait! Message-ID: <20090502103441.BACA5168506@codespeak.net> All night is yours, do as many girls as you want, not as your body can http://okgkv.wiylifit.cn/ -------------- next part -------------- An HTML attachment was scrubbed... URL: http://codespeak.net/pipermail/lxml-checkins/attachments/20090502/2808451d/attachment.htm From lxml-checkins at codespeak.net Sun May 10 22:56:25 2009 From: lxml-checkins at codespeak.net (Anibal Dearinger) Date: Sun, 10 May 2009 17:56:25 -0300 Subject: [Lxml-checkins] Hello, what's up? Message-ID: <20090510205714.8EBFE169E20@codespeak.net> Make manhood ready for the long and intense night attacks http://www.tixxuvuw.cn/ -------------- next part -------------- An HTML attachment was scrubbed... URL: http://codespeak.net/pipermail/lxml-checkins/attachments/20090510/8670af1d/attachment.htm From lxml-checkins at codespeak.net Mon May 25 03:50:48 2009 From: lxml-checkins at codespeak.net (Lym Darnell) Date: Mon, 25 May 2009 03:50:48 +0200 (CEST) Subject: [Lxml-checkins] Don't be late, 5 p.m. Message-ID: An HTML attachment was scrubbed... URL: http://codespeak.net/pipermail/lxml-checkins/attachments/20090525/384d716d/attachment-0001.htm From scoder at codespeak.net Sun May 31 14:13:51 2009 From: scoder at codespeak.net (scoder at codespeak.net) Date: Sun, 31 May 2009 14:13:51 +0200 (CEST) Subject: [Lxml-checkins] r65511 - in lxml/trunk: . src/lxml Message-ID: <20090531121351.77988169F29@codespeak.net> Author: scoder Date: Sun May 31 14:13:49 2009 New Revision: 65511 Modified: lxml/trunk/ (props changed) lxml/trunk/src/lxml/iterparse.pxi Log: r5120 at delle: sbehnel | 2009-05-12 19:58:31 +0200 support optional PARSE_HUGE option also for iterparse() Modified: lxml/trunk/src/lxml/iterparse.pxi ============================================================================== --- lxml/trunk/src/lxml/iterparse.pxi (original) +++ lxml/trunk/src/lxml/iterparse.pxi Sun May 31 14:13:49 2009 @@ -309,7 +309,7 @@ return c_ctxt.node.next cdef class iterparse(_BaseParser): - u"""iterparse(self, source, events=("end",), tag=None, attribute_defaults=False, dtd_validation=False, load_dtd=False, no_network=True, remove_blank_text=False, remove_comments=False, remove_pis=False, encoding=None, html=False, schema=None) + u"""iterparse(self, source, events=("end",), tag=None, attribute_defaults=False, dtd_validation=False, load_dtd=False, no_network=True, remove_blank_text=False, remove_comments=False, remove_pis=False, encoding=None, html=False, huge_tree=False, schema=None) Incremental parser. @@ -346,6 +346,8 @@ - strip_cdata: replace CDATA sections by normal text content (default: True) - compact: safe memory for short text content (default: True) - resolve_entities: replace entities by their text value (default: True) + - huge_tree: disable security restrictions and support very deep trees + and very long text content (only affects libxml2 2.7+) Other keyword arguments: - encoding: override the document encoding @@ -363,7 +365,7 @@ load_dtd=False, no_network=True, remove_blank_text=False, compact=True, resolve_entities=True, remove_comments=False, remove_pis=False, strip_cdata=True, encoding=None, - html=False, XMLSchema schema=None): + html=False, huge_tree=False, XMLSchema schema=None): cdef _IterparseContext context cdef char* c_encoding cdef int parse_options @@ -395,6 +397,8 @@ xmlparser.XML_PARSE_DTDLOAD if remove_blank_text: parse_options = parse_options | xmlparser.XML_PARSE_NOBLANKS + if huge_tree: + parse_options = parse_options | xmlparser.XML_PARSE_HUGE if not no_network: parse_options = parse_options ^ xmlparser.XML_PARSE_NONET if not compact: From scoder at codespeak.net Sun May 31 14:13:54 2009 From: scoder at codespeak.net (scoder at codespeak.net) Date: Sun, 31 May 2009 14:13:54 +0200 (CEST) Subject: [Lxml-checkins] r65512 - in lxml/trunk: . src/lxml Message-ID: <20090531121354.4E3FF169F3C@codespeak.net> Author: scoder Date: Sun May 31 14:13:53 2009 New Revision: 65512 Modified: lxml/trunk/ (props changed) lxml/trunk/src/lxml/lxml.objectify.pyx Log: r5121 at delle: sbehnel | 2009-05-16 10:23:04 +0200 some more Cython typing in objectify Modified: lxml/trunk/src/lxml/lxml.objectify.pyx ============================================================================== --- lxml/trunk/src/lxml/lxml.objectify.pyx (original) +++ lxml/trunk/src/lxml/lxml.objectify.pyx Sun May 31 14:13:53 2009 @@ -21,7 +21,7 @@ cdef object re import re -cdef object IGNORABLE_ERRORS +cdef tuple IGNORABLE_ERRORS IGNORABLE_ERRORS = (ValueError, TypeError) cdef object islice @@ -482,7 +482,7 @@ c_href = _cstr(ns) return cetree.namespacedNameFromNsName(c_href, c_tag) -cdef object _replaceElement(_Element element, value): +cdef _replaceElement(_Element element, value): cdef _Element new_element if isinstance(value, _Element): # deep copy the new element @@ -497,7 +497,7 @@ _setElementValue(new_element, value) element.getparent().replace(element, new_element) -cdef object _appendValue(_Element parent, tag, value): +cdef _appendValue(_Element parent, tag, value): cdef _Element new_element if isinstance(value, _Element): # deep copy the new element @@ -885,7 +885,7 @@ else: return -1 -cdef inline _parseNumber(NumberElement element): +cdef inline object _parseNumber(NumberElement element): return element._parse_value(textOf(element._c_node)) cdef inline object _strValueOf(obj): @@ -935,7 +935,7 @@ cdef readonly object type_check cdef readonly object stringify cdef object _type - cdef object _schema_types + cdef list _schema_types def __init__(self, name, type_check, type_class, stringify=None): if python.PyString_Check(name): name = python.PyUnicode_FromEncodedObject(name, 'ASCII', NULL) @@ -1024,13 +1024,13 @@ self._schema_types = list(map(unicode, types)) -cdef object _PYTYPE_DICT +cdef dict _PYTYPE_DICT _PYTYPE_DICT = {} -cdef object _SCHEMA_TYPE_DICT +cdef dict _SCHEMA_TYPE_DICT _SCHEMA_TYPE_DICT = {} -cdef object _TYPE_CHECKS +cdef list _TYPE_CHECKS _TYPE_CHECKS = [] cdef _lower_bool(b): @@ -1090,7 +1090,7 @@ pytype.register() # non-registered PyType for inner tree elements -cdef object TREE_PYTYPE +cdef PyType TREE_PYTYPE TREE_PYTYPE = PyType(TREE_PYTYPE_NAME, None, ObjectifiedElement) _registerPyTypes() @@ -1837,7 +1837,7 @@ parser = objectify_parser return _parse(f, parser, base_url=base_url) -cdef object _DEFAULT_NSMAP +cdef dict _DEFAULT_NSMAP _DEFAULT_NSMAP = { u"py" : PYTYPE_NAMESPACE, u"xsi" : XML_SCHEMA_INSTANCE_NS, u"xsd" : XML_SCHEMA_NS} From scoder at codespeak.net Sun May 31 14:13:58 2009 From: scoder at codespeak.net (scoder at codespeak.net) Date: Sun, 31 May 2009 14:13:58 +0200 (CEST) Subject: [Lxml-checkins] r65513 - in lxml/trunk: . src/lxml Message-ID: <20090531121358.24D7E169F29@codespeak.net> Author: scoder Date: Sun May 31 14:13:57 2009 New Revision: 65513 Modified: lxml/trunk/ (props changed) lxml/trunk/src/lxml/xmlschema.pxi Log: r5122 at delle: sbehnel | 2009-05-31 14:04:23 +0200 pass invalid schemas on to libxml2 in 2.6.24 and later Modified: lxml/trunk/src/lxml/xmlschema.pxi ============================================================================== --- lxml/trunk/src/lxml/xmlschema.pxi (original) +++ lxml/trunk/src/lxml/xmlschema.pxi Sun May 31 14:13:57 2009 @@ -42,12 +42,12 @@ root_node = _rootNodeOrRaise(etree) # work around for libxml2 bug if document is not XML schema at all - #if _LIBXML_VERSION_INT < 20624: - c_node = root_node._c_node - c_href = _getNs(c_node) - if c_href is NULL or \ - cstd.strcmp(c_href, 'http://www.w3.org/2001/XMLSchema') != 0: - raise XMLSchemaParseError, u"Document is not XML Schema" + if _LIBXML_VERSION_INT < 20624: + c_node = root_node._c_node + c_href = _getNs(c_node) + if c_href is NULL or \ + cstd.strcmp(c_href, 'http://www.w3.org/2001/XMLSchema') != 0: + raise XMLSchemaParseError, u"Document is not XML Schema" fake_c_doc = _fakeRootDoc(doc._c_doc, root_node._c_node) self._error_log.connect() From scoder at codespeak.net Sun May 31 14:14:00 2009 From: scoder at codespeak.net (scoder at codespeak.net) Date: Sun, 31 May 2009 14:14:00 +0200 (CEST) Subject: [Lxml-checkins] r65514 - lxml/trunk Message-ID: <20090531121400.E09B7169F3C@codespeak.net> Author: scoder Date: Sun May 31 14:14:00 2009 New Revision: 65514 Modified: lxml/trunk/ (props changed) lxml/trunk/CHANGES.txt Log: r5123 at delle: sbehnel | 2009-05-31 14:10:38 +0200 changelog Modified: lxml/trunk/CHANGES.txt ============================================================================== --- lxml/trunk/CHANGES.txt (original) +++ lxml/trunk/CHANGES.txt Sun May 31 14:14:00 2009 @@ -14,6 +14,9 @@ Bugs fixed ---------- +* ``XMLSchema()`` also passes invalid schema documents on to libxml2 + for parsing (which could lead to a crash before release 2.6.24) + Other changes ------------- From lxml-checkins at codespeak.net Sun May 31 21:59:16 2009 From: lxml-checkins at codespeak.net (Jesse) Date: Sun, 31 May 2009 21:59:16 +0200 (CEST) Subject: [Lxml-checkins] Writing from home Message-ID: <371745844517780.GIETRFSNPPTLJWS@200-103-137-99.ctame705.dsl.brasiltelecom.net.br> An HTML attachment was scrubbed... URL: http://codespeak.net/pipermail/lxml-checkins/attachments/20090531/6228a51d/attachment.htm From scoder at codespeak.net Sun May 31 22:50:08 2009 From: scoder at codespeak.net (scoder at codespeak.net) Date: Sun, 31 May 2009 22:50:08 +0200 (CEST) Subject: [Lxml-checkins] r65519 - in lxml/trunk: . src/lxml src/lxml/tests Message-ID: <20090531205008.96299169F8E@codespeak.net> Author: scoder Date: Sun May 31 22:50:08 2009 New Revision: 65519 Modified: lxml/trunk/ (props changed) lxml/trunk/CHANGES.txt lxml/trunk/src/lxml/parser.pxi lxml/trunk/src/lxml/tests/test_xmlschema.py lxml/trunk/src/lxml/xmlschema.pxd lxml/trunk/src/lxml/xmlschema.pxi Log: r5128 at delle: sbehnel | 2009-05-31 22:46:55 +0200 support injecting XML Schema default attributes into a document during validation Modified: lxml/trunk/CHANGES.txt ============================================================================== --- lxml/trunk/CHANGES.txt (original) +++ lxml/trunk/CHANGES.txt Sun May 31 22:50:08 2009 @@ -8,6 +8,9 @@ Features added -------------- +* Injecting default attributes into a document during XML Schema + validation (also at parse time). + * Pass ``huge_tree`` parser option to disable parser security restrictions imposed by libxml2 2.7. Modified: lxml/trunk/src/lxml/parser.pxi ============================================================================== --- lxml/trunk/src/lxml/parser.pxi (original) +++ lxml/trunk/src/lxml/parser.pxi Sun May 31 22:50:08 2009 @@ -630,6 +630,14 @@ result.URL = tree.xmlStrdup(_cstr(filename)) if result.encoding is NULL: result.encoding = tree.xmlStrdup("UTF-8") + + if context._validator is not None and \ + context._validator._add_default_attributes: + # we currently need to do this here as libxml2 does not + # support inserting default attributes during parse-time + # validation + context._validator.inject_default_attributes(result) + return result cdef int _fixHtmlDictNames(tree.xmlDict* c_dict, xmlDoc* c_doc) nogil: @@ -717,7 +725,8 @@ self._parser_context = self._createContext(self._target) if self._schema is not None: self._parser_context._validator = \ - self._schema._newSaxValidator() + self._schema._newSaxValidator( + self._parse_options & xmlparser.XML_PARSE_DTDATTR) pctxt = self._newParserCtxt() if pctxt is NULL: python.PyErr_NoMemory() @@ -737,7 +746,8 @@ self._push_parser_context = self._createContext(self._target) if self._schema is not None: self._push_parser_context._validator = \ - self._schema._newSaxValidator() + self._schema._newSaxValidator( + self._parse_options & xmlparser.XML_PARSE_DTDATTR) pctxt = self._newPushParserCtxt() if pctxt is NULL: python.PyErr_NoMemory() @@ -1174,14 +1184,16 @@ 'set_default_parser'. New parsers can be created at any time without a major run-time overhead. - The keyword arguments in the constructor are mainly based on the libxml2 - parser configuration. A DTD will also be loaded if validation or - attribute default values are requested. + The keyword arguments in the constructor are mainly based on the + libxml2 parser configuration. A DTD will also be loaded if DTD + validation or attribute default values are requested (unless you + additionally provide an XMLSchema from which the default + attributes can be read). Available boolean keyword arguments: - - attribute_defaults - read default attributes from DTD - - dtd_validation - validate (if DTD is available) + - attribute_defaults - inject default attributes from DTD or XMLSchema + - dtd_validation - validate against a DTD referenced by the document - load_dtd - use DTD for parsing - no_network - prevent network access for related files (default: True) - ns_clean - clean up redundant namespace declarations @@ -1219,8 +1231,9 @@ parse_options = parse_options | xmlparser.XML_PARSE_DTDVALID | \ xmlparser.XML_PARSE_DTDLOAD if attribute_defaults: - parse_options = parse_options | xmlparser.XML_PARSE_DTDATTR | \ - xmlparser.XML_PARSE_DTDLOAD + parse_options = parse_options | xmlparser.XML_PARSE_DTDATTR + if schema is None: + parse_options = parse_options | xmlparser.XML_PARSE_DTDLOAD if ns_clean: parse_options = parse_options | xmlparser.XML_PARSE_NSCLEAN if recover: Modified: lxml/trunk/src/lxml/tests/test_xmlschema.py ============================================================================== --- lxml/trunk/src/lxml/tests/test_xmlschema.py (original) +++ lxml/trunk/src/lxml/tests/test_xmlschema.py Sun May 31 22:50:08 2009 @@ -31,6 +31,38 @@ self.assert_(schema.validate(tree_valid)) self.assert_(not schema.validate(tree_invalid)) + def test_xmlschema_default_attributes(self): + schema = self.parse(''' + + + + + + + + + + + +''') + schema = etree.XMLSchema(schema, attribute_defaults=True) + + tree = self.parse('') + + root = tree.getroot() + self.assertEquals('ho', root[0].get('hardy')) + self.assertEquals(None, root[1].get('hardy')) + self.assertEquals('ho', root[2].get('hardy')) + self.assertEquals(None, root[3].get('hardy')) + + self.assert_(schema(tree)) + + root = tree.getroot() + self.assertEquals('ho', root[0].get('hardy')) + self.assertEquals('hey', root[1].get('hardy')) + self.assertEquals('ho', root[2].get('hardy')) + self.assertEquals('hey', root[3].get('hardy')) + def test_xmlschema_parse(self): schema = self.parse(''' @@ -51,6 +83,83 @@ self.assertRaises(etree.XMLSyntaxError, self.parse, '', parser=parser) + def test_xmlschema_parse_default_attributes(self): + # does not work as of libxml2 2.7.3 + schema = self.parse(''' + + + + + + + + + + + +''') + schema = etree.XMLSchema(schema) + parser = etree.XMLParser(schema=schema, attribute_defaults=True) + + tree_valid = self.parse('', + parser=parser) + root = tree_valid.getroot() + self.assertEquals('ho', root[0].get('hardy')) + self.assertEquals('hey', root[1].get('hardy')) + self.assertEquals('ho', root[2].get('hardy')) + self.assertEquals('hey', root[3].get('hardy')) + + def test_xmlschema_parse_default_attributes_schema_config(self): + # does not work as of libxml2 2.7.3 + schema = self.parse(''' + + + + + + + + + + + +''') + schema = etree.XMLSchema(schema, attribute_defaults=True) + parser = etree.XMLParser(schema=schema) + + tree_valid = self.parse('', + parser=parser) + root = tree_valid.getroot() + self.assertEquals('ho', root[0].get('hardy')) + self.assertEquals('hey', root[1].get('hardy')) + self.assertEquals('ho', root[2].get('hardy')) + self.assertEquals('hey', root[3].get('hardy')) + + def test_xmlschema_parse_fixed_attributes(self): + # does not work as of libxml2 2.7.3 + schema = self.parse(''' + + + + + + + + + + + +''') + schema = etree.XMLSchema(schema) + parser = etree.XMLParser(schema=schema, attribute_defaults=True) + + tree_valid = self.parse('', + parser=parser) + root = tree_valid.getroot() + self.assertEquals('hey', root[0].get('hardy')) + self.assertEquals('hey', root[1].get('hardy')) + self.assertEquals('hey', root[2].get('hardy')) + def test_xmlschema_stringio(self): schema_file = BytesIO(''' Modified: lxml/trunk/src/lxml/xmlschema.pxd ============================================================================== --- lxml/trunk/src/lxml/xmlschema.pxd (original) +++ lxml/trunk/src/lxml/xmlschema.pxd Sun May 31 22:50:08 2009 @@ -8,6 +8,9 @@ ctypedef struct xmlSchemaSAXPlugStruct ctypedef struct xmlSchemaValidCtxt + ctypedef enum xmlSchemaValidOption: + XML_SCHEMA_VAL_VC_I_CREATE = 1 + cdef xmlSchemaValidCtxt* xmlSchemaNewValidCtxt(xmlSchema* schema) nogil cdef int xmlSchemaValidateDoc(xmlSchemaValidCtxt* ctxt, xmlDoc* doc) nogil cdef xmlSchema* xmlSchemaParse(xmlSchemaParserCtxt* ctxt) nogil @@ -16,6 +19,8 @@ cdef void xmlSchemaFree(xmlSchema* schema) nogil cdef void xmlSchemaFreeParserCtxt(xmlSchemaParserCtxt* ctxt) nogil cdef void xmlSchemaFreeValidCtxt(xmlSchemaValidCtxt* ctxt) nogil + cdef int xmlSchemaSetValidOptions(xmlSchemaValidCtxt* ctxt, + int options) nogil cdef xmlSchemaSAXPlugStruct* xmlSchemaSAXPlug(xmlSchemaValidCtxt* ctxt, xmlSAXHandler** sax, Modified: lxml/trunk/src/lxml/xmlschema.pxi ============================================================================== --- lxml/trunk/src/lxml/xmlschema.pxi (original) +++ lxml/trunk/src/lxml/xmlschema.pxi Sun May 31 22:50:08 2009 @@ -19,23 +19,36 @@ ################################################################################ # XMLSchema +cdef XPath _check_for_default_attributes = XPath( + u"boolean(//xs:attribute[@default or @fixed][1])", + namespaces={u'xs': u'http://www.w3.org/2001/XMLSchema'}) + cdef class XMLSchema(_Validator): u"""XMLSchema(self, etree=None, file=None) Turn a document into an XML Schema validator. Either pass a schema as Element or ElementTree, or pass a file or filename through the ``file`` keyword argument. + + Passing the ``attribute_defaults`` boolean option will make the + schema insert default/fixed attributes into validated documents. """ cdef xmlschema.xmlSchema* _c_schema - def __init__(self, etree=None, *, file=None): + cdef bint _has_default_attributes + cdef bint _add_attribute_defaults + + def __init__(self, etree=None, *, file=None, attribute_defaults=False): cdef _Document doc cdef _Element root_node cdef xmlDoc* fake_c_doc cdef xmlNode* c_node cdef char* c_href cdef xmlschema.xmlSchemaParserCtxt* parser_ctxt - _Validator.__init__(self) + + self._has_default_attributes = True # play safe + self._add_attribute_defaults = attribute_defaults self._c_schema = NULL + _Validator.__init__(self) fake_c_doc = NULL if etree is not None: doc = _documentOrRaise(etree) @@ -92,6 +105,11 @@ u"Document is not valid XML Schema"), self._error_log) + if doc is not None: + self._has_default_attributes = _check_for_default_attributes(doc) + self._add_attribute_defaults = attribute_defaults and \ + self._has_default_attributes + def __dealloc__(self): xmlschema.xmlSchemaFree(self._c_schema) @@ -117,6 +135,10 @@ self._error_log.disconnect() return python.PyErr_NoMemory() + if self._add_attribute_defaults: + xmlschema.xmlSchemaSetValidOptions( + valid_ctxt, xmlschema.XML_SCHEMA_VAL_VC_I_CREATE) + c_doc = _fakeRootDoc(doc._c_doc, root_node._c_node) with nogil: ret = xmlschema.xmlSchemaValidateDoc(valid_ctxt, c_doc) @@ -134,18 +156,22 @@ else: return False - cdef _ParserSchemaValidationContext _newSaxValidator(self): + cdef _ParserSchemaValidationContext _newSaxValidator( + self, bint add_default_attributes): cdef _ParserSchemaValidationContext context context = NEW_SCHEMA_CONTEXT(_ParserSchemaValidationContext) context._schema = self context._valid_ctxt = NULL context._sax_plug = NULL + context._add_default_attributes = (self._has_default_attributes and ( + add_default_attributes or self._add_attribute_defaults)) return context cdef class _ParserSchemaValidationContext: cdef XMLSchema _schema cdef xmlschema.xmlSchemaValidCtxt* _valid_ctxt cdef xmlschema.xmlSchemaSAXPlugStruct* _sax_plug + cdef bint _add_default_attributes def __dealloc__(self): self.disconnect() @@ -153,7 +179,16 @@ xmlschema.xmlSchemaFreeValidCtxt(self._valid_ctxt) cdef _ParserSchemaValidationContext copy(self): - return self._schema._newSaxValidator() + return self._schema._newSaxValidator( + self._add_default_attributes) + + cdef void inject_default_attributes(self, xmlDoc* c_doc): + # we currently need to insert default attributes manually + # after parsing, as libxml2 does not support this at parse + # time + if self._add_default_attributes: + with nogil: + xmlschema.xmlSchemaValidateDoc(self._valid_ctxt, c_doc) cdef int connect(self, xmlparser.xmlParserCtxt* c_ctxt) except -1: if self._valid_ctxt is NULL: @@ -161,6 +196,10 @@ self._schema._c_schema) if self._valid_ctxt is NULL: return python.PyErr_NoMemory() + if self._add_default_attributes: + xmlschema.xmlSchemaSetValidOptions( + self._valid_ctxt, + xmlschema.XML_SCHEMA_VAL_VC_I_CREATE) self._sax_plug = xmlschema.xmlSchemaSAXPlug( self._valid_ctxt, &c_ctxt.sax, &c_ctxt.userData)