From scoder at codespeak.net Sat Apr 7 09:13:07 2007 From: scoder at codespeak.net (scoder at codespeak.net) Date: Sat, 7 Apr 2007 09:13:07 +0200 (CEST) Subject: [Lxml-checkins] r41950 - in lxml/trunk: . doc src/lxml src/lxml/tests Message-ID: <20070407071307.320AA10072@code0.codespeak.net> Author: scoder Date: Sat Apr 7 09:13:05 2007 New Revision: 41950 Modified: lxml/trunk/CHANGES.txt lxml/trunk/doc/objectify.txt lxml/trunk/src/lxml/objectify.pyx lxml/trunk/src/lxml/tests/test_objectify.py Log: support '.' as identity ObjectPath Modified: lxml/trunk/CHANGES.txt ============================================================================== --- lxml/trunk/CHANGES.txt (original) +++ lxml/trunk/CHANGES.txt Sat Apr 7 09:13:05 2007 @@ -8,6 +8,8 @@ Features added -------------- +* '.' represents empty ObjectPath (identity) + * EXSLT RegExp support in standard XPath (not only XSLT) * ``lxml.pyclasslookup`` module that can access the entire tree in read-only Modified: lxml/trunk/doc/objectify.txt ============================================================================== --- lxml/trunk/doc/objectify.txt (original) +++ lxml/trunk/doc/objectify.txt Sat Apr 7 09:13:05 2007 @@ -372,8 +372,8 @@ >>> print find(root).tag {ns}b -You can also use relative paths starting with a '.' that ignore the actual -root element and only inherit its namespace:: +You can also use relative paths starting with a '.' to ignore the actual root +element and only inherit its namespace:: >>> find = objectify.ObjectPath(".b[1]") >>> print find(root).tag @@ -395,6 +395,12 @@ ... AttributeError: no such child: {other}unknown +For convenience, a single dot represents the empty ObjectPath (identity):: + + >>> find = objectify.ObjectPath(".") + >>> print find(root).tag + {ns}root + ObjectPath objects can be used to manipulate trees:: >>> root = objectify.Element("{ns}root") Modified: lxml/trunk/src/lxml/objectify.pyx ============================================================================== --- lxml/trunk/src/lxml/objectify.pyx (original) +++ lxml/trunk/src/lxml/objectify.pyx Sat Apr 7 09:13:05 2007 @@ -1166,6 +1166,9 @@ r"(\.?)\s*(?:\{([^}]*)\})?\s*([^.{}\[\]\s]+)\s*(?:\[\s*([-0-9]+)\s*\])?", re.U).match +cdef object _RELATIVE_PATH_SEGMENT +_RELATIVE_PATH_SEGMENT = (None, None, 0) + cdef _parseObjectPathString(path): """Parse object path string into a 'hrefOnameOhrefOnameOOO' string and an index list. The index list is None if no index was used in the path. @@ -1173,6 +1176,8 @@ cdef int has_dot new_path = [] path = cetree.utf8(path.strip()) + if path == '.': + return [_RELATIVE_PATH_SEGMENT] path_pos = 0 while python.PyString_GET_SIZE(path) > 0: match = __MATCH_PATH_SEGMENT(path, path_pos) @@ -1188,7 +1193,7 @@ if python.PyList_GET_SIZE(new_path) == 0: if has_dot: # path '.child' => ignore root - python.PyList_Append(new_path, (None, None, 0)) + python.PyList_Append(new_path, _RELATIVE_PATH_SEGMENT) elif index != 0: raise ValueError, "index not allowed on root node" elif not has_dot: @@ -1234,9 +1239,7 @@ if python.PyList_GET_SIZE(new_path) == 0 and index != 0: raise ValueError, "index not allowed on root node" python.PyList_Append(new_path, (ns, name, index)) - if python.PyList_GET_SIZE(new_path) == 0 or \ - (python.PyList_GET_SIZE(new_path) == 1 and \ - new_path[0] == (None, None, 0)): + if python.PyList_GET_SIZE(new_path) == 0: raise ValueError, "invalid path" return new_path Modified: lxml/trunk/src/lxml/tests/test_objectify.py ============================================================================== --- lxml/trunk/src/lxml/tests/test_objectify.py (original) +++ lxml/trunk/src/lxml/tests/test_objectify.py Sat Apr 7 09:13:05 2007 @@ -593,6 +593,16 @@ path = objectify.ObjectPath( "root.c1[1].c2" ) self.assertFalse(path.hasattr(root)) + def test_object_path_dot(self): + root = self.XML(xml_str) + path = objectify.ObjectPath( "." ) + self.assertEquals(root.c1.c2.text, path(root).c1.c2.text) + + def test_object_path_dot_list(self): + root = self.XML(xml_str) + path = objectify.ObjectPath( [''] ) + self.assertEquals(root.c1.c2.text, path(root).c1.c2.text) + def test_object_path_dot_root(self): root = self.XML(xml_str) path = objectify.ObjectPath( ".c1.c2" ) @@ -652,9 +662,7 @@ ['root[2]', 'c1', 'c2']) self.assertRaises(ValueError, objectify.ObjectPath, - ".") - self.assertRaises(ValueError, objectify.ObjectPath, - ['']) + []) self.assertRaises(ValueError, objectify.ObjectPath, ['', '', '']) From scoder at codespeak.net Sat Apr 7 22:17:30 2007 From: scoder at codespeak.net (scoder at codespeak.net) Date: Sat, 7 Apr 2007 22:17:30 +0200 (CEST) Subject: [Lxml-checkins] r41955 - in lxml/trunk: . src/lxml Message-ID: <20070407201730.4C33710072@code0.codespeak.net> Author: scoder Date: Sat Apr 7 22:17:29 2007 New Revision: 41955 Modified: lxml/trunk/CHANGES.txt lxml/trunk/src/lxml/sax.py Log: support for custom Element class instantiation in lxml.sax Modified: lxml/trunk/CHANGES.txt ============================================================================== --- lxml/trunk/CHANGES.txt (original) +++ lxml/trunk/CHANGES.txt Sat Apr 7 22:17:29 2007 @@ -8,6 +8,8 @@ Features added -------------- +* Support for custom Element class instantiation in lxml.sax + * '.' represents empty ObjectPath (identity) * EXSLT RegExp support in standard XPath (not only XSLT) Modified: lxml/trunk/src/lxml/sax.py ============================================================================== --- lxml/trunk/src/lxml/sax.py (original) +++ lxml/trunk/src/lxml/sax.py Sat Apr 7 22:17:29 2007 @@ -13,12 +13,15 @@ class ElementTreeContentHandler(object, ContentHandler): """Build an lxml ElementTree from SAX events. """ - def __init__(self): + def __init__(self, makeelement=None): self._root = None self._element_stack = [] self._default_ns = None self._ns_mapping = { None : [None] } self._new_mappings = {} + if makeelement is None: + makeelement = Element + self._makeelement = makeelement def _get_etree(self): "Contains the generated ElementTree after parsing is finished." @@ -77,7 +80,8 @@ element_stack = self._element_stack if self._root is None: - element = self._root = Element(el_name, attrs, self._new_mappings) + element = self._root = \ + self._makeelement(el_name, attrs, self._new_mappings) else: element = SubElement(element_stack[-1], el_name, attrs, self._new_mappings) From scoder at codespeak.net Tue Apr 10 13:30:34 2007 From: scoder at codespeak.net (scoder at codespeak.net) Date: Tue, 10 Apr 2007 13:30:34 +0200 (CEST) Subject: [Lxml-checkins] r41985 - lxml/trunk/src/lxml Message-ID: <20070410113034.B4C918065@code0.codespeak.net> Author: scoder Date: Tue Apr 10 13:30:33 2007 New Revision: 41985 Modified: lxml/trunk/src/lxml/xslt.pxi Log: exception message cleanup Modified: lxml/trunk/src/lxml/xslt.pxi ============================================================================== --- lxml/trunk/src/lxml/xslt.pxi (original) +++ lxml/trunk/src/lxml/xslt.pxi Tue Apr 10 13:30:33 2007 @@ -296,7 +296,7 @@ if self._error_log.last_error is not None: raise XSLTParseError, self._error_log.last_error.message else: - raise XSLTParseError, "Cannot parse style sheet" + raise XSLTParseError, "Cannot parse stylesheet" c_doc._private = NULL # no longer used! self._c_style = c_style @@ -344,7 +344,7 @@ transform_ctxt = xslt.xsltNewTransformContext(self._c_style, c_doc) if transform_ctxt is NULL: _destroyFakeDoc(input_doc._c_doc, c_doc) - raise XSLTApplyError, "Error preparing stylesheet run" + python.PyErr_NoMemory() initTransformDict(transform_ctxt) @@ -383,7 +383,7 @@ message = "%s, line %d" % (error.message, error.line) else: message = error.message - elif error.line >= 0: + elif error is not None and error.line >= 0: message = "Error applying stylesheet, line %d" % error.line else: message = "Error applying stylesheet" From scoder at codespeak.net Tue Apr 10 14:11:23 2007 From: scoder at codespeak.net (scoder at codespeak.net) Date: Tue, 10 Apr 2007 14:11:23 +0200 (CEST) Subject: [Lxml-checkins] r41986 - lxml/trunk/src/lxml/tests Message-ID: <20070410121123.BAB5C807A@code0.codespeak.net> Author: scoder Date: Tue Apr 10 14:11:23 2007 New Revision: 41986 Modified: lxml/trunk/src/lxml/tests/test_xslt.py Log: cleanup in test cases, test case on XSLT parsing errors Modified: lxml/trunk/src/lxml/tests/test_xslt.py ============================================================================== --- lxml/trunk/src/lxml/tests/test_xslt.py (original) +++ lxml/trunk/src/lxml/tests/test_xslt.py Tue Apr 10 14:11:23 2007 @@ -34,6 +34,23 @@ def test_xslt_elementtree_error(self): self.assertRaises(ValueError, etree.XSLT, etree.ElementTree()) + def test_xslt_input_none(self): + self.assertRaises(TypeError, etree.XSLT, None) + + def test_xslt_invalid_stylesheet(self): + if etree.LIBXSLT_VERSION < (1,1,15): + return # no error from libxslt? + + style = self.parse('''\ + + + + +''') + + self.assertRaises(etree.XSLTParseError, etree.XSLT, style) + def test_xslt_utf8(self): tree = self.parse(u'\uF8D2\uF8D2') style = self.parse('''\ @@ -144,10 +161,9 @@ -B--C- ''', - st.tostring(res)) + str(res)) def test_xslt_input(self): - tree = self.parse('BC') style = self.parse('''\ @@ -159,7 +175,6 @@ st = etree.XSLT(style) st = etree.XSLT(style.getroot()) - self.assertRaises(TypeError, etree.XSLT, None) def test_xslt_input_partial_doc(self): style = self.parse('''\ @@ -206,8 +221,10 @@ ''', st.tostring(res)) - def _test_xslt_parameter_missing(self): - # DISABLED - NOT RELIABLE? + def test_xslt_parameter_missing(self): + # DISABLED - NOT RELIABLE! + if etree.LIBXSLT_VERSION >= (1,1,18) and etree.LIBXSLT_VERSION < (1,1,20): + return # no error from libxslt? # apply() without needed parameter will lead to XSLTApplyError tree = self.parse('BC') style = self.parse('''\ From scoder at codespeak.net Tue Apr 10 21:01:51 2007 From: scoder at codespeak.net (scoder at codespeak.net) Date: Tue, 10 Apr 2007 21:01:51 +0200 (CEST) Subject: [Lxml-checkins] r41995 - in lxml/trunk: . doc src/lxml src/lxml/tests Message-ID: <20070410190151.306B680A0@code0.codespeak.net> Author: scoder Date: Tue Apr 10 21:01:50 2007 New Revision: 41995 Modified: lxml/trunk/CHANGES.txt lxml/trunk/doc/objectify.txt lxml/trunk/src/lxml/objectify.pyx lxml/trunk/src/lxml/tests/test_objectify.py Log: Holger's patch for type annotations in objectify Modified: lxml/trunk/CHANGES.txt ============================================================================== --- lxml/trunk/CHANGES.txt (original) +++ lxml/trunk/CHANGES.txt Tue Apr 10 21:01:50 2007 @@ -8,6 +8,9 @@ Features added -------------- +* Extended type annotation in objectify: cleaner annotation namespace setup + plus new ``xsiannotate()`` and ``deannotate()`` functions + * Support for custom Element class instantiation in lxml.sax * '.' represents empty ObjectPath (identity) Modified: lxml/trunk/doc/objectify.txt ============================================================================== --- lxml/trunk/doc/objectify.txt (original) +++ lxml/trunk/doc/objectify.txt Tue Apr 10 21:01:50 2007 @@ -699,6 +699,61 @@ s = '5' [StringElement] * xsi:type = 'string' +Again, there is a utility function ``xsiannotate()`` that recursively +generates the "xsi:type" attribute for the elements of a tree:: + + >>> root = objectify.fromstring('''\ + ... test5true + ... ''') + >>> print objectify.dump(root) + root = None [ObjectifiedElement] + a = 'test' [StringElement] + b = 5 [IntElement] + c = True [BoolElement] + + >>> objectify.xsiannotate(root) + + >>> print objectify.dump(root) + root = None [ObjectifiedElement] + a = 'test' [StringElement] + * xsi:type = 'string' + b = 5 [IntElement] + * xsi:type = 'int' + c = True [BoolElement] + * xsi:type = 'boolean' + +Note, however, that ``xsiannotate()`` will always use the first XML Schema +datatype that is defined for any given Python type, see also +`Defining additional data classes`_. + +The utility function ``deannotate()`` can be used to get rid of 'py:pytype' +and/or 'xsi:type' information:: + + >>> root = objectify.fromstring('''\ + ... + ... 5 + ... 5 + ... 5 + ... ''') + >>> objectify.annotate(root) + >>> print objectify.dump(root) + root = None [ObjectifiedElement] + d = 5.0 [FloatElement] + * xsi:type = 'double' + * py:pytype = 'float' + l = 5L [LongElement] + * xsi:type = 'long' + * py:pytype = 'long' + s = '5' [StringElement] + * xsi:type = 'string' + * py:pytype = 'str' + >>> objectify.deannotate(root) + >>> print objectify.dump(root) + root = None [ObjectifiedElement] + d = 5 [IntElement] + l = 5 [IntElement] + s = 5 [IntElement] + For convenience, the ``DataElement()`` factory creates an Element with a Python value in one step. You can pass the required Python type name or the XSI type name:: @@ -720,8 +775,8 @@ >>> root.x = objectify.DataElement(5, _xsi="integer") >>> print objectify.dump(root) root = None [ObjectifiedElement] - x = 5 [IntElement] - * py:pytype = 'int' + x = 5L [LongElement] + * py:pytype = 'long' * xsi:type = 'integer' There is a side effect of the type lookup. If you assign a string value using Modified: lxml/trunk/src/lxml/objectify.pyx ============================================================================== --- lxml/trunk/src/lxml/objectify.pyx (original) +++ lxml/trunk/src/lxml/objectify.pyx Tue Apr 10 21:01:50 2007 @@ -707,17 +707,21 @@ """Boolean type base on string values: 'true' or 'false'. """ cdef int _boolval(self) except -1: + cdef char* c_str text = textOf(self._c_node) if text is None: return 0 - text = text.lower() - if text == 'false': - return 0 - elif text == 'true': - return 1 - else: - raise ValueError, "Invalid boolean value: '%s'" % text - + c_str = _cstr(text) + if c_str[0] == c'0' or c_str[0] == c'f' or c_str[0] == c'F': + if c_str[1] == c'\0' or text.lower() == "false": + # '0' or 'f' or 'false' + return 0 + elif c_str[0] == c'1' or c_str[0] == c't' or c_str[0] == c'T': + if c_str[1] == c'\0' or text.lower() == 'true'): + # '1' or 't' or 'true' + return 1 + raise ValueError, "Invalid boolean value: '%s'" % text + def __nonzero__(self): if self._boolval(): return True @@ -882,13 +886,15 @@ cdef _registerPyTypes(): pytype = PyType('int', int, IntElement) - pytype.xmlSchemaTypes = ("integer", "positiveInteger", "negativeInteger", - "nonNegativeInteger", "nonPositiveInteger", - "int", "unsignedInt", "short", "unsignedShort") + pytype.xmlSchemaTypes = ("int", "short", "byte", "unsignedShort", + "unsignedByte",) + pytype.register() pytype = PyType('long', long, LongElement) - pytype.xmlSchemaTypes = ("long", "unsignedLong") + pytype.xmlSchemaTypes = ("integer", "nonPositiveInteger", "negativeInteger", + "long", "nonNegativeInteger", "unsignedLong", + "unsignedInt", "positiveInteger",) pytype.register() pytype = PyType('float', float, FloatElement) @@ -900,7 +906,9 @@ pytype.register() pytype = PyType('str', None, StringElement) - pytype.xmlSchemaTypes = ("string", "normalizedString") + pytype.xmlSchemaTypes = ("string", "normalizedString", "token", "language", + "Name", "NCName", "ID", "IDREF", "ENTITY", + "NMTOKEN", ) pytype.register() pytype = PyType('none', None, NoneElement) @@ -936,12 +944,25 @@ python.PyList_Append(types, pytype) return types +cdef PyType _guessPyType(value, PyType defaulttype): + if value is None: + return None + for type_check, tested_pytype in _TYPE_CHECKS: + try: + type_check(value) + return tested_pytype + except IGNORABLE_ERRORS: + # could not be parsed as the specififed type => ignore + pass + return defaulttype + cdef object _guessElementClass(tree.xmlNode* c_node): value = textOf(c_node) if value is None: return None if value == '': return StringElement + for type_check, pytype in _TYPE_CHECKS: try: type_check(value) @@ -1424,11 +1445,26 @@ ################################################################################ # Type annotations +cdef PyType _check_type(tree.xmlNode* c_node, PyType pytype): + # StrType does not have a typecheck but is the default anyway, + # so just accept it if given as type information + if pytype is None: + return pytype + value = textOf(c_node) + try: + pytype.type_check(value) + return pytype + except IGNORABLE_ERRORS: + # could not be parsed as the specified type => ignore + pass + return None + + def annotate(element_or_tree, ignore_old=True): """Recursively annotates the elements of an XML tree with 'pytype' attributes. - If the 'ignore_old' keyword argument is True (the default), current + If the 'ignore_old' keyword argument is True (the default), current 'pytype' attributes will be ignored and replaced. Otherwise, they will be checked and only replaced if they no longer fit the current text value. """ @@ -1438,11 +1474,13 @@ cdef tree.xmlNode* c_node cdef tree.xmlNs* c_ns cdef python.PyObject* dict_result + cdef PyType pytype element = cetree.rootNodeOrRaise(element_or_tree) doc = element._doc ignore = bool(ignore_old) - StrType = _PYTYPE_DICT.get('str') + StrType = _PYTYPE_DICT.get('str') + NoneType = _PYTYPE_DICT.get('none') c_node = element._c_node tree.BEGIN_FOR_EACH_ELEMENT_FROM(c_node, c_node, 1) pytype = None @@ -1452,20 +1490,19 @@ old_value = cetree.attributeValueFromNsName( c_node, _PYTYPE_NAMESPACE, _PYTYPE_ATTRIBUTE_NAME) if old_value is not None and old_value != TREE_PYTYPE: - pytype = _PYTYPE_DICT.get(old_value) - if pytype is not None: - value = textOf(c_node) - try: - if not (pytype).type_check(value): - pytype = None - except ValueError: - pytype = None + dict_result = python.PyDict_GetItem(_PYTYPE_DICT, old_value) + if dict_result is not NULL: + pytype = dict_result + if pytype is not StrType: + # StrType does not have a typecheck but is the default anyway, + # so just accept it if given as type information + pytype = _check_type(c_node, pytype) if pytype is None: - # if element is defined as xsi:nil, return NoneElement class + # if element is defined as xsi:nil, represent it as None if cetree.attributeValueFromNsName( c_node, _XML_SCHEMA_INSTANCE_NS, "nil") == "true": - pytype = _PYTYPE_DICT.get("none") + pytype = NoneType if pytype is None: # check for XML Schema type hint @@ -1481,18 +1518,7 @@ # try to guess type if cetree.findChildForwards(c_node, 0) is NULL: # element has no children => data class - if value is None: - value = textOf(c_node) - if value is not None: - for type_check, tested_pytype in _TYPE_CHECKS: - try: - if type_check(value) is not False: - pytype = tested_pytype - break - except ValueError: - pass - else: - pytype = StrType + pytype = _guessPyType(textOf(c_node), StrType) if pytype is None: # delete attribute if it exists @@ -1505,6 +1531,124 @@ _cstr(pytype.name)) tree.END_FOR_EACH_ELEMENT_FROM(c_node) +def xsiannotate(element_or_tree, ignore_old=True): + """Recursively annotates the elements of an XML tree with 'xsi:type' + attributes. + + If the 'ignore_old' keyword argument is True (the default), current + 'xsi:type' attributes will be ignored and replaced. Otherwise, they will be + checked and only replaced if they no longer fit the current text value. + + Note that tha mapping from Python types to XSI types is usually ambiguous. + Currently, only the first XSI type name in the corresponding PyType + definition will be used for annotation. Thus, you should consider naming + the widest type first here if you define additional types. + """ + cdef _Element element + cdef _Document doc + cdef int ignore + cdef int istree + cdef tree.xmlNode* c_node + cdef tree.xmlNs* c_ns + cdef python.PyObject* dict_result + cdef PyType pytype + element = cetree.rootNodeOrRaise(element_or_tree) + doc = element._doc + ignore = bool(ignore_old) + + StrType = _PYTYPE_DICT.get('str') + c_node = element._c_node + tree.BEGIN_FOR_EACH_ELEMENT_FROM(c_node, c_node, 1) + typename = None + pytype = None + value = None + istree = 0 + if not ignore: + # check that old value is valid + typename = cetree.attributeValueFromNsName( + c_node, _XML_SCHEMA_INSTANCE_NS, "type") + if typename is not None: + dict_result = python.PyDict_GetItem(_SCHEMA_TYPE_DICT, typename) + if dict_result is not NULL: + pytype = dict_result + if pytype is not StrType: + # StrType does not have a typecheck but is the default anyway, + # so just accept it if given as type information + pytype = _check_type(c_node, pytype) + if pytype is None: + typename = None + + if typename is None: + if pytype is None: + # check for pytype hint + value = cetree.attributeValueFromNsName( + c_node, _PYTYPE_NAMESPACE, _PYTYPE_ATTRIBUTE_NAME) + + if value is not None: + if value == TREE_PYTYPE: + istree = 1 + else: + dict_result = python.PyDict_GetItem(_PYTYPE_DICT, value) + if dict_result is not NULL: + pytype = dict_result + if pytype is not StrType: + pytype = _check_type(c_node, pytype) + + if not istree and pytype is None: + # try to guess type + if cetree.findChildForwards(c_node, 0) is NULL: + # element has no children => data class + pytype = _guessPyType(textOf(c_node), StrType) + else: + istree = 1 + + if typename is None and not istree and pytype is not None: + if python.PyList_GET_SIZE(pytype._schema_types) > 0: + # pytype->xsi:type is a 1:n mapping so simply take the first + typename = pytype._schema_types[0] + + if typename is None or istree: + # delete attribute if it exists + cetree.delAttributeFromNsName(c_node, _XML_SCHEMA_INSTANCE_NS, "type") + else: + # update or create attribute + c_ns = cetree.findOrBuildNodeNs(doc, c_node, _XML_SCHEMA_INSTANCE_NS) + tree.xmlSetNsProp(c_node, c_ns, "type", _cstr(typename)) + tree.END_FOR_EACH_ELEMENT_FROM(c_node) + + +def deannotate(element_or_tree, pytype=True, xsi=True): + """Recursively de-annotate the elements of an XML tree by removing 'pytype' + and/or 'type' attributes. + + If the 'pytype' keyword argument is True (the default), 'pytype' attributes + will be removed. If the 'xsi' keyword argument is True (the default), + 'xsi:type' attributes will be removed. + """ + cdef _Element element + cdef tree.xmlNode* c_node + + element = cetree.rootNodeOrRaise(element_or_tree) + c_node = element._c_node + if pytype and xsi: + tree.BEGIN_FOR_EACH_ELEMENT_FROM(c_node, c_node, 1) + cetree.delAttributeFromNsName( + c_node, _PYTYPE_NAMESPACE, _PYTYPE_ATTRIBUTE_NAME) + cetree.delAttributeFromNsName( + c_node, _XML_SCHEMA_INSTANCE_NS, "type") + tree.END_FOR_EACH_ELEMENT_FROM(c_node) + elif pytype: + tree.BEGIN_FOR_EACH_ELEMENT_FROM(c_node, c_node, 1) + cetree.delAttributeFromNsName( + c_node, _PYTYPE_NAMESPACE, _PYTYPE_ATTRIBUTE_NAME) + tree.END_FOR_EACH_ELEMENT_FROM(c_node) + else: + tree.BEGIN_FOR_EACH_ELEMENT_FROM(c_node, c_node, 1) + cetree.delAttributeFromNsName( + c_node, _XML_SCHEMA_INSTANCE_NS, "type") + tree.END_FOR_EACH_ELEMENT_FROM(c_node) + + ################################################################################ # Module level parser setup @@ -1549,6 +1693,9 @@ XML = fromstring +cdef object _DEFAULT_NSMAP +_DEFAULT_NSMAP = { "py": PYTYPE_NAMESPACE, "xsi": XML_SCHEMA_INSTANCE_NS } + def Element(_tag, attrib=None, nsmap=None, _pytype=None, **_attributes): """Objectify specific version of the lxml.etree Element() factory that always creates a structural (tree) element. @@ -1561,6 +1708,8 @@ _attributes = attrib if _pytype is None: _pytype = TREE_PYTYPE + if nsmap is None: + nsmap = _DEFAULT_NSMAP _attributes[PYTYPE_ATTRIBUTE] = _pytype return _makeElement(_tag, None, _attributes, nsmap) @@ -1569,11 +1718,10 @@ """Create a new element with a Python value and XML attributes taken from keyword arguments or a dictionary passed as second argument. - Automatically adds a 'pyval' attribute for the Python type of the value, - if the type can be identified. If '_pyval' or '_xsi' are among the + Automatically adds a 'pytype' attribute for the Python type of the value, + if the type can be identified. If '_pytype' or '_xsi' are among the keyword arguments, they will be used instead. """ - cdef _Element element if attrib is not None: if python.PyDict_Size(_attributes): attrib.update(_attributes) @@ -1581,7 +1729,10 @@ if _xsi is not None: python.PyDict_SetItem(_attributes, XML_SCHEMA_INSTANCE_TYPE_ATTR, _xsi) if _pytype is None: - _pytype = _SCHEMA_TYPE_DICT[_xsi].name + # allow for s.o. using unregistered or even wrong xsi:type names + pytype_lookup = _SCHEMA_TYPE_DICT.get(_xsi) + if pytype_lookup is not None: + _pytype = pytype_lookup.name if python._isString(_value): strval = _value Modified: lxml/trunk/src/lxml/tests/test_objectify.py ============================================================================== --- lxml/trunk/src/lxml/tests/test_objectify.py (original) +++ lxml/trunk/src/lxml/tests/test_objectify.py Tue Apr 10 21:01:50 2007 @@ -13,6 +13,10 @@ from lxml import objectify +XML_SCHEMA_INSTANCE_NS = "http://www.w3.org/2001/XMLSchema-instance" +XML_SCHEMA_INSTANCE_TYPE_ATTR = "{%s}type" % XML_SCHEMA_INSTANCE_NS +XML_SCHEMA_NIL_ATTR = "{%s}nil" % XML_SCHEMA_INSTANCE_NS + xml_str = '''\ @@ -28,7 +32,7 @@ """Test cases for lxml.objectify """ etree = etree - + def XML(self, xml): return self.etree.XML(xml, self.parser) @@ -356,20 +360,69 @@ XML = self.XML root = XML('''\ - 5 - 5 - 5 + true + false + 1 + 0 + + 5 + 5 + + 5 + 5 + 5 + 5 + 5 + 5 + 5 + 5 + 5 + 5 + + 5 + 5 + 5 + 5 + 5 + 5 + 5 + 5 + + 5 + 5 + 5 + 5 + 5 + + ''') - self.assert_(isinstance(root.a[0], objectify.IntElement)) - self.assertEquals(5, root.a[0]) - - self.assert_(isinstance(root.a[1], objectify.StringElement)) - self.assertEquals("5", root.a[1]) - - self.assert_(isinstance(root.a[2], objectify.FloatElement)) - self.assertEquals(5.0, root.a[2]) + for b in root.b: + self.assert_(isinstance(b, objectify.BoolElement)) + self.assertEquals(True, root.b[0]) + self.assertEquals(False, root.b[1]) + self.assertEquals(True, root.b[2]) + self.assertEquals(False, root.b[3]) + + for f in root.f: + self.assert_(isinstance(f, objectify.FloatElement)) + self.assertEquals(5, f) + + for s in root.s: + self.assert_(isinstance(s, objectify.StringElement)) + self.assertEquals("5", s) + + for l in root.l: + self.assert_(isinstance(l, objectify.LongElement)) + self.assertEquals(5l, l) + + for i in root.i: + self.assert_(isinstance(i, objectify.IntElement)) + self.assertEquals(5, i) + + self.assert_(isinstance(root.n, objectify.NoneElement)) + self.assertEquals(None, root.n) def test_type_str_sequence(self): XML = self.XML @@ -444,10 +497,131 @@ root.b = False self.assertFalse(root.b) - def test_type_annotation(self): + def test_pytype_annotation(self): XML = self.XML root = XML(u'''\ - + + 5 + test + 1.1 + \uF8D2 + true + + + 5 + 5 + 23 + 42 + 300 + 2 + + ''') + objectify.annotate(root) + + child_types = [ c.get(objectify.PYTYPE_ATTRIBUTE) + for c in root.iterchildren() ] + self.assertEquals("int", child_types[0]) + self.assertEquals("str", child_types[1]) + self.assertEquals("float", child_types[2]) + self.assertEquals("str", child_types[3]) + self.assertEquals("bool", child_types[4]) + self.assertEquals("none", child_types[5]) + self.assertEquals(None, child_types[6]) + self.assertEquals("float", child_types[7]) + self.assertEquals("float", child_types[8]) + self.assertEquals("str", child_types[9]) + self.assertEquals("int", child_types[10]) + self.assertEquals("int", child_types[11]) + self.assertEquals("int", child_types[12]) + + self.assertEquals("true", root.n.get(XML_SCHEMA_NIL_ATTR)) + + def test_pytype_annotation_use_old(self): + XML = self.XML + root = XML(u'''\ + + 5 + test + 1.1 + \uF8D2 + true + + + 5 + 5 + 23 + 42 + 300 + 2 + + ''') + objectify.annotate(root, ignore_old=False) + + child_types = [ c.get(objectify.PYTYPE_ATTRIBUTE) + for c in root.iterchildren() ] + self.assertEquals("int", child_types[0]) + self.assertEquals("str", child_types[1]) + self.assertEquals("float", child_types[2]) + self.assertEquals("str", child_types[3]) + self.assertEquals("bool", child_types[4]) + self.assertEquals("none", child_types[5]) + self.assertEquals(None, child_types[6]) + self.assertEquals("float", child_types[7]) + self.assertEquals("float", child_types[8]) + self.assertEquals("str", child_types[9]) + self.assertEquals("str", child_types[10]) + self.assertEquals("float", child_types[11]) + self.assertEquals("long", child_types[12]) + + self.assertEquals("true", root.n.get(XML_SCHEMA_NIL_ATTR)) + + def test_xsitype_annotation(self): + XML = self.XML + root = XML(u'''\ + + 5 + test + 1.1 + \uF8D2 + true + + + 5 + 5 + 23 + 42 + 300 + 2 + + ''') + objectify.xsiannotate(root) + + child_types = [ c.get(XML_SCHEMA_INSTANCE_TYPE_ATTR) + for c in root.iterchildren() ] + self.assertEquals("int", child_types[0]) + self.assertEquals("string", child_types[1]) + self.assertEquals("float", child_types[2]) + self.assertEquals("string", child_types[3]) + self.assertEquals("boolean", child_types[4]) + self.assertEquals(None, child_types[5]) + self.assertEquals(None, child_types[6]) + self.assertEquals("int", child_types[7]) + self.assertEquals("int", child_types[8]) + self.assertEquals("int", child_types[9]) + self.assertEquals("string", child_types[10]) + self.assertEquals("float", child_types[11]) + self.assertEquals("integer", child_types[12]) + + self.assertEquals("true", root.n.get(XML_SCHEMA_NIL_ATTR)) + + def test_xsitype_annotation_use_old(self): + XML = self.XML + root = XML(u'''\ + 5 test 1.1 @@ -456,9 +630,127 @@ 5 + 5 + 23 + 42 + 300 + 2 + + ''') + objectify.xsiannotate(root, ignore_old=False) + + child_types = [ c.get(XML_SCHEMA_INSTANCE_TYPE_ATTR) + for c in root.iterchildren() ] + self.assertEquals("int", child_types[0]) + self.assertEquals("string", child_types[1]) + self.assertEquals("float", child_types[2]) + self.assertEquals("string", child_types[3]) + self.assertEquals("boolean", child_types[4]) + self.assertEquals(None, child_types[5]) + self.assertEquals(None, child_types[6]) + self.assertEquals("double", child_types[7]) + self.assertEquals("float", child_types[8]) + self.assertEquals("string", child_types[9]) + self.assertEquals("string", child_types[10]) + self.assertEquals("float", child_types[11]) + self.assertEquals("integer", child_types[12]) + + self.assertEquals("true", root.n.get(XML_SCHEMA_NIL_ATTR)) + + def test_deannotate(self): + XML = self.XML + root = XML(u'''\ + + 5 + test + 1.1 + \uF8D2 + true + + + 5 + 5 + 23 + 42 + 300 + 2 + + ''') + objectify.deannotate(root) + + for c in root.getiterator(): + self.assertEquals(None, c.get(XML_SCHEMA_INSTANCE_TYPE_ATTR)) + self.assertEquals(None, c.get(objectify.PYTYPE_ATTRIBUTE)) + + self.assertEquals("true", root.n.get(XML_SCHEMA_NIL_ATTR)) + + def test_pytype_deannotate(self): + XML = self.XML + root = XML(u'''\ + + 5 + test + 1.1 + \uF8D2 + true + + + 5 + 5 + 23 + 42 + 300 + 2 + + ''') + objectify.xsiannotate(root) + objectify.deannotate(root, xsi=False) + + child_types = [ c.get(XML_SCHEMA_INSTANCE_TYPE_ATTR) + for c in root.iterchildren() ] + self.assertEquals("int", child_types[0]) + self.assertEquals("string", child_types[1]) + self.assertEquals("float", child_types[2]) + self.assertEquals("string", child_types[3]) + self.assertEquals("boolean", child_types[4]) + self.assertEquals(None, child_types[5]) + self.assertEquals(None, child_types[6]) + self.assertEquals("int", child_types[7]) + self.assertEquals("int", child_types[8]) + self.assertEquals("int", child_types[9]) + self.assertEquals("string", child_types[10]) + self.assertEquals("float", child_types[11]) + self.assertEquals("integer", child_types[12]) + + self.assertEquals("true", root.n.get(XML_SCHEMA_NIL_ATTR)) + + for c in root.getiterator(): + self.assertEquals(None, c.get(objectify.PYTYPE_ATTRIBUTE)) + + def test_xsitype_deannotate(self): + XML = self.XML + root = XML(u'''\ + + 5 + test + 1.1 + \uF8D2 + true + + + 5 + 5 + 23 + 42 + 300 + 2 ''') objectify.annotate(root) + objectify.deannotate(root, pytype=False) child_types = [ c.get(objectify.PYTYPE_ATTRIBUTE) for c in root.iterchildren() ] @@ -470,6 +762,16 @@ self.assertEquals("none", child_types[5]) self.assertEquals(None, child_types[6]) self.assertEquals("float", child_types[7]) + self.assertEquals("float", child_types[8]) + self.assertEquals("str", child_types[9]) + self.assertEquals("int", child_types[10]) + self.assertEquals("int", child_types[11]) + self.assertEquals("int", child_types[12]) + + self.assertEquals("true", root.n.get(XML_SCHEMA_NIL_ATTR)) + + for c in root.getiterator(): + self.assertEquals(None, c.get(XML_SCHEMA_INSTANCE_TYPE_ATTR)) def test_change_pytype_attribute(self): XML = self.XML @@ -890,7 +1192,6 @@ etree.tostring(new_root), etree.tostring(root)) - def test_suite(): suite = unittest.TestSuite() suite.addTests([unittest.makeSuite(ObjectifyTestCase)]) From scoder at codespeak.net Tue Apr 10 21:02:28 2007 From: scoder at codespeak.net (scoder at codespeak.net) Date: Tue, 10 Apr 2007 21:02:28 +0200 (CEST) Subject: [Lxml-checkins] r41996 - lxml/trunk/src/lxml Message-ID: <20070410190228.DBA9F80A0@code0.codespeak.net> Author: scoder Date: Tue Apr 10 21:02:27 2007 New Revision: 41996 Modified: lxml/trunk/src/lxml/objectify.pyx Log: typo Modified: lxml/trunk/src/lxml/objectify.pyx ============================================================================== --- lxml/trunk/src/lxml/objectify.pyx (original) +++ lxml/trunk/src/lxml/objectify.pyx Tue Apr 10 21:02:27 2007 @@ -717,7 +717,7 @@ # '0' or 'f' or 'false' return 0 elif c_str[0] == c'1' or c_str[0] == c't' or c_str[0] == c'T': - if c_str[1] == c'\0' or text.lower() == 'true'): + if c_str[1] == c'\0' or text.lower() == 'true': # '1' or 't' or 'true' return 1 raise ValueError, "Invalid boolean value: '%s'" % text From scoder at codespeak.net Wed Apr 11 09:59:20 2007 From: scoder at codespeak.net (scoder at codespeak.net) Date: Wed, 11 Apr 2007 09:59:20 +0200 (CEST) Subject: [Lxml-checkins] r41997 - in lxml/trunk/src/lxml: . tests Message-ID: <20070411075920.DD22180A3@code0.codespeak.net> Author: scoder Date: Wed Apr 11 09:59:19 2007 New Revision: 41997 Modified: lxml/trunk/src/lxml/etree.pyx lxml/trunk/src/lxml/objectify.pyx lxml/trunk/src/lxml/tests/test_objectify.py Log: cleanups Modified: lxml/trunk/src/lxml/etree.pyx ============================================================================== --- lxml/trunk/src/lxml/etree.pyx (original) +++ lxml/trunk/src/lxml/etree.pyx Wed Apr 11 09:59:19 2007 @@ -1924,7 +1924,7 @@ include "dtd.pxi" # DTD include "relaxng.pxi" # RelaxNG include "xmlschema.pxi" # XMLSchema -#include "schematron.pxi" # Schematron +#include "schematron.pxi" # Schematron (requires libxml2 2.6.21+) ################################################################################ # Public C API Modified: lxml/trunk/src/lxml/objectify.pyx ============================================================================== --- lxml/trunk/src/lxml/objectify.pyx (original) +++ lxml/trunk/src/lxml/objectify.pyx Wed Apr 11 09:59:19 2007 @@ -713,11 +713,11 @@ return 0 c_str = _cstr(text) if c_str[0] == c'0' or c_str[0] == c'f' or c_str[0] == c'F': - if c_str[1] == c'\0' or text.lower() == "false": + if c_str[1] == c'\0' or text == "false" or text.lower() == "false": # '0' or 'f' or 'false' return 0 elif c_str[0] == c'1' or c_str[0] == c't' or c_str[0] == c'T': - if c_str[1] == c'\0' or text.lower() == 'true': + if c_str[1] == c'\0' or text == "true" or text.lower() == "true": # '1' or 't' or 'true' return 1 raise ValueError, "Invalid boolean value: '%s'" % text Modified: lxml/trunk/src/lxml/tests/test_objectify.py ============================================================================== --- lxml/trunk/src/lxml/tests/test_objectify.py (original) +++ lxml/trunk/src/lxml/tests/test_objectify.py Wed Apr 11 09:59:19 2007 @@ -710,19 +710,19 @@ child_types = [ c.get(XML_SCHEMA_INSTANCE_TYPE_ATTR) for c in root.iterchildren() ] - self.assertEquals("int", child_types[0]) - self.assertEquals("string", child_types[1]) - self.assertEquals("float", child_types[2]) - self.assertEquals("string", child_types[3]) - self.assertEquals("boolean", child_types[4]) - self.assertEquals(None, child_types[5]) - self.assertEquals(None, child_types[6]) - self.assertEquals("int", child_types[7]) - self.assertEquals("int", child_types[8]) - self.assertEquals("int", child_types[9]) - self.assertEquals("string", child_types[10]) - self.assertEquals("float", child_types[11]) - self.assertEquals("integer", child_types[12]) + self.assertEquals("int", child_types[ 0]) + self.assertEquals("string", child_types[ 1]) + self.assertEquals("float", child_types[ 2]) + self.assertEquals("string", child_types[ 3]) + self.assertEquals("boolean", child_types[ 4]) + self.assertEquals(None, child_types[ 5]) + self.assertEquals(None, child_types[ 6]) + self.assertEquals("int", child_types[ 7]) + self.assertEquals("int", child_types[ 8]) + self.assertEquals("int", child_types[ 9]) + self.assertEquals("string", child_types[10]) + self.assertEquals("float", child_types[11]) + self.assertEquals("integer", child_types[12]) self.assertEquals("true", root.n.get(XML_SCHEMA_NIL_ATTR)) @@ -754,19 +754,19 @@ child_types = [ c.get(objectify.PYTYPE_ATTRIBUTE) for c in root.iterchildren() ] - self.assertEquals("int", child_types[0]) - self.assertEquals("str", child_types[1]) - self.assertEquals("float", child_types[2]) - self.assertEquals("str", child_types[3]) - self.assertEquals("bool", child_types[4]) - self.assertEquals("none", child_types[5]) - self.assertEquals(None, child_types[6]) - self.assertEquals("float", child_types[7]) - self.assertEquals("float", child_types[8]) - self.assertEquals("str", child_types[9]) - self.assertEquals("int", child_types[10]) - self.assertEquals("int", child_types[11]) - self.assertEquals("int", child_types[12]) + self.assertEquals("int", child_types[ 0]) + self.assertEquals("str", child_types[ 1]) + self.assertEquals("float", child_types[ 2]) + self.assertEquals("str", child_types[ 3]) + self.assertEquals("bool", child_types[ 4]) + self.assertEquals("none", child_types[ 5]) + self.assertEquals(None, child_types[ 6]) + self.assertEquals("float", child_types[ 7]) + self.assertEquals("float", child_types[ 8]) + self.assertEquals("str", child_types[ 9]) + self.assertEquals("int", child_types[10]) + self.assertEquals("int", child_types[11]) + self.assertEquals("int", child_types[12]) self.assertEquals("true", root.n.get(XML_SCHEMA_NIL_ATTR)) From scoder at codespeak.net Sun Apr 15 11:43:32 2007 From: scoder at codespeak.net (scoder at codespeak.net) Date: Sun, 15 Apr 2007 11:43:32 +0200 (CEST) Subject: [Lxml-checkins] r42062 - in lxml/trunk: . src/lxml src/lxml/tests Message-ID: <20070415094332.D4C4480B0@code0.codespeak.net> Author: scoder Date: Sun Apr 15 11:43:31 2007 New Revision: 42062 Modified: lxml/trunk/CHANGES.txt lxml/trunk/src/lxml/etree.pyx lxml/trunk/src/lxml/tests/test_etree.py Log: support for element.attrib.pop() Modified: lxml/trunk/CHANGES.txt ============================================================================== --- lxml/trunk/CHANGES.txt (original) +++ lxml/trunk/CHANGES.txt Sun Apr 15 11:43:31 2007 @@ -8,6 +8,8 @@ Features added -------------- +* Element.attrib now has a ``pop()`` method + * Extended type annotation in objectify: cleaner annotation namespace setup plus new ``xsiannotate()`` and ``deannotate()`` functions Modified: lxml/trunk/src/lxml/etree.pyx ============================================================================== --- lxml/trunk/src/lxml/etree.pyx (original) +++ lxml/trunk/src/lxml/etree.pyx Sun Apr 15 11:43:31 2007 @@ -1411,6 +1411,20 @@ for key, value in sequence_or_dict: _setAttributeValue(self._element, key, value) + def pop(self, key, *default): + if python.PyTuple_GET_SIZE(default) > 1: + raise TypeError, "pop expected at most 2 arguments, got %d" % \ + (python.PyTuple_GET_SIZE(default)+1) + result = _getAttributeValue(self._element, key, None) + if result is None: + if python.PyTuple_GET_SIZE(default) == 0: + raise KeyError, key + else: + return python.PyTuple_GET_ITEM(default, 0) + else: + _delAttribute(self._element, key) + return result + # ACCESSORS def __repr__(self): return repr(dict( _attributeIteratorFactory(self._element, 3) )) Modified: lxml/trunk/src/lxml/tests/test_etree.py ============================================================================== --- lxml/trunk/src/lxml/tests/test_etree.py (original) +++ lxml/trunk/src/lxml/tests/test_etree.py Sun Apr 15 11:43:31 2007 @@ -70,6 +70,39 @@ self.assertEquals("TEST", root.get("attr")) self.assertRaises(TypeError, root.set, "newattr", 5) + def test_attrib_pop(self): + ElementTree = self.etree.ElementTree + + f = StringIO('') + doc = ElementTree(file=f) + root = doc.getroot() + self.assertEquals('One', root.attrib['one']) + self.assertEquals('Two', root.attrib['two']) + + self.assertEquals('One', root.attrib.pop('one')) + + self.assertEquals(None, root.attrib.get('one')) + self.assertEquals('Two', root.attrib['two']) + + def test_attrib_pop_unknown(self): + root = self.etree.XML('') + self.assertRaises(KeyError, root.attrib.pop, 'NONE') + + self.assertEquals('One', root.attrib['one']) + self.assertEquals('Two', root.attrib['two']) + + def test_attrib_pop_default(self): + root = self.etree.XML('') + self.assertEquals('Three', root.attrib.pop('three', 'Three')) + + def test_attrib_pop_empty_default(self): + root = self.etree.XML('') + self.assertEquals('Three', root.attrib.pop('three', 'Three')) + + def test_attrib_pop_invalid_args(self): + root = self.etree.XML('') + self.assertRaises(TypeError, root.attrib.pop, 'One', None, None) + def test_pi(self): # lxml.etree separates target and text Element = self.etree.Element From scoder at codespeak.net Fri Apr 20 09:53:08 2007 From: scoder at codespeak.net (scoder at codespeak.net) Date: Fri, 20 Apr 2007 09:53:08 +0200 (CEST) Subject: [Lxml-checkins] r42190 - in lxml/trunk/doc: . html Message-ID: <20070420075308.D65DC8090@code0.codespeak.net> Author: scoder Date: Fri Apr 20 09:53:08 2007 New Revision: 42190 Modified: lxml/trunk/doc/html/style.css lxml/trunk/doc/mkhtml.py Log: work arounds for IE Modified: lxml/trunk/doc/html/style.css ============================================================================== --- lxml/trunk/doc/html/style.css (original) +++ lxml/trunk/doc/html/style.css Fri Apr 20 09:53:08 2007 @@ -1,15 +1,15 @@ body { - /* CSS Hack for IE that does not respect the "margin: auto" rule at the - * document level */ + font: 13px Arial, Verdana, Helvetica, sans-serif; text-align: center; - padding: 1em; } - @media screen { + body { + padding: 1em 1em 1em 21em; + } + div.document { width: 45em; - padding-left: 21em; background-color: white; } } @@ -26,7 +26,6 @@ } div.document { - font: 13px Arial, Verdana, Helvetica, sans-serif; margin: 1em auto 1em auto; color: #222; text-align: left; @@ -50,7 +49,7 @@ /*** side menu ***/ div.sidemenu { - position: fixed; + position: absolute; top: 0px; left: 0px; width: 22em; @@ -62,6 +61,11 @@ background-color: #FFFAFA; } +html > body div.sidemenu { + /* ignored by IE -> everyone else knows 'fixed', right? */ + position: fixed; +} + div.sidemenu span.section.title { line-height: 1.5em; font-size: 130%; Modified: lxml/trunk/doc/mkhtml.py ============================================================================== --- lxml/trunk/doc/mkhtml.py (original) +++ lxml/trunk/doc/mkhtml.py Fri Apr 20 09:53:08 2007 @@ -55,7 +55,7 @@ def merge_menu(tree, menu, name): menu_root = copy.deepcopy(menu) - tree.getroot()[1][0].append(menu_root) # html->body->div[class=document] + tree.getroot()[1][0].insert(0, menu_root) # html->body->div[class=document] for el in menu_root.getiterator(): tag = el.tag if tag[0] != '{': From scoder at codespeak.net Fri Apr 20 09:54:43 2007 From: scoder at codespeak.net (scoder at codespeak.net) Date: Fri, 20 Apr 2007 09:54:43 +0200 (CEST) Subject: [Lxml-checkins] r42192 - lxml/trunk/benchmark Message-ID: <20070420075443.243F48090@code0.codespeak.net> Author: scoder Date: Fri Apr 20 09:54:42 2007 New Revision: 42192 Modified: lxml/trunk/benchmark/bench_etree.py lxml/trunk/benchmark/benchbase.py Log: benchmark fix: remove child iteration overhead from benchmark loops to restrict timings to the benchmark target Modified: lxml/trunk/benchmark/bench_etree.py ============================================================================== --- lxml/trunk/benchmark/bench_etree.py (original) +++ lxml/trunk/benchmark/bench_etree.py Fri Apr 20 09:54:42 2007 @@ -3,7 +3,7 @@ from StringIO import StringIO import benchbase -from benchbase import with_attributes, with_text, onlylib, serialized +from benchbase import with_attributes, with_text, onlylib, serialized, children ############################################################ # Benchmarks @@ -77,8 +77,10 @@ root1.append(el) def bench_insert_from_document(self, root1, root2): + pos = len(root1)/2 for el in root2: - root1.insert(len(root1)/2, el) + root1.insert(pos, el) + pos = pos + 1 def bench_rotate_children(self, root): # == "1 2 3" # runs on any single tree independently @@ -102,18 +104,21 @@ def bench_clear(self, root): root.clear() - def bench_has_children(self, root): - for child in root: + @children + def bench_has_children(self, children): + for child in children: if child and child and child and child and child: pass - def bench_len(self, root): - for child in root: + @children + def bench_len(self, children): + for child in children: map(len, repeat(child, 20)) - def bench_create_subelements(self, root): + @children + def bench_create_subelements(self, children): SubElement = self.etree.SubElement - for child in root: + for child in children: SubElement(child, '{test}test') def bench_append_elements(self, root): @@ -122,103 +127,120 @@ el = Element('{test}test') child.append(el) - def bench_makeelement(self, root): + @children + def bench_makeelement(self, children): empty_attrib = {} - for child in root: + for child in children: child.makeelement('{test}test', empty_attrib) - def bench_create_elements(self, root): + @children + def bench_create_elements(self, children): Element = self.etree.Element - for child in root: + for child in children: Element('{test}test') - def bench_replace_children_element(self, root): + @children + def bench_replace_children_element(self, children): Element = self.etree.Element - for child in root: + for child in children: el = Element('{test}test') child[:] = [el] - def bench_replace_children(self, root): - Element = self.etree.Element - for child in root: - child[:] = [ child[0] ] + @children + def bench_replace_children(self, children): + els = [ self.etree.Element("newchild") ] + for child in children: + child[:] = els def bench_remove_children(self, root): for child in root: root.remove(child) def bench_remove_children_reversed(self, root): - for child in reversed(root[:]): + for child in reversed(root): root.remove(child) - def bench_set_attributes(self, root): - for child in root: + @children + def bench_set_attributes(self, children): + for child in children: child.set('a', 'bla') @with_attributes(True) - def bench_get_attributes(self, root): - for child in root: + @children + def bench_get_attributes(self, children): + for child in children: child.get('bla1') child.get('{attr}test1') - def bench_setget_attributes(self, root): - for child in root: + @children + def bench_setget_attributes(self, children): + for child in children: child.set('a', 'bla') - for child in root: + for child in children: child.get('a') def bench_root_getchildren(self, root): root.getchildren() - def bench_getchildren(self, root): - for child in root: + @children + def bench_getchildren(self, children): + for child in children: child.getchildren() - def bench_get_children_slice(self, root): - for child in root: + @children + def bench_get_children_slice(self, children): + for child in children: child[:] - def bench_get_children_slice_2x(self, root): - for child in root: - children = child[:] + @children + def bench_get_children_slice_2x(self, children): + for child in children: + child[:] child[:] - def bench_deepcopy(self, root): - for child in root: + @children + def bench_deepcopy(self, children): + for child in children: copy.deepcopy(child) def bench_deepcopy_all(self, root): copy.deepcopy(root) - def bench_tag(self, root): - for child in root: + @children + def bench_tag(self, children): + for child in children: child.tag - def bench_tag_repeat(self, root): - for child in root: + @children + def bench_tag_repeat(self, children): + for child in children: for i in repeat(0, 100): child.tag @with_text(utext=True, text=True, no_text=True) - def bench_text(self, root): - for child in root: + @children + def bench_text(self, children): + for child in children: child.text @with_text(utext=True, text=True, no_text=True) - def bench_text_repeat(self, root): + @children + def bench_text_repeat(self, children): repeat = range(500) - for child in root: + for child in children: for i in repeat: child.text - def bench_set_text(self, root): + @children + def bench_set_text(self, children): text = TEXT - for child in root: + for child in children: child.text = text - def bench_set_utext(self, root): + @children + def bench_set_utext(self, children): text = UTEXT - for child in root: + for child in children: child.text = text @onlylib('lxe') Modified: lxml/trunk/benchmark/benchbase.py ============================================================================== --- lxml/trunk/benchmark/benchbase.py (original) +++ lxml/trunk/benchmark/benchbase.py Fri Apr 20 09:54:42 2007 @@ -78,6 +78,11 @@ function.STRING = True return function +def children(function): + "Decorator for benchmarks that require a list of root children" + function.CHILDREN = True + return function + ############################################################ # benchmark baseclass ############################################################ @@ -105,13 +110,18 @@ deepcopy = copy.deepcopy def set_property(root, fname): xml = self._serialize_tree(root) - setattr(self, fname, lambda : etree.XML(xml, etree_parser)) + if etree_parser is not None: + setattr(self, fname, lambda : etree.XML(xml, etree_parser)) + else: + setattr(self, fname, lambda : deepcopy(root)) setattr(self, fname + '_xml', lambda : xml) + setattr(self, fname + '_children', lambda : root[:]) else: def set_property(root, fname): setattr(self, fname, self.et_make_clone_factory(root)) xml = self._serialize_tree(root) setattr(self, fname + '_xml', lambda : xml) + setattr(self, fname + '_children', lambda : root[:]) attribute_list = list(izip(count(), ({}, _ATTRIBUTES))) text_list = list(izip(count(), (None, _TEXT, _UTEXT))) @@ -131,10 +141,12 @@ def _tree_builder_name(self, tree, tn, an): return '_root%d_T%d_A%d' % (tree, tn, an) - def tree_builder(self, tree, tn, an, serial): + def tree_builder(self, tree, tn, an, serial, children): name = self._tree_builder_name(tree, tn, an) if serial: name += '_xml' + elif children: + name += '_children' return getattr(self, name) def _serialize_tree(self, root): @@ -270,13 +282,14 @@ arg_count = 1 tree_tuples = self._permutations(all_trees, arg_count) - serialized = getattr(method, 'STRING', False) + serialized = getattr(method, 'STRING', False) + children = getattr(method, 'CHILDREN', False) for tree_tuple in tree_tuples: for tn in sorted(getattr(method, 'TEXT', (0,))): for an in sorted(getattr(method, 'ATTRIBUTES', (0,))): benchmarks.append((name, method_call, tree_tuple, - tn, an, serialized)) + tn, an, serialized, children)) return benchmarks @@ -315,11 +328,12 @@ return (benchmark_suites, benchmarks) -def build_treeset_name(trees, tn, an, serialized): +def build_treeset_name(trees, tn, an, serialized, children): text = {0:'-', 1:'S', 2:'U'}[tn] attr = {0:'-', 1:'A'}[an] ser = {True:'X', False:'T'}[serialized] - return "%s%s%s T%s" % (text, attr, ser, ',T'.join(imap(str, trees))[:6]) + chd = {True:'C', False:'R'}[children] + return "%s%s%s%s T%s" % (text, attr, ser, chd, ',T'.join(imap(str, trees))[:6]) def printSetupTimes(benchmark_suites): print "Setup times for trees in seconds:" @@ -327,20 +341,20 @@ print "%-3s: " % b.lib_name, for an in (0,1): for tn in (0,1,2): - print ' %s ' % build_treeset_name((), tn, an, False)[:2], + print ' %s ' % build_treeset_name((), tn, an, False, False)[:2], print for i, tree_times in enumerate(b.setup_times): print " T%d:" % (i+1), ' '.join("%6.4f" % t for t in tree_times) print -def runBench(suite, method_name, method_call, tree_set, tn, an, serial): +def runBench(suite, method_name, method_call, tree_set, tn, an, serial, children): if method_call is None: raise SkippedTest current_time = time.time call_repeat = range(10) - tree_builders = [ suite.tree_builder(tree, tn, an, serial) + tree_builders = [ suite.tree_builder(tree, tn, an, serial, children) for tree in tree_set ] times = [] @@ -364,7 +378,7 @@ for bench_calls in izip(*benchmarks): for lib, (bench, benchmark_setup) in enumerate(izip(benchmark_suites, bench_calls)): bench_name = benchmark_setup[0] - tree_set_name = build_treeset_name(*benchmark_setup[-4:]) + tree_set_name = build_treeset_name(*benchmark_setup[-5:]) print "%-3s: %-28s" % (bench.lib_name, bench_name[6:34]), print "(%-10s)" % tree_set_name, sys.stdout.flush() From scoder at codespeak.net Fri Apr 20 10:48:29 2007 From: scoder at codespeak.net (scoder at codespeak.net) Date: Fri, 20 Apr 2007 10:48:29 +0200 (CEST) Subject: [Lxml-checkins] r42195 - lxml/trunk/benchmark Message-ID: <20070420084829.BD66780AD@code0.codespeak.net> Author: scoder Date: Fri Apr 20 10:48:29 2007 New Revision: 42195 Modified: lxml/trunk/benchmark/benchbase.py Log: take minimum time instead of averaging over repeated runs Modified: lxml/trunk/benchmark/benchbase.py ============================================================================== --- lxml/trunk/benchmark/benchbase.py (original) +++ lxml/trunk/benchmark/benchbase.py Fri Apr 20 10:48:29 2007 @@ -362,13 +362,16 @@ for i in range(3): gc.collect() gc.disable() - t = 0 + t = -1 for i in call_repeat: args = [ build() for build in tree_builders ] t_one_call = current_time() method_call(*args) - t += current_time() - t_one_call - t = 1000.0 * t / len(call_repeat) + t_one_call = 1000.0 * (current_time() - t_one_call) + if t < 0: + t = t_one_call + else: + t = min(t, t_one_call) times.append(t) gc.enable() del args From scoder at codespeak.net Fri Apr 20 10:54:42 2007 From: scoder at codespeak.net (scoder at codespeak.net) Date: Fri, 20 Apr 2007 10:54:42 +0200 (CEST) Subject: [Lxml-checkins] r42196 - lxml/trunk/benchmark Message-ID: <20070420085442.A7BD780AD@code0.codespeak.net> Author: scoder Date: Fri Apr 20 10:54:42 2007 New Revision: 42196 Modified: lxml/trunk/benchmark/benchbase.py Log: cleanup Modified: lxml/trunk/benchmark/benchbase.py ============================================================================== --- lxml/trunk/benchmark/benchbase.py (original) +++ lxml/trunk/benchmark/benchbase.py Fri Apr 20 10:54:42 2007 @@ -367,12 +367,12 @@ args = [ build() for build in tree_builders ] t_one_call = current_time() method_call(*args) - t_one_call = 1000.0 * (current_time() - t_one_call) + t_one_call = current_time() - t_one_call if t < 0: t = t_one_call else: t = min(t, t_one_call) - times.append(t) + times.append(1000.0 * t) gc.enable() del args return times From scoder at codespeak.net Fri Apr 20 12:42:01 2007 From: scoder at codespeak.net (scoder at codespeak.net) Date: Fri, 20 Apr 2007 12:42:01 +0200 (CEST) Subject: [Lxml-checkins] r42199 - lxml/trunk/benchmark Message-ID: <20070420104201.CBE0A8090@code0.codespeak.net> Author: scoder Date: Fri Apr 20 12:42:01 2007 New Revision: 42199 Modified: lxml/trunk/benchmark/bench_objectify.py lxml/trunk/benchmark/bench_xpath.py Log: cleanup in benchmarks, use children where appropriate Modified: lxml/trunk/benchmark/bench_objectify.py ============================================================================== --- lxml/trunk/benchmark/bench_objectify.py (original) +++ lxml/trunk/benchmark/bench_objectify.py Fri Apr 20 12:42:01 2007 @@ -10,6 +10,9 @@ ############################################################ class BenchMark(benchbase.BenchMarkBase): + repeat1000 = range(1000) + repeat3000 = range(3000) + def __init__(self, lib): from lxml import etree, objectify self.objectify = objectify @@ -20,37 +23,37 @@ def bench_attribute(self, root): "1 2 4" - for i in repeat(None, 3000): + for i in self.repeat3000: root.zzzzz def bench_attribute_cached(self, root): "1 2 4" cache = root.zzzzz - for i in repeat(None, 3000): + for i in self.repeat3000: root.zzzzz def bench_attributes_deep(self, root): "1 2 4" - for i in repeat(None, 3000): + for i in self.repeat3000: root.zzzzz['{cdefg}z00000'] def bench_attributes_deep_cached(self, root): "1 2 4" cache1 = root.zzzzz cache2 = cache1['{cdefg}z00000'] - for i in repeat(None, 3000): + for i in self.repeat3000: root.zzzzz['{cdefg}z00000'] def bench_objectpath(self, root): "1 2 4" path = self.objectify.ObjectPath(".zzzzz") - for i in repeat(None, 3000): + for i in self.repeat3000: path(root) def bench_objectpath_deep(self, root): "1 2 4" path = self.objectify.ObjectPath(".zzzzz.{cdefg}z00000") - for i in repeat(None, 3000): + for i in self.repeat3000: path(root) def bench_objectpath_deep_cached(self, root): @@ -58,7 +61,7 @@ cache1 = root.zzzzz cache2 = cache1['{cdefg}z00000'] path = self.objectify.ObjectPath(".zzzzz.{cdefg}z00000") - for i in repeat(None, 3000): + for i in self.repeat3000: path(root) @with_text(text=True, utext=True, no_text=True) @@ -72,7 +75,7 @@ def bench_type_inference(self, root): "1 2 4" el = root.aaaaa - for i in repeat(None, 1000): + for i in self.repeat1000: el.getchildren() @with_text(text=True) @@ -80,7 +83,7 @@ "1 2 4" el = root.aaaaa self.objectify.annotate(el) - for i in repeat(None, 1000): + for i in self.repeat1000: el.getchildren() Modified: lxml/trunk/benchmark/bench_xpath.py ============================================================================== --- lxml/trunk/benchmark/bench_xpath.py (original) +++ lxml/trunk/benchmark/bench_xpath.py Fri Apr 20 12:42:01 2007 @@ -3,7 +3,7 @@ from StringIO import StringIO import benchbase -from benchbase import with_attributes, with_text, onlylib, serialized +from benchbase import with_attributes, with_text, onlylib, serialized, children ############################################################ # Benchmarks @@ -11,14 +11,16 @@ class XPathBenchMark(benchbase.BenchMarkBase): @onlylib('lxe') - def bench_xpath_class(self, root): + @children + def bench_xpath_class(self, children): xpath = self.etree.XPath("./*[0]") - for child in root: + for child in children: xpath(child) @onlylib('lxe') - def bench_xpath_class_repeat(self, root): - for child in root: + @children + def bench_xpath_class_repeat(self, children): + for child in children: xpath = self.etree.XPath("./*[0]") xpath(child) @@ -29,12 +31,14 @@ xpath.evaluate("./*[0]") @onlylib('lxe') - def bench_xpath_method(self, root): - for child in root: + @children + def bench_xpath_method(self, children): + for child in children: child.xpath("./*[0]") @onlylib('lxe') - def bench_xpath_old_extensions(self, root): + @children + def bench_xpath_old_extensions(self, children): def return_child(_, elements): if elements: return elements[0][0] @@ -43,11 +47,12 @@ extensions = {("test", "child") : return_child} xpath = self.etree.XPath("t:child(.)", namespaces={"test":"t"}, extensions=extensions) - for child in root: + for child in children: xpath(child) @onlylib('lxe') - def bench_xpath_extensions(self, root): + @children + def bench_xpath_extensions(self, children): def return_child(_, elements): if elements: return elements[0][0] @@ -57,7 +62,7 @@ try: xpath = self.etree.XPath("test:t(.)", {"test":"testns"}) - for child in root: + for child in children: xpath(child) finally: del self.etree.FunctionNamespace("testns")["t"] From scoder at codespeak.net Fri Apr 20 12:43:22 2007 From: scoder at codespeak.net (scoder at codespeak.net) Date: Fri, 20 Apr 2007 12:43:22 +0200 (CEST) Subject: [Lxml-checkins] r42200 - lxml/trunk/doc Message-ID: <20070420104322.449FB80AA@code0.codespeak.net> Author: scoder Date: Fri Apr 20 12:43:22 2007 New Revision: 42200 Modified: lxml/trunk/doc/performance.txt Log: updated benchmark results for pre-1.3 Modified: lxml/trunk/doc/performance.txt ============================================================================== --- lxml/trunk/doc/performance.txt (original) +++ lxml/trunk/doc/performance.txt Fri Apr 20 12:43:22 2007 @@ -14,21 +14,26 @@ .. _ElementTree: http://effbot.org/zone/element-index.htm .. _cElementTree: http://effbot.org/zone/celementtree.htm -The statements made here are backed by the benchmark script `bench.py`_ that -comes with the lxml source distribution. The timings cited below compare lxml -1.0 (with libxml2 2.6.24), ElementTree 1.2.6 and cElementTree 1.0.5 under -CPython 2.4.2 on a 1.6GHz AMD64 machine. - -.. _`bench.py`: http://codespeak.net/svn/lxml/branch/lxml-1.0/bench.py - -The ``bench.py`` script runs a number of simple tests on the different -libraries, using different XML tree configurations: different tree sizes, with -or without attributes (-/A) and with or without ASCII or unicode text (-/S/U). -In the result extracts cited below, T1 refers to a 3-level tree with many -children at the third level, T2 is swapped around to have many children at the -root element, T3 is a deep tree with few children at each level and T4 is a -small tree, slightly broader than deep. Most benchmarks run in a loop over -all children of the tree root. +The statements made here are backed by the benchmark scripts +`bench_etree.py`_, `bench_xpath.py`_ and `bench_objectify.py`_ that come with +the lxml source distribution. The timings cited below compare lxml 1.3 (with +libxml2 2.6.26) to the ElementTree and cElementTree versions shipped with +CPython 2.5 (based on ElementTree 1.2.6). They were run single-threaded on a +1.8GHz Intel Core Duo machine. + +.. _`bench_etree.py`: http://codespeak.net/svn/lxml/branch/lxml-1.3/benchmark/bench_etree.py +.. _`bench_xpath.py`: http://codespeak.net/svn/lxml/branch/lxml-1.3/benchmark/bench_xpath.py +.. _`bench_objectify.py`: http://codespeak.net/svn/lxml/branch/lxml-1.3/benchmark/bench_objectify.py + +The scripts run a number of simple tests on the different libraries, using +different XML tree configurations: different tree sizes, with or without +attributes (-/A), with or without ASCII or unicode text (-/S/U), and either +against a tree or its serialised form (T/X). In the result extracts cited +below, T1 refers to a 3-level tree with many children at the third level, T2 +is swapped around to have many children at the root element, T3 is a deep tree +with few children at each level and T4 is a small tree, slightly broader than +deep. If repetition is involved, this usually means running the benchmark in +a loop over all children of the tree root. .. contents:: .. @@ -37,6 +42,7 @@ 3 The ElementTree API 4 Tree traversal 5 XPath + 6 lxml.objectify Bad things first @@ -57,45 +63,57 @@ results are rather impressive. Compared to cElementTree, lxml is about 20 to 40 times faster on serialisation:: - lxe: tostring_utf16 (SA T2) 30.9846 msec/pass - cET: tostring_utf16 (SA T2) 715.5002 msec/pass - ET : tostring_utf16 (SA T2) 758.5271 msec/pass - - lxe: tostring_utf16 (U- T3) 3.0509 msec/pass - cET: tostring_utf16 (U- T3) 72.4721 msec/pass - ET : tostring_utf16 (U- T3) 87.0735 msec/pass - - lxe: tostring_utf8 (UA T2) 26.8996 msec/pass - cET: tostring_utf8 (UA T2) 700.4889 msec/pass - ET : tostring_utf8 (UA T2) 745.3317 msec/pass - - lxe: tostring_utf8 (S- T3) 2.1876 msec/pass - cET: tostring_utf8 (S- T3) 71.1290 msec/pass - ET : tostring_utf8 (S- T3) 87.1525 msec/pass + lxe: tostring_utf16 (SATR T1) 21.9206 msec/pass + cET: tostring_utf16 (SATR T1) 461.9428 msec/pass + ET : tostring_utf16 (SATR T1) 486.8946 msec/pass + + lxe: tostring_utf16 (UATR T1) 22.7508 msec/pass + cET: tostring_utf16 (UATR T1) 526.3446 msec/pass + ET : tostring_utf16 (UATR T1) 496.0767 msec/pass + + lxe: tostring_utf16 (S-TR T2) 23.8452 msec/pass + cET: tostring_utf16 (S-TR T2) 537.9200 msec/pass + ET : tostring_utf16 (S-TR T2) 504.4273 msec/pass + + lxe: tostring_utf8 (S-TR T2) 18.2550 msec/pass + cET: tostring_utf8 (S-TR T2) 528.3908 msec/pass + ET : tostring_utf8 (S-TR T2) 549.7071 msec/pass + + lxe: tostring_utf8 (U-TR T3) 2.5497 msec/pass + cET: tostring_utf8 (U-TR T3) 49.8495 msec/pass + ET : tostring_utf8 (U-TR T3) 62.6927 msec/pass For parsing, the difference between the libraries is smaller. The (c)ET libraries use the expat parser, which is known to be extremely fast:: - lxe: parse_stringIO (SA T2) 197.7678 msec/pass - cET: parse_stringIO (SA T2) 38.9390 msec/pass - ET : parse_stringIO (SA T2) 364.3468 msec/pass - - lxe: parse_stringIO (UA T3) 48.6735 msec/pass - cET: parse_stringIO (UA T3) 39.7455 msec/pass - ET : parse_stringIO (UA T3) 237.9971 msec/pass + lxe: parse_stringIO (SAXR T1) 150.2380 msec/pass + cET: parse_stringIO (SAXR T1) 25.9311 msec/pass + ET : parse_stringIO (SAXR T1) 222.9431 msec/pass + + lxe: parse_stringIO (S-XR T3) 5.9490 msec/pass + cET: parse_stringIO (S-XR T3) 5.4519 msec/pass + ET : parse_stringIO (S-XR T3) 76.4120 msec/pass + + lxe: parse_stringIO (UAXR T3) 29.3601 msec/pass + cET: parse_stringIO (UAXR T3) 28.9941 msec/pass + ET : parse_stringIO (UAXR T3) 163.5361 msec/pass The expat parser allows cET to be up to 80% faster than lxml on plain parser -performance. The same applies to the ``iterparse()`` function. However, if -you take a complete serialize-parse cycle, the numbers will look similar to -these:: - - lxe: write_utf8_parse_stringIO (S- T1) 187.0444 msec/pass - cET: write_utf8_parse_stringIO (S- T1) 828.4068 msec/pass - ET : write_utf8_parse_stringIO (S- T1) 1181.0658 msec/pass - - lxe: write_utf8_parse_stringIO (UA T2) 213.6599 msec/pass - cET: write_utf8_parse_stringIO (UA T2) 927.2374 msec/pass - ET : write_utf8_parse_stringIO (UA T2) 1297.9678 msec/pass +performance. Similar timings can be observer for the ``iterparse()`` +function. However, if you take a complete serialize-parse cycle, the numbers +will look similar to these:: + + lxe: write_utf8_parse_stringIO (S-TR T1) 316.6230 msec/pass + cET: write_utf8_parse_stringIO (S-TR T1) 592.1209 msec/pass + ET : write_utf8_parse_stringIO (S-TR T1) 817.9121 msec/pass + + lxe: write_utf8_parse_stringIO (UATR T3) 49.9680 msec/pass + cET: write_utf8_parse_stringIO (UATR T3) 434.6111 msec/pass + ET : write_utf8_parse_stringIO (UATR T3) 574.1441 msec/pass + + lxe: write_utf8_parse_stringIO (SATR T4) 1.2789 msec/pass + cET: write_utf8_parse_stringIO (SATR T4) 12.2640 msec/pass + ET : write_utf8_parse_stringIO (SATR T4) 15.6620 msec/pass For applications that require a high parser throughput and do little serialization, cET is the best choice. Also for iterparse applications that @@ -114,22 +132,20 @@ (given in seconds):: lxe: -- S- U- -A SA UA - T1: 0.1360 0.1214 0.1214 0.1217 0.1232 0.1226 - T2: 0.1258 0.1257 0.1250 0.1348 0.1359 0.1358 - T3: 0.0354 0.0282 0.0288 0.0850 0.0860 0.0862 - T4: 0.0006 0.0006 0.0006 0.0019 0.0018 0.0019 - + T1: 0.1029 0.1005 0.0998 0.1003 0.0998 0.1002 + T2: 0.1035 0.1013 0.1015 0.1090 0.1089 0.1090 + T3: 0.0276 0.0270 0.0273 0.0679 0.0673 0.0673 + T4: 0.0004 0.0004 0.0004 0.0013 0.0013 0.0013 cET: -- S- U- -A SA UA - T1: 0.0417 0.0409 0.0403 0.0410 0.0410 0.0415 - T2: 0.0413 0.0414 0.0413 0.0417 0.0411 0.0417 - T3: 0.0097 0.0100 0.0099 0.0187 0.0142 0.0146 + T1: 0.0277 0.0273 0.0273 0.0272 0.0278 0.0275 + T2: 0.0281 0.0347 0.0281 0.0285 0.0284 0.0284 + T3: 0.0074 0.0074 0.0074 0.0122 0.0102 0.0101 T4: 0.0001 0.0001 0.0001 0.0001 0.0001 0.0001 - ET : -- S- U- -A SA UA - T1: 0.2189 0.2832 0.2210 0.2646 0.2905 0.2214 - T2: 0.3022 0.2322 0.2868 0.3192 0.2290 0.3075 - T3: 0.0519 0.0553 0.0527 0.0601 0.0572 0.0911 - T4: 0.0009 0.0008 0.0008 0.0008 0.0009 0.0009 + T1: 0.1349 0.1962 0.2356 0.1288 0.2642 0.1351 + T2: 0.3104 0.1344 0.3566 0.3857 0.1354 0.4677 + T3: 0.0313 0.0325 0.0312 0.0356 0.3803 0.0364 + T4: 0.0005 0.0005 0.0008 0.0006 0.0007 0.0006 While lxml is still faster than ET in most cases (30-60%), cET can be up to three times faster than lxml here. One of the reasons is that lxml must @@ -141,29 +157,29 @@ Where ET and cET can quickly create a shallow copy of their list of children, lxml has to create a Python object for each child and collect them in a list:: - lxe: root_getchildren (-- T2 ) 6.3981 msec/pass - cET: root_getchildren (-- T2 ) 0.0651 msec/pass - ET : root_getchildren (-- T2 ) 0.0224 msec/pass + lxe: root_getchildren (--TR T2) 0.3500 msec/pass + cET: root_getchildren (--TR T2) 0.0150 msec/pass + ET : root_getchildren (--TR T2) 0.0091 msec/pass As opposed to ET, libxml2 has a notion of documents that each element must be in. This results in a major performance difference for creating independent Elements that end up in independently created documents:: - lxe: create_elements (-- T2 ) 22.0083 msec/pass - cET: create_elements (-- T2 ) 0.3920 msec/pass - ET : create_elements (-- T2 ) 3.0865 msec/pass + lxe: create_elements (--TC T2) 3.7301 msec/pass + cET: create_elements (--TC T2) 0.1960 msec/pass + ET : create_elements (--TC T2) 1.4279 msec/pass Therefore, it is always preferable to create Elements for the document they are supposed to end up in, either as SubElements of an Element or using the explicit ``Element.makeelement()`` call:: - lxe: makeelement (-- T2 ) 4.2658 msec/pass - cET: makeelement (-- T2 ) 0.5658 msec/pass - ET : makeelement (-- T2 ) 3.7136 msec/pass - - lxe: create_subelements (-- T2 ) 3.7640 msec/pass - cET: create_subelements (-- T2 ) 0.5332 msec/pass - ET : create_subelements (-- T2 ) 6.5937 msec/pass + lxe: makeelement (--TC T2) 2.5990 msec/pass + cET: makeelement (--TC T2) 0.3128 msec/pass + ET : makeelement (--TC T2) 1.6940 msec/pass + + lxe: create_subelements (--TC T2) 2.3072 msec/pass + cET: create_subelements (--TC T2) 0.2370 msec/pass + ET : create_subelements (--TC T2) 3.2189 msec/pass So, if the main performance bottleneck of an application is creating large XML trees in memory through calls to Element and SubElement, cET is the best @@ -176,13 +192,13 @@ The following benchmark appends all root children of the second tree to the root of the first tree:: - lxe: append_from_document (-- T1,T2) 11.7905 msec/pass - cET: append_from_document (-- T1,T2) 0.4673 msec/pass - ET : append_from_document (-- T1,T2) 2.0460 msec/pass - - lxe: append_from_document (-- T3,T4) 0.1582 msec/pass - cET: append_from_document (-- T3,T4) 0.0224 msec/pass - ET : append_from_document (-- T3,T4) 0.1618 msec/pass + lxe: append_from_document (--TR T1,T2) 4.3468 msec/pass + cET: append_from_document (--TR T1,T2) 0.2608 msec/pass + ET : append_from_document (--TR T1,T2) 1.2310 msec/pass + + lxe: append_from_document (--TR T3,T4) 0.0679 msec/pass + cET: append_from_document (--TR T3,T4) 0.0148 msec/pass + ET : append_from_document (--TR T3,T4) 0.0880 msec/pass Although these are fairly small numbers compared to parsing, this easily shows the different performance classes for lxml and (c)ET. Where the latter do not @@ -193,26 +209,26 @@ This difference is not always as visible, but applies to most parts of the API, like inserting newly created elements:: - lxe: insert_from_document (-- T1,T2) 16.2342 msec/pass - cET: insert_from_document (-- T1,T2) 1.1786 msec/pass - ET : insert_from_document (-- T1,T2) 3.6107 msec/pass + lxe: insert_from_document (--TR T1,T2) 6.3150 msec/pass + cET: insert_from_document (--TR T1,T2) 0.4039 msec/pass + ET : insert_from_document (--TR T1,T2) 1.4770 msec/pass Or replacing the child slice by a new element:: - lxe: replace_children_element (-- T1 ) 9.1834 msec/pass - cET: replace_children_element (-- T1 ) 0.9731 msec/pass - ET : replace_children_element (-- T1 ) 14.8213 msec/pass + lxe: replace_children_element (--TC T1) 0.2608 msec/pass + cET: replace_children_element (--TC T1) 0.0238 msec/pass + ET : replace_children_element (--TC T1) 0.1628 msec/pass You should keep this difference in mind when you merge very large trees. On the other hand, deep copying a tree is fast in lxml:: - lxe: deepcopy (-- T1 ) 24.7359 msec/pass - cET: deepcopy (-- T1 ) 450.5479 msec/pass - ET : deepcopy (-- T1 ) 717.8308 msec/pass - - lxe: deepcopy (-- T3 ) 2.1182 msec/pass - cET: deepcopy (-- T3 ) 107.2124 msec/pass - ET : deepcopy (-- T3 ) 173.9782 msec/pass + lxe: deepcopy (--TC T1) 10.6010 msec/pass + cET: deepcopy (--TC T1) 220.2251 msec/pass + ET : deepcopy (--TC T1) 463.7730 msec/pass + + lxe: deepcopy (--TC T3) 8.2979 msec/pass + cET: deepcopy (--TC T3) 53.8740 msec/pass + ET : deepcopy (--TC T3) 118.2799 msec/pass So, for example, if you often need to create independent subtrees from a large tree that you have parsed in, lxml is by far the best choice here. @@ -226,39 +242,39 @@ especially if few elements are of interest or the element tag name is known, lxml is a good choice:: - lxe: getiterator_all (-- T2 ) 22.5847 msec/pass - cET: getiterator_all (-- T2 ) 36.8212 msec/pass - ET : getiterator_all (-- T2 ) 46.2846 msec/pass - - lxe: getiterator_islice (-- T2 ) 2.0421 msec/pass - cET: getiterator_islice (-- T2 ) 0.3343 msec/pass - ET : getiterator_islice (-- T2 ) 44.5898 msec/pass - - lxe: getiterator_tag (-- T2 ) 1.9593 msec/pass - cET: getiterator_tag (-- T2 ) 11.7767 msec/pass - ET : getiterator_tag (-- T2 ) 37.5661 msec/pass - - lxe: getiterator_tag_all (-- T2 ) 4.5667 msec/pass - cET: getiterator_tag_all (-- T2 ) 33.5681 msec/pass - ET : getiterator_tag_all (-- T2 ) 37.6200 msec/pass + lxe: getiterator_all (--TR T2) 10.3800 msec/pass + cET: getiterator_all (--TR T2) 28.2831 msec/pass + ET : getiterator_all (--TR T2) 26.0720 msec/pass + + lxe: getiterator_islice (--TR T2) 0.1140 msec/pass + cET: getiterator_islice (--TR T2) 0.2460 msec/pass + ET : getiterator_islice (--TR T2) 26.6550 msec/pass + + lxe: getiterator_tag (--TR T2) 0.3879 msec/pass + cET: getiterator_tag (--TR T2) 9.3720 msec/pass + ET : getiterator_tag (--TR T2) 22.8221 msec/pass + + lxe: getiterator_tag_all (--TR T2) 0.8819 msec/pass + cET: getiterator_tag_all (--TR T2) 27.2939 msec/pass + ET : getiterator_tag_all (--TR T2) 22.8271 msec/pass This similarly shows in ``Element.findall()``:: - lxe: findall (-- T2 ) 26.9907 msec/pass - cET: findall (-- T2 ) 39.1728 msec/pass - ET : findall (-- T2 ) 50.9692 msec/pass - - lxe: findall (-- T3 ) 3.6452 msec/pass - cET: findall (-- T3 ) 12.0210 msec/pass - ET : findall (-- T3 ) 11.2570 msec/pass - - lxe: findall_tag (-- T2 ) 4.6065 msec/pass - cET: findall_tag (-- T2 ) 34.0267 msec/pass - ET : findall_tag (-- T2 ) 36.7813 msec/pass - - lxe: findall_tag (-- T3 ) 0.5884 msec/pass - cET: findall_tag (-- T3 ) 7.6307 msec/pass - ET : findall_tag (-- T3 ) 9.2943 msec/pass + lxe: findall (--TR T2) 10.9370 msec/pass + cET: findall (--TR T2) 28.8639 msec/pass + ET : findall (--TR T2) 27.1060 msec/pass + + lxe: findall (--TR T3) 2.1989 msec/pass + cET: findall (--TR T3) 8.9881 msec/pass + ET : findall (--TR T3) 6.4890 msec/pass + + lxe: findall_tag (--TR T2) 0.9520 msec/pass + cET: findall_tag (--TR T2) 27.2651 msec/pass + ET : findall_tag (--TR T2) 22.7208 msec/pass + + lxe: findall_tag (--TR T3) 0.1700 msec/pass + cET: findall_tag (--TR T3) 6.4540 msec/pass + ET : findall_tag (--TR T3) 5.4770 msec/pass Note that all three libraries currently use the same Python implementation for ``findall()``, except for their native tree iterator. @@ -267,48 +283,52 @@ XPath ----- +The following timings are based on the benchmark script `bench_xpath.py`_. + This part of lxml does not have an equivalent in ElementTree. However, lxml provides more than one way of accessing it and you should take care which part of the lxml API you use. The most straight forward way is to call the ``xpath()`` method on an Element or ElementTree:: - lxe: xpath_method (-- T1) 9.9304 msec/pass - lxe: xpath_method (-- T2) 29.3595 msec/pass - lxe: xpath_method (-- T3) 0.2791 msec/pass - lxe: xpath_method (-- T4) 0.9906 msec/pass + lxe: xpath_method (--TC T1) 1.0180 msec/pass + lxe: xpath_method (--TC T2) 20.3521 msec/pass + lxe: xpath_method (--TC T3) 0.1259 msec/pass + lxe: xpath_method (--TC T4) 1.0169 msec/pass This is well suited for testing and when the XPath expressions are as diverse as the trees they are called on. However, if you have a single XPath expression that you want to apply to a larger number of different elements, the ``XPath`` class is the most efficient way to do it:: - lxe: xpath_class (-- T1) 4.7921 msec/pass - lxe: xpath_class (-- T2) 9.6187 msec/pass - lxe: xpath_class (-- T3) 0.2215 msec/pass - lxe: xpath_class (-- T4) 0.2697 msec/pass + lxe: xpath_class (--TC T1) 0.1891 msec/pass + lxe: xpath_class (--TC T2) 3.0179 msec/pass + lxe: xpath_class (--TC T3) 0.0570 msec/pass + lxe: xpath_class (--TC T4) 0.1910 msec/pass Note that this still allows you to use variables in the expression, so you can parse it once and then adapt it through variables at call time. In other cases, where you have a fixed Element or ElementTree and want to run different expressions on it, you should consider the ``XPathEvaluator``:: - lxe: xpath_element (-- T1) 5.3826 msec/pass - lxe: xpath_element (-- T2) 11.3929 msec/pass - lxe: xpath_element (-- T3) 0.2514 msec/pass - lxe: xpath_element (-- T4) 0.3038 msec/pass + lxe: xpath_element (--TR T1) 0.4089 msec/pass + lxe: xpath_element (--TR T2) 5.9960 msec/pass + lxe: xpath_element (--TR T3) 0.1230 msec/pass + lxe: xpath_element (--TR T4) 0.3440 msec/pass While it looks slightly slower, creating an XPath object for each of the expressions generates a much higher overhead here:: - lxe: xpath_class_repeat (-- T1) 6.8099 msec/pass - lxe: xpath_class_repeat (-- T2) 26.7462 msec/pass - lxe: xpath_class_repeat (-- T3) 0.3126 msec/pass - lxe: xpath_class_repeat (-- T4) 1.1111 msec/pass + lxe: xpath_class_repeat (--TC T1) 1.0259 msec/pass + lxe: xpath_class_repeat (--TC T2) 20.4861 msec/pass + lxe: xpath_class_repeat (--TC T3) 0.1280 msec/pass + lxe: xpath_class_repeat (--TC T4) 1.0269 msec/pass lxml.objectify -------------- +The following timings are based on the benchmark script `bench_objectify.py`_. + Objectify is a data-binding API for XML based on lxml.etree, that was added in version 1.1. It uses standard Python attribute access to traverse the XML tree. It also features ObjectPath, a fast path language based on the same @@ -325,21 +345,21 @@ tree. It avoids step-by-step Python element instantiations along the path, which can substantially improve the access time:: - lxe: attribute (--T T1) 14.8621 msec/pass - lxe: attribute (--T T2) 61.8820 msec/pass - lxe: attribute (--T T4) 14.9317 msec/pass - - lxe: objectpath (--T T1) 13.7311 msec/pass - lxe: objectpath (--T T2) 58.5930 msec/pass - lxe: objectpath (--T T4) 8.0961 msec/pass - - lxe: attributes_deep (--T T1) 81.4488 msec/pass - lxe: attributes_deep (--T T2) 77.0266 msec/pass - lxe: attributes_deep (--T T4) 27.1226 msec/pass - - lxe: objectpath_deep (--T T1) 63.1915 msec/pass - lxe: objectpath_deep (--T T2) 65.2469 msec/pass - lxe: objectpath_deep (--T T4) 11.0138 msec/pass + lxe: attribute (--TR T1) 10.6189 msec/pass + lxe: attribute (--TR T2) 53.7431 msec/pass + lxe: attribute (--TR T4) 10.3359 msec/pass + + lxe: objectpath (--TR T1) 5.8351 msec/pass + lxe: objectpath (--TR T2) 48.1579 msec/pass + lxe: objectpath (--TR T4) 5.6930 msec/pass + + lxe: attributes_deep (--TR T1) 58.7430 msec/pass + lxe: attributes_deep (--TR T2) 63.0901 msec/pass + lxe: attributes_deep (--TR T4) 17.4620 msec/pass + + lxe: objectpath_deep (--TR T1) 52.1719 msec/pass + lxe: objectpath_deep (--TR T2) 52.9201 msec/pass + lxe: objectpath_deep (--TR T4) 7.5650 msec/pass Note, however, that parsing ObjectPath expressions is not for free either, so this is most effective for frequently accessing the same element. @@ -361,13 +381,17 @@ subtrees and elements) to cache, you can trade memory usage against access speed:: - lxe: attribute_cached (--T T1) 10.8343 msec/pass - lxe: attribute_cached (--T T2) 55.5890 msec/pass - lxe: attribute_cached (--T T4) 10.9514 msec/pass - - lxe: attributes_deep_cached (--T T1) 63.7080 msec/pass - lxe: attributes_deep_cached (--T T2) 65.6838 msec/pass - lxe: attributes_deep_cached (--T T4) 15.4514 msec/pass + lxe: attribute_cached (--TR T1) 7.9739 msec/pass + lxe: attribute_cached (--TR T2) 50.9331 msec/pass + lxe: attribute_cached (--TR T4) 7.8540 msec/pass + + lxe: attributes_deep_cached (--TR T1) 51.1391 msec/pass + lxe: attributes_deep_cached (--TR T2) 55.7129 msec/pass + lxe: attributes_deep_cached (--TR T4) 10.7968 msec/pass + + lxe: objectpath_deep_cached (--TR T1) 47.6151 msec/pass + lxe: objectpath_deep_cached (--TR T2) 48.0802 msec/pass + lxe: objectpath_deep_cached (--TR T4) 4.0281 msec/pass Things to note: you cannot currently use ``weakref.WeakKeyDictionary`` objects for this as lxml's element objects do not support weak references (which are From scoder at codespeak.net Fri Apr 20 15:35:35 2007 From: scoder at codespeak.net (scoder at codespeak.net) Date: Fri, 20 Apr 2007 15:35:35 +0200 (CEST) Subject: [Lxml-checkins] r42202 - lxml/trunk/doc Message-ID: <20070420133535.AD43180B2@code0.codespeak.net> Author: scoder Date: Fri Apr 20 15:35:34 2007 New Revision: 42202 Modified: lxml/trunk/doc/api.txt Log: some cleanup, new API section on serialisation Modified: lxml/trunk/doc/api.txt ============================================================================== --- lxml/trunk/doc/api.txt (original) +++ lxml/trunk/doc/api.txt Fri Apr 20 15:35:34 2007 @@ -31,8 +31,9 @@ 3 Trees and Documents 4 Iteration 5 Error handling on exceptions - 6 xinclude - 7 write_c14n on ElementTree + 6 Serialisation + 7 xinclude + 8 write_c14n on ElementTree lxml.etree @@ -62,17 +63,16 @@ While lxml.etree itself uses the ElementTree API, it is possible to replace the Element implementation by `custom element subclasses`_. This has been -used to implement well-known XML APIs on top of lxml. The ``lxml.elements`` -package contains examples. Currently, there is a data-binding implementation -called `objectify`_, which is similar to the `Amara bindery`_ tool. - -Additionally, the `lxml.elements.classlookup`_ module provides a number of -different schemes to customize the mapping between libxml2 nodes and the -Element classes used by lxml.etree. +used to implement well-known XML APIs on top of lxml. For example, lxml ships +with a data-binding implementation called `objectify`_, which is similar to +the `Amara bindery`_ tool. + +lxml.etree comes with a number of `different lookup schemes`_ to customize the +mapping between libxml2 nodes and the Element classes used by lxml.etree. .. _`custom element subclasses`: namespace_extensions.html .. _`objectify`: objectify.html -.. _`lxml.elements.classlookup`: elements.html#lxml.elements.classlookup +.. _`different lookup schemes`: element_classes.html#setting-up-a-class-lookup-scheme .. _`Amara bindery`: http://uche.ogbuji.net/tech/4suite/amara/ @@ -228,6 +228,31 @@ etc. which are described in their respective sections below. +Serialisation +------------- + +lxml.etree has direct support for pretty printing XML output. Functions like +``ElementTree.write()`` and ``tostring()`` support it through a keyword +argument:: + + >>> root = etree.XML("") + >>> print etree.tostring(root) + + + >>> print etree.tostring(root, pretty_print=True) + + + + +By default, lxml (and ElementTree) output the XML declaration only if it is +required. You can enable or disable it explicitly by passing another keyword +argument for the serialisation:: + + >>> print etree.tostring(root, xml_declaration=True) + + + + xinclude -------- From scoder at codespeak.net Fri Apr 20 15:48:28 2007 From: scoder at codespeak.net (scoder at codespeak.net) Date: Fri, 20 Apr 2007 15:48:28 +0200 (CEST) Subject: [Lxml-checkins] r42203 - lxml/trunk/doc Message-ID: <20070420134828.3B28D80B8@code0.codespeak.net> Author: scoder Date: Fri Apr 20 15:48:27 2007 New Revision: 42203 Modified: lxml/trunk/doc/api.txt Log: doc link from serialisation to the unicode section Modified: lxml/trunk/doc/api.txt ============================================================================== --- lxml/trunk/doc/api.txt (original) +++ lxml/trunk/doc/api.txt Fri Apr 20 15:48:27 2007 @@ -252,8 +252,12 @@ +Also see the general remarks on `Unicode support`_. -xinclude +.. _`Unicode support`: parsing.html#python-unicode-strings + + +XInclude -------- Simple XInclude support exists. You can let lxml process xinclude statements From scoder at codespeak.net Sun Apr 22 20:43:12 2007 From: scoder at codespeak.net (scoder at codespeak.net) Date: Sun, 22 Apr 2007 20:43:12 +0200 (CEST) Subject: [Lxml-checkins] r42242 - lxml/trunk/src/lxml/tests Message-ID: <20070422184312.4C2AB809D@code0.codespeak.net> Author: scoder Date: Sun Apr 22 20:43:11 2007 New Revision: 42242 Modified: lxml/trunk/src/lxml/tests/test_xslt.py Log: libxslt 1.1.20 also seems to not raise an error when parameters are missing Modified: lxml/trunk/src/lxml/tests/test_xslt.py ============================================================================== --- lxml/trunk/src/lxml/tests/test_xslt.py (original) +++ lxml/trunk/src/lxml/tests/test_xslt.py Sun Apr 22 20:43:11 2007 @@ -223,7 +223,7 @@ def test_xslt_parameter_missing(self): # DISABLED - NOT RELIABLE! - if etree.LIBXSLT_VERSION >= (1,1,18) and etree.LIBXSLT_VERSION < (1,1,20): + if etree.LIBXSLT_VERSION >= (1,1,18): return # no error from libxslt? # apply() without needed parameter will lead to XSLTApplyError tree = self.parse('BC') From scoder at codespeak.net Sun Apr 22 20:58:16 2007 From: scoder at codespeak.net (scoder at codespeak.net) Date: Sun, 22 Apr 2007 20:58:16 +0200 (CEST) Subject: [Lxml-checkins] r42243 - lxml/trunk/doc Message-ID: <20070422185816.D24938095@code0.codespeak.net> Author: scoder Date: Sun Apr 22 20:58:16 2007 New Revision: 42243 Modified: lxml/trunk/doc/api.txt Log: note on ElementInclude support Modified: lxml/trunk/doc/api.txt ============================================================================== --- lxml/trunk/doc/api.txt (original) +++ lxml/trunk/doc/api.txt Sun Apr 22 20:58:16 2007 @@ -32,7 +32,7 @@ 4 Iteration 5 Error handling on exceptions 6 Serialisation - 7 xinclude + 7 XInclude and ElementInclude 8 write_c14n on ElementTree @@ -257,11 +257,11 @@ .. _`Unicode support`: parsing.html#python-unicode-strings -XInclude --------- +XInclude and ElementInclude +--------------------------- -Simple XInclude support exists. You can let lxml process xinclude statements -in a document by calling the xinclude() method on a tree:: +You can let lxml process xinclude statements in a document by calling the +xinclude() method on a tree:: >>> data = StringIO('''\ ... @@ -274,6 +274,14 @@ >>> etree.tostring(tree.getroot()) '\n\n\n' +Note that the ElementTree compatible ElementInclude_ module is also supported +as ``lxml.ElementInclude``. It has the additional advantage of supporting +custom `URL resolvers`_ at the Python level. The normal XInclude mechanism +cannot deploy these. If you need ElementTree compatibility or custom +resolvers, you have to stick to the external Python module. + +.. _ElementInclude: http://effbot.org/zone/element-xinclude.htm + write_c14n on ElementTree ------------------------- From scoder at codespeak.net Sun Apr 22 21:01:43 2007 From: scoder at codespeak.net (scoder at codespeak.net) Date: Sun, 22 Apr 2007 21:01:43 +0200 (CEST) Subject: [Lxml-checkins] r42244 - lxml/trunk/doc Message-ID: <20070422190143.C70C98095@code0.codespeak.net> Author: scoder Date: Sun Apr 22 21:01:36 2007 New Revision: 42244 Modified: lxml/trunk/doc/api.txt Log: small cleanup in docs Modified: lxml/trunk/doc/api.txt ============================================================================== --- lxml/trunk/doc/api.txt (original) +++ lxml/trunk/doc/api.txt Sun Apr 22 21:01:36 2007 @@ -271,8 +271,11 @@ >>> tree = etree.parse(data) >>> tree.xinclude() - >>> etree.tostring(tree.getroot()) - '\n\n\n' + >>> print etree.tostring(tree.getroot()) + + + + Note that the ElementTree compatible ElementInclude_ module is also supported as ``lxml.ElementInclude``. It has the additional advantage of supporting From scoder at codespeak.net Sun Apr 22 22:04:11 2007 From: scoder at codespeak.net (scoder at codespeak.net) Date: Sun, 22 Apr 2007 22:04:11 +0200 (CEST) Subject: [Lxml-checkins] r42246 - lxml/trunk/src/lxml Message-ID: <20070422200411.CD08F809D@code0.codespeak.net> Author: scoder Date: Sun Apr 22 22:04:11 2007 New Revision: 42246 Modified: lxml/trunk/src/lxml/objectify.pyx Log: define XMLSchema namespace in objectify and provide default nsmap in DataElement Modified: lxml/trunk/src/lxml/objectify.pyx ============================================================================== --- lxml/trunk/src/lxml/objectify.pyx (original) +++ lxml/trunk/src/lxml/objectify.pyx Sun Apr 22 22:04:11 2007 @@ -101,7 +101,12 @@ setPytypeAttributeTag() -# namespace for XML Schema instance +# namespaces for XML Schema +cdef object XML_SCHEMA_NS +XML_SCHEMA_NS = "http://www.w3.org/2001/XMLSchema" +cdef char* _XML_SCHEMA_NS +_XML_SCHEMA_NS = _cstr(XML_SCHEMA_NS) + cdef object XML_SCHEMA_INSTANCE_NS XML_SCHEMA_INSTANCE_NS = "http://www.w3.org/2001/XMLSchema-instance" cdef char* _XML_SCHEMA_INSTANCE_NS @@ -1694,7 +1699,9 @@ XML = fromstring cdef object _DEFAULT_NSMAP -_DEFAULT_NSMAP = { "py": PYTYPE_NAMESPACE, "xsi": XML_SCHEMA_INSTANCE_NS } +_DEFAULT_NSMAP = { "py" : PYTYPE_NAMESPACE, + "xsi" : XML_SCHEMA_INSTANCE_NS, + "xsd" : XML_SCHEMA_NS} def Element(_tag, attrib=None, nsmap=None, _pytype=None, **_attributes): """Objectify specific version of the lxml.etree Element() factory that @@ -1722,6 +1729,8 @@ if the type can be identified. If '_pytype' or '_xsi' are among the keyword arguments, they will be used instead. """ + if nsmap is None: + nsmap = _DEFAULT_NSMAP if attrib is not None: if python.PyDict_Size(_attributes): attrib.update(_attributes)