From scoder at codespeak.net Sat Apr 7 09:13:07 2007
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Sat, 7 Apr 2007 09:13:07 +0200 (CEST)
Subject: [Lxml-checkins] r41950 - in lxml/trunk: . doc src/lxml
src/lxml/tests
Message-ID: <20070407071307.320AA10072@code0.codespeak.net>
Author: scoder
Date: Sat Apr 7 09:13:05 2007
New Revision: 41950
Modified:
lxml/trunk/CHANGES.txt
lxml/trunk/doc/objectify.txt
lxml/trunk/src/lxml/objectify.pyx
lxml/trunk/src/lxml/tests/test_objectify.py
Log:
support '.' as identity ObjectPath
Modified: lxml/trunk/CHANGES.txt
==============================================================================
--- lxml/trunk/CHANGES.txt (original)
+++ lxml/trunk/CHANGES.txt Sat Apr 7 09:13:05 2007
@@ -8,6 +8,8 @@
Features added
--------------
+* '.' represents empty ObjectPath (identity)
+
* EXSLT RegExp support in standard XPath (not only XSLT)
* ``lxml.pyclasslookup`` module that can access the entire tree in read-only
Modified: lxml/trunk/doc/objectify.txt
==============================================================================
--- lxml/trunk/doc/objectify.txt (original)
+++ lxml/trunk/doc/objectify.txt Sat Apr 7 09:13:05 2007
@@ -372,8 +372,8 @@
>>> print find(root).tag
{ns}b
-You can also use relative paths starting with a '.' that ignore the actual
-root element and only inherit its namespace::
+You can also use relative paths starting with a '.' to ignore the actual root
+element and only inherit its namespace::
>>> find = objectify.ObjectPath(".b[1]")
>>> print find(root).tag
@@ -395,6 +395,12 @@
...
AttributeError: no such child: {other}unknown
+For convenience, a single dot represents the empty ObjectPath (identity)::
+
+ >>> find = objectify.ObjectPath(".")
+ >>> print find(root).tag
+ {ns}root
+
ObjectPath objects can be used to manipulate trees::
>>> root = objectify.Element("{ns}root")
Modified: lxml/trunk/src/lxml/objectify.pyx
==============================================================================
--- lxml/trunk/src/lxml/objectify.pyx (original)
+++ lxml/trunk/src/lxml/objectify.pyx Sat Apr 7 09:13:05 2007
@@ -1166,6 +1166,9 @@
r"(\.?)\s*(?:\{([^}]*)\})?\s*([^.{}\[\]\s]+)\s*(?:\[\s*([-0-9]+)\s*\])?",
re.U).match
+cdef object _RELATIVE_PATH_SEGMENT
+_RELATIVE_PATH_SEGMENT = (None, None, 0)
+
cdef _parseObjectPathString(path):
"""Parse object path string into a 'hrefOnameOhrefOnameOOO' string and an
index list. The index list is None if no index was used in the path.
@@ -1173,6 +1176,8 @@
cdef int has_dot
new_path = []
path = cetree.utf8(path.strip())
+ if path == '.':
+ return [_RELATIVE_PATH_SEGMENT]
path_pos = 0
while python.PyString_GET_SIZE(path) > 0:
match = __MATCH_PATH_SEGMENT(path, path_pos)
@@ -1188,7 +1193,7 @@
if python.PyList_GET_SIZE(new_path) == 0:
if has_dot:
# path '.child' => ignore root
- python.PyList_Append(new_path, (None, None, 0))
+ python.PyList_Append(new_path, _RELATIVE_PATH_SEGMENT)
elif index != 0:
raise ValueError, "index not allowed on root node"
elif not has_dot:
@@ -1234,9 +1239,7 @@
if python.PyList_GET_SIZE(new_path) == 0 and index != 0:
raise ValueError, "index not allowed on root node"
python.PyList_Append(new_path, (ns, name, index))
- if python.PyList_GET_SIZE(new_path) == 0 or \
- (python.PyList_GET_SIZE(new_path) == 1 and \
- new_path[0] == (None, None, 0)):
+ if python.PyList_GET_SIZE(new_path) == 0:
raise ValueError, "invalid path"
return new_path
Modified: lxml/trunk/src/lxml/tests/test_objectify.py
==============================================================================
--- lxml/trunk/src/lxml/tests/test_objectify.py (original)
+++ lxml/trunk/src/lxml/tests/test_objectify.py Sat Apr 7 09:13:05 2007
@@ -593,6 +593,16 @@
path = objectify.ObjectPath( "root.c1[1].c2" )
self.assertFalse(path.hasattr(root))
+ def test_object_path_dot(self):
+ root = self.XML(xml_str)
+ path = objectify.ObjectPath( "." )
+ self.assertEquals(root.c1.c2.text, path(root).c1.c2.text)
+
+ def test_object_path_dot_list(self):
+ root = self.XML(xml_str)
+ path = objectify.ObjectPath( [''] )
+ self.assertEquals(root.c1.c2.text, path(root).c1.c2.text)
+
def test_object_path_dot_root(self):
root = self.XML(xml_str)
path = objectify.ObjectPath( ".c1.c2" )
@@ -652,9 +662,7 @@
['root[2]', 'c1', 'c2'])
self.assertRaises(ValueError, objectify.ObjectPath,
- ".")
- self.assertRaises(ValueError, objectify.ObjectPath,
- [''])
+ [])
self.assertRaises(ValueError, objectify.ObjectPath,
['', '', ''])
From scoder at codespeak.net Sat Apr 7 22:17:30 2007
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Sat, 7 Apr 2007 22:17:30 +0200 (CEST)
Subject: [Lxml-checkins] r41955 - in lxml/trunk: . src/lxml
Message-ID: <20070407201730.4C33710072@code0.codespeak.net>
Author: scoder
Date: Sat Apr 7 22:17:29 2007
New Revision: 41955
Modified:
lxml/trunk/CHANGES.txt
lxml/trunk/src/lxml/sax.py
Log:
support for custom Element class instantiation in lxml.sax
Modified: lxml/trunk/CHANGES.txt
==============================================================================
--- lxml/trunk/CHANGES.txt (original)
+++ lxml/trunk/CHANGES.txt Sat Apr 7 22:17:29 2007
@@ -8,6 +8,8 @@
Features added
--------------
+* Support for custom Element class instantiation in lxml.sax
+
* '.' represents empty ObjectPath (identity)
* EXSLT RegExp support in standard XPath (not only XSLT)
Modified: lxml/trunk/src/lxml/sax.py
==============================================================================
--- lxml/trunk/src/lxml/sax.py (original)
+++ lxml/trunk/src/lxml/sax.py Sat Apr 7 22:17:29 2007
@@ -13,12 +13,15 @@
class ElementTreeContentHandler(object, ContentHandler):
"""Build an lxml ElementTree from SAX events.
"""
- def __init__(self):
+ def __init__(self, makeelement=None):
self._root = None
self._element_stack = []
self._default_ns = None
self._ns_mapping = { None : [None] }
self._new_mappings = {}
+ if makeelement is None:
+ makeelement = Element
+ self._makeelement = makeelement
def _get_etree(self):
"Contains the generated ElementTree after parsing is finished."
@@ -77,7 +80,8 @@
element_stack = self._element_stack
if self._root is None:
- element = self._root = Element(el_name, attrs, self._new_mappings)
+ element = self._root = \
+ self._makeelement(el_name, attrs, self._new_mappings)
else:
element = SubElement(element_stack[-1], el_name,
attrs, self._new_mappings)
From scoder at codespeak.net Tue Apr 10 13:30:34 2007
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Tue, 10 Apr 2007 13:30:34 +0200 (CEST)
Subject: [Lxml-checkins] r41985 - lxml/trunk/src/lxml
Message-ID: <20070410113034.B4C918065@code0.codespeak.net>
Author: scoder
Date: Tue Apr 10 13:30:33 2007
New Revision: 41985
Modified:
lxml/trunk/src/lxml/xslt.pxi
Log:
exception message cleanup
Modified: lxml/trunk/src/lxml/xslt.pxi
==============================================================================
--- lxml/trunk/src/lxml/xslt.pxi (original)
+++ lxml/trunk/src/lxml/xslt.pxi Tue Apr 10 13:30:33 2007
@@ -296,7 +296,7 @@
if self._error_log.last_error is not None:
raise XSLTParseError, self._error_log.last_error.message
else:
- raise XSLTParseError, "Cannot parse style sheet"
+ raise XSLTParseError, "Cannot parse stylesheet"
c_doc._private = NULL # no longer used!
self._c_style = c_style
@@ -344,7 +344,7 @@
transform_ctxt = xslt.xsltNewTransformContext(self._c_style, c_doc)
if transform_ctxt is NULL:
_destroyFakeDoc(input_doc._c_doc, c_doc)
- raise XSLTApplyError, "Error preparing stylesheet run"
+ python.PyErr_NoMemory()
initTransformDict(transform_ctxt)
@@ -383,7 +383,7 @@
message = "%s, line %d" % (error.message, error.line)
else:
message = error.message
- elif error.line >= 0:
+ elif error is not None and error.line >= 0:
message = "Error applying stylesheet, line %d" % error.line
else:
message = "Error applying stylesheet"
From scoder at codespeak.net Tue Apr 10 14:11:23 2007
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Tue, 10 Apr 2007 14:11:23 +0200 (CEST)
Subject: [Lxml-checkins] r41986 - lxml/trunk/src/lxml/tests
Message-ID: <20070410121123.BAB5C807A@code0.codespeak.net>
Author: scoder
Date: Tue Apr 10 14:11:23 2007
New Revision: 41986
Modified:
lxml/trunk/src/lxml/tests/test_xslt.py
Log:
cleanup in test cases, test case on XSLT parsing errors
Modified: lxml/trunk/src/lxml/tests/test_xslt.py
==============================================================================
--- lxml/trunk/src/lxml/tests/test_xslt.py (original)
+++ lxml/trunk/src/lxml/tests/test_xslt.py Tue Apr 10 14:11:23 2007
@@ -34,6 +34,23 @@
def test_xslt_elementtree_error(self):
self.assertRaises(ValueError, etree.XSLT, etree.ElementTree())
+ def test_xslt_input_none(self):
+ self.assertRaises(TypeError, etree.XSLT, None)
+
+ def test_xslt_invalid_stylesheet(self):
+ if etree.LIBXSLT_VERSION < (1,1,15):
+ return # no error from libxslt?
+
+ style = self.parse('''\
+
+
+
+
+''')
+
+ self.assertRaises(etree.XSLTParseError, etree.XSLT, style)
+
def test_xslt_utf8(self):
tree = self.parse(u'\uF8D2\uF8D2')
style = self.parse('''\
@@ -144,10 +161,9 @@
-B--C-
''',
- st.tostring(res))
+ str(res))
def test_xslt_input(self):
- tree = self.parse('BC')
style = self.parse('''\
@@ -159,7 +175,6 @@
st = etree.XSLT(style)
st = etree.XSLT(style.getroot())
- self.assertRaises(TypeError, etree.XSLT, None)
def test_xslt_input_partial_doc(self):
style = self.parse('''\
@@ -206,8 +221,10 @@
''',
st.tostring(res))
- def _test_xslt_parameter_missing(self):
- # DISABLED - NOT RELIABLE?
+ def test_xslt_parameter_missing(self):
+ # DISABLED - NOT RELIABLE!
+ if etree.LIBXSLT_VERSION >= (1,1,18) and etree.LIBXSLT_VERSION < (1,1,20):
+ return # no error from libxslt?
# apply() without needed parameter will lead to XSLTApplyError
tree = self.parse('BC')
style = self.parse('''\
From scoder at codespeak.net Tue Apr 10 21:01:51 2007
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Tue, 10 Apr 2007 21:01:51 +0200 (CEST)
Subject: [Lxml-checkins] r41995 - in lxml/trunk: . doc src/lxml
src/lxml/tests
Message-ID: <20070410190151.306B680A0@code0.codespeak.net>
Author: scoder
Date: Tue Apr 10 21:01:50 2007
New Revision: 41995
Modified:
lxml/trunk/CHANGES.txt
lxml/trunk/doc/objectify.txt
lxml/trunk/src/lxml/objectify.pyx
lxml/trunk/src/lxml/tests/test_objectify.py
Log:
Holger's patch for type annotations in objectify
Modified: lxml/trunk/CHANGES.txt
==============================================================================
--- lxml/trunk/CHANGES.txt (original)
+++ lxml/trunk/CHANGES.txt Tue Apr 10 21:01:50 2007
@@ -8,6 +8,9 @@
Features added
--------------
+* Extended type annotation in objectify: cleaner annotation namespace setup
+ plus new ``xsiannotate()`` and ``deannotate()`` functions
+
* Support for custom Element class instantiation in lxml.sax
* '.' represents empty ObjectPath (identity)
Modified: lxml/trunk/doc/objectify.txt
==============================================================================
--- lxml/trunk/doc/objectify.txt (original)
+++ lxml/trunk/doc/objectify.txt Tue Apr 10 21:01:50 2007
@@ -699,6 +699,61 @@
s = '5' [StringElement]
* xsi:type = 'string'
+Again, there is a utility function ``xsiannotate()`` that recursively
+generates the "xsi:type" attribute for the elements of a tree::
+
+ >>> root = objectify.fromstring('''\
+ ... test5true
+ ... ''')
+ >>> print objectify.dump(root)
+ root = None [ObjectifiedElement]
+ a = 'test' [StringElement]
+ b = 5 [IntElement]
+ c = True [BoolElement]
+
+ >>> objectify.xsiannotate(root)
+
+ >>> print objectify.dump(root)
+ root = None [ObjectifiedElement]
+ a = 'test' [StringElement]
+ * xsi:type = 'string'
+ b = 5 [IntElement]
+ * xsi:type = 'int'
+ c = True [BoolElement]
+ * xsi:type = 'boolean'
+
+Note, however, that ``xsiannotate()`` will always use the first XML Schema
+datatype that is defined for any given Python type, see also
+`Defining additional data classes`_.
+
+The utility function ``deannotate()`` can be used to get rid of 'py:pytype'
+and/or 'xsi:type' information::
+
+ >>> root = objectify.fromstring('''\
+ ...
+ ... 5
+ ... 5
+ ... 5
+ ... ''')
+ >>> objectify.annotate(root)
+ >>> print objectify.dump(root)
+ root = None [ObjectifiedElement]
+ d = 5.0 [FloatElement]
+ * xsi:type = 'double'
+ * py:pytype = 'float'
+ l = 5L [LongElement]
+ * xsi:type = 'long'
+ * py:pytype = 'long'
+ s = '5' [StringElement]
+ * xsi:type = 'string'
+ * py:pytype = 'str'
+ >>> objectify.deannotate(root)
+ >>> print objectify.dump(root)
+ root = None [ObjectifiedElement]
+ d = 5 [IntElement]
+ l = 5 [IntElement]
+ s = 5 [IntElement]
+
For convenience, the ``DataElement()`` factory creates an Element with a
Python value in one step. You can pass the required Python type name or the
XSI type name::
@@ -720,8 +775,8 @@
>>> root.x = objectify.DataElement(5, _xsi="integer")
>>> print objectify.dump(root)
root = None [ObjectifiedElement]
- x = 5 [IntElement]
- * py:pytype = 'int'
+ x = 5L [LongElement]
+ * py:pytype = 'long'
* xsi:type = 'integer'
There is a side effect of the type lookup. If you assign a string value using
Modified: lxml/trunk/src/lxml/objectify.pyx
==============================================================================
--- lxml/trunk/src/lxml/objectify.pyx (original)
+++ lxml/trunk/src/lxml/objectify.pyx Tue Apr 10 21:01:50 2007
@@ -707,17 +707,21 @@
"""Boolean type base on string values: 'true' or 'false'.
"""
cdef int _boolval(self) except -1:
+ cdef char* c_str
text = textOf(self._c_node)
if text is None:
return 0
- text = text.lower()
- if text == 'false':
- return 0
- elif text == 'true':
- return 1
- else:
- raise ValueError, "Invalid boolean value: '%s'" % text
-
+ c_str = _cstr(text)
+ if c_str[0] == c'0' or c_str[0] == c'f' or c_str[0] == c'F':
+ if c_str[1] == c'\0' or text.lower() == "false":
+ # '0' or 'f' or 'false'
+ return 0
+ elif c_str[0] == c'1' or c_str[0] == c't' or c_str[0] == c'T':
+ if c_str[1] == c'\0' or text.lower() == 'true'):
+ # '1' or 't' or 'true'
+ return 1
+ raise ValueError, "Invalid boolean value: '%s'" % text
+
def __nonzero__(self):
if self._boolval():
return True
@@ -882,13 +886,15 @@
cdef _registerPyTypes():
pytype = PyType('int', int, IntElement)
- pytype.xmlSchemaTypes = ("integer", "positiveInteger", "negativeInteger",
- "nonNegativeInteger", "nonPositiveInteger",
- "int", "unsignedInt", "short", "unsignedShort")
+ pytype.xmlSchemaTypes = ("int", "short", "byte", "unsignedShort",
+ "unsignedByte",)
+
pytype.register()
pytype = PyType('long', long, LongElement)
- pytype.xmlSchemaTypes = ("long", "unsignedLong")
+ pytype.xmlSchemaTypes = ("integer", "nonPositiveInteger", "negativeInteger",
+ "long", "nonNegativeInteger", "unsignedLong",
+ "unsignedInt", "positiveInteger",)
pytype.register()
pytype = PyType('float', float, FloatElement)
@@ -900,7 +906,9 @@
pytype.register()
pytype = PyType('str', None, StringElement)
- pytype.xmlSchemaTypes = ("string", "normalizedString")
+ pytype.xmlSchemaTypes = ("string", "normalizedString", "token", "language",
+ "Name", "NCName", "ID", "IDREF", "ENTITY",
+ "NMTOKEN", )
pytype.register()
pytype = PyType('none', None, NoneElement)
@@ -936,12 +944,25 @@
python.PyList_Append(types, pytype)
return types
+cdef PyType _guessPyType(value, PyType defaulttype):
+ if value is None:
+ return None
+ for type_check, tested_pytype in _TYPE_CHECKS:
+ try:
+ type_check(value)
+ return tested_pytype
+ except IGNORABLE_ERRORS:
+ # could not be parsed as the specififed type => ignore
+ pass
+ return defaulttype
+
cdef object _guessElementClass(tree.xmlNode* c_node):
value = textOf(c_node)
if value is None:
return None
if value == '':
return StringElement
+
for type_check, pytype in _TYPE_CHECKS:
try:
type_check(value)
@@ -1424,11 +1445,26 @@
################################################################################
# Type annotations
+cdef PyType _check_type(tree.xmlNode* c_node, PyType pytype):
+ # StrType does not have a typecheck but is the default anyway,
+ # so just accept it if given as type information
+ if pytype is None:
+ return pytype
+ value = textOf(c_node)
+ try:
+ pytype.type_check(value)
+ return pytype
+ except IGNORABLE_ERRORS:
+ # could not be parsed as the specified type => ignore
+ pass
+ return None
+
+
def annotate(element_or_tree, ignore_old=True):
"""Recursively annotates the elements of an XML tree with 'pytype'
attributes.
- If the 'ignore_old' keyword argument is True (the default), current
+ If the 'ignore_old' keyword argument is True (the default), current 'pytype'
attributes will be ignored and replaced. Otherwise, they will be checked
and only replaced if they no longer fit the current text value.
"""
@@ -1438,11 +1474,13 @@
cdef tree.xmlNode* c_node
cdef tree.xmlNs* c_ns
cdef python.PyObject* dict_result
+ cdef PyType pytype
element = cetree.rootNodeOrRaise(element_or_tree)
doc = element._doc
ignore = bool(ignore_old)
- StrType = _PYTYPE_DICT.get('str')
+ StrType = _PYTYPE_DICT.get('str')
+ NoneType = _PYTYPE_DICT.get('none')
c_node = element._c_node
tree.BEGIN_FOR_EACH_ELEMENT_FROM(c_node, c_node, 1)
pytype = None
@@ -1452,20 +1490,19 @@
old_value = cetree.attributeValueFromNsName(
c_node, _PYTYPE_NAMESPACE, _PYTYPE_ATTRIBUTE_NAME)
if old_value is not None and old_value != TREE_PYTYPE:
- pytype = _PYTYPE_DICT.get(old_value)
- if pytype is not None:
- value = textOf(c_node)
- try:
- if not (pytype).type_check(value):
- pytype = None
- except ValueError:
- pytype = None
+ dict_result = python.PyDict_GetItem(_PYTYPE_DICT, old_value)
+ if dict_result is not NULL:
+ pytype = dict_result
+ if pytype is not StrType:
+ # StrType does not have a typecheck but is the default anyway,
+ # so just accept it if given as type information
+ pytype = _check_type(c_node, pytype)
if pytype is None:
- # if element is defined as xsi:nil, return NoneElement class
+ # if element is defined as xsi:nil, represent it as None
if cetree.attributeValueFromNsName(
c_node, _XML_SCHEMA_INSTANCE_NS, "nil") == "true":
- pytype = _PYTYPE_DICT.get("none")
+ pytype = NoneType
if pytype is None:
# check for XML Schema type hint
@@ -1481,18 +1518,7 @@
# try to guess type
if cetree.findChildForwards(c_node, 0) is NULL:
# element has no children => data class
- if value is None:
- value = textOf(c_node)
- if value is not None:
- for type_check, tested_pytype in _TYPE_CHECKS:
- try:
- if type_check(value) is not False:
- pytype = tested_pytype
- break
- except ValueError:
- pass
- else:
- pytype = StrType
+ pytype = _guessPyType(textOf(c_node), StrType)
if pytype is None:
# delete attribute if it exists
@@ -1505,6 +1531,124 @@
_cstr(pytype.name))
tree.END_FOR_EACH_ELEMENT_FROM(c_node)
+def xsiannotate(element_or_tree, ignore_old=True):
+ """Recursively annotates the elements of an XML tree with 'xsi:type'
+ attributes.
+
+ If the 'ignore_old' keyword argument is True (the default), current
+ 'xsi:type' attributes will be ignored and replaced. Otherwise, they will be
+ checked and only replaced if they no longer fit the current text value.
+
+ Note that tha mapping from Python types to XSI types is usually ambiguous.
+ Currently, only the first XSI type name in the corresponding PyType
+ definition will be used for annotation. Thus, you should consider naming
+ the widest type first here if you define additional types.
+ """
+ cdef _Element element
+ cdef _Document doc
+ cdef int ignore
+ cdef int istree
+ cdef tree.xmlNode* c_node
+ cdef tree.xmlNs* c_ns
+ cdef python.PyObject* dict_result
+ cdef PyType pytype
+ element = cetree.rootNodeOrRaise(element_or_tree)
+ doc = element._doc
+ ignore = bool(ignore_old)
+
+ StrType = _PYTYPE_DICT.get('str')
+ c_node = element._c_node
+ tree.BEGIN_FOR_EACH_ELEMENT_FROM(c_node, c_node, 1)
+ typename = None
+ pytype = None
+ value = None
+ istree = 0
+ if not ignore:
+ # check that old value is valid
+ typename = cetree.attributeValueFromNsName(
+ c_node, _XML_SCHEMA_INSTANCE_NS, "type")
+ if typename is not None:
+ dict_result = python.PyDict_GetItem(_SCHEMA_TYPE_DICT, typename)
+ if dict_result is not NULL:
+ pytype = dict_result
+ if pytype is not StrType:
+ # StrType does not have a typecheck but is the default anyway,
+ # so just accept it if given as type information
+ pytype = _check_type(c_node, pytype)
+ if pytype is None:
+ typename = None
+
+ if typename is None:
+ if pytype is None:
+ # check for pytype hint
+ value = cetree.attributeValueFromNsName(
+ c_node, _PYTYPE_NAMESPACE, _PYTYPE_ATTRIBUTE_NAME)
+
+ if value is not None:
+ if value == TREE_PYTYPE:
+ istree = 1
+ else:
+ dict_result = python.PyDict_GetItem(_PYTYPE_DICT, value)
+ if dict_result is not NULL:
+ pytype = dict_result
+ if pytype is not StrType:
+ pytype = _check_type(c_node, pytype)
+
+ if not istree and pytype is None:
+ # try to guess type
+ if cetree.findChildForwards(c_node, 0) is NULL:
+ # element has no children => data class
+ pytype = _guessPyType(textOf(c_node), StrType)
+ else:
+ istree = 1
+
+ if typename is None and not istree and pytype is not None:
+ if python.PyList_GET_SIZE(pytype._schema_types) > 0:
+ # pytype->xsi:type is a 1:n mapping so simply take the first
+ typename = pytype._schema_types[0]
+
+ if typename is None or istree:
+ # delete attribute if it exists
+ cetree.delAttributeFromNsName(c_node, _XML_SCHEMA_INSTANCE_NS, "type")
+ else:
+ # update or create attribute
+ c_ns = cetree.findOrBuildNodeNs(doc, c_node, _XML_SCHEMA_INSTANCE_NS)
+ tree.xmlSetNsProp(c_node, c_ns, "type", _cstr(typename))
+ tree.END_FOR_EACH_ELEMENT_FROM(c_node)
+
+
+def deannotate(element_or_tree, pytype=True, xsi=True):
+ """Recursively de-annotate the elements of an XML tree by removing 'pytype'
+ and/or 'type' attributes.
+
+ If the 'pytype' keyword argument is True (the default), 'pytype' attributes
+ will be removed. If the 'xsi' keyword argument is True (the default),
+ 'xsi:type' attributes will be removed.
+ """
+ cdef _Element element
+ cdef tree.xmlNode* c_node
+
+ element = cetree.rootNodeOrRaise(element_or_tree)
+ c_node = element._c_node
+ if pytype and xsi:
+ tree.BEGIN_FOR_EACH_ELEMENT_FROM(c_node, c_node, 1)
+ cetree.delAttributeFromNsName(
+ c_node, _PYTYPE_NAMESPACE, _PYTYPE_ATTRIBUTE_NAME)
+ cetree.delAttributeFromNsName(
+ c_node, _XML_SCHEMA_INSTANCE_NS, "type")
+ tree.END_FOR_EACH_ELEMENT_FROM(c_node)
+ elif pytype:
+ tree.BEGIN_FOR_EACH_ELEMENT_FROM(c_node, c_node, 1)
+ cetree.delAttributeFromNsName(
+ c_node, _PYTYPE_NAMESPACE, _PYTYPE_ATTRIBUTE_NAME)
+ tree.END_FOR_EACH_ELEMENT_FROM(c_node)
+ else:
+ tree.BEGIN_FOR_EACH_ELEMENT_FROM(c_node, c_node, 1)
+ cetree.delAttributeFromNsName(
+ c_node, _XML_SCHEMA_INSTANCE_NS, "type")
+ tree.END_FOR_EACH_ELEMENT_FROM(c_node)
+
+
################################################################################
# Module level parser setup
@@ -1549,6 +1693,9 @@
XML = fromstring
+cdef object _DEFAULT_NSMAP
+_DEFAULT_NSMAP = { "py": PYTYPE_NAMESPACE, "xsi": XML_SCHEMA_INSTANCE_NS }
+
def Element(_tag, attrib=None, nsmap=None, _pytype=None, **_attributes):
"""Objectify specific version of the lxml.etree Element() factory that
always creates a structural (tree) element.
@@ -1561,6 +1708,8 @@
_attributes = attrib
if _pytype is None:
_pytype = TREE_PYTYPE
+ if nsmap is None:
+ nsmap = _DEFAULT_NSMAP
_attributes[PYTYPE_ATTRIBUTE] = _pytype
return _makeElement(_tag, None, _attributes, nsmap)
@@ -1569,11 +1718,10 @@
"""Create a new element with a Python value and XML attributes taken from
keyword arguments or a dictionary passed as second argument.
- Automatically adds a 'pyval' attribute for the Python type of the value,
- if the type can be identified. If '_pyval' or '_xsi' are among the
+ Automatically adds a 'pytype' attribute for the Python type of the value,
+ if the type can be identified. If '_pytype' or '_xsi' are among the
keyword arguments, they will be used instead.
"""
- cdef _Element element
if attrib is not None:
if python.PyDict_Size(_attributes):
attrib.update(_attributes)
@@ -1581,7 +1729,10 @@
if _xsi is not None:
python.PyDict_SetItem(_attributes, XML_SCHEMA_INSTANCE_TYPE_ATTR, _xsi)
if _pytype is None:
- _pytype = _SCHEMA_TYPE_DICT[_xsi].name
+ # allow for s.o. using unregistered or even wrong xsi:type names
+ pytype_lookup = _SCHEMA_TYPE_DICT.get(_xsi)
+ if pytype_lookup is not None:
+ _pytype = pytype_lookup.name
if python._isString(_value):
strval = _value
Modified: lxml/trunk/src/lxml/tests/test_objectify.py
==============================================================================
--- lxml/trunk/src/lxml/tests/test_objectify.py (original)
+++ lxml/trunk/src/lxml/tests/test_objectify.py Tue Apr 10 21:01:50 2007
@@ -13,6 +13,10 @@
from lxml import objectify
+XML_SCHEMA_INSTANCE_NS = "http://www.w3.org/2001/XMLSchema-instance"
+XML_SCHEMA_INSTANCE_TYPE_ATTR = "{%s}type" % XML_SCHEMA_INSTANCE_NS
+XML_SCHEMA_NIL_ATTR = "{%s}nil" % XML_SCHEMA_INSTANCE_NS
+
xml_str = '''\
@@ -28,7 +32,7 @@
"""Test cases for lxml.objectify
"""
etree = etree
-
+
def XML(self, xml):
return self.etree.XML(xml, self.parser)
@@ -356,20 +360,69 @@
XML = self.XML
root = XML('''\
- 5
- 5
- 5
+ true
+ false
+ 1
+ 0
+
+ 5
+ 5
+
+ 5
+ 5
+ 5
+ 5
+ 5
+ 5
+ 5
+ 5
+ 5
+ 5
+
+ 5
+ 5
+ 5
+ 5
+ 5
+ 5
+ 5
+ 5
+
+ 5
+ 5
+ 5
+ 5
+ 5
+
+
''')
- self.assert_(isinstance(root.a[0], objectify.IntElement))
- self.assertEquals(5, root.a[0])
-
- self.assert_(isinstance(root.a[1], objectify.StringElement))
- self.assertEquals("5", root.a[1])
-
- self.assert_(isinstance(root.a[2], objectify.FloatElement))
- self.assertEquals(5.0, root.a[2])
+ for b in root.b:
+ self.assert_(isinstance(b, objectify.BoolElement))
+ self.assertEquals(True, root.b[0])
+ self.assertEquals(False, root.b[1])
+ self.assertEquals(True, root.b[2])
+ self.assertEquals(False, root.b[3])
+
+ for f in root.f:
+ self.assert_(isinstance(f, objectify.FloatElement))
+ self.assertEquals(5, f)
+
+ for s in root.s:
+ self.assert_(isinstance(s, objectify.StringElement))
+ self.assertEquals("5", s)
+
+ for l in root.l:
+ self.assert_(isinstance(l, objectify.LongElement))
+ self.assertEquals(5l, l)
+
+ for i in root.i:
+ self.assert_(isinstance(i, objectify.IntElement))
+ self.assertEquals(5, i)
+
+ self.assert_(isinstance(root.n, objectify.NoneElement))
+ self.assertEquals(None, root.n)
def test_type_str_sequence(self):
XML = self.XML
@@ -444,10 +497,131 @@
root.b = False
self.assertFalse(root.b)
- def test_type_annotation(self):
+ def test_pytype_annotation(self):
XML = self.XML
root = XML(u'''\
-
+
+ 5
+ test
+ 1.1
+ \uF8D2
+ true
+
+
+ 5
+ 5
+ 23
+ 42
+ 300
+ 2
+
+ ''')
+ objectify.annotate(root)
+
+ child_types = [ c.get(objectify.PYTYPE_ATTRIBUTE)
+ for c in root.iterchildren() ]
+ self.assertEquals("int", child_types[0])
+ self.assertEquals("str", child_types[1])
+ self.assertEquals("float", child_types[2])
+ self.assertEquals("str", child_types[3])
+ self.assertEquals("bool", child_types[4])
+ self.assertEquals("none", child_types[5])
+ self.assertEquals(None, child_types[6])
+ self.assertEquals("float", child_types[7])
+ self.assertEquals("float", child_types[8])
+ self.assertEquals("str", child_types[9])
+ self.assertEquals("int", child_types[10])
+ self.assertEquals("int", child_types[11])
+ self.assertEquals("int", child_types[12])
+
+ self.assertEquals("true", root.n.get(XML_SCHEMA_NIL_ATTR))
+
+ def test_pytype_annotation_use_old(self):
+ XML = self.XML
+ root = XML(u'''\
+
+ 5
+ test
+ 1.1
+ \uF8D2
+ true
+
+
+ 5
+ 5
+ 23
+ 42
+ 300
+ 2
+
+ ''')
+ objectify.annotate(root, ignore_old=False)
+
+ child_types = [ c.get(objectify.PYTYPE_ATTRIBUTE)
+ for c in root.iterchildren() ]
+ self.assertEquals("int", child_types[0])
+ self.assertEquals("str", child_types[1])
+ self.assertEquals("float", child_types[2])
+ self.assertEquals("str", child_types[3])
+ self.assertEquals("bool", child_types[4])
+ self.assertEquals("none", child_types[5])
+ self.assertEquals(None, child_types[6])
+ self.assertEquals("float", child_types[7])
+ self.assertEquals("float", child_types[8])
+ self.assertEquals("str", child_types[9])
+ self.assertEquals("str", child_types[10])
+ self.assertEquals("float", child_types[11])
+ self.assertEquals("long", child_types[12])
+
+ self.assertEquals("true", root.n.get(XML_SCHEMA_NIL_ATTR))
+
+ def test_xsitype_annotation(self):
+ XML = self.XML
+ root = XML(u'''\
+
+ 5
+ test
+ 1.1
+ \uF8D2
+ true
+
+
+ 5
+ 5
+ 23
+ 42
+ 300
+ 2
+
+ ''')
+ objectify.xsiannotate(root)
+
+ child_types = [ c.get(XML_SCHEMA_INSTANCE_TYPE_ATTR)
+ for c in root.iterchildren() ]
+ self.assertEquals("int", child_types[0])
+ self.assertEquals("string", child_types[1])
+ self.assertEquals("float", child_types[2])
+ self.assertEquals("string", child_types[3])
+ self.assertEquals("boolean", child_types[4])
+ self.assertEquals(None, child_types[5])
+ self.assertEquals(None, child_types[6])
+ self.assertEquals("int", child_types[7])
+ self.assertEquals("int", child_types[8])
+ self.assertEquals("int", child_types[9])
+ self.assertEquals("string", child_types[10])
+ self.assertEquals("float", child_types[11])
+ self.assertEquals("integer", child_types[12])
+
+ self.assertEquals("true", root.n.get(XML_SCHEMA_NIL_ATTR))
+
+ def test_xsitype_annotation_use_old(self):
+ XML = self.XML
+ root = XML(u'''\
+
5
test
1.1
@@ -456,9 +630,127 @@
5
+ 5
+ 23
+ 42
+ 300
+ 2
+
+ ''')
+ objectify.xsiannotate(root, ignore_old=False)
+
+ child_types = [ c.get(XML_SCHEMA_INSTANCE_TYPE_ATTR)
+ for c in root.iterchildren() ]
+ self.assertEquals("int", child_types[0])
+ self.assertEquals("string", child_types[1])
+ self.assertEquals("float", child_types[2])
+ self.assertEquals("string", child_types[3])
+ self.assertEquals("boolean", child_types[4])
+ self.assertEquals(None, child_types[5])
+ self.assertEquals(None, child_types[6])
+ self.assertEquals("double", child_types[7])
+ self.assertEquals("float", child_types[8])
+ self.assertEquals("string", child_types[9])
+ self.assertEquals("string", child_types[10])
+ self.assertEquals("float", child_types[11])
+ self.assertEquals("integer", child_types[12])
+
+ self.assertEquals("true", root.n.get(XML_SCHEMA_NIL_ATTR))
+
+ def test_deannotate(self):
+ XML = self.XML
+ root = XML(u'''\
+
+ 5
+ test
+ 1.1
+ \uF8D2
+ true
+
+
+ 5
+ 5
+ 23
+ 42
+ 300
+ 2
+
+ ''')
+ objectify.deannotate(root)
+
+ for c in root.getiterator():
+ self.assertEquals(None, c.get(XML_SCHEMA_INSTANCE_TYPE_ATTR))
+ self.assertEquals(None, c.get(objectify.PYTYPE_ATTRIBUTE))
+
+ self.assertEquals("true", root.n.get(XML_SCHEMA_NIL_ATTR))
+
+ def test_pytype_deannotate(self):
+ XML = self.XML
+ root = XML(u'''\
+
+ 5
+ test
+ 1.1
+ \uF8D2
+ true
+
+
+ 5
+ 5
+ 23
+ 42
+ 300
+ 2
+
+ ''')
+ objectify.xsiannotate(root)
+ objectify.deannotate(root, xsi=False)
+
+ child_types = [ c.get(XML_SCHEMA_INSTANCE_TYPE_ATTR)
+ for c in root.iterchildren() ]
+ self.assertEquals("int", child_types[0])
+ self.assertEquals("string", child_types[1])
+ self.assertEquals("float", child_types[2])
+ self.assertEquals("string", child_types[3])
+ self.assertEquals("boolean", child_types[4])
+ self.assertEquals(None, child_types[5])
+ self.assertEquals(None, child_types[6])
+ self.assertEquals("int", child_types[7])
+ self.assertEquals("int", child_types[8])
+ self.assertEquals("int", child_types[9])
+ self.assertEquals("string", child_types[10])
+ self.assertEquals("float", child_types[11])
+ self.assertEquals("integer", child_types[12])
+
+ self.assertEquals("true", root.n.get(XML_SCHEMA_NIL_ATTR))
+
+ for c in root.getiterator():
+ self.assertEquals(None, c.get(objectify.PYTYPE_ATTRIBUTE))
+
+ def test_xsitype_deannotate(self):
+ XML = self.XML
+ root = XML(u'''\
+
+ 5
+ test
+ 1.1
+ \uF8D2
+ true
+
+
+ 5
+ 5
+ 23
+ 42
+ 300
+ 2
''')
objectify.annotate(root)
+ objectify.deannotate(root, pytype=False)
child_types = [ c.get(objectify.PYTYPE_ATTRIBUTE)
for c in root.iterchildren() ]
@@ -470,6 +762,16 @@
self.assertEquals("none", child_types[5])
self.assertEquals(None, child_types[6])
self.assertEquals("float", child_types[7])
+ self.assertEquals("float", child_types[8])
+ self.assertEquals("str", child_types[9])
+ self.assertEquals("int", child_types[10])
+ self.assertEquals("int", child_types[11])
+ self.assertEquals("int", child_types[12])
+
+ self.assertEquals("true", root.n.get(XML_SCHEMA_NIL_ATTR))
+
+ for c in root.getiterator():
+ self.assertEquals(None, c.get(XML_SCHEMA_INSTANCE_TYPE_ATTR))
def test_change_pytype_attribute(self):
XML = self.XML
@@ -890,7 +1192,6 @@
etree.tostring(new_root),
etree.tostring(root))
-
def test_suite():
suite = unittest.TestSuite()
suite.addTests([unittest.makeSuite(ObjectifyTestCase)])
From scoder at codespeak.net Tue Apr 10 21:02:28 2007
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Tue, 10 Apr 2007 21:02:28 +0200 (CEST)
Subject: [Lxml-checkins] r41996 - lxml/trunk/src/lxml
Message-ID: <20070410190228.DBA9F80A0@code0.codespeak.net>
Author: scoder
Date: Tue Apr 10 21:02:27 2007
New Revision: 41996
Modified:
lxml/trunk/src/lxml/objectify.pyx
Log:
typo
Modified: lxml/trunk/src/lxml/objectify.pyx
==============================================================================
--- lxml/trunk/src/lxml/objectify.pyx (original)
+++ lxml/trunk/src/lxml/objectify.pyx Tue Apr 10 21:02:27 2007
@@ -717,7 +717,7 @@
# '0' or 'f' or 'false'
return 0
elif c_str[0] == c'1' or c_str[0] == c't' or c_str[0] == c'T':
- if c_str[1] == c'\0' or text.lower() == 'true'):
+ if c_str[1] == c'\0' or text.lower() == 'true':
# '1' or 't' or 'true'
return 1
raise ValueError, "Invalid boolean value: '%s'" % text
From scoder at codespeak.net Wed Apr 11 09:59:20 2007
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Wed, 11 Apr 2007 09:59:20 +0200 (CEST)
Subject: [Lxml-checkins] r41997 - in lxml/trunk/src/lxml: . tests
Message-ID: <20070411075920.DD22180A3@code0.codespeak.net>
Author: scoder
Date: Wed Apr 11 09:59:19 2007
New Revision: 41997
Modified:
lxml/trunk/src/lxml/etree.pyx
lxml/trunk/src/lxml/objectify.pyx
lxml/trunk/src/lxml/tests/test_objectify.py
Log:
cleanups
Modified: lxml/trunk/src/lxml/etree.pyx
==============================================================================
--- lxml/trunk/src/lxml/etree.pyx (original)
+++ lxml/trunk/src/lxml/etree.pyx Wed Apr 11 09:59:19 2007
@@ -1924,7 +1924,7 @@
include "dtd.pxi" # DTD
include "relaxng.pxi" # RelaxNG
include "xmlschema.pxi" # XMLSchema
-#include "schematron.pxi" # Schematron
+#include "schematron.pxi" # Schematron (requires libxml2 2.6.21+)
################################################################################
# Public C API
Modified: lxml/trunk/src/lxml/objectify.pyx
==============================================================================
--- lxml/trunk/src/lxml/objectify.pyx (original)
+++ lxml/trunk/src/lxml/objectify.pyx Wed Apr 11 09:59:19 2007
@@ -713,11 +713,11 @@
return 0
c_str = _cstr(text)
if c_str[0] == c'0' or c_str[0] == c'f' or c_str[0] == c'F':
- if c_str[1] == c'\0' or text.lower() == "false":
+ if c_str[1] == c'\0' or text == "false" or text.lower() == "false":
# '0' or 'f' or 'false'
return 0
elif c_str[0] == c'1' or c_str[0] == c't' or c_str[0] == c'T':
- if c_str[1] == c'\0' or text.lower() == 'true':
+ if c_str[1] == c'\0' or text == "true" or text.lower() == "true":
# '1' or 't' or 'true'
return 1
raise ValueError, "Invalid boolean value: '%s'" % text
Modified: lxml/trunk/src/lxml/tests/test_objectify.py
==============================================================================
--- lxml/trunk/src/lxml/tests/test_objectify.py (original)
+++ lxml/trunk/src/lxml/tests/test_objectify.py Wed Apr 11 09:59:19 2007
@@ -710,19 +710,19 @@
child_types = [ c.get(XML_SCHEMA_INSTANCE_TYPE_ATTR)
for c in root.iterchildren() ]
- self.assertEquals("int", child_types[0])
- self.assertEquals("string", child_types[1])
- self.assertEquals("float", child_types[2])
- self.assertEquals("string", child_types[3])
- self.assertEquals("boolean", child_types[4])
- self.assertEquals(None, child_types[5])
- self.assertEquals(None, child_types[6])
- self.assertEquals("int", child_types[7])
- self.assertEquals("int", child_types[8])
- self.assertEquals("int", child_types[9])
- self.assertEquals("string", child_types[10])
- self.assertEquals("float", child_types[11])
- self.assertEquals("integer", child_types[12])
+ self.assertEquals("int", child_types[ 0])
+ self.assertEquals("string", child_types[ 1])
+ self.assertEquals("float", child_types[ 2])
+ self.assertEquals("string", child_types[ 3])
+ self.assertEquals("boolean", child_types[ 4])
+ self.assertEquals(None, child_types[ 5])
+ self.assertEquals(None, child_types[ 6])
+ self.assertEquals("int", child_types[ 7])
+ self.assertEquals("int", child_types[ 8])
+ self.assertEquals("int", child_types[ 9])
+ self.assertEquals("string", child_types[10])
+ self.assertEquals("float", child_types[11])
+ self.assertEquals("integer", child_types[12])
self.assertEquals("true", root.n.get(XML_SCHEMA_NIL_ATTR))
@@ -754,19 +754,19 @@
child_types = [ c.get(objectify.PYTYPE_ATTRIBUTE)
for c in root.iterchildren() ]
- self.assertEquals("int", child_types[0])
- self.assertEquals("str", child_types[1])
- self.assertEquals("float", child_types[2])
- self.assertEquals("str", child_types[3])
- self.assertEquals("bool", child_types[4])
- self.assertEquals("none", child_types[5])
- self.assertEquals(None, child_types[6])
- self.assertEquals("float", child_types[7])
- self.assertEquals("float", child_types[8])
- self.assertEquals("str", child_types[9])
- self.assertEquals("int", child_types[10])
- self.assertEquals("int", child_types[11])
- self.assertEquals("int", child_types[12])
+ self.assertEquals("int", child_types[ 0])
+ self.assertEquals("str", child_types[ 1])
+ self.assertEquals("float", child_types[ 2])
+ self.assertEquals("str", child_types[ 3])
+ self.assertEquals("bool", child_types[ 4])
+ self.assertEquals("none", child_types[ 5])
+ self.assertEquals(None, child_types[ 6])
+ self.assertEquals("float", child_types[ 7])
+ self.assertEquals("float", child_types[ 8])
+ self.assertEquals("str", child_types[ 9])
+ self.assertEquals("int", child_types[10])
+ self.assertEquals("int", child_types[11])
+ self.assertEquals("int", child_types[12])
self.assertEquals("true", root.n.get(XML_SCHEMA_NIL_ATTR))
From scoder at codespeak.net Sun Apr 15 11:43:32 2007
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Sun, 15 Apr 2007 11:43:32 +0200 (CEST)
Subject: [Lxml-checkins] r42062 - in lxml/trunk: . src/lxml src/lxml/tests
Message-ID: <20070415094332.D4C4480B0@code0.codespeak.net>
Author: scoder
Date: Sun Apr 15 11:43:31 2007
New Revision: 42062
Modified:
lxml/trunk/CHANGES.txt
lxml/trunk/src/lxml/etree.pyx
lxml/trunk/src/lxml/tests/test_etree.py
Log:
support for element.attrib.pop()
Modified: lxml/trunk/CHANGES.txt
==============================================================================
--- lxml/trunk/CHANGES.txt (original)
+++ lxml/trunk/CHANGES.txt Sun Apr 15 11:43:31 2007
@@ -8,6 +8,8 @@
Features added
--------------
+* Element.attrib now has a ``pop()`` method
+
* Extended type annotation in objectify: cleaner annotation namespace setup
plus new ``xsiannotate()`` and ``deannotate()`` functions
Modified: lxml/trunk/src/lxml/etree.pyx
==============================================================================
--- lxml/trunk/src/lxml/etree.pyx (original)
+++ lxml/trunk/src/lxml/etree.pyx Sun Apr 15 11:43:31 2007
@@ -1411,6 +1411,20 @@
for key, value in sequence_or_dict:
_setAttributeValue(self._element, key, value)
+ def pop(self, key, *default):
+ if python.PyTuple_GET_SIZE(default) > 1:
+ raise TypeError, "pop expected at most 2 arguments, got %d" % \
+ (python.PyTuple_GET_SIZE(default)+1)
+ result = _getAttributeValue(self._element, key, None)
+ if result is None:
+ if python.PyTuple_GET_SIZE(default) == 0:
+ raise KeyError, key
+ else:
+ return python.PyTuple_GET_ITEM(default, 0)
+ else:
+ _delAttribute(self._element, key)
+ return result
+
# ACCESSORS
def __repr__(self):
return repr(dict( _attributeIteratorFactory(self._element, 3) ))
Modified: lxml/trunk/src/lxml/tests/test_etree.py
==============================================================================
--- lxml/trunk/src/lxml/tests/test_etree.py (original)
+++ lxml/trunk/src/lxml/tests/test_etree.py Sun Apr 15 11:43:31 2007
@@ -70,6 +70,39 @@
self.assertEquals("TEST", root.get("attr"))
self.assertRaises(TypeError, root.set, "newattr", 5)
+ def test_attrib_pop(self):
+ ElementTree = self.etree.ElementTree
+
+ f = StringIO('')
+ doc = ElementTree(file=f)
+ root = doc.getroot()
+ self.assertEquals('One', root.attrib['one'])
+ self.assertEquals('Two', root.attrib['two'])
+
+ self.assertEquals('One', root.attrib.pop('one'))
+
+ self.assertEquals(None, root.attrib.get('one'))
+ self.assertEquals('Two', root.attrib['two'])
+
+ def test_attrib_pop_unknown(self):
+ root = self.etree.XML('')
+ self.assertRaises(KeyError, root.attrib.pop, 'NONE')
+
+ self.assertEquals('One', root.attrib['one'])
+ self.assertEquals('Two', root.attrib['two'])
+
+ def test_attrib_pop_default(self):
+ root = self.etree.XML('')
+ self.assertEquals('Three', root.attrib.pop('three', 'Three'))
+
+ def test_attrib_pop_empty_default(self):
+ root = self.etree.XML('')
+ self.assertEquals('Three', root.attrib.pop('three', 'Three'))
+
+ def test_attrib_pop_invalid_args(self):
+ root = self.etree.XML('')
+ self.assertRaises(TypeError, root.attrib.pop, 'One', None, None)
+
def test_pi(self):
# lxml.etree separates target and text
Element = self.etree.Element
From scoder at codespeak.net Fri Apr 20 09:53:08 2007
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Fri, 20 Apr 2007 09:53:08 +0200 (CEST)
Subject: [Lxml-checkins] r42190 - in lxml/trunk/doc: . html
Message-ID: <20070420075308.D65DC8090@code0.codespeak.net>
Author: scoder
Date: Fri Apr 20 09:53:08 2007
New Revision: 42190
Modified:
lxml/trunk/doc/html/style.css
lxml/trunk/doc/mkhtml.py
Log:
work arounds for IE
Modified: lxml/trunk/doc/html/style.css
==============================================================================
--- lxml/trunk/doc/html/style.css (original)
+++ lxml/trunk/doc/html/style.css Fri Apr 20 09:53:08 2007
@@ -1,15 +1,15 @@
body {
- /* CSS Hack for IE that does not respect the "margin: auto" rule at the
- * document level */
+ font: 13px Arial, Verdana, Helvetica, sans-serif;
text-align: center;
- padding: 1em;
}
-
@media screen {
+ body {
+ padding: 1em 1em 1em 21em;
+ }
+
div.document {
width: 45em;
- padding-left: 21em;
background-color: white;
}
}
@@ -26,7 +26,6 @@
}
div.document {
- font: 13px Arial, Verdana, Helvetica, sans-serif;
margin: 1em auto 1em auto;
color: #222;
text-align: left;
@@ -50,7 +49,7 @@
/*** side menu ***/
div.sidemenu {
- position: fixed;
+ position: absolute;
top: 0px;
left: 0px;
width: 22em;
@@ -62,6 +61,11 @@
background-color: #FFFAFA;
}
+html > body div.sidemenu {
+ /* ignored by IE -> everyone else knows 'fixed', right? */
+ position: fixed;
+}
+
div.sidemenu span.section.title {
line-height: 1.5em;
font-size: 130%;
Modified: lxml/trunk/doc/mkhtml.py
==============================================================================
--- lxml/trunk/doc/mkhtml.py (original)
+++ lxml/trunk/doc/mkhtml.py Fri Apr 20 09:53:08 2007
@@ -55,7 +55,7 @@
def merge_menu(tree, menu, name):
menu_root = copy.deepcopy(menu)
- tree.getroot()[1][0].append(menu_root) # html->body->div[class=document]
+ tree.getroot()[1][0].insert(0, menu_root) # html->body->div[class=document]
for el in menu_root.getiterator():
tag = el.tag
if tag[0] != '{':
From scoder at codespeak.net Fri Apr 20 09:54:43 2007
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Fri, 20 Apr 2007 09:54:43 +0200 (CEST)
Subject: [Lxml-checkins] r42192 - lxml/trunk/benchmark
Message-ID: <20070420075443.243F48090@code0.codespeak.net>
Author: scoder
Date: Fri Apr 20 09:54:42 2007
New Revision: 42192
Modified:
lxml/trunk/benchmark/bench_etree.py
lxml/trunk/benchmark/benchbase.py
Log:
benchmark fix: remove child iteration overhead from benchmark loops to restrict timings to the benchmark target
Modified: lxml/trunk/benchmark/bench_etree.py
==============================================================================
--- lxml/trunk/benchmark/bench_etree.py (original)
+++ lxml/trunk/benchmark/bench_etree.py Fri Apr 20 09:54:42 2007
@@ -3,7 +3,7 @@
from StringIO import StringIO
import benchbase
-from benchbase import with_attributes, with_text, onlylib, serialized
+from benchbase import with_attributes, with_text, onlylib, serialized, children
############################################################
# Benchmarks
@@ -77,8 +77,10 @@
root1.append(el)
def bench_insert_from_document(self, root1, root2):
+ pos = len(root1)/2
for el in root2:
- root1.insert(len(root1)/2, el)
+ root1.insert(pos, el)
+ pos = pos + 1
def bench_rotate_children(self, root):
# == "1 2 3" # runs on any single tree independently
@@ -102,18 +104,21 @@
def bench_clear(self, root):
root.clear()
- def bench_has_children(self, root):
- for child in root:
+ @children
+ def bench_has_children(self, children):
+ for child in children:
if child and child and child and child and child:
pass
- def bench_len(self, root):
- for child in root:
+ @children
+ def bench_len(self, children):
+ for child in children:
map(len, repeat(child, 20))
- def bench_create_subelements(self, root):
+ @children
+ def bench_create_subelements(self, children):
SubElement = self.etree.SubElement
- for child in root:
+ for child in children:
SubElement(child, '{test}test')
def bench_append_elements(self, root):
@@ -122,103 +127,120 @@
el = Element('{test}test')
child.append(el)
- def bench_makeelement(self, root):
+ @children
+ def bench_makeelement(self, children):
empty_attrib = {}
- for child in root:
+ for child in children:
child.makeelement('{test}test', empty_attrib)
- def bench_create_elements(self, root):
+ @children
+ def bench_create_elements(self, children):
Element = self.etree.Element
- for child in root:
+ for child in children:
Element('{test}test')
- def bench_replace_children_element(self, root):
+ @children
+ def bench_replace_children_element(self, children):
Element = self.etree.Element
- for child in root:
+ for child in children:
el = Element('{test}test')
child[:] = [el]
- def bench_replace_children(self, root):
- Element = self.etree.Element
- for child in root:
- child[:] = [ child[0] ]
+ @children
+ def bench_replace_children(self, children):
+ els = [ self.etree.Element("newchild") ]
+ for child in children:
+ child[:] = els
def bench_remove_children(self, root):
for child in root:
root.remove(child)
def bench_remove_children_reversed(self, root):
- for child in reversed(root[:]):
+ for child in reversed(root):
root.remove(child)
- def bench_set_attributes(self, root):
- for child in root:
+ @children
+ def bench_set_attributes(self, children):
+ for child in children:
child.set('a', 'bla')
@with_attributes(True)
- def bench_get_attributes(self, root):
- for child in root:
+ @children
+ def bench_get_attributes(self, children):
+ for child in children:
child.get('bla1')
child.get('{attr}test1')
- def bench_setget_attributes(self, root):
- for child in root:
+ @children
+ def bench_setget_attributes(self, children):
+ for child in children:
child.set('a', 'bla')
- for child in root:
+ for child in children:
child.get('a')
def bench_root_getchildren(self, root):
root.getchildren()
- def bench_getchildren(self, root):
- for child in root:
+ @children
+ def bench_getchildren(self, children):
+ for child in children:
child.getchildren()
- def bench_get_children_slice(self, root):
- for child in root:
+ @children
+ def bench_get_children_slice(self, children):
+ for child in children:
child[:]
- def bench_get_children_slice_2x(self, root):
- for child in root:
- children = child[:]
+ @children
+ def bench_get_children_slice_2x(self, children):
+ for child in children:
+ child[:]
child[:]
- def bench_deepcopy(self, root):
- for child in root:
+ @children
+ def bench_deepcopy(self, children):
+ for child in children:
copy.deepcopy(child)
def bench_deepcopy_all(self, root):
copy.deepcopy(root)
- def bench_tag(self, root):
- for child in root:
+ @children
+ def bench_tag(self, children):
+ for child in children:
child.tag
- def bench_tag_repeat(self, root):
- for child in root:
+ @children
+ def bench_tag_repeat(self, children):
+ for child in children:
for i in repeat(0, 100):
child.tag
@with_text(utext=True, text=True, no_text=True)
- def bench_text(self, root):
- for child in root:
+ @children
+ def bench_text(self, children):
+ for child in children:
child.text
@with_text(utext=True, text=True, no_text=True)
- def bench_text_repeat(self, root):
+ @children
+ def bench_text_repeat(self, children):
repeat = range(500)
- for child in root:
+ for child in children:
for i in repeat:
child.text
- def bench_set_text(self, root):
+ @children
+ def bench_set_text(self, children):
text = TEXT
- for child in root:
+ for child in children:
child.text = text
- def bench_set_utext(self, root):
+ @children
+ def bench_set_utext(self, children):
text = UTEXT
- for child in root:
+ for child in children:
child.text = text
@onlylib('lxe')
Modified: lxml/trunk/benchmark/benchbase.py
==============================================================================
--- lxml/trunk/benchmark/benchbase.py (original)
+++ lxml/trunk/benchmark/benchbase.py Fri Apr 20 09:54:42 2007
@@ -78,6 +78,11 @@
function.STRING = True
return function
+def children(function):
+ "Decorator for benchmarks that require a list of root children"
+ function.CHILDREN = True
+ return function
+
############################################################
# benchmark baseclass
############################################################
@@ -105,13 +110,18 @@
deepcopy = copy.deepcopy
def set_property(root, fname):
xml = self._serialize_tree(root)
- setattr(self, fname, lambda : etree.XML(xml, etree_parser))
+ if etree_parser is not None:
+ setattr(self, fname, lambda : etree.XML(xml, etree_parser))
+ else:
+ setattr(self, fname, lambda : deepcopy(root))
setattr(self, fname + '_xml', lambda : xml)
+ setattr(self, fname + '_children', lambda : root[:])
else:
def set_property(root, fname):
setattr(self, fname, self.et_make_clone_factory(root))
xml = self._serialize_tree(root)
setattr(self, fname + '_xml', lambda : xml)
+ setattr(self, fname + '_children', lambda : root[:])
attribute_list = list(izip(count(), ({}, _ATTRIBUTES)))
text_list = list(izip(count(), (None, _TEXT, _UTEXT)))
@@ -131,10 +141,12 @@
def _tree_builder_name(self, tree, tn, an):
return '_root%d_T%d_A%d' % (tree, tn, an)
- def tree_builder(self, tree, tn, an, serial):
+ def tree_builder(self, tree, tn, an, serial, children):
name = self._tree_builder_name(tree, tn, an)
if serial:
name += '_xml'
+ elif children:
+ name += '_children'
return getattr(self, name)
def _serialize_tree(self, root):
@@ -270,13 +282,14 @@
arg_count = 1
tree_tuples = self._permutations(all_trees, arg_count)
- serialized = getattr(method, 'STRING', False)
+ serialized = getattr(method, 'STRING', False)
+ children = getattr(method, 'CHILDREN', False)
for tree_tuple in tree_tuples:
for tn in sorted(getattr(method, 'TEXT', (0,))):
for an in sorted(getattr(method, 'ATTRIBUTES', (0,))):
benchmarks.append((name, method_call, tree_tuple,
- tn, an, serialized))
+ tn, an, serialized, children))
return benchmarks
@@ -315,11 +328,12 @@
return (benchmark_suites, benchmarks)
-def build_treeset_name(trees, tn, an, serialized):
+def build_treeset_name(trees, tn, an, serialized, children):
text = {0:'-', 1:'S', 2:'U'}[tn]
attr = {0:'-', 1:'A'}[an]
ser = {True:'X', False:'T'}[serialized]
- return "%s%s%s T%s" % (text, attr, ser, ',T'.join(imap(str, trees))[:6])
+ chd = {True:'C', False:'R'}[children]
+ return "%s%s%s%s T%s" % (text, attr, ser, chd, ',T'.join(imap(str, trees))[:6])
def printSetupTimes(benchmark_suites):
print "Setup times for trees in seconds:"
@@ -327,20 +341,20 @@
print "%-3s: " % b.lib_name,
for an in (0,1):
for tn in (0,1,2):
- print ' %s ' % build_treeset_name((), tn, an, False)[:2],
+ print ' %s ' % build_treeset_name((), tn, an, False, False)[:2],
print
for i, tree_times in enumerate(b.setup_times):
print " T%d:" % (i+1), ' '.join("%6.4f" % t for t in tree_times)
print
-def runBench(suite, method_name, method_call, tree_set, tn, an, serial):
+def runBench(suite, method_name, method_call, tree_set, tn, an, serial, children):
if method_call is None:
raise SkippedTest
current_time = time.time
call_repeat = range(10)
- tree_builders = [ suite.tree_builder(tree, tn, an, serial)
+ tree_builders = [ suite.tree_builder(tree, tn, an, serial, children)
for tree in tree_set ]
times = []
@@ -364,7 +378,7 @@
for bench_calls in izip(*benchmarks):
for lib, (bench, benchmark_setup) in enumerate(izip(benchmark_suites, bench_calls)):
bench_name = benchmark_setup[0]
- tree_set_name = build_treeset_name(*benchmark_setup[-4:])
+ tree_set_name = build_treeset_name(*benchmark_setup[-5:])
print "%-3s: %-28s" % (bench.lib_name, bench_name[6:34]),
print "(%-10s)" % tree_set_name,
sys.stdout.flush()
From scoder at codespeak.net Fri Apr 20 10:48:29 2007
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Fri, 20 Apr 2007 10:48:29 +0200 (CEST)
Subject: [Lxml-checkins] r42195 - lxml/trunk/benchmark
Message-ID: <20070420084829.BD66780AD@code0.codespeak.net>
Author: scoder
Date: Fri Apr 20 10:48:29 2007
New Revision: 42195
Modified:
lxml/trunk/benchmark/benchbase.py
Log:
take minimum time instead of averaging over repeated runs
Modified: lxml/trunk/benchmark/benchbase.py
==============================================================================
--- lxml/trunk/benchmark/benchbase.py (original)
+++ lxml/trunk/benchmark/benchbase.py Fri Apr 20 10:48:29 2007
@@ -362,13 +362,16 @@
for i in range(3):
gc.collect()
gc.disable()
- t = 0
+ t = -1
for i in call_repeat:
args = [ build() for build in tree_builders ]
t_one_call = current_time()
method_call(*args)
- t += current_time() - t_one_call
- t = 1000.0 * t / len(call_repeat)
+ t_one_call = 1000.0 * (current_time() - t_one_call)
+ if t < 0:
+ t = t_one_call
+ else:
+ t = min(t, t_one_call)
times.append(t)
gc.enable()
del args
From scoder at codespeak.net Fri Apr 20 10:54:42 2007
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Fri, 20 Apr 2007 10:54:42 +0200 (CEST)
Subject: [Lxml-checkins] r42196 - lxml/trunk/benchmark
Message-ID: <20070420085442.A7BD780AD@code0.codespeak.net>
Author: scoder
Date: Fri Apr 20 10:54:42 2007
New Revision: 42196
Modified:
lxml/trunk/benchmark/benchbase.py
Log:
cleanup
Modified: lxml/trunk/benchmark/benchbase.py
==============================================================================
--- lxml/trunk/benchmark/benchbase.py (original)
+++ lxml/trunk/benchmark/benchbase.py Fri Apr 20 10:54:42 2007
@@ -367,12 +367,12 @@
args = [ build() for build in tree_builders ]
t_one_call = current_time()
method_call(*args)
- t_one_call = 1000.0 * (current_time() - t_one_call)
+ t_one_call = current_time() - t_one_call
if t < 0:
t = t_one_call
else:
t = min(t, t_one_call)
- times.append(t)
+ times.append(1000.0 * t)
gc.enable()
del args
return times
From scoder at codespeak.net Fri Apr 20 12:42:01 2007
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Fri, 20 Apr 2007 12:42:01 +0200 (CEST)
Subject: [Lxml-checkins] r42199 - lxml/trunk/benchmark
Message-ID: <20070420104201.CBE0A8090@code0.codespeak.net>
Author: scoder
Date: Fri Apr 20 12:42:01 2007
New Revision: 42199
Modified:
lxml/trunk/benchmark/bench_objectify.py
lxml/trunk/benchmark/bench_xpath.py
Log:
cleanup in benchmarks, use children where appropriate
Modified: lxml/trunk/benchmark/bench_objectify.py
==============================================================================
--- lxml/trunk/benchmark/bench_objectify.py (original)
+++ lxml/trunk/benchmark/bench_objectify.py Fri Apr 20 12:42:01 2007
@@ -10,6 +10,9 @@
############################################################
class BenchMark(benchbase.BenchMarkBase):
+ repeat1000 = range(1000)
+ repeat3000 = range(3000)
+
def __init__(self, lib):
from lxml import etree, objectify
self.objectify = objectify
@@ -20,37 +23,37 @@
def bench_attribute(self, root):
"1 2 4"
- for i in repeat(None, 3000):
+ for i in self.repeat3000:
root.zzzzz
def bench_attribute_cached(self, root):
"1 2 4"
cache = root.zzzzz
- for i in repeat(None, 3000):
+ for i in self.repeat3000:
root.zzzzz
def bench_attributes_deep(self, root):
"1 2 4"
- for i in repeat(None, 3000):
+ for i in self.repeat3000:
root.zzzzz['{cdefg}z00000']
def bench_attributes_deep_cached(self, root):
"1 2 4"
cache1 = root.zzzzz
cache2 = cache1['{cdefg}z00000']
- for i in repeat(None, 3000):
+ for i in self.repeat3000:
root.zzzzz['{cdefg}z00000']
def bench_objectpath(self, root):
"1 2 4"
path = self.objectify.ObjectPath(".zzzzz")
- for i in repeat(None, 3000):
+ for i in self.repeat3000:
path(root)
def bench_objectpath_deep(self, root):
"1 2 4"
path = self.objectify.ObjectPath(".zzzzz.{cdefg}z00000")
- for i in repeat(None, 3000):
+ for i in self.repeat3000:
path(root)
def bench_objectpath_deep_cached(self, root):
@@ -58,7 +61,7 @@
cache1 = root.zzzzz
cache2 = cache1['{cdefg}z00000']
path = self.objectify.ObjectPath(".zzzzz.{cdefg}z00000")
- for i in repeat(None, 3000):
+ for i in self.repeat3000:
path(root)
@with_text(text=True, utext=True, no_text=True)
@@ -72,7 +75,7 @@
def bench_type_inference(self, root):
"1 2 4"
el = root.aaaaa
- for i in repeat(None, 1000):
+ for i in self.repeat1000:
el.getchildren()
@with_text(text=True)
@@ -80,7 +83,7 @@
"1 2 4"
el = root.aaaaa
self.objectify.annotate(el)
- for i in repeat(None, 1000):
+ for i in self.repeat1000:
el.getchildren()
Modified: lxml/trunk/benchmark/bench_xpath.py
==============================================================================
--- lxml/trunk/benchmark/bench_xpath.py (original)
+++ lxml/trunk/benchmark/bench_xpath.py Fri Apr 20 12:42:01 2007
@@ -3,7 +3,7 @@
from StringIO import StringIO
import benchbase
-from benchbase import with_attributes, with_text, onlylib, serialized
+from benchbase import with_attributes, with_text, onlylib, serialized, children
############################################################
# Benchmarks
@@ -11,14 +11,16 @@
class XPathBenchMark(benchbase.BenchMarkBase):
@onlylib('lxe')
- def bench_xpath_class(self, root):
+ @children
+ def bench_xpath_class(self, children):
xpath = self.etree.XPath("./*[0]")
- for child in root:
+ for child in children:
xpath(child)
@onlylib('lxe')
- def bench_xpath_class_repeat(self, root):
- for child in root:
+ @children
+ def bench_xpath_class_repeat(self, children):
+ for child in children:
xpath = self.etree.XPath("./*[0]")
xpath(child)
@@ -29,12 +31,14 @@
xpath.evaluate("./*[0]")
@onlylib('lxe')
- def bench_xpath_method(self, root):
- for child in root:
+ @children
+ def bench_xpath_method(self, children):
+ for child in children:
child.xpath("./*[0]")
@onlylib('lxe')
- def bench_xpath_old_extensions(self, root):
+ @children
+ def bench_xpath_old_extensions(self, children):
def return_child(_, elements):
if elements:
return elements[0][0]
@@ -43,11 +47,12 @@
extensions = {("test", "child") : return_child}
xpath = self.etree.XPath("t:child(.)", namespaces={"test":"t"},
extensions=extensions)
- for child in root:
+ for child in children:
xpath(child)
@onlylib('lxe')
- def bench_xpath_extensions(self, root):
+ @children
+ def bench_xpath_extensions(self, children):
def return_child(_, elements):
if elements:
return elements[0][0]
@@ -57,7 +62,7 @@
try:
xpath = self.etree.XPath("test:t(.)", {"test":"testns"})
- for child in root:
+ for child in children:
xpath(child)
finally:
del self.etree.FunctionNamespace("testns")["t"]
From scoder at codespeak.net Fri Apr 20 12:43:22 2007
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Fri, 20 Apr 2007 12:43:22 +0200 (CEST)
Subject: [Lxml-checkins] r42200 - lxml/trunk/doc
Message-ID: <20070420104322.449FB80AA@code0.codespeak.net>
Author: scoder
Date: Fri Apr 20 12:43:22 2007
New Revision: 42200
Modified:
lxml/trunk/doc/performance.txt
Log:
updated benchmark results for pre-1.3
Modified: lxml/trunk/doc/performance.txt
==============================================================================
--- lxml/trunk/doc/performance.txt (original)
+++ lxml/trunk/doc/performance.txt Fri Apr 20 12:43:22 2007
@@ -14,21 +14,26 @@
.. _ElementTree: http://effbot.org/zone/element-index.htm
.. _cElementTree: http://effbot.org/zone/celementtree.htm
-The statements made here are backed by the benchmark script `bench.py`_ that
-comes with the lxml source distribution. The timings cited below compare lxml
-1.0 (with libxml2 2.6.24), ElementTree 1.2.6 and cElementTree 1.0.5 under
-CPython 2.4.2 on a 1.6GHz AMD64 machine.
-
-.. _`bench.py`: http://codespeak.net/svn/lxml/branch/lxml-1.0/bench.py
-
-The ``bench.py`` script runs a number of simple tests on the different
-libraries, using different XML tree configurations: different tree sizes, with
-or without attributes (-/A) and with or without ASCII or unicode text (-/S/U).
-In the result extracts cited below, T1 refers to a 3-level tree with many
-children at the third level, T2 is swapped around to have many children at the
-root element, T3 is a deep tree with few children at each level and T4 is a
-small tree, slightly broader than deep. Most benchmarks run in a loop over
-all children of the tree root.
+The statements made here are backed by the benchmark scripts
+`bench_etree.py`_, `bench_xpath.py`_ and `bench_objectify.py`_ that come with
+the lxml source distribution. The timings cited below compare lxml 1.3 (with
+libxml2 2.6.26) to the ElementTree and cElementTree versions shipped with
+CPython 2.5 (based on ElementTree 1.2.6). They were run single-threaded on a
+1.8GHz Intel Core Duo machine.
+
+.. _`bench_etree.py`: http://codespeak.net/svn/lxml/branch/lxml-1.3/benchmark/bench_etree.py
+.. _`bench_xpath.py`: http://codespeak.net/svn/lxml/branch/lxml-1.3/benchmark/bench_xpath.py
+.. _`bench_objectify.py`: http://codespeak.net/svn/lxml/branch/lxml-1.3/benchmark/bench_objectify.py
+
+The scripts run a number of simple tests on the different libraries, using
+different XML tree configurations: different tree sizes, with or without
+attributes (-/A), with or without ASCII or unicode text (-/S/U), and either
+against a tree or its serialised form (T/X). In the result extracts cited
+below, T1 refers to a 3-level tree with many children at the third level, T2
+is swapped around to have many children at the root element, T3 is a deep tree
+with few children at each level and T4 is a small tree, slightly broader than
+deep. If repetition is involved, this usually means running the benchmark in
+a loop over all children of the tree root.
.. contents::
..
@@ -37,6 +42,7 @@
3 The ElementTree API
4 Tree traversal
5 XPath
+ 6 lxml.objectify
Bad things first
@@ -57,45 +63,57 @@
results are rather impressive. Compared to cElementTree, lxml is about 20 to
40 times faster on serialisation::
- lxe: tostring_utf16 (SA T2) 30.9846 msec/pass
- cET: tostring_utf16 (SA T2) 715.5002 msec/pass
- ET : tostring_utf16 (SA T2) 758.5271 msec/pass
-
- lxe: tostring_utf16 (U- T3) 3.0509 msec/pass
- cET: tostring_utf16 (U- T3) 72.4721 msec/pass
- ET : tostring_utf16 (U- T3) 87.0735 msec/pass
-
- lxe: tostring_utf8 (UA T2) 26.8996 msec/pass
- cET: tostring_utf8 (UA T2) 700.4889 msec/pass
- ET : tostring_utf8 (UA T2) 745.3317 msec/pass
-
- lxe: tostring_utf8 (S- T3) 2.1876 msec/pass
- cET: tostring_utf8 (S- T3) 71.1290 msec/pass
- ET : tostring_utf8 (S- T3) 87.1525 msec/pass
+ lxe: tostring_utf16 (SATR T1) 21.9206 msec/pass
+ cET: tostring_utf16 (SATR T1) 461.9428 msec/pass
+ ET : tostring_utf16 (SATR T1) 486.8946 msec/pass
+
+ lxe: tostring_utf16 (UATR T1) 22.7508 msec/pass
+ cET: tostring_utf16 (UATR T1) 526.3446 msec/pass
+ ET : tostring_utf16 (UATR T1) 496.0767 msec/pass
+
+ lxe: tostring_utf16 (S-TR T2) 23.8452 msec/pass
+ cET: tostring_utf16 (S-TR T2) 537.9200 msec/pass
+ ET : tostring_utf16 (S-TR T2) 504.4273 msec/pass
+
+ lxe: tostring_utf8 (S-TR T2) 18.2550 msec/pass
+ cET: tostring_utf8 (S-TR T2) 528.3908 msec/pass
+ ET : tostring_utf8 (S-TR T2) 549.7071 msec/pass
+
+ lxe: tostring_utf8 (U-TR T3) 2.5497 msec/pass
+ cET: tostring_utf8 (U-TR T3) 49.8495 msec/pass
+ ET : tostring_utf8 (U-TR T3) 62.6927 msec/pass
For parsing, the difference between the libraries is smaller. The (c)ET
libraries use the expat parser, which is known to be extremely fast::
- lxe: parse_stringIO (SA T2) 197.7678 msec/pass
- cET: parse_stringIO (SA T2) 38.9390 msec/pass
- ET : parse_stringIO (SA T2) 364.3468 msec/pass
-
- lxe: parse_stringIO (UA T3) 48.6735 msec/pass
- cET: parse_stringIO (UA T3) 39.7455 msec/pass
- ET : parse_stringIO (UA T3) 237.9971 msec/pass
+ lxe: parse_stringIO (SAXR T1) 150.2380 msec/pass
+ cET: parse_stringIO (SAXR T1) 25.9311 msec/pass
+ ET : parse_stringIO (SAXR T1) 222.9431 msec/pass
+
+ lxe: parse_stringIO (S-XR T3) 5.9490 msec/pass
+ cET: parse_stringIO (S-XR T3) 5.4519 msec/pass
+ ET : parse_stringIO (S-XR T3) 76.4120 msec/pass
+
+ lxe: parse_stringIO (UAXR T3) 29.3601 msec/pass
+ cET: parse_stringIO (UAXR T3) 28.9941 msec/pass
+ ET : parse_stringIO (UAXR T3) 163.5361 msec/pass
The expat parser allows cET to be up to 80% faster than lxml on plain parser
-performance. The same applies to the ``iterparse()`` function. However, if
-you take a complete serialize-parse cycle, the numbers will look similar to
-these::
-
- lxe: write_utf8_parse_stringIO (S- T1) 187.0444 msec/pass
- cET: write_utf8_parse_stringIO (S- T1) 828.4068 msec/pass
- ET : write_utf8_parse_stringIO (S- T1) 1181.0658 msec/pass
-
- lxe: write_utf8_parse_stringIO (UA T2) 213.6599 msec/pass
- cET: write_utf8_parse_stringIO (UA T2) 927.2374 msec/pass
- ET : write_utf8_parse_stringIO (UA T2) 1297.9678 msec/pass
+performance. Similar timings can be observer for the ``iterparse()``
+function. However, if you take a complete serialize-parse cycle, the numbers
+will look similar to these::
+
+ lxe: write_utf8_parse_stringIO (S-TR T1) 316.6230 msec/pass
+ cET: write_utf8_parse_stringIO (S-TR T1) 592.1209 msec/pass
+ ET : write_utf8_parse_stringIO (S-TR T1) 817.9121 msec/pass
+
+ lxe: write_utf8_parse_stringIO (UATR T3) 49.9680 msec/pass
+ cET: write_utf8_parse_stringIO (UATR T3) 434.6111 msec/pass
+ ET : write_utf8_parse_stringIO (UATR T3) 574.1441 msec/pass
+
+ lxe: write_utf8_parse_stringIO (SATR T4) 1.2789 msec/pass
+ cET: write_utf8_parse_stringIO (SATR T4) 12.2640 msec/pass
+ ET : write_utf8_parse_stringIO (SATR T4) 15.6620 msec/pass
For applications that require a high parser throughput and do little
serialization, cET is the best choice. Also for iterparse applications that
@@ -114,22 +132,20 @@
(given in seconds)::
lxe: -- S- U- -A SA UA
- T1: 0.1360 0.1214 0.1214 0.1217 0.1232 0.1226
- T2: 0.1258 0.1257 0.1250 0.1348 0.1359 0.1358
- T3: 0.0354 0.0282 0.0288 0.0850 0.0860 0.0862
- T4: 0.0006 0.0006 0.0006 0.0019 0.0018 0.0019
-
+ T1: 0.1029 0.1005 0.0998 0.1003 0.0998 0.1002
+ T2: 0.1035 0.1013 0.1015 0.1090 0.1089 0.1090
+ T3: 0.0276 0.0270 0.0273 0.0679 0.0673 0.0673
+ T4: 0.0004 0.0004 0.0004 0.0013 0.0013 0.0013
cET: -- S- U- -A SA UA
- T1: 0.0417 0.0409 0.0403 0.0410 0.0410 0.0415
- T2: 0.0413 0.0414 0.0413 0.0417 0.0411 0.0417
- T3: 0.0097 0.0100 0.0099 0.0187 0.0142 0.0146
+ T1: 0.0277 0.0273 0.0273 0.0272 0.0278 0.0275
+ T2: 0.0281 0.0347 0.0281 0.0285 0.0284 0.0284
+ T3: 0.0074 0.0074 0.0074 0.0122 0.0102 0.0101
T4: 0.0001 0.0001 0.0001 0.0001 0.0001 0.0001
-
ET : -- S- U- -A SA UA
- T1: 0.2189 0.2832 0.2210 0.2646 0.2905 0.2214
- T2: 0.3022 0.2322 0.2868 0.3192 0.2290 0.3075
- T3: 0.0519 0.0553 0.0527 0.0601 0.0572 0.0911
- T4: 0.0009 0.0008 0.0008 0.0008 0.0009 0.0009
+ T1: 0.1349 0.1962 0.2356 0.1288 0.2642 0.1351
+ T2: 0.3104 0.1344 0.3566 0.3857 0.1354 0.4677
+ T3: 0.0313 0.0325 0.0312 0.0356 0.3803 0.0364
+ T4: 0.0005 0.0005 0.0008 0.0006 0.0007 0.0006
While lxml is still faster than ET in most cases (30-60%), cET can be up to
three times faster than lxml here. One of the reasons is that lxml must
@@ -141,29 +157,29 @@
Where ET and cET can quickly create a shallow copy of their list of children,
lxml has to create a Python object for each child and collect them in a list::
- lxe: root_getchildren (-- T2 ) 6.3981 msec/pass
- cET: root_getchildren (-- T2 ) 0.0651 msec/pass
- ET : root_getchildren (-- T2 ) 0.0224 msec/pass
+ lxe: root_getchildren (--TR T2) 0.3500 msec/pass
+ cET: root_getchildren (--TR T2) 0.0150 msec/pass
+ ET : root_getchildren (--TR T2) 0.0091 msec/pass
As opposed to ET, libxml2 has a notion of documents that each element must be
in. This results in a major performance difference for creating independent
Elements that end up in independently created documents::
- lxe: create_elements (-- T2 ) 22.0083 msec/pass
- cET: create_elements (-- T2 ) 0.3920 msec/pass
- ET : create_elements (-- T2 ) 3.0865 msec/pass
+ lxe: create_elements (--TC T2) 3.7301 msec/pass
+ cET: create_elements (--TC T2) 0.1960 msec/pass
+ ET : create_elements (--TC T2) 1.4279 msec/pass
Therefore, it is always preferable to create Elements for the document they
are supposed to end up in, either as SubElements of an Element or using the
explicit ``Element.makeelement()`` call::
- lxe: makeelement (-- T2 ) 4.2658 msec/pass
- cET: makeelement (-- T2 ) 0.5658 msec/pass
- ET : makeelement (-- T2 ) 3.7136 msec/pass
-
- lxe: create_subelements (-- T2 ) 3.7640 msec/pass
- cET: create_subelements (-- T2 ) 0.5332 msec/pass
- ET : create_subelements (-- T2 ) 6.5937 msec/pass
+ lxe: makeelement (--TC T2) 2.5990 msec/pass
+ cET: makeelement (--TC T2) 0.3128 msec/pass
+ ET : makeelement (--TC T2) 1.6940 msec/pass
+
+ lxe: create_subelements (--TC T2) 2.3072 msec/pass
+ cET: create_subelements (--TC T2) 0.2370 msec/pass
+ ET : create_subelements (--TC T2) 3.2189 msec/pass
So, if the main performance bottleneck of an application is creating large XML
trees in memory through calls to Element and SubElement, cET is the best
@@ -176,13 +192,13 @@
The following benchmark appends all root children of the second tree to the
root of the first tree::
- lxe: append_from_document (-- T1,T2) 11.7905 msec/pass
- cET: append_from_document (-- T1,T2) 0.4673 msec/pass
- ET : append_from_document (-- T1,T2) 2.0460 msec/pass
-
- lxe: append_from_document (-- T3,T4) 0.1582 msec/pass
- cET: append_from_document (-- T3,T4) 0.0224 msec/pass
- ET : append_from_document (-- T3,T4) 0.1618 msec/pass
+ lxe: append_from_document (--TR T1,T2) 4.3468 msec/pass
+ cET: append_from_document (--TR T1,T2) 0.2608 msec/pass
+ ET : append_from_document (--TR T1,T2) 1.2310 msec/pass
+
+ lxe: append_from_document (--TR T3,T4) 0.0679 msec/pass
+ cET: append_from_document (--TR T3,T4) 0.0148 msec/pass
+ ET : append_from_document (--TR T3,T4) 0.0880 msec/pass
Although these are fairly small numbers compared to parsing, this easily shows
the different performance classes for lxml and (c)ET. Where the latter do not
@@ -193,26 +209,26 @@
This difference is not always as visible, but applies to most parts of the
API, like inserting newly created elements::
- lxe: insert_from_document (-- T1,T2) 16.2342 msec/pass
- cET: insert_from_document (-- T1,T2) 1.1786 msec/pass
- ET : insert_from_document (-- T1,T2) 3.6107 msec/pass
+ lxe: insert_from_document (--TR T1,T2) 6.3150 msec/pass
+ cET: insert_from_document (--TR T1,T2) 0.4039 msec/pass
+ ET : insert_from_document (--TR T1,T2) 1.4770 msec/pass
Or replacing the child slice by a new element::
- lxe: replace_children_element (-- T1 ) 9.1834 msec/pass
- cET: replace_children_element (-- T1 ) 0.9731 msec/pass
- ET : replace_children_element (-- T1 ) 14.8213 msec/pass
+ lxe: replace_children_element (--TC T1) 0.2608 msec/pass
+ cET: replace_children_element (--TC T1) 0.0238 msec/pass
+ ET : replace_children_element (--TC T1) 0.1628 msec/pass
You should keep this difference in mind when you merge very large trees. On
the other hand, deep copying a tree is fast in lxml::
- lxe: deepcopy (-- T1 ) 24.7359 msec/pass
- cET: deepcopy (-- T1 ) 450.5479 msec/pass
- ET : deepcopy (-- T1 ) 717.8308 msec/pass
-
- lxe: deepcopy (-- T3 ) 2.1182 msec/pass
- cET: deepcopy (-- T3 ) 107.2124 msec/pass
- ET : deepcopy (-- T3 ) 173.9782 msec/pass
+ lxe: deepcopy (--TC T1) 10.6010 msec/pass
+ cET: deepcopy (--TC T1) 220.2251 msec/pass
+ ET : deepcopy (--TC T1) 463.7730 msec/pass
+
+ lxe: deepcopy (--TC T3) 8.2979 msec/pass
+ cET: deepcopy (--TC T3) 53.8740 msec/pass
+ ET : deepcopy (--TC T3) 118.2799 msec/pass
So, for example, if you often need to create independent subtrees from a large
tree that you have parsed in, lxml is by far the best choice here.
@@ -226,39 +242,39 @@
especially if few elements are of interest or the element tag name is known,
lxml is a good choice::
- lxe: getiterator_all (-- T2 ) 22.5847 msec/pass
- cET: getiterator_all (-- T2 ) 36.8212 msec/pass
- ET : getiterator_all (-- T2 ) 46.2846 msec/pass
-
- lxe: getiterator_islice (-- T2 ) 2.0421 msec/pass
- cET: getiterator_islice (-- T2 ) 0.3343 msec/pass
- ET : getiterator_islice (-- T2 ) 44.5898 msec/pass
-
- lxe: getiterator_tag (-- T2 ) 1.9593 msec/pass
- cET: getiterator_tag (-- T2 ) 11.7767 msec/pass
- ET : getiterator_tag (-- T2 ) 37.5661 msec/pass
-
- lxe: getiterator_tag_all (-- T2 ) 4.5667 msec/pass
- cET: getiterator_tag_all (-- T2 ) 33.5681 msec/pass
- ET : getiterator_tag_all (-- T2 ) 37.6200 msec/pass
+ lxe: getiterator_all (--TR T2) 10.3800 msec/pass
+ cET: getiterator_all (--TR T2) 28.2831 msec/pass
+ ET : getiterator_all (--TR T2) 26.0720 msec/pass
+
+ lxe: getiterator_islice (--TR T2) 0.1140 msec/pass
+ cET: getiterator_islice (--TR T2) 0.2460 msec/pass
+ ET : getiterator_islice (--TR T2) 26.6550 msec/pass
+
+ lxe: getiterator_tag (--TR T2) 0.3879 msec/pass
+ cET: getiterator_tag (--TR T2) 9.3720 msec/pass
+ ET : getiterator_tag (--TR T2) 22.8221 msec/pass
+
+ lxe: getiterator_tag_all (--TR T2) 0.8819 msec/pass
+ cET: getiterator_tag_all (--TR T2) 27.2939 msec/pass
+ ET : getiterator_tag_all (--TR T2) 22.8271 msec/pass
This similarly shows in ``Element.findall()``::
- lxe: findall (-- T2 ) 26.9907 msec/pass
- cET: findall (-- T2 ) 39.1728 msec/pass
- ET : findall (-- T2 ) 50.9692 msec/pass
-
- lxe: findall (-- T3 ) 3.6452 msec/pass
- cET: findall (-- T3 ) 12.0210 msec/pass
- ET : findall (-- T3 ) 11.2570 msec/pass
-
- lxe: findall_tag (-- T2 ) 4.6065 msec/pass
- cET: findall_tag (-- T2 ) 34.0267 msec/pass
- ET : findall_tag (-- T2 ) 36.7813 msec/pass
-
- lxe: findall_tag (-- T3 ) 0.5884 msec/pass
- cET: findall_tag (-- T3 ) 7.6307 msec/pass
- ET : findall_tag (-- T3 ) 9.2943 msec/pass
+ lxe: findall (--TR T2) 10.9370 msec/pass
+ cET: findall (--TR T2) 28.8639 msec/pass
+ ET : findall (--TR T2) 27.1060 msec/pass
+
+ lxe: findall (--TR T3) 2.1989 msec/pass
+ cET: findall (--TR T3) 8.9881 msec/pass
+ ET : findall (--TR T3) 6.4890 msec/pass
+
+ lxe: findall_tag (--TR T2) 0.9520 msec/pass
+ cET: findall_tag (--TR T2) 27.2651 msec/pass
+ ET : findall_tag (--TR T2) 22.7208 msec/pass
+
+ lxe: findall_tag (--TR T3) 0.1700 msec/pass
+ cET: findall_tag (--TR T3) 6.4540 msec/pass
+ ET : findall_tag (--TR T3) 5.4770 msec/pass
Note that all three libraries currently use the same Python implementation for
``findall()``, except for their native tree iterator.
@@ -267,48 +283,52 @@
XPath
-----
+The following timings are based on the benchmark script `bench_xpath.py`_.
+
This part of lxml does not have an equivalent in ElementTree. However, lxml
provides more than one way of accessing it and you should take care which part
of the lxml API you use. The most straight forward way is to call the
``xpath()`` method on an Element or ElementTree::
- lxe: xpath_method (-- T1) 9.9304 msec/pass
- lxe: xpath_method (-- T2) 29.3595 msec/pass
- lxe: xpath_method (-- T3) 0.2791 msec/pass
- lxe: xpath_method (-- T4) 0.9906 msec/pass
+ lxe: xpath_method (--TC T1) 1.0180 msec/pass
+ lxe: xpath_method (--TC T2) 20.3521 msec/pass
+ lxe: xpath_method (--TC T3) 0.1259 msec/pass
+ lxe: xpath_method (--TC T4) 1.0169 msec/pass
This is well suited for testing and when the XPath expressions are as diverse
as the trees they are called on. However, if you have a single XPath
expression that you want to apply to a larger number of different elements,
the ``XPath`` class is the most efficient way to do it::
- lxe: xpath_class (-- T1) 4.7921 msec/pass
- lxe: xpath_class (-- T2) 9.6187 msec/pass
- lxe: xpath_class (-- T3) 0.2215 msec/pass
- lxe: xpath_class (-- T4) 0.2697 msec/pass
+ lxe: xpath_class (--TC T1) 0.1891 msec/pass
+ lxe: xpath_class (--TC T2) 3.0179 msec/pass
+ lxe: xpath_class (--TC T3) 0.0570 msec/pass
+ lxe: xpath_class (--TC T4) 0.1910 msec/pass
Note that this still allows you to use variables in the expression, so you can
parse it once and then adapt it through variables at call time. In other
cases, where you have a fixed Element or ElementTree and want to run different
expressions on it, you should consider the ``XPathEvaluator``::
- lxe: xpath_element (-- T1) 5.3826 msec/pass
- lxe: xpath_element (-- T2) 11.3929 msec/pass
- lxe: xpath_element (-- T3) 0.2514 msec/pass
- lxe: xpath_element (-- T4) 0.3038 msec/pass
+ lxe: xpath_element (--TR T1) 0.4089 msec/pass
+ lxe: xpath_element (--TR T2) 5.9960 msec/pass
+ lxe: xpath_element (--TR T3) 0.1230 msec/pass
+ lxe: xpath_element (--TR T4) 0.3440 msec/pass
While it looks slightly slower, creating an XPath object for each of the
expressions generates a much higher overhead here::
- lxe: xpath_class_repeat (-- T1) 6.8099 msec/pass
- lxe: xpath_class_repeat (-- T2) 26.7462 msec/pass
- lxe: xpath_class_repeat (-- T3) 0.3126 msec/pass
- lxe: xpath_class_repeat (-- T4) 1.1111 msec/pass
+ lxe: xpath_class_repeat (--TC T1) 1.0259 msec/pass
+ lxe: xpath_class_repeat (--TC T2) 20.4861 msec/pass
+ lxe: xpath_class_repeat (--TC T3) 0.1280 msec/pass
+ lxe: xpath_class_repeat (--TC T4) 1.0269 msec/pass
lxml.objectify
--------------
+The following timings are based on the benchmark script `bench_objectify.py`_.
+
Objectify is a data-binding API for XML based on lxml.etree, that was added in
version 1.1. It uses standard Python attribute access to traverse the XML
tree. It also features ObjectPath, a fast path language based on the same
@@ -325,21 +345,21 @@
tree. It avoids step-by-step Python element instantiations along the path,
which can substantially improve the access time::
- lxe: attribute (--T T1) 14.8621 msec/pass
- lxe: attribute (--T T2) 61.8820 msec/pass
- lxe: attribute (--T T4) 14.9317 msec/pass
-
- lxe: objectpath (--T T1) 13.7311 msec/pass
- lxe: objectpath (--T T2) 58.5930 msec/pass
- lxe: objectpath (--T T4) 8.0961 msec/pass
-
- lxe: attributes_deep (--T T1) 81.4488 msec/pass
- lxe: attributes_deep (--T T2) 77.0266 msec/pass
- lxe: attributes_deep (--T T4) 27.1226 msec/pass
-
- lxe: objectpath_deep (--T T1) 63.1915 msec/pass
- lxe: objectpath_deep (--T T2) 65.2469 msec/pass
- lxe: objectpath_deep (--T T4) 11.0138 msec/pass
+ lxe: attribute (--TR T1) 10.6189 msec/pass
+ lxe: attribute (--TR T2) 53.7431 msec/pass
+ lxe: attribute (--TR T4) 10.3359 msec/pass
+
+ lxe: objectpath (--TR T1) 5.8351 msec/pass
+ lxe: objectpath (--TR T2) 48.1579 msec/pass
+ lxe: objectpath (--TR T4) 5.6930 msec/pass
+
+ lxe: attributes_deep (--TR T1) 58.7430 msec/pass
+ lxe: attributes_deep (--TR T2) 63.0901 msec/pass
+ lxe: attributes_deep (--TR T4) 17.4620 msec/pass
+
+ lxe: objectpath_deep (--TR T1) 52.1719 msec/pass
+ lxe: objectpath_deep (--TR T2) 52.9201 msec/pass
+ lxe: objectpath_deep (--TR T4) 7.5650 msec/pass
Note, however, that parsing ObjectPath expressions is not for free either, so
this is most effective for frequently accessing the same element.
@@ -361,13 +381,17 @@
subtrees and elements) to cache, you can trade memory usage against access
speed::
- lxe: attribute_cached (--T T1) 10.8343 msec/pass
- lxe: attribute_cached (--T T2) 55.5890 msec/pass
- lxe: attribute_cached (--T T4) 10.9514 msec/pass
-
- lxe: attributes_deep_cached (--T T1) 63.7080 msec/pass
- lxe: attributes_deep_cached (--T T2) 65.6838 msec/pass
- lxe: attributes_deep_cached (--T T4) 15.4514 msec/pass
+ lxe: attribute_cached (--TR T1) 7.9739 msec/pass
+ lxe: attribute_cached (--TR T2) 50.9331 msec/pass
+ lxe: attribute_cached (--TR T4) 7.8540 msec/pass
+
+ lxe: attributes_deep_cached (--TR T1) 51.1391 msec/pass
+ lxe: attributes_deep_cached (--TR T2) 55.7129 msec/pass
+ lxe: attributes_deep_cached (--TR T4) 10.7968 msec/pass
+
+ lxe: objectpath_deep_cached (--TR T1) 47.6151 msec/pass
+ lxe: objectpath_deep_cached (--TR T2) 48.0802 msec/pass
+ lxe: objectpath_deep_cached (--TR T4) 4.0281 msec/pass
Things to note: you cannot currently use ``weakref.WeakKeyDictionary`` objects
for this as lxml's element objects do not support weak references (which are
From scoder at codespeak.net Fri Apr 20 15:35:35 2007
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Fri, 20 Apr 2007 15:35:35 +0200 (CEST)
Subject: [Lxml-checkins] r42202 - lxml/trunk/doc
Message-ID: <20070420133535.AD43180B2@code0.codespeak.net>
Author: scoder
Date: Fri Apr 20 15:35:34 2007
New Revision: 42202
Modified:
lxml/trunk/doc/api.txt
Log:
some cleanup, new API section on serialisation
Modified: lxml/trunk/doc/api.txt
==============================================================================
--- lxml/trunk/doc/api.txt (original)
+++ lxml/trunk/doc/api.txt Fri Apr 20 15:35:34 2007
@@ -31,8 +31,9 @@
3 Trees and Documents
4 Iteration
5 Error handling on exceptions
- 6 xinclude
- 7 write_c14n on ElementTree
+ 6 Serialisation
+ 7 xinclude
+ 8 write_c14n on ElementTree
lxml.etree
@@ -62,17 +63,16 @@
While lxml.etree itself uses the ElementTree API, it is possible to replace
the Element implementation by `custom element subclasses`_. This has been
-used to implement well-known XML APIs on top of lxml. The ``lxml.elements``
-package contains examples. Currently, there is a data-binding implementation
-called `objectify`_, which is similar to the `Amara bindery`_ tool.
-
-Additionally, the `lxml.elements.classlookup`_ module provides a number of
-different schemes to customize the mapping between libxml2 nodes and the
-Element classes used by lxml.etree.
+used to implement well-known XML APIs on top of lxml. For example, lxml ships
+with a data-binding implementation called `objectify`_, which is similar to
+the `Amara bindery`_ tool.
+
+lxml.etree comes with a number of `different lookup schemes`_ to customize the
+mapping between libxml2 nodes and the Element classes used by lxml.etree.
.. _`custom element subclasses`: namespace_extensions.html
.. _`objectify`: objectify.html
-.. _`lxml.elements.classlookup`: elements.html#lxml.elements.classlookup
+.. _`different lookup schemes`: element_classes.html#setting-up-a-class-lookup-scheme
.. _`Amara bindery`: http://uche.ogbuji.net/tech/4suite/amara/
@@ -228,6 +228,31 @@
etc. which are described in their respective sections below.
+Serialisation
+-------------
+
+lxml.etree has direct support for pretty printing XML output. Functions like
+``ElementTree.write()`` and ``tostring()`` support it through a keyword
+argument::
+
+ >>> root = etree.XML("")
+ >>> print etree.tostring(root)
+
+
+ >>> print etree.tostring(root, pretty_print=True)
+
+
+
+
+By default, lxml (and ElementTree) output the XML declaration only if it is
+required. You can enable or disable it explicitly by passing another keyword
+argument for the serialisation::
+
+ >>> print etree.tostring(root, xml_declaration=True)
+
+
+
+
xinclude
--------
From scoder at codespeak.net Fri Apr 20 15:48:28 2007
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Fri, 20 Apr 2007 15:48:28 +0200 (CEST)
Subject: [Lxml-checkins] r42203 - lxml/trunk/doc
Message-ID: <20070420134828.3B28D80B8@code0.codespeak.net>
Author: scoder
Date: Fri Apr 20 15:48:27 2007
New Revision: 42203
Modified:
lxml/trunk/doc/api.txt
Log:
doc link from serialisation to the unicode section
Modified: lxml/trunk/doc/api.txt
==============================================================================
--- lxml/trunk/doc/api.txt (original)
+++ lxml/trunk/doc/api.txt Fri Apr 20 15:48:27 2007
@@ -252,8 +252,12 @@
+Also see the general remarks on `Unicode support`_.
-xinclude
+.. _`Unicode support`: parsing.html#python-unicode-strings
+
+
+XInclude
--------
Simple XInclude support exists. You can let lxml process xinclude statements
From scoder at codespeak.net Sun Apr 22 20:43:12 2007
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Sun, 22 Apr 2007 20:43:12 +0200 (CEST)
Subject: [Lxml-checkins] r42242 - lxml/trunk/src/lxml/tests
Message-ID: <20070422184312.4C2AB809D@code0.codespeak.net>
Author: scoder
Date: Sun Apr 22 20:43:11 2007
New Revision: 42242
Modified:
lxml/trunk/src/lxml/tests/test_xslt.py
Log:
libxslt 1.1.20 also seems to not raise an error when parameters are missing
Modified: lxml/trunk/src/lxml/tests/test_xslt.py
==============================================================================
--- lxml/trunk/src/lxml/tests/test_xslt.py (original)
+++ lxml/trunk/src/lxml/tests/test_xslt.py Sun Apr 22 20:43:11 2007
@@ -223,7 +223,7 @@
def test_xslt_parameter_missing(self):
# DISABLED - NOT RELIABLE!
- if etree.LIBXSLT_VERSION >= (1,1,18) and etree.LIBXSLT_VERSION < (1,1,20):
+ if etree.LIBXSLT_VERSION >= (1,1,18):
return # no error from libxslt?
# apply() without needed parameter will lead to XSLTApplyError
tree = self.parse('BC')
From scoder at codespeak.net Sun Apr 22 20:58:16 2007
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Sun, 22 Apr 2007 20:58:16 +0200 (CEST)
Subject: [Lxml-checkins] r42243 - lxml/trunk/doc
Message-ID: <20070422185816.D24938095@code0.codespeak.net>
Author: scoder
Date: Sun Apr 22 20:58:16 2007
New Revision: 42243
Modified:
lxml/trunk/doc/api.txt
Log:
note on ElementInclude support
Modified: lxml/trunk/doc/api.txt
==============================================================================
--- lxml/trunk/doc/api.txt (original)
+++ lxml/trunk/doc/api.txt Sun Apr 22 20:58:16 2007
@@ -32,7 +32,7 @@
4 Iteration
5 Error handling on exceptions
6 Serialisation
- 7 xinclude
+ 7 XInclude and ElementInclude
8 write_c14n on ElementTree
@@ -257,11 +257,11 @@
.. _`Unicode support`: parsing.html#python-unicode-strings
-XInclude
---------
+XInclude and ElementInclude
+---------------------------
-Simple XInclude support exists. You can let lxml process xinclude statements
-in a document by calling the xinclude() method on a tree::
+You can let lxml process xinclude statements in a document by calling the
+xinclude() method on a tree::
>>> data = StringIO('''\
...
@@ -274,6 +274,14 @@
>>> etree.tostring(tree.getroot())
'\n\n\n'
+Note that the ElementTree compatible ElementInclude_ module is also supported
+as ``lxml.ElementInclude``. It has the additional advantage of supporting
+custom `URL resolvers`_ at the Python level. The normal XInclude mechanism
+cannot deploy these. If you need ElementTree compatibility or custom
+resolvers, you have to stick to the external Python module.
+
+.. _ElementInclude: http://effbot.org/zone/element-xinclude.htm
+
write_c14n on ElementTree
-------------------------
From scoder at codespeak.net Sun Apr 22 21:01:43 2007
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Sun, 22 Apr 2007 21:01:43 +0200 (CEST)
Subject: [Lxml-checkins] r42244 - lxml/trunk/doc
Message-ID: <20070422190143.C70C98095@code0.codespeak.net>
Author: scoder
Date: Sun Apr 22 21:01:36 2007
New Revision: 42244
Modified:
lxml/trunk/doc/api.txt
Log:
small cleanup in docs
Modified: lxml/trunk/doc/api.txt
==============================================================================
--- lxml/trunk/doc/api.txt (original)
+++ lxml/trunk/doc/api.txt Sun Apr 22 21:01:36 2007
@@ -271,8 +271,11 @@
>>> tree = etree.parse(data)
>>> tree.xinclude()
- >>> etree.tostring(tree.getroot())
- '\n\n\n'
+ >>> print etree.tostring(tree.getroot())
+
+
+
+
Note that the ElementTree compatible ElementInclude_ module is also supported
as ``lxml.ElementInclude``. It has the additional advantage of supporting
From scoder at codespeak.net Sun Apr 22 22:04:11 2007
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Sun, 22 Apr 2007 22:04:11 +0200 (CEST)
Subject: [Lxml-checkins] r42246 - lxml/trunk/src/lxml
Message-ID: <20070422200411.CD08F809D@code0.codespeak.net>
Author: scoder
Date: Sun Apr 22 22:04:11 2007
New Revision: 42246
Modified:
lxml/trunk/src/lxml/objectify.pyx
Log:
define XMLSchema namespace in objectify and provide default nsmap in DataElement
Modified: lxml/trunk/src/lxml/objectify.pyx
==============================================================================
--- lxml/trunk/src/lxml/objectify.pyx (original)
+++ lxml/trunk/src/lxml/objectify.pyx Sun Apr 22 22:04:11 2007
@@ -101,7 +101,12 @@
setPytypeAttributeTag()
-# namespace for XML Schema instance
+# namespaces for XML Schema
+cdef object XML_SCHEMA_NS
+XML_SCHEMA_NS = "http://www.w3.org/2001/XMLSchema"
+cdef char* _XML_SCHEMA_NS
+_XML_SCHEMA_NS = _cstr(XML_SCHEMA_NS)
+
cdef object XML_SCHEMA_INSTANCE_NS
XML_SCHEMA_INSTANCE_NS = "http://www.w3.org/2001/XMLSchema-instance"
cdef char* _XML_SCHEMA_INSTANCE_NS
@@ -1694,7 +1699,9 @@
XML = fromstring
cdef object _DEFAULT_NSMAP
-_DEFAULT_NSMAP = { "py": PYTYPE_NAMESPACE, "xsi": XML_SCHEMA_INSTANCE_NS }
+_DEFAULT_NSMAP = { "py" : PYTYPE_NAMESPACE,
+ "xsi" : XML_SCHEMA_INSTANCE_NS,
+ "xsd" : XML_SCHEMA_NS}
def Element(_tag, attrib=None, nsmap=None, _pytype=None, **_attributes):
"""Objectify specific version of the lxml.etree Element() factory that
@@ -1722,6 +1729,8 @@
if the type can be identified. If '_pytype' or '_xsi' are among the
keyword arguments, they will be used instead.
"""
+ if nsmap is None:
+ nsmap = _DEFAULT_NSMAP
if attrib is not None:
if python.PyDict_Size(_attributes):
attrib.update(_attributes)