[Lxml-checkins] r44230 - in lxml/branch/lxml-1.3: . doc src/lxml src/lxml/tests
scoder at codespeak.net
scoder at codespeak.net
Wed Jun 13 14:48:23 CEST 2007
Author: scoder
Date: Wed Jun 13 14:48:21 2007
New Revision: 44230
Modified:
lxml/branch/lxml-1.3/CHANGES.txt
lxml/branch/lxml-1.3/doc/objectify.txt
lxml/branch/lxml-1.3/src/lxml/objectify.pyx
lxml/branch/lxml-1.3/src/lxml/tests/test_objectify.py
Log:
Holger's objectify.deannotate() and some cleanup in objectify.pyx
Modified: lxml/branch/lxml-1.3/CHANGES.txt
==============================================================================
--- lxml/branch/lxml-1.3/CHANGES.txt (original)
+++ lxml/branch/lxml-1.3/CHANGES.txt Wed Jun 13 14:48:21 2007
@@ -11,6 +11,9 @@
* ``Element.addnext(el)`` and ``Element.addprevious(el)`` methods to support
adding processing instructions and comments around the root node
+* Extended type annotation in objectify: cleaner annotation namespace setup
+ plus new ``xsiannotate()`` and ``deannotate()`` functions
+
* Element.attrib now has a ``pop()`` method
* Support for custom Element class instantiation in lxml.sax: passing a
Modified: lxml/branch/lxml-1.3/doc/objectify.txt
==============================================================================
--- lxml/branch/lxml-1.3/doc/objectify.txt (original)
+++ lxml/branch/lxml-1.3/doc/objectify.txt Wed Jun 13 14:48:21 2007
@@ -699,6 +699,34 @@
s = '5' [StringElement]
* xsi:type = 'string'
+The utility function ``deannotate()`` can be used to get rid of 'py:pytype'
+and/or 'xsi:type' information::
+
+ >>> root = objectify.fromstring('''\
+ ... <root xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
+ ... <d xsi:type="double">5</d>
+ ... <l xsi:type="long" >5</l>
+ ... <s xsi:type="string">5</s>
+ ... </root>''')
+ >>> objectify.annotate(root)
+ >>> print objectify.dump(root)
+ root = None [ObjectifiedElement]
+ d = 5.0 [FloatElement]
+ * xsi:type = 'double'
+ * py:pytype = 'float'
+ l = 5L [LongElement]
+ * xsi:type = 'long'
+ * py:pytype = 'long'
+ s = '5' [StringElement]
+ * xsi:type = 'string'
+ * py:pytype = 'str'
+ >>> objectify.deannotate(root)
+ >>> print objectify.dump(root)
+ root = None [ObjectifiedElement]
+ d = 5 [IntElement]
+ l = 5 [IntElement]
+ s = 5 [IntElement]
+
For convenience, the ``DataElement()`` factory creates an Element with a
Python value in one step. You can pass the required Python type name or the
XSI type name::
@@ -720,8 +748,8 @@
>>> root.x = objectify.DataElement(5, _xsi="integer")
>>> print objectify.dump(root)
root = None [ObjectifiedElement]
- x = 5 [IntElement]
- * py:pytype = 'int'
+ x = 5L [LongElement]
+ * py:pytype = 'long'
* xsi:type = 'integer'
There is a side effect of the type lookup. If you assign a string value using
Modified: lxml/branch/lxml-1.3/src/lxml/objectify.pyx
==============================================================================
--- lxml/branch/lxml-1.3/src/lxml/objectify.pyx (original)
+++ lxml/branch/lxml-1.3/src/lxml/objectify.pyx Wed Jun 13 14:48:21 2007
@@ -707,17 +707,21 @@
"""Boolean type base on string values: 'true' or 'false'.
"""
cdef int _boolval(self) except -1:
+ cdef char* c_str
text = textOf(self._c_node)
if text is None:
return 0
- text = text.lower()
- if text == 'false':
- return 0
- elif text == 'true':
- return 1
- else:
- raise ValueError, "Invalid boolean value: '%s'" % text
-
+ c_str = _cstr(text)
+ if c_str[0] == c'0' or c_str[0] == c'f' or c_str[0] == c'F':
+ if c_str[1] == c'\0' or text == "false" or text.lower() == "false":
+ # '0' or 'f' or 'false'
+ return 0
+ elif c_str[0] == c'1' or c_str[0] == c't' or c_str[0] == c'T':
+ if c_str[1] == c'\0' or text == "true" or text.lower() == "true":
+ # '1' or 't' or 'true'
+ return 1
+ raise ValueError, "Invalid boolean value: '%s'" % text
+
def __nonzero__(self):
if self._boolval():
return True
@@ -882,13 +886,15 @@
cdef _registerPyTypes():
pytype = PyType('int', int, IntElement)
- pytype.xmlSchemaTypes = ("integer", "positiveInteger", "negativeInteger",
- "nonNegativeInteger", "nonPositiveInteger",
- "int", "unsignedInt", "short", "unsignedShort")
+ pytype.xmlSchemaTypes = ("int", "short", "byte", "unsignedShort",
+ "unsignedByte",)
+
pytype.register()
pytype = PyType('long', long, LongElement)
- pytype.xmlSchemaTypes = ("long", "unsignedLong")
+ pytype.xmlSchemaTypes = ("integer", "nonPositiveInteger", "negativeInteger",
+ "long", "nonNegativeInteger", "unsignedLong",
+ "unsignedInt", "positiveInteger",)
pytype.register()
pytype = PyType('float', float, FloatElement)
@@ -900,7 +906,9 @@
pytype.register()
pytype = PyType('str', None, StringElement)
- pytype.xmlSchemaTypes = ("string", "normalizedString")
+ pytype.xmlSchemaTypes = ("string", "normalizedString", "token", "language",
+ "Name", "NCName", "ID", "IDREF", "ENTITY",
+ "NMTOKEN", )
pytype.register()
pytype = PyType('none', None, NoneElement)
@@ -936,12 +944,25 @@
python.PyList_Append(types, pytype)
return types
+cdef PyType _guessPyType(value, PyType defaulttype):
+ if value is None:
+ return None
+ for type_check, tested_pytype in _TYPE_CHECKS:
+ try:
+ type_check(value)
+ return <PyType>tested_pytype
+ except IGNORABLE_ERRORS:
+ # could not be parsed as the specififed type => ignore
+ pass
+ return defaulttype
+
cdef object _guessElementClass(tree.xmlNode* c_node):
value = textOf(c_node)
if value is None:
return None
if value == '':
return StringElement
+
for type_check, pytype in _TYPE_CHECKS:
try:
type_check(value)
@@ -1424,11 +1445,26 @@
################################################################################
# Type annotations
+cdef PyType _check_type(tree.xmlNode* c_node, PyType pytype):
+ # StrType does not have a typecheck but is the default anyway,
+ # so just accept it if given as type information
+ if pytype is None:
+ return pytype
+ value = textOf(c_node)
+ try:
+ pytype.type_check(value)
+ return pytype
+ except IGNORABLE_ERRORS:
+ # could not be parsed as the specified type => ignore
+ pass
+ return None
+
+
def annotate(element_or_tree, ignore_old=True):
"""Recursively annotates the elements of an XML tree with 'pytype'
attributes.
- If the 'ignore_old' keyword argument is True (the default), current
+ If the 'ignore_old' keyword argument is True (the default), current 'pytype'
attributes will be ignored and replaced. Otherwise, they will be checked
and only replaced if they no longer fit the current text value.
"""
@@ -1438,11 +1474,13 @@
cdef tree.xmlNode* c_node
cdef tree.xmlNs* c_ns
cdef python.PyObject* dict_result
+ cdef PyType pytype
element = cetree.rootNodeOrRaise(element_or_tree)
doc = element._doc
ignore = bool(ignore_old)
- StrType = _PYTYPE_DICT.get('str')
+ StrType = _PYTYPE_DICT.get('str')
+ NoneType = _PYTYPE_DICT.get('none')
c_node = element._c_node
tree.BEGIN_FOR_EACH_ELEMENT_FROM(c_node, c_node, 1)
pytype = None
@@ -1452,20 +1490,19 @@
old_value = cetree.attributeValueFromNsName(
c_node, _PYTYPE_NAMESPACE, _PYTYPE_ATTRIBUTE_NAME)
if old_value is not None and old_value != TREE_PYTYPE:
- pytype = _PYTYPE_DICT.get(old_value)
- if pytype is not None:
- value = textOf(c_node)
- try:
- if not (<PyType>pytype).type_check(value):
- pytype = None
- except ValueError:
- pytype = None
+ dict_result = python.PyDict_GetItem(_PYTYPE_DICT, old_value)
+ if dict_result is not NULL:
+ pytype = <PyType>dict_result
+ if pytype is not StrType:
+ # StrType does not have a typecheck but is the default anyway,
+ # so just accept it if given as type information
+ pytype = _check_type(c_node, pytype)
if pytype is None:
- # if element is defined as xsi:nil, return NoneElement class
+ # if element is defined as xsi:nil, represent it as None
if cetree.attributeValueFromNsName(
c_node, _XML_SCHEMA_INSTANCE_NS, "nil") == "true":
- pytype = _PYTYPE_DICT.get("none")
+ pytype = NoneType
if pytype is None:
# check for XML Schema type hint
@@ -1481,18 +1518,7 @@
# try to guess type
if cetree.findChildForwards(c_node, 0) is NULL:
# element has no children => data class
- if value is None:
- value = textOf(c_node)
- if value is not None:
- for type_check, tested_pytype in _TYPE_CHECKS:
- try:
- if type_check(value) is not False:
- pytype = tested_pytype
- break
- except ValueError:
- pass
- else:
- pytype = StrType
+ pytype = _guessPyType(textOf(c_node), StrType)
if pytype is None:
# delete attribute if it exists
@@ -1505,6 +1531,38 @@
_cstr(pytype.name))
tree.END_FOR_EACH_ELEMENT_FROM(c_node)
+def deannotate(element_or_tree, pytype=True, xsi=True):
+ """Recursively de-annotate the elements of an XML tree by removing 'pytype'
+ and/or 'type' attributes.
+
+ If the 'pytype' keyword argument is True (the default), 'pytype' attributes
+ will be removed. If the 'xsi' keyword argument is True (the default),
+ 'xsi:type' attributes will be removed.
+ """
+ cdef _Element element
+ cdef tree.xmlNode* c_node
+
+ element = cetree.rootNodeOrRaise(element_or_tree)
+ c_node = element._c_node
+ if pytype and xsi:
+ tree.BEGIN_FOR_EACH_ELEMENT_FROM(c_node, c_node, 1)
+ cetree.delAttributeFromNsName(
+ c_node, _PYTYPE_NAMESPACE, _PYTYPE_ATTRIBUTE_NAME)
+ cetree.delAttributeFromNsName(
+ c_node, _XML_SCHEMA_INSTANCE_NS, "type")
+ tree.END_FOR_EACH_ELEMENT_FROM(c_node)
+ elif pytype:
+ tree.BEGIN_FOR_EACH_ELEMENT_FROM(c_node, c_node, 1)
+ cetree.delAttributeFromNsName(
+ c_node, _PYTYPE_NAMESPACE, _PYTYPE_ATTRIBUTE_NAME)
+ tree.END_FOR_EACH_ELEMENT_FROM(c_node)
+ else:
+ tree.BEGIN_FOR_EACH_ELEMENT_FROM(c_node, c_node, 1)
+ cetree.delAttributeFromNsName(
+ c_node, _XML_SCHEMA_INSTANCE_NS, "type")
+ tree.END_FOR_EACH_ELEMENT_FROM(c_node)
+
+
################################################################################
# Module level parser setup
@@ -1549,6 +1607,9 @@
XML = fromstring
+cdef object _DEFAULT_NSMAP
+_DEFAULT_NSMAP = { "py": PYTYPE_NAMESPACE, "xsi": XML_SCHEMA_INSTANCE_NS }
+
def Element(_tag, attrib=None, nsmap=None, _pytype=None, **_attributes):
"""Objectify specific version of the lxml.etree Element() factory that
always creates a structural (tree) element.
@@ -1561,6 +1622,8 @@
_attributes = attrib
if _pytype is None:
_pytype = TREE_PYTYPE
+ if nsmap is None:
+ nsmap = _DEFAULT_NSMAP
_attributes[PYTYPE_ATTRIBUTE] = _pytype
return _makeElement(_tag, None, _attributes, nsmap)
@@ -1569,11 +1632,10 @@
"""Create a new element with a Python value and XML attributes taken from
keyword arguments or a dictionary passed as second argument.
- Automatically adds a 'pyval' attribute for the Python type of the value,
- if the type can be identified. If '_pyval' or '_xsi' are among the
+ Automatically adds a 'pytype' attribute for the Python type of the value,
+ if the type can be identified. If '_pytype' or '_xsi' are among the
keyword arguments, they will be used instead.
"""
- cdef _Element element
if attrib is not None:
if python.PyDict_Size(_attributes):
attrib.update(_attributes)
@@ -1581,7 +1643,10 @@
if _xsi is not None:
python.PyDict_SetItem(_attributes, XML_SCHEMA_INSTANCE_TYPE_ATTR, _xsi)
if _pytype is None:
- _pytype = _SCHEMA_TYPE_DICT[_xsi].name
+ # allow for s.o. using unregistered or even wrong xsi:type names
+ pytype_lookup = _SCHEMA_TYPE_DICT.get(_xsi)
+ if pytype_lookup is not None:
+ _pytype = pytype_lookup.name
if python._isString(_value):
strval = _value
Modified: lxml/branch/lxml-1.3/src/lxml/tests/test_objectify.py
==============================================================================
--- lxml/branch/lxml-1.3/src/lxml/tests/test_objectify.py (original)
+++ lxml/branch/lxml-1.3/src/lxml/tests/test_objectify.py Wed Jun 13 14:48:21 2007
@@ -13,6 +13,10 @@
from lxml import objectify
+XML_SCHEMA_INSTANCE_NS = "http://www.w3.org/2001/XMLSchema-instance"
+XML_SCHEMA_INSTANCE_TYPE_ATTR = "{%s}type" % XML_SCHEMA_INSTANCE_NS
+XML_SCHEMA_NIL_ATTR = "{%s}nil" % XML_SCHEMA_INSTANCE_NS
+
xml_str = '''\
<obj:root xmlns:obj="objectified" xmlns:other="otherNS">
<obj:c1 a1="A1" a2="A2" other:a3="A3">
@@ -28,7 +32,7 @@
"""Test cases for lxml.objectify
"""
etree = etree
-
+
def XML(self, xml):
return self.etree.XML(xml, self.parser)
@@ -356,20 +360,69 @@
XML = self.XML
root = XML('''\
<root xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
- <a xsi:type="integer">5</a>
- <a xsi:type="string">5</a>
- <a xsi:type="float">5</a>
+ <b xsi:type="boolean">true</b>
+ <b xsi:type="boolean">false</b>
+ <b xsi:type="boolean">1</b>
+ <b xsi:type="boolean">0</b>
+
+ <f xsi:type="float">5</f>
+ <f xsi:type="double">5</f>
+
+ <s xsi:type="string">5</s>
+ <s xsi:type="normalizedString">5</s>
+ <s xsi:type="token">5</s>
+ <s xsi:type="language">5</s>
+ <s xsi:type="Name">5</s>
+ <s xsi:type="NCName">5</s>
+ <s xsi:type="ID">5</s>
+ <s xsi:type="IDREF">5</s>
+ <s xsi:type="ENTITY">5</s>
+ <s xsi:type="NMTOKEN">5</s>
+
+ <l xsi:type="integer">5</l>
+ <l xsi:type="nonPositiveInteger">5</l>
+ <l xsi:type="negativeInteger">5</l>
+ <l xsi:type="long">5</l>
+ <l xsi:type="nonNegativeInteger">5</l>
+ <l xsi:type="unsignedLong">5</l>
+ <l xsi:type="unsignedInt">5</l>
+ <l xsi:type="positiveInteger">5</l>
+
+ <i xsi:type="int">5</i>
+ <i xsi:type="short">5</i>
+ <i xsi:type="byte">5</i>
+ <i xsi:type="unsignedShort">5</i>
+ <i xsi:type="unsignedByte">5</i>
+
+ <n xsi:nil="true"/>
</root>
''')
- self.assert_(isinstance(root.a[0], objectify.IntElement))
- self.assertEquals(5, root.a[0])
-
- self.assert_(isinstance(root.a[1], objectify.StringElement))
- self.assertEquals("5", root.a[1])
-
- self.assert_(isinstance(root.a[2], objectify.FloatElement))
- self.assertEquals(5.0, root.a[2])
+ for b in root.b:
+ self.assert_(isinstance(b, objectify.BoolElement))
+ self.assertEquals(True, root.b[0])
+ self.assertEquals(False, root.b[1])
+ self.assertEquals(True, root.b[2])
+ self.assertEquals(False, root.b[3])
+
+ for f in root.f:
+ self.assert_(isinstance(f, objectify.FloatElement))
+ self.assertEquals(5, f)
+
+ for s in root.s:
+ self.assert_(isinstance(s, objectify.StringElement))
+ self.assertEquals("5", s)
+
+ for l in root.l:
+ self.assert_(isinstance(l, objectify.LongElement))
+ self.assertEquals(5l, l)
+
+ for i in root.i:
+ self.assert_(isinstance(i, objectify.IntElement))
+ self.assertEquals(5, i)
+
+ self.assert_(isinstance(root.n, objectify.NoneElement))
+ self.assertEquals(None, root.n)
def test_type_str_sequence(self):
XML = self.XML
@@ -444,10 +497,11 @@
root.b = False
self.assertFalse(root.b)
- def test_type_annotation(self):
+ def test_pytype_annotation(self):
XML = self.XML
root = XML(u'''\
- <a xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
+ <a xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+ xmlns:py="http://codespeak.net/lxml/objectify/pytype">
<b>5</b>
<b>test</b>
<c>1.1</c>
@@ -456,6 +510,11 @@
<n xsi:nil="true" />
<n></n>
<b xsi:type="double">5</b>
+ <b xsi:type="float">5</b>
+ <s xsi:type="string">23</s>
+ <s py:pytype="str">42</s>
+ <f py:pytype="float">300</f>
+ <l py:pytype="long">2</l>
</a>
''')
objectify.annotate(root)
@@ -470,6 +529,125 @@
self.assertEquals("none", child_types[5])
self.assertEquals(None, child_types[6])
self.assertEquals("float", child_types[7])
+ self.assertEquals("float", child_types[8])
+ self.assertEquals("str", child_types[9])
+ self.assertEquals("int", child_types[10])
+ self.assertEquals("int", child_types[11])
+ self.assertEquals("int", child_types[12])
+
+ self.assertEquals("true", root.n.get(XML_SCHEMA_NIL_ATTR))
+
+ def test_pytype_annotation_use_old(self):
+ XML = self.XML
+ root = XML(u'''\
+ <a xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+ xmlns:py="http://codespeak.net/lxml/objectify/pytype">
+ <b>5</b>
+ <b>test</b>
+ <c>1.1</c>
+ <c>\uF8D2</c>
+ <x>true</x>
+ <n xsi:nil="true" />
+ <n></n>
+ <b xsi:type="double">5</b>
+ <b xsi:type="float">5</b>
+ <s xsi:type="string">23</s>
+ <s py:pytype="str">42</s>
+ <f py:pytype="float">300</f>
+ <l py:pytype="long">2</l>
+ </a>
+ ''')
+ objectify.annotate(root, ignore_old=False)
+
+ child_types = [ c.get(objectify.PYTYPE_ATTRIBUTE)
+ for c in root.iterchildren() ]
+ self.assertEquals("int", child_types[0])
+ self.assertEquals("str", child_types[1])
+ self.assertEquals("float", child_types[2])
+ self.assertEquals("str", child_types[3])
+ self.assertEquals("bool", child_types[4])
+ self.assertEquals("none", child_types[5])
+ self.assertEquals(None, child_types[6])
+ self.assertEquals("float", child_types[7])
+ self.assertEquals("float", child_types[8])
+ self.assertEquals("str", child_types[9])
+ self.assertEquals("str", child_types[10])
+ self.assertEquals("float", child_types[11])
+ self.assertEquals("long", child_types[12])
+
+ self.assertEquals("true", root.n.get(XML_SCHEMA_NIL_ATTR))
+
+ def test_deannotate(self):
+ XML = self.XML
+ root = XML(u'''\
+ <a xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+ xmlns:py="http://codespeak.net/lxml/objectify/pytype">
+ <b>5</b>
+ <b>test</b>
+ <c>1.1</c>
+ <c>\uF8D2</c>
+ <x>true</x>
+ <n xsi:nil="true" />
+ <n></n>
+ <b xsi:type="double">5</b>
+ <b xsi:type="float">5</b>
+ <s xsi:type="string">23</s>
+ <s py:pytype="str">42</s>
+ <f py:pytype="float">300</f>
+ <l py:pytype="long">2</l>
+ </a>
+ ''')
+ objectify.deannotate(root)
+
+ for c in root.getiterator():
+ self.assertEquals(None, c.get(XML_SCHEMA_INSTANCE_TYPE_ATTR))
+ self.assertEquals(None, c.get(objectify.PYTYPE_ATTRIBUTE))
+
+ self.assertEquals("true", root.n.get(XML_SCHEMA_NIL_ATTR))
+
+ def test_xsitype_deannotate(self):
+ XML = self.XML
+ root = XML(u'''\
+ <a xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+ xmlns:py="http://codespeak.net/lxml/objectify/pytype">
+ <b>5</b>
+ <b>test</b>
+ <c>1.1</c>
+ <c>\uF8D2</c>
+ <x>true</x>
+ <n xsi:nil="true" />
+ <n></n>
+ <b xsi:type="double">5</b>
+ <b xsi:type="float">5</b>
+ <s xsi:type="string">23</s>
+ <s py:pytype="str">42</s>
+ <f py:pytype="float">300</f>
+ <l py:pytype="long">2</l>
+ </a>
+ ''')
+ objectify.annotate(root)
+ objectify.deannotate(root, pytype=False)
+
+ child_types = [ c.get(objectify.PYTYPE_ATTRIBUTE)
+ for c in root.iterchildren() ]
+ self.assertEquals("int", child_types[ 0])
+ self.assertEquals("str", child_types[ 1])
+ self.assertEquals("float", child_types[ 2])
+ self.assertEquals("str", child_types[ 3])
+ self.assertEquals("bool", child_types[ 4])
+ self.assertEquals("none", child_types[ 5])
+ self.assertEquals(None, child_types[ 6])
+ self.assertEquals("float", child_types[ 7])
+ self.assertEquals("float", child_types[ 8])
+ self.assertEquals("str", child_types[ 9])
+ self.assertEquals("int", child_types[10])
+ self.assertEquals("int", child_types[11])
+ self.assertEquals("int", child_types[12])
+
+ self.assertEquals("true", root.n.get(XML_SCHEMA_NIL_ATTR))
+
+ for c in root.getiterator():
+ self.assertEquals(None, c.get(XML_SCHEMA_INSTANCE_TYPE_ATTR))
def test_change_pytype_attribute(self):
XML = self.XML
@@ -890,7 +1068,6 @@
etree.tostring(new_root),
etree.tostring(root))
-
def test_suite():
suite = unittest.TestSuite()
suite.addTests([unittest.makeSuite(ObjectifyTestCase)])
More information about the lxml-checkins
mailing list