Index: src/lxml/objectify.pyx =================================================================== --- src/lxml/objectify.pyx (revision 46370) +++ src/lxml/objectify.pyx (working copy) @@ -1643,8 +1643,8 @@ pass return None -def annotate(element_or_tree, ignore_old=True, ignore_xsi=False, - empty_pytype=None): +def pyannotate(element_or_tree, ignore_old=False, ignore_xsi=False, + empty_pytype=None, keep_tree=False): """Recursively annotates the elements of an XML tree with 'pytype' attributes. @@ -1659,13 +1659,17 @@ The default annotation of empty elements can be set with the ``empty_pytype`` keyword argument. The default is not to annotate empty elements. Pass 'str', for example, to make string values the default. + + py:pytype='TREE' type annotations for "structural" elements can be preserved + by setting the keyword argument ``keep_tree`` to True. By default, these are + removed. ``keep_tree`` is only effective if ``ignore_old`` is false. """ cdef _Element element element = cetree.rootNodeOrRaise(element_or_tree) _annotate(element, 0, 1, bool(ignore_xsi), bool(ignore_old), - None, empty_pytype) + None, empty_pytype, bool(keep_tree)) -def xsiannotate(element_or_tree, ignore_old=True, ignore_pytype=False, +def xsiannotate(element_or_tree, ignore_old=False, ignore_pytype=False, empty_type=None): """Recursively annotates the elements of an XML tree with 'xsi:type' attributes. @@ -1690,11 +1694,52 @@ cdef _Element element element = cetree.rootNodeOrRaise(element_or_tree) _annotate(element, 1, 0, bool(ignore_old), bool(ignore_pytype), - empty_type, None) + empty_type, None, 0) +def annotate(element_or_tree, ignore_old=True, ignore_xsi=False, + empty_pytype=None, empty_type=None, annotate_xsi=0, + annotate_pytype=1, keep_tree=False): + """Recursively annotates the elements of an XML tree with 'xsi:type' + and/or 'py:pytype' attributes. + + If the 'ignore_old' keyword argument is True (the default), current + 'py:pytype' attributes will be ignored for the type annotation. Set to False + if you want reuse existing 'py:pytype' information (iff appropriate for the + element text value). + + If the 'ignore_xsi' keyword argument is False (the default), existing + 'xsi:type' attributes will be used for the type annotation, if they fit the + element text values. + + Note that the mapping from Python types to XSI types is usually ambiguous. + Currently, only the first XSI type name in the corresponding PyType + definition will be used for annotation. Thus, you should consider naming + the widest type first if you define additional types. + + The default 'py:pytype' annotation of empty elements can be set with the + ``empty_pytype`` keyword argument. Pass 'str', for example, to make + string values the default. + + The default 'xsi:type' annotation of empty elements can be set with the + ``empty_type`` keyword argument. The default is not to annotate empty + elements. Pass 'string', for example, to make string values the default. + + The keyword arguments 'annotate_xsi' (default: 0) and 'annotate_pytype' + (default: 1) control which kind(s) of annotation to use. + + py:pytype='TREE' type annotations for "structural" elements can be preserved + by setting the keyword argument ``keep_tree`` to True. By default, these are + removed. ``keep_tree`` is only effective if ``ignore_old`` is false. + """ + cdef _Element element + element = cetree.rootNodeOrRaise(element_or_tree) + _annotate(element, annotate_xsi, annotate_pytype, bool(ignore_xsi), + bool(ignore_old), empty_type, empty_pytype, bool(keep_tree)) + + cdef _annotate(_Element element, int annotate_xsi, int annotate_pytype, - int ignore_xsi, int ignore_pytype, - empty_type_name, empty_pytype_name): + int ignore_xsi, int ignore_pytype, empty_type_name, + empty_pytype_name, int keep_tree): cdef _Document doc cdef tree.xmlNode* c_node cdef tree.xmlNs* c_ns @@ -1751,47 +1796,53 @@ if pytype is None and not ignore_pytype: # check that old pytype value is valid - old_value = cetree.attributeValueFromNsName( + old_pytypename = cetree.attributeValueFromNsName( c_node, _PYTYPE_NAMESPACE, _PYTYPE_ATTRIBUTE_NAME) - if old_value is not None and old_value != TREE_PYTYPE: - if old_value == 'none': - # transition from lxml 1.x - old_value = "NoneType" - dict_result = python.PyDict_GetItem(_PYTYPE_DICT, old_value) - if dict_result is not NULL: - pytype = dict_result - if pytype is not StrType: - # StrType does not have a typecheck but is the default - # anyway, so just accept it if given as type information - pytype = _check_type(c_node, pytype) + if old_pytypename is not None: + if old_pytypename != TREE_PYTYPE: + if old_pytypename == 'none': + # transition from lxml 1.x + old_pytypename = "NoneType" + dict_result = python.PyDict_GetItem(_PYTYPE_DICT, + old_pytypename) + if dict_result is not NULL: + pytype = dict_result + if pytype is not StrType: + # StrType does not have a typecheck but is the + # default anyway, so just accept it if given as type + # information + pytype = _check_type(c_node, pytype) + elif keep_tree: + istree = 1 + + if not istree: + if pytype is None: + # try to guess type + if cetree.findChildForwards(c_node, 0) is NULL: + # element has no children => data class + pytype = _guessPyType(textOf(c_node), StrType) + else: + istree = 1 - if pytype is None: - # try to guess type - if cetree.findChildForwards(c_node, 0) is NULL: - # element has no children => data class - pytype = _guessPyType(textOf(c_node), StrType) - else: - istree = 1 + if pytype is None: + # use default type for empty elements + if cetree.hasText(c_node): + pytype = StrType + else: + pytype = empty_pytype + if typename is None: + typename = empty_type_name - if pytype is None: - # use default type for empty elements - if cetree.hasText(c_node): - pytype = StrType - else: - pytype = empty_pytype + if pytype is not None: if typename is None: - typename = empty_type_name + if not istree: + if python.PyList_GET_SIZE(pytype._schema_types) > 0: + # pytype->xsi:type is a 1:n mapping + # simply take the first + typename = pytype._schema_types[0] + elif typename not in pytype._schema_types: + typename = pytype._schema_types[0] - if pytype is not None: - if typename is None: - if not istree: - if python.PyList_GET_SIZE(pytype._schema_types) > 0: - # pytype->xsi:type is a 1:n mapping - # simply take the first - typename = pytype._schema_types[0] - elif typename not in pytype._schema_types: - typename = pytype._schema_types[0] - if annotate_xsi: if typename is None or istree: cetree.delAttributeFromNsName( @@ -1817,9 +1868,10 @@ if annotate_pytype: if pytype is None: - # delete attribute if it exists - cetree.delAttributeFromNsName( - c_node, _PYTYPE_NAMESPACE, _PYTYPE_ATTRIBUTE_NAME) + if not istree or not keep_tree: + # delete attribute if it exists + cetree.delAttributeFromNsName( + c_node, _PYTYPE_NAMESPACE, _PYTYPE_ATTRIBUTE_NAME) else: # update or create attribute c_ns = cetree.findOrBuildNodeNsPrefix( Index: src/lxml/tests/test_objectify.py =================================================================== --- src/lxml/tests/test_objectify.py (revision 46370) +++ src/lxml/tests/test_objectify.py (working copy) @@ -17,6 +17,7 @@ XML_SCHEMA_INSTANCE_NS = "http://www.w3.org/2001/XMLSchema-instance" XML_SCHEMA_INSTANCE_TYPE_ATTR = "{%s}type" % XML_SCHEMA_INSTANCE_NS XML_SCHEMA_NIL_ATTR = "{%s}nil" % XML_SCHEMA_INSTANCE_NS +TREE_PYTYPE = "TREE" DEFAULT_NSMAP = { "py" : PYTYPE_NAMESPACE, "xsi" : XML_SCHEMA_INSTANCE_NS, "xsd" : XML_SCHEMA_NS} @@ -1038,7 +1039,7 @@ self.assertEquals("true", root.n.get(XML_SCHEMA_NIL_ATTR)) - def test_xsitype_annotation(self): + def test_pytype_xsitype_annotation(self): XML = self.XML root = XML(u'''\ 2 ''') - objectify.xsiannotate(root) + objectify.annotate(root, ignore_old=False, ignore_xsi=False, + annotate_xsi=1, annotate_pytype=1) + + # check py annotations + child_types = [ c.get(objectify.PYTYPE_ATTRIBUTE) + for c in root.iterchildren() ] + self.assertEquals("int", child_types[ 0]) + self.assertEquals("str", child_types[ 1]) + self.assertEquals("float", child_types[ 2]) + self.assertEquals("str", child_types[ 3]) + self.assertEquals("bool", child_types[ 4]) + self.assertEquals("NoneType", child_types[ 5]) + self.assertEquals(None, child_types[ 6]) + self.assertEquals("float", child_types[ 7]) + self.assertEquals("float", child_types[ 8]) + self.assertEquals("str", child_types[ 9]) + self.assertEquals("str", child_types[10]) + self.assertEquals("float", child_types[11]) + self.assertEquals("long", child_types[12]) + + self.assertEquals("true", root.n.get(XML_SCHEMA_NIL_ATTR)) + child_xsitypes = [ c.get(XML_SCHEMA_INSTANCE_TYPE_ATTR) + for c in root.iterchildren() ] + + # check xsi annotations child_types = [ c.get(XML_SCHEMA_INSTANCE_TYPE_ATTR) for c in root.iterchildren() ] self.assertEquals("xsd:int", child_types[ 0]) @@ -1069,6 +1094,156 @@ self.assertEquals("xsd:boolean", child_types[ 4]) self.assertEquals(None, child_types[ 5]) self.assertEquals(None, child_types[ 6]) + self.assertEquals("xsd:double", child_types[ 7]) + self.assertEquals("xsd:float", child_types[ 8]) + self.assertEquals("xsd:string", child_types[ 9]) + self.assertEquals("xsd:string", child_types[10]) + self.assertEquals("xsd:double", child_types[11]) + self.assertEquals("xsd:integer", child_types[12]) + + def test_pyannotate_ignore_old(self): + XML = self.XML + root = XML(u'''\ + + 5 + test + 1.1 + \uF8D2 + true + + + 5 + 5 + 23 + 42 + 300 + 2 + + ''') + objectify.pyannotate(root, ignore_old=True) + + child_types = [ c.get(objectify.PYTYPE_ATTRIBUTE) + for c in root.iterchildren() ] + self.assertEquals("int", child_types[ 0]) + self.assertEquals("str", child_types[ 1]) + self.assertEquals("float", child_types[ 2]) + self.assertEquals("str", child_types[ 3]) + self.assertEquals("bool", child_types[ 4]) + self.assertEquals("NoneType", child_types[ 5]) + self.assertEquals(None, child_types[ 6]) + self.assertEquals("float", child_types[ 7]) + self.assertEquals("float", child_types[ 8]) + self.assertEquals("str", child_types[ 9]) + self.assertEquals("int", child_types[10]) + self.assertEquals("int", child_types[11]) + self.assertEquals("int", child_types[12]) + + self.assertEquals("true", root.n.get(XML_SCHEMA_NIL_ATTR)) + + def test_pyannotate_empty(self): + XML = self.XML + root = XML(u'''\ + + + + ''') + objectify.pyannotate(root) + + child_types = [ c.get(objectify.PYTYPE_ATTRIBUTE) + for c in root.iterchildren() ] + self.assertEquals(None, child_types[0]) + + objectify.annotate(root, empty_pytype="str") + + child_types = [ c.get(objectify.PYTYPE_ATTRIBUTE) + for c in root.iterchildren() ] + self.assertEquals("str", child_types[0]) + + def test_pyannotate_use_old(self): + XML = self.XML + root = XML(u'''\ + + 5 + test + 1.1 + \uF8D2 + true + + + 5 + 5 + 23 + 42 + 300 + 2 + + ''') + objectify.pyannotate(root) + + child_types = [ c.get(objectify.PYTYPE_ATTRIBUTE) + for c in root.iterchildren() ] + self.assertEquals("int", child_types[ 0]) + self.assertEquals("str", child_types[ 1]) + self.assertEquals("float", child_types[ 2]) + self.assertEquals("str", child_types[ 3]) + self.assertEquals("bool", child_types[ 4]) + self.assertEquals("NoneType", child_types[ 5]) + self.assertEquals(None, child_types[ 6]) + self.assertEquals("float", child_types[ 7]) + self.assertEquals("float", child_types[ 8]) + self.assertEquals("str", child_types[ 9]) + self.assertEquals("str", child_types[10]) + self.assertEquals("float", child_types[11]) + self.assertEquals("long", child_types[12]) + + self.assertEquals("true", root.n.get(XML_SCHEMA_NIL_ATTR)) + + def test_pyannotate_keep_tree(self): + root = objectify.Element("root") + root.sub = objectify.Element("sub") + objectify.pyannotate(root, keep_tree=True) + self.assertEquals(root.sub.get(objectify.PYTYPE_ATTRIBUTE), TREE_PYTYPE) + + def test_pyannotate_lose_tree(self): + root = objectify.Element("root") + root.sub = objectify.Element("sub") + objectify.pyannotate(root) + self.assertEquals(root.sub.get(objectify.PYTYPE_ATTRIBUTE), None) + + def test_xsiannotate_ignore_old(self): + XML = self.XML + root = XML(u'''\ + + 5 + test + 1.1 + \uF8D2 + true + + + 5 + 5 + 23 + 42 + 300 + 2 + + ''') + objectify.xsiannotate(root, ignore_old=True) + + child_types = [ c.get(XML_SCHEMA_INSTANCE_TYPE_ATTR) + for c in root.iterchildren() ] + self.assertEquals("xsd:int", child_types[ 0]) + self.assertEquals("xsd:string", child_types[ 1]) + self.assertEquals("xsd:double", child_types[ 2]) + self.assertEquals("xsd:string", child_types[ 3]) + self.assertEquals("xsd:boolean", child_types[ 4]) + self.assertEquals(None, child_types[ 5]) + self.assertEquals(None, child_types[ 6]) self.assertEquals("xsd:int", child_types[ 7]) self.assertEquals("xsd:int", child_types[ 8]) self.assertEquals("xsd:int", child_types[ 9]) @@ -1078,7 +1253,7 @@ self.assertEquals("true", root.n.get(XML_SCHEMA_NIL_ATTR)) - def test_xsitype_annotation_use_old(self): + def test_xsiannotate_use_old(self): XML = self.XML root = XML(u'''\