[Lxml-checkins] r44230 - in lxml/branch/lxml-1.3: . doc src/lxml src/lxml/tests

scoder at codespeak.net scoder at codespeak.net
Wed Jun 13 14:48:23 CEST 2007


Author: scoder
Date: Wed Jun 13 14:48:21 2007
New Revision: 44230

Modified:
   lxml/branch/lxml-1.3/CHANGES.txt
   lxml/branch/lxml-1.3/doc/objectify.txt
   lxml/branch/lxml-1.3/src/lxml/objectify.pyx
   lxml/branch/lxml-1.3/src/lxml/tests/test_objectify.py
Log:
Holger's objectify.deannotate() and some cleanup in objectify.pyx

Modified: lxml/branch/lxml-1.3/CHANGES.txt
==============================================================================
--- lxml/branch/lxml-1.3/CHANGES.txt	(original)
+++ lxml/branch/lxml-1.3/CHANGES.txt	Wed Jun 13 14:48:21 2007
@@ -11,6 +11,9 @@
 * ``Element.addnext(el)`` and ``Element.addprevious(el)`` methods to support
   adding processing instructions and comments around the root node
 
+* Extended type annotation in objectify: cleaner annotation namespace setup
+  plus new ``xsiannotate()`` and ``deannotate()`` functions
+
 * Element.attrib now has a ``pop()`` method
 
 * Support for custom Element class instantiation in lxml.sax: passing a

Modified: lxml/branch/lxml-1.3/doc/objectify.txt
==============================================================================
--- lxml/branch/lxml-1.3/doc/objectify.txt	(original)
+++ lxml/branch/lxml-1.3/doc/objectify.txt	Wed Jun 13 14:48:21 2007
@@ -699,6 +699,34 @@
         s = '5' [StringElement]
           * xsi:type = 'string'
 
+The utility function ``deannotate()`` can be used to get rid of 'py:pytype'
+and/or 'xsi:type' information::
+
+    >>> root = objectify.fromstring('''\
+    ... <root xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
+    ...   <d xsi:type="double">5</d>
+    ...   <l xsi:type="long"  >5</l>
+    ...   <s xsi:type="string">5</s>
+    ... </root>''')
+    >>> objectify.annotate(root)
+    >>> print objectify.dump(root)
+    root = None [ObjectifiedElement]
+        d = 5.0 [FloatElement]
+          * xsi:type = 'double'
+          * py:pytype = 'float'
+        l = 5L [LongElement]
+          * xsi:type = 'long'
+          * py:pytype = 'long'
+        s = '5' [StringElement]
+          * xsi:type = 'string'
+          * py:pytype = 'str'
+    >>> objectify.deannotate(root)
+    >>> print objectify.dump(root)
+    root = None [ObjectifiedElement]
+        d = 5 [IntElement]
+        l = 5 [IntElement]
+        s = 5 [IntElement]
+
 For convenience, the ``DataElement()`` factory creates an Element with a
 Python value in one step.  You can pass the required Python type name or the
 XSI type name::
@@ -720,8 +748,8 @@
     >>> root.x = objectify.DataElement(5, _xsi="integer")
     >>> print objectify.dump(root)
     root = None [ObjectifiedElement]
-        x = 5 [IntElement]
-          * py:pytype = 'int'
+        x = 5L [LongElement]
+          * py:pytype = 'long'
           * xsi:type = 'integer'
 
 There is a side effect of the type lookup.  If you assign a string value using

Modified: lxml/branch/lxml-1.3/src/lxml/objectify.pyx
==============================================================================
--- lxml/branch/lxml-1.3/src/lxml/objectify.pyx	(original)
+++ lxml/branch/lxml-1.3/src/lxml/objectify.pyx	Wed Jun 13 14:48:21 2007
@@ -707,17 +707,21 @@
     """Boolean type base on string values: 'true' or 'false'.
     """
     cdef int _boolval(self) except -1:
+        cdef char* c_str
         text = textOf(self._c_node)
         if text is None:
             return 0
-        text = text.lower()
-        if text == 'false':
-            return 0
-        elif text == 'true':
-            return 1
-        else:
-            raise ValueError, "Invalid boolean value: '%s'" % text
-        
+        c_str = _cstr(text)
+        if c_str[0] == c'0' or c_str[0] == c'f' or c_str[0] == c'F':
+            if c_str[1] == c'\0' or text == "false" or text.lower() == "false":
+                # '0' or 'f' or 'false'
+                return 0
+        elif c_str[0] == c'1' or c_str[0] == c't' or c_str[0] == c'T':
+            if c_str[1] == c'\0' or text == "true" or text.lower() == "true":
+                # '1' or 't' or 'true'
+                return 1
+        raise ValueError, "Invalid boolean value: '%s'" % text
+
     def __nonzero__(self):
         if self._boolval():
             return True
@@ -882,13 +886,15 @@
 
 cdef _registerPyTypes():
     pytype = PyType('int', int, IntElement)
-    pytype.xmlSchemaTypes = ("integer", "positiveInteger", "negativeInteger",
-                             "nonNegativeInteger", "nonPositiveInteger",
-                             "int", "unsignedInt", "short", "unsignedShort")
+    pytype.xmlSchemaTypes = ("int", "short", "byte", "unsignedShort",
+                             "unsignedByte",)
+    
     pytype.register()
 
     pytype = PyType('long', long, LongElement)
-    pytype.xmlSchemaTypes = ("long", "unsignedLong")
+    pytype.xmlSchemaTypes = ("integer", "nonPositiveInteger", "negativeInteger",
+                             "long", "nonNegativeInteger", "unsignedLong",
+                             "unsignedInt", "positiveInteger",)
     pytype.register()
 
     pytype = PyType('float', float, FloatElement)
@@ -900,7 +906,9 @@
     pytype.register()
 
     pytype = PyType('str', None, StringElement)
-    pytype.xmlSchemaTypes = ("string", "normalizedString")
+    pytype.xmlSchemaTypes = ("string", "normalizedString", "token", "language",
+                             "Name", "NCName", "ID", "IDREF", "ENTITY",
+                             "NMTOKEN", )
     pytype.register()
 
     pytype = PyType('none', None, NoneElement)
@@ -936,12 +944,25 @@
             python.PyList_Append(types, pytype)
     return types
 
+cdef PyType _guessPyType(value, PyType defaulttype):
+    if value is None:
+        return None
+    for type_check, tested_pytype in _TYPE_CHECKS:
+        try:
+            type_check(value)
+            return <PyType>tested_pytype
+        except IGNORABLE_ERRORS:
+            # could not be parsed as the specififed type => ignore
+            pass
+    return defaulttype
+
 cdef object _guessElementClass(tree.xmlNode* c_node):
     value = textOf(c_node)
     if value is None:
         return None
     if value == '':
         return StringElement
+    
     for type_check, pytype in _TYPE_CHECKS:
         try:
             type_check(value)
@@ -1424,11 +1445,26 @@
 ################################################################################
 # Type annotations
 
+cdef PyType _check_type(tree.xmlNode* c_node, PyType pytype):
+    # StrType does not have a typecheck but is the default anyway,
+    # so just accept it if given as type information
+    if pytype is None:
+        return pytype
+    value = textOf(c_node)
+    try:
+        pytype.type_check(value)
+        return pytype
+    except IGNORABLE_ERRORS:
+        # could not be parsed as the specified type => ignore
+        pass
+    return None
+
+
 def annotate(element_or_tree, ignore_old=True):
     """Recursively annotates the elements of an XML tree with 'pytype'
     attributes.
 
-    If the 'ignore_old' keyword argument is True (the default), current
+    If the 'ignore_old' keyword argument is True (the default), current 'pytype'
     attributes will be ignored and replaced.  Otherwise, they will be checked
     and only replaced if they no longer fit the current text value.
     """
@@ -1438,11 +1474,13 @@
     cdef tree.xmlNode* c_node
     cdef tree.xmlNs*   c_ns
     cdef python.PyObject* dict_result
+    cdef PyType pytype
     element = cetree.rootNodeOrRaise(element_or_tree)
     doc = element._doc
     ignore = bool(ignore_old)
 
-    StrType = _PYTYPE_DICT.get('str')
+    StrType  = _PYTYPE_DICT.get('str')
+    NoneType = _PYTYPE_DICT.get('none')
     c_node = element._c_node
     tree.BEGIN_FOR_EACH_ELEMENT_FROM(c_node, c_node, 1)
     pytype = None
@@ -1452,20 +1490,19 @@
         old_value = cetree.attributeValueFromNsName(
             c_node, _PYTYPE_NAMESPACE, _PYTYPE_ATTRIBUTE_NAME)
         if old_value is not None and old_value != TREE_PYTYPE:
-            pytype = _PYTYPE_DICT.get(old_value)
-            if pytype is not None:
-                value = textOf(c_node)
-                try:
-                    if not (<PyType>pytype).type_check(value):
-                        pytype = None
-                except ValueError:
-                    pytype = None
+            dict_result = python.PyDict_GetItem(_PYTYPE_DICT, old_value)
+            if dict_result is not NULL:
+                pytype = <PyType>dict_result
+                if pytype is not StrType:
+                    # StrType does not have a typecheck but is the default anyway,
+                    # so just accept it if given as type information
+                    pytype = _check_type(c_node, pytype)
 
     if pytype is None:
-        # if element is defined as xsi:nil, return NoneElement class
+        # if element is defined as xsi:nil, represent it as None
         if cetree.attributeValueFromNsName(
             c_node, _XML_SCHEMA_INSTANCE_NS, "nil") == "true":
-            pytype = _PYTYPE_DICT.get("none")
+            pytype = NoneType
 
     if pytype is None:
         # check for XML Schema type hint
@@ -1481,18 +1518,7 @@
         # try to guess type
         if cetree.findChildForwards(c_node, 0) is NULL:
             # element has no children => data class
-            if value is None:
-                value = textOf(c_node)
-            if value is not None:
-                for type_check, tested_pytype in _TYPE_CHECKS:
-                    try:
-                        if type_check(value) is not False:
-                            pytype = tested_pytype
-                            break
-                    except ValueError:
-                        pass
-                else:
-                    pytype = StrType
+            pytype = _guessPyType(textOf(c_node), StrType)
 
     if pytype is None:
         # delete attribute if it exists
@@ -1505,6 +1531,38 @@
                           _cstr(pytype.name))
     tree.END_FOR_EACH_ELEMENT_FROM(c_node)
 
+def deannotate(element_or_tree, pytype=True, xsi=True):
+    """Recursively de-annotate the elements of an XML tree by removing 'pytype'
+    and/or 'type' attributes.
+
+    If the 'pytype' keyword argument is True (the default), 'pytype' attributes
+    will be removed. If the 'xsi' keyword argument is True (the default),
+    'xsi:type' attributes will be removed.
+    """
+    cdef _Element  element
+    cdef tree.xmlNode* c_node
+
+    element = cetree.rootNodeOrRaise(element_or_tree)
+    c_node = element._c_node
+    if pytype and xsi:
+        tree.BEGIN_FOR_EACH_ELEMENT_FROM(c_node, c_node, 1)
+        cetree.delAttributeFromNsName(
+            c_node, _PYTYPE_NAMESPACE, _PYTYPE_ATTRIBUTE_NAME)
+        cetree.delAttributeFromNsName(
+            c_node, _XML_SCHEMA_INSTANCE_NS, "type")
+        tree.END_FOR_EACH_ELEMENT_FROM(c_node)
+    elif pytype:
+        tree.BEGIN_FOR_EACH_ELEMENT_FROM(c_node, c_node, 1)
+        cetree.delAttributeFromNsName(
+            c_node, _PYTYPE_NAMESPACE, _PYTYPE_ATTRIBUTE_NAME)
+        tree.END_FOR_EACH_ELEMENT_FROM(c_node)
+    else:
+        tree.BEGIN_FOR_EACH_ELEMENT_FROM(c_node, c_node, 1)
+        cetree.delAttributeFromNsName(
+            c_node, _XML_SCHEMA_INSTANCE_NS, "type")
+        tree.END_FOR_EACH_ELEMENT_FROM(c_node)
+
+
 ################################################################################
 # Module level parser setup
 
@@ -1549,6 +1607,9 @@
 
 XML = fromstring
 
+cdef object _DEFAULT_NSMAP
+_DEFAULT_NSMAP = { "py": PYTYPE_NAMESPACE, "xsi": XML_SCHEMA_INSTANCE_NS }
+
 def Element(_tag, attrib=None, nsmap=None, _pytype=None, **_attributes):
     """Objectify specific version of the lxml.etree Element() factory that
     always creates a structural (tree) element.
@@ -1561,6 +1622,8 @@
         _attributes = attrib
     if _pytype is None:
         _pytype = TREE_PYTYPE
+    if nsmap is None:
+        nsmap = _DEFAULT_NSMAP
     _attributes[PYTYPE_ATTRIBUTE] = _pytype
     return _makeElement(_tag, None, _attributes, nsmap)
 
@@ -1569,11 +1632,10 @@
     """Create a new element with a Python value and XML attributes taken from
     keyword arguments or a dictionary passed as second argument.
 
-    Automatically adds a 'pyval' attribute for the Python type of the value,
-    if the type can be identified.  If '_pyval' or '_xsi' are among the
+    Automatically adds a 'pytype' attribute for the Python type of the value,
+    if the type can be identified.  If '_pytype' or '_xsi' are among the
     keyword arguments, they will be used instead.
     """
-    cdef _Element element
     if attrib is not None:
         if python.PyDict_Size(_attributes):
             attrib.update(_attributes)
@@ -1581,7 +1643,10 @@
     if _xsi is not None:
         python.PyDict_SetItem(_attributes, XML_SCHEMA_INSTANCE_TYPE_ATTR, _xsi)
         if _pytype is None:
-            _pytype = _SCHEMA_TYPE_DICT[_xsi].name
+            # allow for s.o. using unregistered or even wrong xsi:type names
+            pytype_lookup = _SCHEMA_TYPE_DICT.get(_xsi)
+            if pytype_lookup is not None:
+                _pytype = pytype_lookup.name
 
     if python._isString(_value):
         strval = _value

Modified: lxml/branch/lxml-1.3/src/lxml/tests/test_objectify.py
==============================================================================
--- lxml/branch/lxml-1.3/src/lxml/tests/test_objectify.py	(original)
+++ lxml/branch/lxml-1.3/src/lxml/tests/test_objectify.py	Wed Jun 13 14:48:21 2007
@@ -13,6 +13,10 @@
 
 from lxml import objectify
 
+XML_SCHEMA_INSTANCE_NS = "http://www.w3.org/2001/XMLSchema-instance"
+XML_SCHEMA_INSTANCE_TYPE_ATTR = "{%s}type" % XML_SCHEMA_INSTANCE_NS
+XML_SCHEMA_NIL_ATTR = "{%s}nil" % XML_SCHEMA_INSTANCE_NS
+
 xml_str = '''\
 <obj:root xmlns:obj="objectified" xmlns:other="otherNS">
   <obj:c1 a1="A1" a2="A2" other:a3="A3">
@@ -28,7 +32,7 @@
     """Test cases for lxml.objectify
     """
     etree = etree
-
+    
     def XML(self, xml):
         return self.etree.XML(xml, self.parser)
 
@@ -356,20 +360,69 @@
         XML = self.XML
         root = XML('''\
         <root xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
-          <a xsi:type="integer">5</a>
-          <a xsi:type="string">5</a>
-          <a xsi:type="float">5</a>
+          <b xsi:type="boolean">true</b>
+          <b xsi:type="boolean">false</b>
+          <b xsi:type="boolean">1</b>
+          <b xsi:type="boolean">0</b>
+
+          <f xsi:type="float">5</f>
+          <f xsi:type="double">5</f>
+        
+          <s xsi:type="string">5</s>
+          <s xsi:type="normalizedString">5</s>
+          <s xsi:type="token">5</s>
+          <s xsi:type="language">5</s>
+          <s xsi:type="Name">5</s>
+          <s xsi:type="NCName">5</s>
+          <s xsi:type="ID">5</s>
+          <s xsi:type="IDREF">5</s>
+          <s xsi:type="ENTITY">5</s>
+          <s xsi:type="NMTOKEN">5</s>
+
+          <l xsi:type="integer">5</l>
+          <l xsi:type="nonPositiveInteger">5</l>
+          <l xsi:type="negativeInteger">5</l>
+          <l xsi:type="long">5</l>
+          <l xsi:type="nonNegativeInteger">5</l>
+          <l xsi:type="unsignedLong">5</l>
+          <l xsi:type="unsignedInt">5</l>
+          <l xsi:type="positiveInteger">5</l>
+          
+          <i xsi:type="int">5</i>
+          <i xsi:type="short">5</i>
+          <i xsi:type="byte">5</i>
+          <i xsi:type="unsignedShort">5</i>
+          <i xsi:type="unsignedByte">5</i>
+
+          <n xsi:nil="true"/>
         </root>
         ''')
 
-        self.assert_(isinstance(root.a[0], objectify.IntElement))
-        self.assertEquals(5, root.a[0])
-
-        self.assert_(isinstance(root.a[1], objectify.StringElement))
-        self.assertEquals("5", root.a[1])
-
-        self.assert_(isinstance(root.a[2], objectify.FloatElement))
-        self.assertEquals(5.0, root.a[2])
+        for b in root.b:
+            self.assert_(isinstance(b, objectify.BoolElement))
+        self.assertEquals(True, root.b[0])
+        self.assertEquals(False, root.b[1])
+        self.assertEquals(True, root.b[2])
+        self.assertEquals(False, root.b[3])
+
+        for f in root.f:
+            self.assert_(isinstance(f, objectify.FloatElement))
+            self.assertEquals(5, f)
+            
+        for s in root.s:
+            self.assert_(isinstance(s, objectify.StringElement))
+            self.assertEquals("5", s)
+
+        for l in root.l:
+            self.assert_(isinstance(l, objectify.LongElement))
+            self.assertEquals(5l, l)
+
+        for i in root.i:
+            self.assert_(isinstance(i, objectify.IntElement))
+            self.assertEquals(5, i)
+            
+        self.assert_(isinstance(root.n, objectify.NoneElement))
+        self.assertEquals(None, root.n)
 
     def test_type_str_sequence(self):
         XML = self.XML
@@ -444,10 +497,11 @@
         root.b = False
         self.assertFalse(root.b)
 
-    def test_type_annotation(self):
+    def test_pytype_annotation(self):
         XML = self.XML
         root = XML(u'''\
-        <a xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
+        <a xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+        xmlns:py="http://codespeak.net/lxml/objectify/pytype">
           <b>5</b>
           <b>test</b>
           <c>1.1</c>
@@ -456,6 +510,11 @@
           <n xsi:nil="true" />
           <n></n>
           <b xsi:type="double">5</b>
+          <b xsi:type="float">5</b>
+          <s xsi:type="string">23</s>
+          <s py:pytype="str">42</s>
+          <f py:pytype="float">300</f>
+          <l py:pytype="long">2</l>
         </a>
         ''')
         objectify.annotate(root)
@@ -470,6 +529,125 @@
         self.assertEquals("none",  child_types[5])
         self.assertEquals(None,    child_types[6])
         self.assertEquals("float", child_types[7])
+        self.assertEquals("float", child_types[8])
+        self.assertEquals("str", child_types[9])
+        self.assertEquals("int", child_types[10])
+        self.assertEquals("int", child_types[11])
+        self.assertEquals("int", child_types[12])
+        
+        self.assertEquals("true", root.n.get(XML_SCHEMA_NIL_ATTR))
+
+    def test_pytype_annotation_use_old(self):
+        XML = self.XML
+        root = XML(u'''\
+        <a xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+        xmlns:py="http://codespeak.net/lxml/objectify/pytype">
+          <b>5</b>
+          <b>test</b>
+          <c>1.1</c>
+          <c>\uF8D2</c>
+          <x>true</x>
+          <n xsi:nil="true" />
+          <n></n>
+          <b xsi:type="double">5</b>
+          <b xsi:type="float">5</b>
+          <s xsi:type="string">23</s>
+          <s py:pytype="str">42</s>
+          <f py:pytype="float">300</f>
+          <l py:pytype="long">2</l>
+        </a>
+        ''')
+        objectify.annotate(root, ignore_old=False)
+
+        child_types = [ c.get(objectify.PYTYPE_ATTRIBUTE)
+                        for c in root.iterchildren() ]
+        self.assertEquals("int",   child_types[0])
+        self.assertEquals("str",   child_types[1])
+        self.assertEquals("float", child_types[2])
+        self.assertEquals("str",   child_types[3])
+        self.assertEquals("bool",  child_types[4])
+        self.assertEquals("none",  child_types[5])
+        self.assertEquals(None,    child_types[6])
+        self.assertEquals("float", child_types[7])
+        self.assertEquals("float", child_types[8])
+        self.assertEquals("str", child_types[9])
+        self.assertEquals("str", child_types[10])
+        self.assertEquals("float", child_types[11])
+        self.assertEquals("long", child_types[12])
+        
+        self.assertEquals("true", root.n.get(XML_SCHEMA_NIL_ATTR))
+
+    def test_deannotate(self):
+        XML = self.XML
+        root = XML(u'''\
+        <a xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+        xmlns:py="http://codespeak.net/lxml/objectify/pytype">
+          <b>5</b>
+          <b>test</b>
+          <c>1.1</c>
+          <c>\uF8D2</c>
+          <x>true</x>
+          <n xsi:nil="true" />
+          <n></n>
+          <b xsi:type="double">5</b>
+          <b xsi:type="float">5</b>
+          <s xsi:type="string">23</s>
+          <s py:pytype="str">42</s>
+          <f py:pytype="float">300</f>
+          <l py:pytype="long">2</l>
+        </a>
+        ''')
+        objectify.deannotate(root)
+
+        for c in root.getiterator():
+            self.assertEquals(None, c.get(XML_SCHEMA_INSTANCE_TYPE_ATTR))
+            self.assertEquals(None, c.get(objectify.PYTYPE_ATTRIBUTE))
+
+        self.assertEquals("true", root.n.get(XML_SCHEMA_NIL_ATTR))
+
+    def test_xsitype_deannotate(self):
+        XML = self.XML
+        root = XML(u'''\
+        <a xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+        xmlns:py="http://codespeak.net/lxml/objectify/pytype">
+          <b>5</b>
+          <b>test</b>
+          <c>1.1</c>
+          <c>\uF8D2</c>
+          <x>true</x>
+          <n xsi:nil="true" />
+          <n></n>
+          <b xsi:type="double">5</b>
+          <b xsi:type="float">5</b>
+          <s xsi:type="string">23</s>
+          <s py:pytype="str">42</s>
+          <f py:pytype="float">300</f>
+          <l py:pytype="long">2</l>
+        </a>
+        ''')
+        objectify.annotate(root)
+        objectify.deannotate(root, pytype=False)
+
+        child_types = [ c.get(objectify.PYTYPE_ATTRIBUTE)
+                        for c in root.iterchildren() ]
+        self.assertEquals("int",   child_types[ 0])
+        self.assertEquals("str",   child_types[ 1])
+        self.assertEquals("float", child_types[ 2])
+        self.assertEquals("str",   child_types[ 3])
+        self.assertEquals("bool",  child_types[ 4])
+        self.assertEquals("none",  child_types[ 5])
+        self.assertEquals(None,    child_types[ 6])
+        self.assertEquals("float", child_types[ 7])
+        self.assertEquals("float", child_types[ 8])
+        self.assertEquals("str",   child_types[ 9])
+        self.assertEquals("int",   child_types[10])
+        self.assertEquals("int",   child_types[11])
+        self.assertEquals("int",   child_types[12])
+        
+        self.assertEquals("true", root.n.get(XML_SCHEMA_NIL_ATTR))
+
+        for c in root.getiterator():
+            self.assertEquals(None, c.get(XML_SCHEMA_INSTANCE_TYPE_ATTR))
 
     def test_change_pytype_attribute(self):
         XML = self.XML
@@ -890,7 +1068,6 @@
             etree.tostring(new_root),
             etree.tostring(root))
 
-
 def test_suite():
     suite = unittest.TestSuite()
     suite.addTests([unittest.makeSuite(ObjectifyTestCase)])


More information about the lxml-checkins mailing list