From scoder at codespeak.net Sun Jul 1 15:00:45 2007 From: scoder at codespeak.net (scoder at codespeak.net) Date: Sun, 1 Jul 2007 15:00:45 +0200 (CEST) Subject: [Lxml-checkins] r44649 - lxml/trunk/src/lxml Message-ID: <20070701130045.C5AC780C6@code0.codespeak.net> Author: scoder Date: Sun Jul 1 15:00:43 2007 New Revision: 44649 Modified: lxml/trunk/src/lxml/etree.pyx lxml/trunk/src/lxml/python.pxd Log: small optimisation in _Element.nsmap Modified: lxml/trunk/src/lxml/etree.pyx ============================================================================== --- lxml/trunk/src/lxml/etree.pyx (original) +++ lxml/trunk/src/lxml/etree.pyx Sun Jul 1 15:00:43 2007 @@ -739,15 +739,16 @@ cdef xmlNs* c_ns nsmap = {} c_node = self._c_node - while c_node is not NULL and _isElement(c_node): + while c_node is not NULL and c_node.type == tree.XML_ELEMENT_NODE: c_ns = c_node.nsDef while c_ns is not NULL: if c_ns.prefix is NULL: prefix = None else: prefix = funicode(c_ns.prefix) - if prefix not in nsmap: - nsmap[prefix] = funicode(c_ns.href) + if not python.PyDict_Contains(nsmap, prefix): + python.PyDict_SetItem( + nsmap, prefix, funicode(c_ns.href)) c_ns = c_ns.next c_node = c_node.parent return nsmap Modified: lxml/trunk/src/lxml/python.pxd ============================================================================== --- lxml/trunk/src/lxml/python.pxd (original) +++ lxml/trunk/src/lxml/python.pxd Sun Jul 1 15:00:43 2007 @@ -55,6 +55,7 @@ cdef int PyDict_DelItem(object d, object key) except -1 cdef void PyDict_Clear(object d) cdef object PyDict_Copy(object d) + cdef int PyDict_Contains(object d, object key) except -1 cdef Py_ssize_t PyDict_Size(object d) cdef object PySequence_List(object o) cdef object PySequence_Tuple(object o) From scoder at codespeak.net Sun Jul 1 15:05:00 2007 From: scoder at codespeak.net (scoder at codespeak.net) Date: Sun, 1 Jul 2007 15:05:00 +0200 (CEST) Subject: [Lxml-checkins] r44650 - in lxml/trunk: . src/lxml src/lxml/tests Message-ID: <20070701130500.93E4380C6@code0.codespeak.net> Author: scoder Date: Sun Jul 1 15:05:00 2007 New Revision: 44650 Modified: lxml/trunk/CHANGES.txt lxml/trunk/src/lxml/objectify.pyx lxml/trunk/src/lxml/tests/test_objectify.py Log: objectify: loads of test updates by Holger (merged from 1.3 branch), fixes for passing None and Element values to DataElement(), type checking in DataElement() Modified: lxml/trunk/CHANGES.txt ============================================================================== --- lxml/trunk/CHANGES.txt (original) +++ lxml/trunk/CHANGES.txt Sun Jul 1 15:05:00 2007 @@ -8,6 +8,9 @@ Features added -------------- +* objectify.DataElement now supports setting values from existing data + elements (not just plain Python types) and reuses defined namespaces etc. + * E-factory support for lxml.objectify (``objectify.E``) * Entity support through an ``Entity`` factory and element classes. XML @@ -30,6 +33,10 @@ Bugs fixed ---------- +* objectify.DataElement didn't set up None value correctly + +* objectify.DataElement didn't check the value against the provided type hints + * Reference-counting bug in ``Element.attrib.pop()`` * The XML parser did not report undefined entities as error Modified: lxml/trunk/src/lxml/objectify.pyx ============================================================================== --- lxml/trunk/src/lxml/objectify.pyx (original) +++ lxml/trunk/src/lxml/objectify.pyx Sun Jul 1 15:05:00 2007 @@ -1918,10 +1918,34 @@ cdef python.PyObject* dict_result if nsmap is None: nsmap = _DEFAULT_NSMAP - if attrib is not None: + if attrib is not None and attrib: if python.PyDict_Size(_attributes): + attrib = dict(attrib) attrib.update(_attributes) _attributes = attrib + if isinstance(_value, ObjectifiedDataElement): + # reuse existing nsmap unless redefined in nsmap parameter + temp = _value.nsmap + if temp is not None and temp: + temp = dict(_value.nsmap) + temp.update(nsmap) + nsmap = temp + # reuse existing attributes unless redefined in attrib/_attributes + temp = _value.attrib + if temp is not None and temp: + temp = dict(_value.attrib) + temp.update(_attributes) + _attributes = temp + # reuse existing xsi:type or py:pytype attributes, unless provided as + # arguments + if _xsi is None and _pytype is None: + dict_result = python.PyDict_GetItem(_attributes, + XML_SCHEMA_INSTANCE_TYPE_ATTR) + if dict_result is not NULL: + _xsi = dict_result + dict_result = python.PyDict_GetItem(_attributes, PYTYPE_ATTRIBUTE) + if dict_result is not NULL: + _pytype = dict_result if _xsi is not None: if ':' in _xsi: prefix, name = _xsi.split(':', 1) @@ -1956,23 +1980,34 @@ strval = "true" else: strval = "false" + elif _value is None: + strval = None else: strval = str(_value) if _pytype is None: - for type_check, pytype in _TYPE_CHECKS: - try: - type_check(strval) - _pytype = (pytype).name - break - except IGNORABLE_ERRORS: - pass + if strval is not None: + for type_check, pytype in _TYPE_CHECKS: + try: + type_check(strval) + _pytype = (pytype).name + break + except IGNORABLE_ERRORS: + pass if _pytype is None: if _value is None: - _pytype = "none" + python.PyDict_SetItem(_attributes, XML_SCHEMA_INSTANCE_NIL_ATTR, "true") elif python._isString(_value): _pytype = "str" - if _pytype is not None: + else: + # check if type information from arguments is valid + dict_result = python.PyDict_GetItem(_PYTYPE_DICT, _pytype) + if dict_result is not NULL: + type_check = (dict_result).type_check + if type_check is not None: + type_check(strval) + + if _pytype is not None: python.PyDict_SetItem(_attributes, PYTYPE_ATTRIBUTE, _pytype) return _makeElement("value", strval, _attributes, nsmap) Modified: lxml/trunk/src/lxml/tests/test_objectify.py ============================================================================== --- lxml/trunk/src/lxml/tests/test_objectify.py (original) +++ lxml/trunk/src/lxml/tests/test_objectify.py Sun Jul 1 15:05:00 2007 @@ -13,10 +13,32 @@ from lxml import objectify +PYTYPE_NAMESPACE = "http://codespeak.net/lxml/objectify/pytype" XML_SCHEMA_NS = "http://www.w3.org/2001/XMLSchema" XML_SCHEMA_INSTANCE_NS = "http://www.w3.org/2001/XMLSchema-instance" XML_SCHEMA_INSTANCE_TYPE_ATTR = "{%s}type" % XML_SCHEMA_INSTANCE_NS XML_SCHEMA_NIL_ATTR = "{%s}nil" % XML_SCHEMA_INSTANCE_NS +DEFAULT_NSMAP = { "py" : PYTYPE_NAMESPACE, + "xsi" : XML_SCHEMA_INSTANCE_NS, + "xsd" : XML_SCHEMA_NS} + +objectclass2xsitype = { + # objectify built-in + objectify.IntElement: ("int", "short", "byte", "unsignedShort", + "unsignedByte",), + objectify.LongElement: ("integer", "nonPositiveInteger", "negativeInteger", + "long", "nonNegativeInteger", "unsignedLong", + "unsignedInt", "positiveInteger",), + objectify.FloatElement: ("float", "double"), + objectify.BoolElement: ("boolean",), + objectify.StringElement: ("string", "normalizedString", "token", "language", + "Name", "NCName", "ID", "IDREF", "ENTITY", + "NMTOKEN", ), + # None: xsi:nil="true" + } + +xsitype2objclass = dict(( (v, k) for k in objectclass2xsitype + for v in objectclass2xsitype[k] )) xml_str = '''\ @@ -52,6 +74,205 @@ self.etree.Namespace("otherNS").clear() objectify.setPytypeAttributeTag() + def test_element_nsmap_default(self): + elt = objectify.Element("test") + self.assertEquals(elt.nsmap, DEFAULT_NSMAP) + + def test_element_nsmap_empty(self): + nsmap = {} + elt = objectify.Element("test", nsmap=nsmap) + self.assertEquals(elt.nsmap.values(), [PYTYPE_NAMESPACE]) + + def test_element_nsmap_custom_prefixes(self): + nsmap = {"mypy": PYTYPE_NAMESPACE, + "myxsi": XML_SCHEMA_INSTANCE_NS, + "myxsd": XML_SCHEMA_NS} + elt = objectify.Element("test", nsmap=nsmap) + self.assertEquals(elt.nsmap, nsmap) + + def test_element_nsmap_custom(self): + nsmap = {"my": "someNS", + "myother": "someOtherNS", + "myxsd": XML_SCHEMA_NS} + elt = objectify.Element("test", nsmap=nsmap) + self.assert_(PYTYPE_NAMESPACE in elt.nsmap.values()) + for prefix, ns in nsmap.items(): + self.assert_(prefix in elt.nsmap) + self.assertEquals(nsmap[prefix], elt.nsmap[prefix]) + + def test_sub_element_nsmap_default(self): + root = objectify.Element("root") + root.sub = objectify.Element("test") + self.assertEquals(root.sub.nsmap, DEFAULT_NSMAP) + + def test_sub_element_nsmap_empty(self): + root = objectify.Element("root") + nsmap = {} + root.sub = objectify.Element("test", nsmap=nsmap) + self.assertEquals(root.sub.nsmap, DEFAULT_NSMAP) + + def test_sub_element_nsmap_custom_prefixes(self): + root = objectify.Element("root") + nsmap = {"mypy": PYTYPE_NAMESPACE, + "myxsi": XML_SCHEMA_INSTANCE_NS, + "myxsd": XML_SCHEMA_NS} + root.sub = objectify.Element("test", nsmap=nsmap) + self.assertEquals(root.sub.nsmap, DEFAULT_NSMAP) + + def test_sub_element_nsmap_custom(self): + root = objectify.Element("root") + nsmap = {"my": "someNS", + "myother": "someOtherNS", + "myxsd": XML_SCHEMA_NS,} + root.sub = objectify.Element("test", nsmap=nsmap) + expected = nsmap.copy() + del expected["myxsd"] + expected.update(DEFAULT_NSMAP) + self.assertEquals(root.sub.nsmap, expected) + + def test_data_element_nsmap_default(self): + value = objectify.DataElement("test this") + self.assertEquals(value.nsmap, DEFAULT_NSMAP) + + def test_data_element_nsmap_empty(self): + nsmap = {} + value = objectify.DataElement("test this", nsmap=nsmap) + self.assertEquals(value.nsmap.values(), [PYTYPE_NAMESPACE]) + + def test_data_element_nsmap_custom_prefixes(self): + nsmap = {"mypy": PYTYPE_NAMESPACE, + "myxsi": XML_SCHEMA_INSTANCE_NS, + "myxsd": XML_SCHEMA_NS} + value = objectify.DataElement("test this", nsmap=nsmap) + self.assertEquals(value.nsmap, nsmap) + + def test_data_element_nsmap_custom(self): + nsmap = {"my": "someNS", + "myother": "someOtherNS", + "myxsd": XML_SCHEMA_NS,} + value = objectify.DataElement("test", nsmap=nsmap) + self.assert_(PYTYPE_NAMESPACE in value.nsmap.values()) + for prefix, ns in nsmap.items(): + self.assert_(prefix in value.nsmap) + self.assertEquals(nsmap[prefix], value.nsmap[prefix]) + + def test_sub_data_element_nsmap_default(self): + root = objectify.Element("root") + root.value = objectify.DataElement("test this") + self.assertEquals(root.value.nsmap, DEFAULT_NSMAP) + + def test_sub_data_element_nsmap_empty(self): + root = objectify.Element("root") + nsmap = {} + root.value = objectify.DataElement("test this", nsmap=nsmap) + self.assertEquals(root.value.nsmap, DEFAULT_NSMAP) + + def test_sub_data_element_nsmap_custom_prefixes(self): + root = objectify.Element("root") + nsmap = {"mypy": PYTYPE_NAMESPACE, + "myxsi": XML_SCHEMA_INSTANCE_NS, + "myxsd": XML_SCHEMA_NS} + root.value = objectify.DataElement("test this", nsmap=nsmap) + self.assertEquals(root.value.nsmap, DEFAULT_NSMAP) + + def test_sub_data_element_nsmap_custom(self): + root = objectify.Element("root") + nsmap = {"my": "someNS", + "myother": "someOtherNS", + "myxsd": XML_SCHEMA_NS} + root.value = objectify.DataElement("test", nsmap=nsmap) + expected = nsmap.copy() + del expected["myxsd"] + expected.update(DEFAULT_NSMAP) + self.assertEquals(root.value.nsmap, expected) + + def test_data_element_attrib_attributes_precedence(self): + # keyword arguments override attrib entries + value = objectify.DataElement(23, _pytype="str", _xsi="foobar", + attrib={"gnu": "muh", "cat": "meeow", + "dog": "wuff"}, + bird="tchilp", dog="grrr") + self.assertEquals(value.get("gnu"), "muh") + self.assertEquals(value.get("cat"), "meeow") + self.assertEquals(value.get("dog"), "grrr") + self.assertEquals(value.get("bird"), "tchilp") + + def test_data_element_data_element_arg(self): + # Check that DataElement preserves all attributes ObjectifiedDataElement + # arguments + arg = objectify.DataElement(23, _pytype="str", _xsi="foobar", + attrib={"gnu": "muh", "cat": "meeow", + "dog": "wuff"}, + bird="tchilp", dog="grrr") + value = objectify.DataElement(arg) + self.assert_(isinstance(value, objectify.StringElement)) + for attr in arg.attrib: + self.assertEquals(value.get(attr), arg.get(attr)) + + def test_data_element_data_element_arg_pytype(self): + # Check that _pytype arg overrides original py:pytype of + # ObjectifiedDataElement + arg = objectify.DataElement(23, _pytype="str", _xsi="foobar", + attrib={"gnu": "muh", "cat": "meeow", + "dog": "wuff"}, + bird="tchilp", dog="grrr") + value = objectify.DataElement(arg, _pytype="int") + self.assert_(isinstance(value, objectify.IntElement)) + self.assertEquals(value.get(objectify.PYTYPE_ATTRIBUTE), "int") + for attr in arg.attrib: + if not attr == objectify.PYTYPE_ATTRIBUTE: + self.assertEquals(value.get(attr), arg.get(attr)) + + def test_data_element_data_element_arg_xsitype(self): + # Check that _xsi arg overrides original xsi:type of given + # ObjectifiedDataElement + arg = objectify.DataElement(23, _pytype="str", _xsi="foobar", + attrib={"gnu": "muh", "cat": "meeow", + "dog": "wuff"}, + bird="tchilp", dog="grrr") + value = objectify.DataElement(arg, _xsi="xsd:int") + self.assert_(isinstance(value, objectify.IntElement)) + self.assertEquals(value.get(XML_SCHEMA_INSTANCE_TYPE_ATTR), "xsd:int") + self.assertEquals(value.get(objectify.PYTYPE_ATTRIBUTE), "int") + for attr in arg.attrib: + if not attr in [objectify.PYTYPE_ATTRIBUTE, + XML_SCHEMA_INSTANCE_TYPE_ATTR]: + self.assertEquals(value.get(attr), arg.get(attr)) + + def test_data_element_data_element_arg_pytype_xsitype(self): + # Check that _pytype and _xsi args override original py:pytype and + # xsi:type attributes of given ObjectifiedDataElement + arg = objectify.DataElement(23, _pytype="str", _xsi="foobar", + attrib={"gnu": "muh", "cat": "meeow", + "dog": "wuff"}, + bird="tchilp", dog="grrr") + value = objectify.DataElement(arg, _pytype="int", _xsi="xsd:int") + self.assert_(isinstance(value, objectify.IntElement)) + self.assertEquals(value.get(objectify.PYTYPE_ATTRIBUTE), "int") + self.assertEquals(value.get(XML_SCHEMA_INSTANCE_TYPE_ATTR), "xsd:int") + for attr in arg.attrib: + if not attr in [objectify.PYTYPE_ATTRIBUTE, + XML_SCHEMA_INSTANCE_TYPE_ATTR]: + self.assertEquals(value.get(attr), arg.get(attr)) + + def test_data_element_invalid_pytype(self): + self.assertRaises(ValueError, objectify.DataElement, 3.1415, + _pytype="int") + + def test_data_element_invalid_xsi(self): + self.assertRaises(ValueError, objectify.DataElement, 3.1415, + _xsi="xsd:int") + + def test_data_element_data_element_arg_invalid_pytype(self): + arg = objectify.DataElement(3.1415) + self.assertRaises(ValueError, objectify.DataElement, arg, + _pytype="int") + + def test_data_element_data_element_arg_invalid_xsi(self): + arg = objectify.DataElement(3.1415) + self.assertRaises(ValueError, objectify.DataElement, arg, + _xsi="xsd:int") + def test_root(self): root = self.Element("test") self.assert_(isinstance(root, objectify.ObjectifiedElement)) @@ -268,7 +489,7 @@ Element = self.Element SubElement = self.etree.SubElement - nil_attr = "{http://www.w3.org/2001/XMLSchema-instance}nil" + nil_attr = XML_SCHEMA_NIL_ATTR root = Element("{objectified}root") SubElement(root, "{objectified}none") SubElement(root, "{objectified}none", {nil_attr : "true"}) @@ -282,13 +503,19 @@ value = objectify.DataElement(None) self.assert_(isinstance(value, objectify.NoneElement)) self.assertEquals(value, None) + self.assertEquals(value.get(XML_SCHEMA_NIL_ATTR), "true") def test_type_bool(self): Element = self.Element SubElement = self.etree.SubElement root = Element("{objectified}root") - root.none = 'true' - self.assert_(isinstance(root.none, objectify.BoolElement)) + root.bool = 'true' + self.assert_(isinstance(root.bool, objectify.BoolElement)) + self.assertEquals(root.bool, True) + + root.bool = 'false' + self.assert_(isinstance(root.bool, objectify.BoolElement)) + self.assertEquals(root.bool, False) def test_data_element_bool(self): value = objectify.DataElement(True) @@ -357,6 +584,24 @@ self.assert_(isinstance(value, objectify.FloatElement)) self.assertEquals(value, 5.5) + def test_data_element_xsitypes(self): + for xsi, objclass in xsitype2objclass.iteritems(): + # 1 is a valid value for all ObjectifiedDataElement classes + value = objectify.DataElement(1, _xsi=xsi) + self.assert_(isinstance(value, objclass)) + + def test_data_element_xsitypes_xsdprefixed(self): + for xsi, objclass in xsitype2objclass.iteritems(): + # 1 is a valid value for all ObjectifiedDataElement classes + value = objectify.DataElement(1, _xsi="xsd:%s" % xsi) + self.assert_(isinstance(value, objclass)) + + def test_data_element_xsitypes_prefixed(self): + for xsi, objclass in xsitype2objclass.iteritems(): + # 1 is a valid value for all ObjectifiedDataElement classes + self.assertRaises(ValueError, objectify.DataElement, 1, + _xsi="foo:%s" % xsi) + def test_schema_types(self): XML = self.XML root = XML('''\ @@ -401,9 +646,9 @@ for b in root.b: self.assert_(isinstance(b, objectify.BoolElement)) - self.assertEquals(True, root.b[0]) + self.assertEquals(True, root.b[0]) self.assertEquals(False, root.b[1]) - self.assertEquals(True, root.b[2]) + self.assertEquals(True, root.b[2]) self.assertEquals(False, root.b[3]) for f in root.f: @@ -416,7 +661,7 @@ for l in root.l: self.assert_(isinstance(l, objectify.LongElement)) - self.assertEquals(5l, l) + self.assertEquals(5L, l) for i in root.i: self.assert_(isinstance(i, objectify.IntElement)) @@ -425,6 +670,75 @@ self.assert_(isinstance(root.n, objectify.NoneElement)) self.assertEquals(None, root.n) + def test_schema_types_prefixed(self): + XML = self.XML + root = XML('''\ + + true + false + 1 + 0 + + 5 + 5 + + 5 + 5 + 5 + 5 + 5 + 5 + 5 + 5 + 5 + 5 + + 5 + 5 + 5 + 5 + 5 + 5 + 5 + 5 + + 5 + 5 + 5 + 5 + 5 + + + + ''') + + for b in root.b: + self.assert_(isinstance(b, objectify.BoolElement)) + self.assertEquals(True, root.b[0]) + self.assertEquals(False, root.b[1]) + self.assertEquals(True, root.b[2]) + self.assertEquals(False, root.b[3]) + + for f in root.f: + self.assert_(isinstance(f, objectify.FloatElement)) + self.assertEquals(5, f) + + for s in root.s: + self.assert_(isinstance(s, objectify.StringElement)) + self.assertEquals("5", s) + + for l in root.l: + self.assert_(isinstance(l, objectify.LongElement)) + self.assertEquals(5L, l) + + for i in root.i: + self.assert_(isinstance(i, objectify.IntElement)) + self.assertEquals(5, i) + + self.assert_(isinstance(root.n, objectify.NoneElement)) + self.assertEquals(None, root.n) + def test_type_str_sequence(self): XML = self.XML root = XML(u'whytry') @@ -539,19 +853,19 @@ child_types = [ c.get(objectify.PYTYPE_ATTRIBUTE) for c in root.iterchildren() ] - self.assertEquals("int", child_types[0]) - self.assertEquals("str", child_types[1]) - self.assertEquals("float", child_types[2]) - self.assertEquals("str", child_types[3]) - self.assertEquals("bool", child_types[4]) - self.assertEquals("none", child_types[5]) - self.assertEquals(None, child_types[6]) - self.assertEquals("float", child_types[7]) - self.assertEquals("float", child_types[8]) - self.assertEquals("str", child_types[9]) - self.assertEquals("int", child_types[10]) - self.assertEquals("int", child_types[11]) - self.assertEquals("int", child_types[12]) + self.assertEquals("int", child_types[ 0]) + self.assertEquals("str", child_types[ 1]) + self.assertEquals("float", child_types[ 2]) + self.assertEquals("str", child_types[ 3]) + self.assertEquals("bool", child_types[ 4]) + self.assertEquals("none", child_types[ 5]) + self.assertEquals(None, child_types[ 6]) + self.assertEquals("float", child_types[ 7]) + self.assertEquals("float", child_types[ 8]) + self.assertEquals("str", child_types[ 9]) + self.assertEquals("int", child_types[10]) + self.assertEquals("int", child_types[11]) + self.assertEquals("int", child_types[12]) self.assertEquals("true", root.n.get(XML_SCHEMA_NIL_ATTR)) @@ -771,7 +1085,8 @@ XML = self.XML root = XML(u'''\ + xmlns:py="http://codespeak.net/lxml/objectify/pytype" + xmlns:xsd="http://www.w3.org/2001/XMLSchema"> 5 test 1.1 @@ -779,9 +1094,9 @@ true - 5 - 5 - 23 + 5 + 5 + 23 42 300 2 @@ -811,6 +1126,51 @@ for c in root.getiterator(): self.assertEquals(None, c.get(XML_SCHEMA_INSTANCE_TYPE_ATTR)) + def test_pytype_deannotate(self): + XML = self.XML + root = XML(u'''\ + + 5 + test + 1.1 + \uF8D2 + true + + + 5 + 5 + 23 + 42 + 300 + 2 + + ''') + objectify.annotate(root) + objectify.deannotate(root, xsi=False) + + child_types = [ c.get(XML_SCHEMA_INSTANCE_TYPE_ATTR) + for c in root.iterchildren() ] + self.assertEquals("xsd:int", child_types[ 0]) + self.assertEquals("xsd:string", child_types[ 1]) + self.assertEquals("xsd:float", child_types[ 2]) + self.assertEquals("xsd:string", child_types[ 3]) + self.assertEquals("xsd:boolean", child_types[ 4]) + self.assertEquals(None, child_types[ 5]) + self.assertEquals(None, child_types[ 6]) + self.assertEquals("xsd:double", child_types[ 7]) + self.assertEquals("xsd:float", child_types[ 8]) + self.assertEquals("xsd:string", child_types[ 9]) + self.assertEquals("xsd:string", child_types[10]) + self.assertEquals("xsd:float", child_types[11]) + self.assertEquals("xsd:long", child_types[12]) + + self.assertEquals("true", root.n.get(XML_SCHEMA_NIL_ATTR)) + + for c in root.getiterator(): + self.assertEquals(None, c.get(objectify.PYTYPE_ATTRIBUTE)) + def test_change_pytype_attribute(self): XML = self.XML From scoder at codespeak.net Sun Jul 1 15:15:15 2007 From: scoder at codespeak.net (scoder at codespeak.net) Date: Sun, 1 Jul 2007 15:15:15 +0200 (CEST) Subject: [Lxml-checkins] r44651 - in lxml/branch/lxml-1.3: . src/lxml src/lxml/tests Message-ID: <20070701131515.35A5D80C8@code0.codespeak.net> Author: scoder Date: Sun Jul 1 15:15:14 2007 New Revision: 44651 Modified: lxml/branch/lxml-1.3/CHANGES.txt lxml/branch/lxml-1.3/src/lxml/objectify.pyx lxml/branch/lxml-1.3/src/lxml/tests/test_objectify.py Log: objectify: loads of test updates by Holger (merged from 1.3 branch), fixes for passing None and Element values to DataElement(), type checking in DataElement() Modified: lxml/branch/lxml-1.3/CHANGES.txt ============================================================================== --- lxml/branch/lxml-1.3/CHANGES.txt (original) +++ lxml/branch/lxml-1.3/CHANGES.txt Sun Jul 1 15:15:14 2007 @@ -8,9 +8,16 @@ Features added -------------- +* objectify.DataElement now supports setting values from existing data + elements (not just plain Python types) and reuses defined namespaces etc. + Bugs fixed ---------- +* objectify.DataElement didn't set up None value correctly + +* objectify.DataElement didn't check the value against the provided type hints + * Reference-counting bug in ``Element.attrib.pop()`` Modified: lxml/branch/lxml-1.3/src/lxml/objectify.pyx ============================================================================== --- lxml/branch/lxml-1.3/src/lxml/objectify.pyx (original) +++ lxml/branch/lxml-1.3/src/lxml/objectify.pyx Sun Jul 1 15:15:14 2007 @@ -1667,10 +1667,34 @@ cdef python.PyObject* dict_result if nsmap is None: nsmap = _DEFAULT_NSMAP - if attrib is not None: + if attrib is not None and attrib: if python.PyDict_Size(_attributes): + attrib = dict(attrib) attrib.update(_attributes) _attributes = attrib + if isinstance(_value, ObjectifiedDataElement): + # reuse existing nsmap unless redefined in nsmap parameter + temp = _value.nsmap + if temp is not None and temp: + temp = dict(_value.nsmap) + temp.update(nsmap) + nsmap = temp + # reuse existing attributes unless redefined in attrib/_attributes + temp = _value.attrib + if temp is not None and temp: + temp = dict(_value.attrib) + temp.update(_attributes) + _attributes = temp + # reuse existing xsi:type or py:pytype attributes, unless provided as + # arguments + if _xsi is None and _pytype is None: + dict_result = python.PyDict_GetItem(_attributes, + XML_SCHEMA_INSTANCE_TYPE_ATTR) + if dict_result is not NULL: + _xsi = dict_result + dict_result = python.PyDict_GetItem(_attributes, PYTYPE_ATTRIBUTE) + if dict_result is not NULL: + _pytype = dict_result if _xsi is not None: if ':' in _xsi: prefix, name = _xsi.split(':', 1) @@ -1693,23 +1717,34 @@ strval = "true" else: strval = "false" + elif _value is None: + strval = None else: strval = str(_value) if _pytype is None: - for type_check, pytype in _TYPE_CHECKS: - try: - type_check(strval) - _pytype = (pytype).name - break - except IGNORABLE_ERRORS: - pass + if strval is not None: + for type_check, pytype in _TYPE_CHECKS: + try: + type_check(strval) + _pytype = (pytype).name + break + except IGNORABLE_ERRORS: + pass if _pytype is None: if _value is None: - _pytype = "none" + python.PyDict_SetItem(_attributes, XML_SCHEMA_INSTANCE_NIL_ATTR, "true") elif python._isString(_value): _pytype = "str" - if _pytype is not None: + else: + # check if type information from arguments is valid + dict_result = python.PyDict_GetItem(_PYTYPE_DICT, _pytype) + if dict_result is not NULL: + type_check = (dict_result).type_check + if type_check is not None: + type_check(strval) + + if _pytype is not None: python.PyDict_SetItem(_attributes, PYTYPE_ATTRIBUTE, _pytype) return _makeElement("value", strval, _attributes, nsmap) Modified: lxml/branch/lxml-1.3/src/lxml/tests/test_objectify.py ============================================================================== --- lxml/branch/lxml-1.3/src/lxml/tests/test_objectify.py (original) +++ lxml/branch/lxml-1.3/src/lxml/tests/test_objectify.py Sun Jul 1 15:15:14 2007 @@ -74,116 +74,205 @@ self.etree.Namespace("otherNS").clear() objectify.setPytypeAttributeTag() - def test_element_nsmap(self): - # default nsmap - root = objectify.Element("test") - self.assertEquals(root.nsmap, DEFAULT_NSMAP) + def test_element_nsmap_default(self): + elt = objectify.Element("test") + self.assertEquals(elt.nsmap, DEFAULT_NSMAP) - # empty nsmap + def test_element_nsmap_empty(self): nsmap = {} - root = objectify.Element("test", nsmap=nsmap) - self.assertEquals(root.nsmap.values(), [PYTYPE_NAMESPACE]) + elt = objectify.Element("test", nsmap=nsmap) + self.assertEquals(elt.nsmap.values(), [PYTYPE_NAMESPACE]) - # nsmap with custom prefixes - nsmap = {"mypy" : PYTYPE_NAMESPACE, - "myxsi" : XML_SCHEMA_INSTANCE_NS, - "myxsd" : XML_SCHEMA_NS} - root = objectify.Element("test", nsmap=nsmap) - self.assertEquals(root.nsmap, nsmap) + def test_element_nsmap_custom_prefixes(self): + nsmap = {"mypy": PYTYPE_NAMESPACE, + "myxsi": XML_SCHEMA_INSTANCE_NS, + "myxsd": XML_SCHEMA_NS} + elt = objectify.Element("test", nsmap=nsmap) + self.assertEquals(elt.nsmap, nsmap) - # custom nsmap - nsmap = {"my" : "someNS", - "myother" : "someOtherNS", - } - root = objectify.Element("test", nsmap=nsmap) - self.assert_(PYTYPE_NAMESPACE in root.nsmap.values()) + def test_element_nsmap_custom(self): + nsmap = {"my": "someNS", + "myother": "someOtherNS", + "myxsd": XML_SCHEMA_NS} + elt = objectify.Element("test", nsmap=nsmap) + self.assert_(PYTYPE_NAMESPACE in elt.nsmap.values()) for prefix, ns in nsmap.items(): - self.assert_(prefix in root.nsmap) - self.assertEquals(nsmap[prefix], root.nsmap[prefix]) + self.assert_(prefix in elt.nsmap) + self.assertEquals(nsmap[prefix], elt.nsmap[prefix]) - def test_sub_element_nsmap(self): + def test_sub_element_nsmap_default(self): root = objectify.Element("root") - # default nsmap root.sub = objectify.Element("test") self.assertEquals(root.sub.nsmap, DEFAULT_NSMAP) - # empty nsmap + def test_sub_element_nsmap_empty(self): + root = objectify.Element("root") nsmap = {} root.sub = objectify.Element("test", nsmap=nsmap) self.assertEquals(root.sub.nsmap, DEFAULT_NSMAP) - # nsmap with custom prefixes - nsmap = {"mypy" : PYTYPE_NAMESPACE, - "myxsi" : XML_SCHEMA_INSTANCE_NS, - "myxsd" : XML_SCHEMA_NS} + def test_sub_element_nsmap_custom_prefixes(self): + root = objectify.Element("root") + nsmap = {"mypy": PYTYPE_NAMESPACE, + "myxsi": XML_SCHEMA_INSTANCE_NS, + "myxsd": XML_SCHEMA_NS} root.sub = objectify.Element("test", nsmap=nsmap) self.assertEquals(root.sub.nsmap, DEFAULT_NSMAP) - # custom nsmap - nsmap = {"my" : "someNS", - "myother" : "someOtherNS", - } + def test_sub_element_nsmap_custom(self): + root = objectify.Element("root") + nsmap = {"my": "someNS", + "myother": "someOtherNS", + "myxsd": XML_SCHEMA_NS,} root.sub = objectify.Element("test", nsmap=nsmap) expected = nsmap.copy() + del expected["myxsd"] expected.update(DEFAULT_NSMAP) self.assertEquals(root.sub.nsmap, expected) - def test_data_element_nsmap(self): - # default nsmap + def test_data_element_nsmap_default(self): value = objectify.DataElement("test this") self.assertEquals(value.nsmap, DEFAULT_NSMAP) - # empty nsmap + def test_data_element_nsmap_empty(self): nsmap = {} value = objectify.DataElement("test this", nsmap=nsmap) self.assertEquals(value.nsmap.values(), [PYTYPE_NAMESPACE]) - # nsmap with custom prefixes - nsmap = {"mypy" : PYTYPE_NAMESPACE, - "myxsi" : XML_SCHEMA_INSTANCE_NS, - "myxsd" : XML_SCHEMA_NS} - + def test_data_element_nsmap_custom_prefixes(self): + nsmap = {"mypy": PYTYPE_NAMESPACE, + "myxsi": XML_SCHEMA_INSTANCE_NS, + "myxsd": XML_SCHEMA_NS} value = objectify.DataElement("test this", nsmap=nsmap) self.assertEquals(value.nsmap, nsmap) - # custom nsmap - nsmap = {"my" : "someNS", - "myother" : "someOtherNS", - } + def test_data_element_nsmap_custom(self): + nsmap = {"my": "someNS", + "myother": "someOtherNS", + "myxsd": XML_SCHEMA_NS,} value = objectify.DataElement("test", nsmap=nsmap) self.assert_(PYTYPE_NAMESPACE in value.nsmap.values()) for prefix, ns in nsmap.items(): self.assert_(prefix in value.nsmap) self.assertEquals(nsmap[prefix], value.nsmap[prefix]) - def test_sub_data_element_nsmap(self): + def test_sub_data_element_nsmap_default(self): root = objectify.Element("root") - # default nsmap root.value = objectify.DataElement("test this") self.assertEquals(root.value.nsmap, DEFAULT_NSMAP) - # empty nsmap + def test_sub_data_element_nsmap_empty(self): + root = objectify.Element("root") nsmap = {} root.value = objectify.DataElement("test this", nsmap=nsmap) self.assertEquals(root.value.nsmap, DEFAULT_NSMAP) - # nsmap with custom prefixes - nsmap = {"mypy" : PYTYPE_NAMESPACE, - "myxsi" : XML_SCHEMA_INSTANCE_NS, - "myxsd" : XML_SCHEMA_NS} - + def test_sub_data_element_nsmap_custom_prefixes(self): + root = objectify.Element("root") + nsmap = {"mypy": PYTYPE_NAMESPACE, + "myxsi": XML_SCHEMA_INSTANCE_NS, + "myxsd": XML_SCHEMA_NS} root.value = objectify.DataElement("test this", nsmap=nsmap) self.assertEquals(root.value.nsmap, DEFAULT_NSMAP) - # custom nsmap - nsmap = {"my" : "someNS", - "myother" : "someOtherNS", - } + def test_sub_data_element_nsmap_custom(self): + root = objectify.Element("root") + nsmap = {"my": "someNS", + "myother": "someOtherNS", + "myxsd": XML_SCHEMA_NS} root.value = objectify.DataElement("test", nsmap=nsmap) expected = nsmap.copy() + del expected["myxsd"] expected.update(DEFAULT_NSMAP) self.assertEquals(root.value.nsmap, expected) + def test_data_element_attrib_attributes_precedence(self): + # keyword arguments override attrib entries + value = objectify.DataElement(23, _pytype="str", _xsi="foobar", + attrib={"gnu": "muh", "cat": "meeow", + "dog": "wuff"}, + bird="tchilp", dog="grrr") + self.assertEquals(value.get("gnu"), "muh") + self.assertEquals(value.get("cat"), "meeow") + self.assertEquals(value.get("dog"), "grrr") + self.assertEquals(value.get("bird"), "tchilp") + + def test_data_element_data_element_arg(self): + # Check that DataElement preserves all attributes ObjectifiedDataElement + # arguments + arg = objectify.DataElement(23, _pytype="str", _xsi="foobar", + attrib={"gnu": "muh", "cat": "meeow", + "dog": "wuff"}, + bird="tchilp", dog="grrr") + value = objectify.DataElement(arg) + self.assert_(isinstance(value, objectify.StringElement)) + for attr in arg.attrib: + self.assertEquals(value.get(attr), arg.get(attr)) + + def test_data_element_data_element_arg_pytype(self): + # Check that _pytype arg overrides original py:pytype of + # ObjectifiedDataElement + arg = objectify.DataElement(23, _pytype="str", _xsi="foobar", + attrib={"gnu": "muh", "cat": "meeow", + "dog": "wuff"}, + bird="tchilp", dog="grrr") + value = objectify.DataElement(arg, _pytype="int") + self.assert_(isinstance(value, objectify.IntElement)) + self.assertEquals(value.get(objectify.PYTYPE_ATTRIBUTE), "int") + for attr in arg.attrib: + if not attr == objectify.PYTYPE_ATTRIBUTE: + self.assertEquals(value.get(attr), arg.get(attr)) + + def test_data_element_data_element_arg_xsitype(self): + # Check that _xsi arg overrides original xsi:type of given + # ObjectifiedDataElement + arg = objectify.DataElement(23, _pytype="str", _xsi="foobar", + attrib={"gnu": "muh", "cat": "meeow", + "dog": "wuff"}, + bird="tchilp", dog="grrr") + value = objectify.DataElement(arg, _xsi="xsd:int") + self.assert_(isinstance(value, objectify.IntElement)) + self.assertEquals(value.get(XML_SCHEMA_INSTANCE_TYPE_ATTR), "xsd:int") + self.assertEquals(value.get(objectify.PYTYPE_ATTRIBUTE), "int") + for attr in arg.attrib: + if not attr in [objectify.PYTYPE_ATTRIBUTE, + XML_SCHEMA_INSTANCE_TYPE_ATTR]: + self.assertEquals(value.get(attr), arg.get(attr)) + + def test_data_element_data_element_arg_pytype_xsitype(self): + # Check that _pytype and _xsi args override original py:pytype and + # xsi:type attributes of given ObjectifiedDataElement + arg = objectify.DataElement(23, _pytype="str", _xsi="foobar", + attrib={"gnu": "muh", "cat": "meeow", + "dog": "wuff"}, + bird="tchilp", dog="grrr") + value = objectify.DataElement(arg, _pytype="int", _xsi="xsd:int") + self.assert_(isinstance(value, objectify.IntElement)) + self.assertEquals(value.get(objectify.PYTYPE_ATTRIBUTE), "int") + self.assertEquals(value.get(XML_SCHEMA_INSTANCE_TYPE_ATTR), "xsd:int") + for attr in arg.attrib: + if not attr in [objectify.PYTYPE_ATTRIBUTE, + XML_SCHEMA_INSTANCE_TYPE_ATTR]: + self.assertEquals(value.get(attr), arg.get(attr)) + + def test_data_element_invalid_pytype(self): + self.assertRaises(ValueError, objectify.DataElement, 3.1415, + _pytype="int") + + def test_data_element_invalid_xsi(self): + self.assertRaises(ValueError, objectify.DataElement, 3.1415, + _xsi="xsd:int") + + def test_data_element_data_element_arg_invalid_pytype(self): + arg = objectify.DataElement(3.1415) + self.assertRaises(ValueError, objectify.DataElement, arg, + _pytype="int") + + def test_data_element_data_element_arg_invalid_xsi(self): + arg = objectify.DataElement(3.1415) + self.assertRaises(ValueError, objectify.DataElement, arg, + _xsi="xsd:int") + def test_root(self): root = self.Element("test") self.assert_(isinstance(root, objectify.ObjectifiedElement)) @@ -400,7 +489,7 @@ Element = self.Element SubElement = self.etree.SubElement - nil_attr = "{http://www.w3.org/2001/XMLSchema-instance}nil" + nil_attr = XML_SCHEMA_NIL_ATTR root = Element("{objectified}root") SubElement(root, "{objectified}none") SubElement(root, "{objectified}none", {nil_attr : "true"}) @@ -414,6 +503,7 @@ value = objectify.DataElement(None) self.assert_(isinstance(value, objectify.NoneElement)) self.assertEquals(value, None) + self.assertEquals(value.get(XML_SCHEMA_NIL_ATTR), "true") def test_type_bool(self): Element = self.Element From scoder at codespeak.net Sun Jul 1 15:16:28 2007 From: scoder at codespeak.net (scoder at codespeak.net) Date: Sun, 1 Jul 2007 15:16:28 +0200 (CEST) Subject: [Lxml-checkins] r44652 - lxml/trunk Message-ID: <20070701131628.2B2EE80C8@code0.codespeak.net> Author: scoder Date: Sun Jul 1 15:16:27 2007 New Revision: 44652 Modified: lxml/trunk/CHANGES.txt Log: changelog update Modified: lxml/trunk/CHANGES.txt ============================================================================== --- lxml/trunk/CHANGES.txt (original) +++ lxml/trunk/CHANGES.txt Sun Jul 1 15:16:27 2007 @@ -33,6 +33,8 @@ Bugs fixed ---------- +* Better way to prevent crashes in Element proxy cleanup code + * objectify.DataElement didn't set up None value correctly * objectify.DataElement didn't check the value against the provided type hints From scoder at codespeak.net Sun Jul 1 15:18:17 2007 From: scoder at codespeak.net (scoder at codespeak.net) Date: Sun, 1 Jul 2007 15:18:17 +0200 (CEST) Subject: [Lxml-checkins] r44653 - lxml/trunk/src/lxml Message-ID: <20070701131817.C179280C6@code0.codespeak.net> Author: scoder Date: Sun Jul 1 15:18:17 2007 New Revision: 44653 Modified: lxml/trunk/src/lxml/proxy.pxi Log: cleanup Modified: lxml/trunk/src/lxml/proxy.pxi ============================================================================== --- lxml/trunk/src/lxml/proxy.pxi (original) +++ lxml/trunk/src/lxml/proxy.pxi Sun Jul 1 15:18:17 2007 @@ -173,19 +173,6 @@ tree.END_FOR_EACH_ELEMENT_FROM(c_node) return 1 -## cdef void _deallocDocument(xmlDoc* c_doc): -## """We cannot rely on Python's GC to *always* dealloc the _Document *after* -## all proxies it contains => traverse the document and mark all its proxies -## as dead by deleting their xmlNode* reference. -## """ -## cdef xmlNode* c_node -## c_node = c_doc.children -## tree.BEGIN_FOR_EACH_ELEMENT_FROM(c_doc, c_node, 1) -## if c_node._private is not NULL: -## (<_Element>c_node._private)._c_node = NULL -## tree.END_FOR_EACH_ELEMENT_FROM(c_node) -## tree.xmlFreeDoc(c_doc) - ################################################################################ # fix _Document references and namespaces when a node changes documents From scoder at codespeak.net Sun Jul 1 15:19:00 2007 From: scoder at codespeak.net (scoder at codespeak.net) Date: Sun, 1 Jul 2007 15:19:00 +0200 (CEST) Subject: [Lxml-checkins] r44654 - in lxml/branch/lxml-1.3: . src/lxml Message-ID: <20070701131900.1555A80A2@code0.codespeak.net> Author: scoder Date: Sun Jul 1 15:18:59 2007 New Revision: 44654 Modified: lxml/branch/lxml-1.3/CHANGES.txt lxml/branch/lxml-1.3/src/lxml/etree.pyx lxml/branch/lxml-1.3/src/lxml/proxy.pxi lxml/branch/lxml-1.3/src/lxml/python.pxd Log: merged in proxy deallocation update from trunk Modified: lxml/branch/lxml-1.3/CHANGES.txt ============================================================================== --- lxml/branch/lxml-1.3/CHANGES.txt (original) +++ lxml/branch/lxml-1.3/CHANGES.txt Sun Jul 1 15:18:59 2007 @@ -14,6 +14,8 @@ Bugs fixed ---------- +* Better way to prevent crashes in Element proxy cleanup code + * objectify.DataElement didn't set up None value correctly * objectify.DataElement didn't check the value against the provided type hints Modified: lxml/branch/lxml-1.3/src/lxml/etree.pyx ============================================================================== --- lxml/branch/lxml-1.3/src/lxml/etree.pyx (original) +++ lxml/branch/lxml-1.3/src/lxml/etree.pyx Sun Jul 1 15:18:59 2007 @@ -243,8 +243,8 @@ #displayNode(self._c_doc, 0) #print self._c_doc, self._c_doc.dict is __GLOBAL_PARSER_CONTEXT._c_dict #print self._c_doc, canDeallocateChildNodes(self._c_doc) - #tree.xmlFreeDoc(c_doc) - _deallocDocument(self._c_doc) + tree.xmlFreeDoc(self._c_doc) + #_deallocDocument(self._c_doc) cdef getroot(self): cdef xmlNode* c_node Modified: lxml/branch/lxml-1.3/src/lxml/proxy.pxi ============================================================================== --- lxml/branch/lxml-1.3/src/lxml/proxy.pxi (original) +++ lxml/branch/lxml-1.3/src/lxml/proxy.pxi Sun Jul 1 15:18:59 2007 @@ -27,6 +27,8 @@ #print "registering for:", proxy._c_node assert c_node._private is NULL, "double registering proxy!" c_node._private = proxy + # additional INCREF to make sure _Document is GC-ed LAST! + python.Py_INCREF(proxy._doc) cdef unregisterProxy(_Element proxy): """Unregister a proxy for the node it's proxying for. @@ -35,6 +37,7 @@ c_node = proxy._c_node assert c_node._private is proxy, "Tried to unregister unknown proxy" c_node._private = NULL + python.Py_DECREF(proxy._doc) ################################################################################ # temporarily make a node the root node of its document @@ -170,19 +173,6 @@ tree.END_FOR_EACH_ELEMENT_FROM(c_node) return 1 -cdef void _deallocDocument(xmlDoc* c_doc): - """We cannot rely on Python's GC to *always* dealloc the _Document *after* - all proxies it contains => traverse the document and mark all its proxies - as dead by deleting their xmlNode* reference. - """ - cdef xmlNode* c_node - c_node = c_doc.children - tree.BEGIN_FOR_EACH_ELEMENT_FROM(c_doc, c_node, 1) - if c_node._private is not NULL: - (<_Element>c_node._private)._c_node = NULL - tree.END_FOR_EACH_ELEMENT_FROM(c_node) - tree.xmlFreeDoc(c_doc) - ################################################################################ # fix _Document references and namespaces when a node changes documents @@ -303,6 +293,8 @@ if c_element._private is not NULL: element = <_Element>c_element._private if element._doc is not doc: + python.Py_INCREF(doc) + python.Py_DECREF(element._doc) element._doc = doc if c_element is c_start_node: @@ -321,7 +313,11 @@ # fix _Document reference (may dealloc the original document!) if c_element._private is not NULL: - (<_Element>c_element._private)._doc = doc + element = <_Element>c_element._private + if element._doc is not doc: + python.Py_INCREF(doc) + python.Py_DECREF(element._doc) + element._doc = doc if c_element is c_start_node: break Modified: lxml/branch/lxml-1.3/src/lxml/python.pxd ============================================================================== --- lxml/branch/lxml-1.3/src/lxml/python.pxd (original) +++ lxml/branch/lxml-1.3/src/lxml/python.pxd Sun Jul 1 15:18:59 2007 @@ -9,6 +9,7 @@ cdef int PY_SSIZE_T_MAX cdef void Py_INCREF(object o) + cdef void Py_DECREF(object o) cdef FILE* PyFile_AsFile(object p) cdef int PyFile_Check(object p) From scoder at codespeak.net Mon Jul 2 10:30:20 2007 From: scoder at codespeak.net (scoder at codespeak.net) Date: Mon, 2 Jul 2007 10:30:20 +0200 (CEST) Subject: [Lxml-checkins] r44658 - lxml/trunk/src/lxml Message-ID: <20070702083020.B092A80B0@code0.codespeak.net> Author: scoder Date: Mon Jul 2 10:30:19 2007 New Revision: 44658 Modified: lxml/trunk/src/lxml/xpath.pxi Log: let repr(XPath()) return original path expression Modified: lxml/trunk/src/lxml/xpath.pxi ============================================================================== --- lxml/trunk/src/lxml/xpath.pxi (original) +++ lxml/trunk/src/lxml/xpath.pxi Mon Jul 2 10:30:19 2007 @@ -392,6 +392,9 @@ if self._xpath is not NULL: xpath.xmlXPathFreeCompExpr(self._xpath) + def __repr__(self): + return path + cdef object _replace_strings cdef object _find_namespaces From scoder at codespeak.net Mon Jul 2 10:31:03 2007 From: scoder at codespeak.net (scoder at codespeak.net) Date: Mon, 2 Jul 2007 10:31:03 +0200 (CEST) Subject: [Lxml-checkins] r44659 - lxml/trunk/src/lxml/tests Message-ID: <20070702083103.BD3A980B3@code0.codespeak.net> Author: scoder Date: Mon Jul 2 10:31:03 2007 New Revision: 44659 Modified: lxml/trunk/src/lxml/tests/test_classlookup.py lxml/trunk/src/lxml/tests/test_dtd.py lxml/trunk/src/lxml/tests/test_elementtree.py lxml/trunk/src/lxml/tests/test_errors.py lxml/trunk/src/lxml/tests/test_etree.py lxml/trunk/src/lxml/tests/test_htmlparser.py lxml/trunk/src/lxml/tests/test_io.py lxml/trunk/src/lxml/tests/test_nsclasses.py lxml/trunk/src/lxml/tests/test_objectify.py lxml/trunk/src/lxml/tests/test_pyclasslookup.py lxml/trunk/src/lxml/tests/test_relaxng.py lxml/trunk/src/lxml/tests/test_sax.py lxml/trunk/src/lxml/tests/test_xmlschema.py lxml/trunk/src/lxml/tests/test_xpathevaluator.py lxml/trunk/src/lxml/tests/test_xslt.py Log: disable calling unittest scripts directly Modified: lxml/trunk/src/lxml/tests/test_classlookup.py ============================================================================== --- lxml/trunk/src/lxml/tests/test_classlookup.py (original) +++ lxml/trunk/src/lxml/tests/test_classlookup.py Mon Jul 2 10:31:03 2007 @@ -178,4 +178,4 @@ return suite if __name__ == '__main__': - unittest.main() + print 'to test use test.py %s' % __file__ Modified: lxml/trunk/src/lxml/tests/test_dtd.py ============================================================================== --- lxml/trunk/src/lxml/tests/test_dtd.py (original) +++ lxml/trunk/src/lxml/tests/test_dtd.py Mon Jul 2 10:31:03 2007 @@ -72,4 +72,4 @@ return suite if __name__ == '__main__': - unittest.main() + print 'to test use test.py %s' % __file__ Modified: lxml/trunk/src/lxml/tests/test_elementtree.py ============================================================================== --- lxml/trunk/src/lxml/tests/test_elementtree.py (original) +++ lxml/trunk/src/lxml/tests/test_elementtree.py Mon Jul 2 10:31:03 2007 @@ -2544,4 +2544,4 @@ return suite if __name__ == '__main__': - unittest.main() + print 'to test use test.py %s' % __file__ Modified: lxml/trunk/src/lxml/tests/test_errors.py ============================================================================== --- lxml/trunk/src/lxml/tests/test_errors.py (original) +++ lxml/trunk/src/lxml/tests/test_errors.py Mon Jul 2 10:31:03 2007 @@ -25,4 +25,4 @@ return suite if __name__ == '__main__': - unittest.main() + print 'to test use test.py %s' % __file__ Modified: lxml/trunk/src/lxml/tests/test_etree.py ============================================================================== --- lxml/trunk/src/lxml/tests/test_etree.py (original) +++ lxml/trunk/src/lxml/tests/test_etree.py Mon Jul 2 10:31:03 2007 @@ -1704,4 +1704,4 @@ return suite if __name__ == '__main__': - unittest.main() + print 'to test use test.py %s' % __file__ Modified: lxml/trunk/src/lxml/tests/test_htmlparser.py ============================================================================== --- lxml/trunk/src/lxml/tests/test_htmlparser.py (original) +++ lxml/trunk/src/lxml/tests/test_htmlparser.py Mon Jul 2 10:31:03 2007 @@ -115,4 +115,4 @@ return suite if __name__ == '__main__': - unittest.main() + print 'to test use test.py %s' % __file__ Modified: lxml/trunk/src/lxml/tests/test_io.py ============================================================================== --- lxml/trunk/src/lxml/tests/test_io.py (original) +++ lxml/trunk/src/lxml/tests/test_io.py Mon Jul 2 10:31:03 2007 @@ -172,4 +172,4 @@ return suite if __name__ == '__main__': - unittest.main() + print 'to test use test.py %s' % __file__ Modified: lxml/trunk/src/lxml/tests/test_nsclasses.py ============================================================================== --- lxml/trunk/src/lxml/tests/test_nsclasses.py (original) +++ lxml/trunk/src/lxml/tests/test_nsclasses.py Mon Jul 2 10:31:03 2007 @@ -163,4 +163,4 @@ return suite if __name__ == '__main__': - unittest.main() + print 'to test use test.py %s' % __file__ Modified: lxml/trunk/src/lxml/tests/test_objectify.py ============================================================================== --- lxml/trunk/src/lxml/tests/test_objectify.py (original) +++ lxml/trunk/src/lxml/tests/test_objectify.py Mon Jul 2 10:31:03 2007 @@ -1598,4 +1598,4 @@ return suite if __name__ == '__main__': - unittest.main() + print 'to test use test.py %s' % __file__ Modified: lxml/trunk/src/lxml/tests/test_pyclasslookup.py ============================================================================== --- lxml/trunk/src/lxml/tests/test_pyclasslookup.py (original) +++ lxml/trunk/src/lxml/tests/test_pyclasslookup.py Mon Jul 2 10:31:03 2007 @@ -287,4 +287,4 @@ return suite if __name__ == '__main__': - unittest.main() + print 'to test use test.py %s' % __file__ Modified: lxml/trunk/src/lxml/tests/test_relaxng.py ============================================================================== --- lxml/trunk/src/lxml/tests/test_relaxng.py (original) +++ lxml/trunk/src/lxml/tests/test_relaxng.py Mon Jul 2 10:31:03 2007 @@ -142,4 +142,4 @@ return suite if __name__ == '__main__': - unittest.main() + print 'to test use test.py %s' % __file__ Modified: lxml/trunk/src/lxml/tests/test_sax.py ============================================================================== --- lxml/trunk/src/lxml/tests/test_sax.py (original) +++ lxml/trunk/src/lxml/tests/test_sax.py Mon Jul 2 10:31:03 2007 @@ -222,4 +222,4 @@ return suite if __name__ == '__main__': - unittest.main() + print 'to test use test.py %s' % __file__ Modified: lxml/trunk/src/lxml/tests/test_xmlschema.py ============================================================================== --- lxml/trunk/src/lxml/tests/test_xmlschema.py (original) +++ lxml/trunk/src/lxml/tests/test_xmlschema.py Mon Jul 2 10:31:03 2007 @@ -81,4 +81,4 @@ return suite if __name__ == '__main__': - unittest.main() + print 'to test use test.py %s' % __file__ Modified: lxml/trunk/src/lxml/tests/test_xpathevaluator.py ============================================================================== --- lxml/trunk/src/lxml/tests/test_xpathevaluator.py (original) +++ lxml/trunk/src/lxml/tests/test_xpathevaluator.py Mon Jul 2 10:31:03 2007 @@ -532,4 +532,4 @@ return suite if __name__ == '__main__': - unittest.main() + print 'to test use test.py %s' % __file__ Modified: lxml/trunk/src/lxml/tests/test_xslt.py ============================================================================== --- lxml/trunk/src/lxml/tests/test_xslt.py (original) +++ lxml/trunk/src/lxml/tests/test_xslt.py Mon Jul 2 10:31:03 2007 @@ -1011,4 +1011,4 @@ return suite if __name__ == '__main__': - unittest.main() + print 'to test use test.py %s' % __file__ From scoder at codespeak.net Mon Jul 2 10:53:42 2007 From: scoder at codespeak.net (scoder at codespeak.net) Date: Mon, 2 Jul 2007 10:53:42 +0200 (CEST) Subject: [Lxml-checkins] r44660 - lxml/trunk/doc Message-ID: <20070702085342.89A9080C1@code0.codespeak.net> Author: scoder Date: Mon Jul 2 10:53:42 2007 New Revision: 44660 Modified: lxml/trunk/doc/objectify.txt Log: doc: writing objectify tag languages with E-factory Modified: lxml/trunk/doc/objectify.txt ============================================================================== --- lxml/trunk/doc/objectify.txt (original) +++ lxml/trunk/doc/objectify.txt Mon Jul 2 10:53:42 2007 @@ -287,7 +287,24 @@ true how - + +This allows you to write up a specific language in tags:: + + >>> ROOT = objectify.E.root + >>> TITLE = objectify.E.title + >>> TYPE = objectify.E.type + + >>> root = ROOT( + ... TITLE("The title"), + ... TYPE(5) + ... ) + + >>> print etree.tostring(root, pretty_print=True) + + The title + 5 + + Namespace handling ------------------ From scoder at codespeak.net Mon Jul 2 10:55:24 2007 From: scoder at codespeak.net (scoder at codespeak.net) Date: Mon, 2 Jul 2007 10:55:24 +0200 (CEST) Subject: [Lxml-checkins] r44661 - in lxml/branch/lxml-1.3: . doc src/lxml Message-ID: <20070702085524.D8D7C80D2@code0.codespeak.net> Author: scoder Date: Mon Jul 2 10:55:24 2007 New Revision: 44661 Modified: lxml/branch/lxml-1.3/CHANGES.txt lxml/branch/lxml-1.3/doc/objectify.txt lxml/branch/lxml-1.3/src/lxml/builder.py lxml/branch/lxml-1.3/src/lxml/objectify.pyx Log: objectify: merged in E-factory support from trunk Modified: lxml/branch/lxml-1.3/CHANGES.txt ============================================================================== --- lxml/branch/lxml-1.3/CHANGES.txt (original) +++ lxml/branch/lxml-1.3/CHANGES.txt Mon Jul 2 10:55:24 2007 @@ -11,6 +11,8 @@ * objectify.DataElement now supports setting values from existing data elements (not just plain Python types) and reuses defined namespaces etc. +* E-factory support for lxml.objectify (``objectify.E``) + Bugs fixed ---------- Modified: lxml/branch/lxml-1.3/doc/objectify.txt ============================================================================== --- lxml/branch/lxml-1.3/doc/objectify.txt (original) +++ lxml/branch/lxml-1.3/doc/objectify.txt Mon Jul 2 10:55:24 2007 @@ -266,6 +266,45 @@ notB +Tree generation with the E-factory +---------------------------------- + +To simplify the generation of trees even further, you can use the E-factory:: + + >>> E = objectify.E + >>> root = E.root( + ... E.a(5), + ... E.b(6.1), + ... E.c(True), + ... E.d("how") + ... ) + + >>> print etree.tostring(root, pretty_print=True) + + 5 + 6.1 + true + how + + +This allows you to write up a specific language in tags:: + + >>> ROOT = objectify.E.root + >>> TITLE = objectify.E.title + >>> TYPE = objectify.E.type + + >>> root = ROOT( + ... TITLE("The title"), + ... TYPE(5) + ... ) + + >>> print etree.tostring(root, pretty_print=True) + + The title + 5 + + + Namespace handling ------------------ Modified: lxml/branch/lxml-1.3/src/lxml/builder.py ============================================================================== --- lxml/branch/lxml-1.3/src/lxml/builder.py (original) +++ lxml/branch/lxml-1.3/src/lxml/builder.py Mon Jul 2 10:55:24 2007 @@ -140,7 +140,10 @@ elem[-1].tail = (elem[-1].tail or "") + item else: elem.text = (elem.text or "") + item - typemap[str] = typemap[unicode] = add_text + if str not in typemap: + typemap[str] = add_text + if unicode not in typemap: + typemap[unicode] = add_text def add_dict(elem, item): attrib = elem.attrib @@ -149,7 +152,8 @@ attrib[k] = v else: attrib[k] = typemap[type(v)](None, v) - typemap[dict] = add_dict + if dict not in typemap: + typemap[dict] = add_dict self._typemap = typemap Modified: lxml/branch/lxml-1.3/src/lxml/objectify.pyx ============================================================================== --- lxml/branch/lxml-1.3/src/lxml/objectify.pyx (original) +++ lxml/branch/lxml-1.3/src/lxml/objectify.pyx Mon Jul 2 10:55:24 2007 @@ -65,6 +65,8 @@ cdef object islice from itertools import islice +cdef object _ElementMaker +from builder import ElementMaker as _ElementMaker # namespace/name for "pytype" hint attribute cdef object PYTYPE_NAMESPACE @@ -1633,6 +1635,42 @@ parser = objectify_parser return _parse(f, parser) +class ElementMaker(_ElementMaker): + def __init__(self, typemap=None): + if typemap is None: + typemap = {} + else: + typemap = typemap.copy() + + typemap[__builtin__.str] = __add_text + typemap[__builtin__.unicode] = __add_text + typemap[__builtin__.int] = __add_text + typemap[__builtin__.long] = __add_text + typemap[__builtin__.float] = __add_text + typemap[__builtin__.bool] = __add_text + + _ElementMaker.__init__(self, typemap, objectify_parser.makeelement) + +def __add_text(_Element elem not None, text): + cdef tree.xmlNode* c_child + if isinstance(text, bool): + text = str(text).lower() + else: + text = str(text) + c_child = cetree.findChildBackwards(elem._c_node, 0) + if c_child is not NULL: + old = cetree.tailOf(c_child) + if old is not None: + text = old + text + cetree.setTailText(c_child, text) + else: + old = cetree.textOf(elem._c_node) + if old is not None: + text = old + text + cetree.setNodeText(elem._c_node, text) + +E = ElementMaker() + cdef object _DEFAULT_NSMAP _DEFAULT_NSMAP = { "py" : PYTYPE_NAMESPACE, "xsi" : XML_SCHEMA_INSTANCE_NS, From scoder at codespeak.net Mon Jul 2 10:55:57 2007 From: scoder at codespeak.net (scoder at codespeak.net) Date: Mon, 2 Jul 2007 10:55:57 +0200 (CEST) Subject: [Lxml-checkins] r44662 - lxml/branch/lxml-1.3 Message-ID: <20070702085557.B011680E3@code0.codespeak.net> Author: scoder Date: Mon Jul 2 10:55:57 2007 New Revision: 44662 Modified: lxml/branch/lxml-1.3/version.txt Log: version 1.3.1 Modified: lxml/branch/lxml-1.3/version.txt ============================================================================== --- lxml/branch/lxml-1.3/version.txt (original) +++ lxml/branch/lxml-1.3/version.txt Mon Jul 2 10:55:57 2007 @@ -1 +1 @@ -1.3 +1.3.1 From scoder at codespeak.net Mon Jul 2 11:03:35 2007 From: scoder at codespeak.net (scoder at codespeak.net) Date: Mon, 2 Jul 2007 11:03:35 +0200 (CEST) Subject: [Lxml-checkins] r44663 - in lxml/branch/lxml-1.3: . doc Message-ID: <20070702090335.CA74880B0@code0.codespeak.net> Author: scoder Date: Mon Jul 2 11:03:35 2007 New Revision: 44663 Modified: lxml/branch/lxml-1.3/CHANGES.txt lxml/branch/lxml-1.3/doc/main.txt Log: prepare release of 1.3.1 Modified: lxml/branch/lxml-1.3/CHANGES.txt ============================================================================== --- lxml/branch/lxml-1.3/CHANGES.txt (original) +++ lxml/branch/lxml-1.3/CHANGES.txt Mon Jul 2 11:03:35 2007 @@ -2,8 +2,8 @@ lxml changelog ============== -Under development -================= +1.3.1 (2007-07-02) +================== Features added -------------- Modified: lxml/branch/lxml-1.3/doc/main.txt ============================================================================== --- lxml/branch/lxml-1.3/doc/main.txt (original) +++ lxml/branch/lxml-1.3/doc/main.txt Mon Jul 2 11:03:35 2007 @@ -129,7 +129,7 @@ .. _`lxml at the Python cheeseshop`: http://cheeseshop.python.org/pypi/lxml/ .. _`this key`: pubkey.asc -The latest version is `lxml 1.3`_, released 2007-06-24 (`changes for 1.3`_). +The latest version is `lxml 1.3.1`_, released 2007-07-02 (`changes for 1.3.1`_). `Older versions`_ are listed below. .. _`Older versions`: #old-versions @@ -179,6 +179,8 @@ Old Versions ------------ +* `lxml 1.3`_, released 2007-06-24 (`changes for 1.3`_) + * `lxml 1.2.1`_, released 2007-02-27 (`changes for 1.2.1`_) * `lxml 1.2`_, released 2007-02-20 (`changes for 1.2`_) @@ -215,6 +217,7 @@ * `lxml 0.5`_, released 2005-04-08 +.. _`lxml 1.3.1`: lxml-1.3.1.tgz .. _`lxml 1.3`: lxml-1.3.tgz .. _`lxml 1.2.1`: lxml-1.2.1.tgz .. _`lxml 1.2`: lxml-1.2.tgz @@ -235,7 +238,8 @@ .. _`lxml 0.5.1`: lxml-0.5.1.tgz .. _`lxml 0.5`: lxml-0.5.tgz -.. _`CHANGES for 1.3`: changes-1.3.html +.. _`changes for 1.3.1`: changes-1.3.1.html +.. _`changes for 1.3`: changes-1.3.html .. _`changes for 1.2.1`: changes-1.2.1.html .. _`changes for 1.2`: changes-1.2.html .. _`changes for 1.1.2`: changes-1.1.2.html From scoder at codespeak.net Mon Jul 2 11:05:48 2007 From: scoder at codespeak.net (scoder at codespeak.net) Date: Mon, 2 Jul 2007 11:05:48 +0200 (CEST) Subject: [Lxml-checkins] r44664 - in lxml/trunk: . doc Message-ID: <20070702090548.3FC0E80BB@code0.codespeak.net> Author: scoder Date: Mon Jul 2 11:05:47 2007 New Revision: 44664 Modified: lxml/trunk/CHANGES.txt lxml/trunk/doc/main.txt Log: merged in release changes for 1.3.1 Modified: lxml/trunk/CHANGES.txt ============================================================================== --- lxml/trunk/CHANGES.txt (original) +++ lxml/trunk/CHANGES.txt Mon Jul 2 11:05:47 2007 @@ -8,11 +8,6 @@ Features added -------------- -* objectify.DataElement now supports setting values from existing data - elements (not just plain Python types) and reuses defined namespaces etc. - -* E-factory support for lxml.objectify (``objectify.E``) - * Entity support through an ``Entity`` factory and element classes. XML parsers now have a ``resolve_entities`` keyword argument that can be set to False to keep entities in the document. @@ -33,14 +28,6 @@ Bugs fixed ---------- -* Better way to prevent crashes in Element proxy cleanup code - -* objectify.DataElement didn't set up None value correctly - -* objectify.DataElement didn't check the value against the provided type hints - -* Reference-counting bug in ``Element.attrib.pop()`` - * The XML parser did not report undefined entities as error * The text in exceptions raised by XML parsers, validators and XPath @@ -56,6 +43,29 @@ * major refactoring in XPath/XSLT extension function code +1.3.1 (2007-07-02) +================== + +Features added +-------------- + +* objectify.DataElement now supports setting values from existing data + elements (not just plain Python types) and reuses defined namespaces etc. + +* E-factory support for lxml.objectify (``objectify.E``) + +Bugs fixed +---------- + +* Better way to prevent crashes in Element proxy cleanup code + +* objectify.DataElement didn't set up None value correctly + +* objectify.DataElement didn't check the value against the provided type hints + +* Reference-counting bug in ``Element.attrib.pop()`` + + 1.3 (2007-06-24) ================ Modified: lxml/trunk/doc/main.txt ============================================================================== --- lxml/trunk/doc/main.txt (original) +++ lxml/trunk/doc/main.txt Mon Jul 2 11:05:47 2007 @@ -129,7 +129,7 @@ .. _`lxml at the Python cheeseshop`: http://cheeseshop.python.org/pypi/lxml/ .. _`this key`: pubkey.asc -The latest version is `lxml 1.3`_, released 2007-06-24 (`changes for 1.3`_). +The latest version is `lxml 1.3.1`_, released 2007-07-02 (`changes for 1.3.1`_). `Older versions`_ are listed below. .. _`Older versions`: #old-versions @@ -179,6 +179,8 @@ Old Versions ------------ +* `lxml 1.3`_, released 2007-06-24 (`changes for 1.3`_) + * `lxml 1.2.1`_, released 2007-02-27 (`changes for 1.2.1`_) * `lxml 1.2`_, released 2007-02-20 (`changes for 1.2`_) @@ -215,6 +217,7 @@ * `lxml 0.5`_, released 2005-04-08 +.. _`lxml 1.3.1`: lxml-1.3.1.tgz .. _`lxml 1.3`: lxml-1.3.tgz .. _`lxml 1.2.1`: lxml-1.2.1.tgz .. _`lxml 1.2`: lxml-1.2.tgz @@ -235,7 +238,8 @@ .. _`lxml 0.5.1`: lxml-0.5.1.tgz .. _`lxml 0.5`: lxml-0.5.tgz -.. _`CHANGES for 1.3`: changes-1.3.html +.. _`changes for 1.3.1`: changes-1.3.1.html +.. _`changes for 1.3`: changes-1.3.html .. _`changes for 1.2.1`: changes-1.2.1.html .. _`changes for 1.2`: changes-1.2.html .. _`changes for 1.1.2`: changes-1.1.2.html From scoder at codespeak.net Mon Jul 2 16:49:19 2007 From: scoder at codespeak.net (scoder at codespeak.net) Date: Mon, 2 Jul 2007 16:49:19 +0200 (CEST) Subject: [Lxml-checkins] r44668 - lxml/trunk/src/lxml Message-ID: <20070702144919.81EA780BA@code0.codespeak.net> Author: scoder Date: Mon Jul 2 16:49:18 2007 New Revision: 44668 Modified: lxml/trunk/src/lxml/objectify.pyx lxml/trunk/src/lxml/pyclasslookup.pyx Log: provide __version__ in all Pyrex modules Modified: lxml/trunk/src/lxml/objectify.pyx ============================================================================== --- lxml/trunk/src/lxml/objectify.pyx (original) +++ lxml/trunk/src/lxml/objectify.pyx Mon Jul 2 16:49:18 2007 @@ -13,6 +13,8 @@ # initialize C-API of lxml.etree import_etree(etree) +__version__ = etree.__version__ + cdef object SubElement SubElement = etree.SubElement Modified: lxml/trunk/src/lxml/pyclasslookup.pyx ============================================================================== --- lxml/trunk/src/lxml/pyclasslookup.pyx (original) +++ lxml/trunk/src/lxml/pyclasslookup.pyx Mon Jul 2 16:49:18 2007 @@ -15,6 +15,8 @@ # initialize C-API of lxml.etree import_etree(etree) +__version__ = etree.__version__ + cdef class _ElementProxy: cdef tree.xmlNode* _c_node cdef object _source_proxy From scoder at codespeak.net Mon Jul 2 16:50:28 2007 From: scoder at codespeak.net (scoder at codespeak.net) Date: Mon, 2 Jul 2007 16:50:28 +0200 (CEST) Subject: [Lxml-checkins] r44669 - lxml/trunk/src/lxml Message-ID: <20070702145028.9E8C780BC@code0.codespeak.net> Author: scoder Date: Mon Jul 2 16:50:28 2007 New Revision: 44669 Modified: lxml/trunk/src/lxml/etree.pyx Log: support dev/alpha/beta versions with number postfix Modified: lxml/trunk/src/lxml/etree.pyx ============================================================================== --- lxml/trunk/src/lxml/etree.pyx (original) +++ lxml/trunk/src/lxml/etree.pyx Mon Jul 2 16:50:28 2007 @@ -131,12 +131,19 @@ try: item = int(item) except ValueError: - if item == 'dev': - item = -3 - elif item == 'alpha': - item = -2 - elif item == 'beta': - item = -1 + if item.startswith('dev'): + count = item[3:] + item = -30 + elif item.startswith('alpha'): + count = item[5:] + item = -20 + elif item.startswith('beta'): + count = item[4:] + item = -10 + else: + count = 0 + if count: + item = item + int(count) version_list.append(item) return tuple(version_list) From scoder at codespeak.net Mon Jul 2 16:52:54 2007 From: scoder at codespeak.net (scoder at codespeak.net) Date: Mon, 2 Jul 2007 16:52:54 +0200 (CEST) Subject: [Lxml-checkins] r44670 - lxml/branch/lxml-1.3/src/lxml Message-ID: <20070702145254.EAD7D80B6@code0.codespeak.net> Author: scoder Date: Mon Jul 2 16:52:54 2007 New Revision: 44670 Modified: lxml/branch/lxml-1.3/src/lxml/objectify.pyx lxml/branch/lxml-1.3/src/lxml/pyclasslookup.pyx Log: provide __version__ in all Pyrex modules Modified: lxml/branch/lxml-1.3/src/lxml/objectify.pyx ============================================================================== --- lxml/branch/lxml-1.3/src/lxml/objectify.pyx (original) +++ lxml/branch/lxml-1.3/src/lxml/objectify.pyx Mon Jul 2 16:52:54 2007 @@ -13,6 +13,8 @@ # initialize C-API of lxml.etree import_etree(etree) +__version__ = etree.__version__ + cdef object SubElement SubElement = etree.SubElement Modified: lxml/branch/lxml-1.3/src/lxml/pyclasslookup.pyx ============================================================================== --- lxml/branch/lxml-1.3/src/lxml/pyclasslookup.pyx (original) +++ lxml/branch/lxml-1.3/src/lxml/pyclasslookup.pyx Mon Jul 2 16:52:54 2007 @@ -15,6 +15,8 @@ # initialize C-API of lxml.etree import_etree(etree) +__version__ = etree.__version__ + cdef class _ElementProxy: cdef tree.xmlNode* _c_node cdef object _source_proxy From ianb at codespeak.net Mon Jul 2 19:20:57 2007 From: ianb at codespeak.net (ianb at codespeak.net) Date: Mon, 2 Jul 2007 19:20:57 +0200 (CEST) Subject: [Lxml-checkins] r44675 - lxml/branch/html/src/lxml/html Message-ID: <20070702172057.73F2580BA@code0.codespeak.net> Author: ianb Date: Mon Jul 2 19:20:56 2007 New Revision: 44675 Modified: lxml/branch/html/src/lxml/html/css.py Log: Change up the namespace function a bit Modified: lxml/branch/html/src/lxml/html/css.py ============================================================================== --- lxml/branch/html/src/lxml/html/css.py (original) +++ lxml/branch/html/src/lxml/html/css.py Mon Jul 2 19:20:56 2007 @@ -155,7 +155,9 @@ def _make_lower_case(context, s): return s.lower() -etree.FunctionNamespace("css")['lower-case'] = _make_lower_case +ns = etree.FunctionNamespace('http://codespeak.net/lxml/css/') +ns.prefix = 'css' +ns['lower-case'] = _make_lower_case class Pseudo(object): """ From scoder at codespeak.net Mon Jul 2 19:34:04 2007 From: scoder at codespeak.net (scoder at codespeak.net) Date: Mon, 2 Jul 2007 19:34:04 +0200 (CEST) Subject: [Lxml-checkins] r44676 - lxml/trunk/src/lxml Message-ID: <20070702173404.056A080B2@code0.codespeak.net> Author: scoder Date: Mon Jul 2 19:34:04 2007 New Revision: 44676 Modified: lxml/trunk/src/lxml/etree.pyx Log: allow for a lot of alpha/beta versions :) Modified: lxml/trunk/src/lxml/etree.pyx ============================================================================== --- lxml/trunk/src/lxml/etree.pyx (original) +++ lxml/trunk/src/lxml/etree.pyx Mon Jul 2 19:34:04 2007 @@ -133,16 +133,16 @@ except ValueError: if item.startswith('dev'): count = item[3:] - item = -30 + item = -300 elif item.startswith('alpha'): count = item[5:] - item = -20 + item = -200 elif item.startswith('beta'): count = item[4:] - item = -10 + item = -100 else: count = 0 - if count: + if count.trim(): item = item + int(count) version_list.append(item) return tuple(version_list) From scoder at codespeak.net Mon Jul 2 19:34:27 2007 From: scoder at codespeak.net (scoder at codespeak.net) Date: Mon, 2 Jul 2007 19:34:27 +0200 (CEST) Subject: [Lxml-checkins] r44677 - lxml/trunk/src/lxml Message-ID: <20070702173427.9CC8F80BA@code0.codespeak.net> Author: scoder Date: Mon Jul 2 19:34:27 2007 New Revision: 44677 Modified: lxml/trunk/src/lxml/etree.pyx Log: small fix Modified: lxml/trunk/src/lxml/etree.pyx ============================================================================== --- lxml/trunk/src/lxml/etree.pyx (original) +++ lxml/trunk/src/lxml/etree.pyx Mon Jul 2 19:34:27 2007 @@ -142,7 +142,7 @@ item = -100 else: count = 0 - if count.trim(): + if count: item = item + int(count) version_list.append(item) return tuple(version_list) From scoder at codespeak.net Mon Jul 2 20:29:52 2007 From: scoder at codespeak.net (scoder at codespeak.net) Date: Mon, 2 Jul 2007 20:29:52 +0200 (CEST) Subject: [Lxml-checkins] r44678 - lxml/trunk/src/lxml/tests Message-ID: <20070702182952.9567380AB@code0.codespeak.net> Author: scoder Date: Mon Jul 2 20:29:50 2007 New Revision: 44678 Modified: lxml/trunk/src/lxml/tests/test_etree.py Log: test script cleanup Modified: lxml/trunk/src/lxml/tests/test_etree.py ============================================================================== --- lxml/trunk/src/lxml/tests/test_etree.py (original) +++ lxml/trunk/src/lxml/tests/test_etree.py Mon Jul 2 20:29:50 2007 @@ -14,7 +14,7 @@ from common_imports import SillyFileLike, canonicalize, doctest print -print "TESTED VERSION:" +print "TESTED VERSION:", etree.__version__ print " Python: ", sys.version_info print " lxml.etree: ", etree.LXML_VERSION print " libxml used: ", etree.LIBXML_VERSION @@ -24,7 +24,7 @@ print try: - sorted(()) + sorted except NameError: # Python 2.3 def sorted(seq): From scoder at codespeak.net Mon Jul 2 21:44:11 2007 From: scoder at codespeak.net (scoder at codespeak.net) Date: Mon, 2 Jul 2007 21:44:11 +0200 (CEST) Subject: [Lxml-checkins] r44679 - lxml/trunk/doc Message-ID: <20070702194411.F1DB280AA@code0.codespeak.net> Author: scoder Date: Mon Jul 2 21:44:10 2007 New Revision: 44679 Modified: lxml/trunk/doc/tutorial.txt Log: doctest readability Modified: lxml/trunk/doc/tutorial.txt ============================================================================== --- lxml/trunk/doc/tutorial.txt (original) +++ lxml/trunk/doc/tutorial.txt Mon Jul 2 21:44:10 2007 @@ -426,7 +426,7 @@ ... E.body( ... E.h1("Hello!", CLASS("title")), ... E.p("This is a paragraph with ", E.b("bold"), " text in it!"), - ... E.p("This is another paragraph, with a ", + ... E.p("This is another paragraph, with a", "\n ", ... E.a("link", href="http://www.python.org"), "."), ... E.p("Here are some reservered characters: ."), ... etree.XML("

And finally an embedded XHTML fragment.

"), @@ -442,7 +442,8 @@

Hello!

This is a paragraph with bold text in it!

-

This is another paragraph, with a link.

+

This is another paragraph, with a + link.

Here are some reservered characters: <spam&egg>.

And finally an embedded XHTML fragment.

From ianb at codespeak.net Tue Jul 3 00:54:34 2007 From: ianb at codespeak.net (ianb at codespeak.net) Date: Tue, 3 Jul 2007 00:54:34 +0200 (CEST) Subject: [Lxml-checkins] r44681 - in lxml/branch/html/src/lxml/html: . tests Message-ID: <20070702225434.C572980B0@code0.codespeak.net> Author: ianb Date: Tue Jul 3 00:54:33 2007 New Revision: 44681 Modified: lxml/branch/html/src/lxml/html/css.py lxml/branch/html/src/lxml/html/tests/test_css.py lxml/branch/html/src/lxml/html/tests/test_css.txt lxml/branch/html/src/lxml/html/tests/test_css_select.txt Log: Created CSSSelector, added a public __all__ for the css module; renamed the xpath() function; put in some tests for parse_series and fixed the results; added NotImplemented errors for *:something-of-type psuedoclasses, which I can't well implement with XPath; change inheritance of the exceptions Modified: lxml/branch/html/src/lxml/html/css.py ============================================================================== --- lxml/branch/html/src/lxml/html/css.py (original) +++ lxml/branch/html/src/lxml/html/css.py Tue Jul 3 00:54:33 2007 @@ -1,12 +1,31 @@ import re from lxml import etree -class SelectorSyntaxError(Exception): +__all__ = ['SelectorSyntaxError', 'ExpressionError', + 'CSSSelector'] + +class SelectorSyntaxError(SyntaxError): pass -class ExpressionError(Exception): +class ExpressionError(RuntimeError): pass +class CSSSelector(etree.XPath): + + def __init__(self, css): + path = css_to_xpath(css) + etree.XPath.__init__(self, path) + self.css = css + + def __repr__(self): + return '<%s %s for %r>' % ( + self.__class__.__name__, + hex(abs(id(self)))[2:], + self.css) + +############################## +## Token objects: + class _UniToken(unicode): def __new__(cls, contents, pos): obj = unicode.__new__(cls, contents) @@ -91,14 +110,14 @@ def _xpath_nth_child(self, xpath, expr, last=False, add_name_test=True): a, b = parse_series(expr) - if not a: + if not a and not b: # a=0 means nothing is returned... xpath.add_condition('false() and position() = 0') return xpath if add_name_test: xpath.add_name_test() xpath.add_star_prefix() - if a == 1: + if a == 0: if last: b = 'last() - %s' % b xpath.add_condition('position() = %s' % b) @@ -111,12 +130,17 @@ b_neg = str(-b) else: b_neg = '+%s' % (-b) - expr = '(position() %s) mod %s = 0' % (b_neg, a) + if a != 1: + expr = ['(position() %s) mod %s = 0' % (b_neg, a)] + else: + expr = [] if b >= 0: - expr += ' and position() >= %s' % b + expr.append('position() >= %s' % b) elif b < 0 and last: - expr += ' and position() < (last() %s)' % b - xpath.add_condition(expr) + expr.append('position() < (last() %s)' % b) + expr = ' and '.join(expr) + if expr: + xpath.add_condition(expr) return xpath # FIXME: handle an+b, odd, even # an+b means every-a, plus b, e.g., 2n+1 means odd @@ -130,6 +154,9 @@ return self._xpath_nth_child(xpath, expr, last=True) def _xpath_nth_of_type(self, xpath, expr): + if xpath.element == '*': + raise NotImplementedError( + "*:nth-of-type() is not implemented") return self._xpath_nth_child(xpath, expr, add_name_test=False) def _xpath_nth_last_of_type(self, xpath, expr): @@ -215,11 +242,17 @@ return xpath def _xpath_first_of_type(self, xpath): + if xpath.element == '*': + raise NotImplementedError( + "*:first-of-type is not implemented") xpath.add_star_prefix() xpath.add_condition('position() = 1') return xpath def _xpath_last_of_type(self, xpath): + if xpath.element == '*': + raise NotImplementedError( + "*:last-of-type is not implemented") xpath.add_star_prefix() xpath.add_condition('position() = last()') return xpath @@ -230,6 +263,9 @@ return xpath def _xpath_only_of_type(self, xpath): + if xpath.element == '*': + raise NotImplementedError( + "*:only-of-type is not implemented") xpath.add_condition('last() = 1') return xpath @@ -343,7 +379,7 @@ else: # FIXME: Should we lowercase here? el = '%s:%s' % (self.namespace, self.element) - return XPath(element=el) + return XPathExpr(element=el) class Hash(object): """ @@ -375,7 +411,7 @@ def xpath(self): paths = [item.xpath() for item in self.items] - return XPathOr(paths) + return XPathExprOr(paths) class CombinedSelector(object): @@ -435,9 +471,9 @@ return xpath ############################## -## XPath objects: +## XPathExpr objects: -def xpath(css_expr, prefix='descendant-or-self::'): +def css_to_xpath(css_expr, prefix='descendant-or-self::'): if isinstance(css_expr, basestring): css_expr = parse(css_expr) expr = css_expr.xpath() @@ -447,14 +483,7 @@ expr.add_prefix(prefix) return str(expr) -def run_xpath(doc, xpath): - return [el for el in doc.xpath(xpath) - if isinstance(el, etree.ElementBase)] - -def run_css(doc, css): - return run_xpath(doc, xpath(css)) - -class XPath(object): +class XPathExpr(object): def __init__(self, prefix=None, path=None, element='*', condition=None, star_prefix=False): @@ -529,7 +558,7 @@ self.element = other.element self.condition = other.condition -class XPathOr(XPath): +class XPathExprOr(XPathExpr): """ Represents on |'d expressions. Note that unfortunately it isn't @@ -547,7 +576,9 @@ return ' | '.join([prefix + str(i) for i in self.items]) def xpath_repr(s): - # FIXME: I don't think this is right + # FIXME: I don't think this is right, but lacking any reasonable + # specification on what XPath literals look like (which doesn't seem + # to be in the XPath specification) it is hard to do 'right' if isinstance(s, Element): # This is probably a symbol that looks like an expression... s = s._format_element() @@ -703,11 +734,11 @@ if isinstance(s, Element): s = s._format_element() if not s or s == '*': - # Happens when there's nothing, which CSS things of as * - return (1, 0) + # Happens when there's nothing, which the CSS parser thinks of as * + return (0, 0) if isinstance(s, int): # Happens when you just get a number - return (1, s) + return (0, s) if s == 'odd': return (2, 1) elif s == 'even': @@ -716,7 +747,7 @@ return (1, 0) if 'n' not in s: # Just a b - return int(s) + return (0, int(s)) a, b = s.split('n', 1) if not a: a = 1 Modified: lxml/branch/html/src/lxml/html/tests/test_css.py ============================================================================== --- lxml/branch/html/src/lxml/html/tests/test_css.py (original) +++ lxml/branch/html/src/lxml/html/tests/test_css.py Tue Jul 3 00:54:33 2007 @@ -69,7 +69,7 @@ body = doc.xpath('//body')[0] bad = [] selector, count = self.selectors[self.index] - xpath = css.xpath(css.parse(selector)) + xpath = css.css_to_xpath(css.parse(selector)) try: results = body.xpath(xpath) except Exception, e: Modified: lxml/branch/html/src/lxml/html/tests/test_css.txt ============================================================================== --- lxml/branch/html/src/lxml/html/tests/test_css.txt (original) +++ lxml/branch/html/src/lxml/html/tests/test_css.txt Tue Jul 3 00:54:33 2007 @@ -110,6 +110,23 @@ e/following-sibling::f >>> xpath('div#container p') div[@id = 'container']/descendant::p - >>> # FIXME: This isn't right, but I don't know what *is* right >>> xpath('p *:only-of-type') - p/descendant::*[last() = 1] + Traceback (most recent call last): + ... + NotImplementedError: *:only-of-type is not implemented + +Then of parse_series: + + >>> from lxml.html.css import parse_series + >>> parse_series('1n+3') + (1, 3) + >>> parse_series('n-5') + (1, -5) + >>> parse_series('odd') + (2, 1) + >>> parse_series('3n') + (3, 0) + >>> parse_series('n') + (1, 0) + >>> parse_series('5') + (0, 5) \ No newline at end of file Modified: lxml/branch/html/src/lxml/html/tests/test_css_select.txt ============================================================================== --- lxml/branch/html/src/lxml/html/tests/test_css_select.txt (original) +++ lxml/branch/html/src/lxml/html/tests/test_css_select.txt Tue Jul 3 00:54:33 2007 @@ -1,7 +1,7 @@ This is a test of CSS selectors. We setup a document we'll use for all our selections, and a function make querying simpler: - >>> from lxml.html.css import run_css, xpath + >>> from lxml.html.css import CSSSelector >>> from lxml.html import HTML >>> doc = HTML(''' ... @@ -35,10 +35,10 @@ >>> for count, el in enumerate(doc.getiterator()): ... order[el] = count >>> def select_ids(selector): - ... items = run_css(doc, selector) + ... items = CSSSelector(selector)(doc) ... if not items: ... return 'empty' - ... items = run_css(doc, selector) + ... items = CSSSelector(selector)(doc) ... items.sort(key=lambda el: order[el]) ... return ', '.join([el.get('id', 'nil') for el in items]) >>> def pcss(main, *selectors): @@ -114,7 +114,9 @@ >>> pcss('div *:only-child') foobar-span >>> pcss('p *:only-of-type') - p-em + Traceback (most recent call last): + ... + NotImplementedError: *:only-of-type is not implemented >>> pcss('p:only-of-type') paragraph >>> pcss('a:empty') From ianb at codespeak.net Tue Jul 3 01:26:02 2007 From: ianb at codespeak.net (ianb at codespeak.net) Date: Tue, 3 Jul 2007 01:26:02 +0200 (CEST) Subject: [Lxml-checkins] r44682 - lxml/branch/html/src/lxml/html Message-ID: <20070702232602.5859580BC@code0.codespeak.net> Author: ianb Date: Tue Jul 3 01:26:00 2007 New Revision: 44682 Modified: lxml/branch/html/src/lxml/html/css.py Log: Fix a problem with nth-last-child; adjust el:empty a little Modified: lxml/branch/html/src/lxml/html/css.py ============================================================================== --- lxml/branch/html/src/lxml/html/css.py (original) +++ lxml/branch/html/src/lxml/html/css.py Tue Jul 3 01:26:00 2007 @@ -110,7 +110,7 @@ def _xpath_nth_child(self, xpath, expr, last=False, add_name_test=True): a, b = parse_series(expr) - if not a and not b: + if not a and not b and not last: # a=0 means nothing is returned... xpath.add_condition('false() and position() = 0') return xpath @@ -270,7 +270,7 @@ return xpath def _xpath_empty(self, xpath): - xpath.add_condition("count(./child::*) = 0 and normalize-space(.) = ''") + xpath.add_condition("not(*) and not(normalize-space())") return xpath class Attrib(object): From ianb at codespeak.net Tue Jul 3 01:58:14 2007 From: ianb at codespeak.net (ianb at codespeak.net) Date: Tue, 3 Jul 2007 01:58:14 +0200 (CEST) Subject: [Lxml-checkins] r44683 - in lxml/branch/html/src/lxml/html: . tests Message-ID: <20070702235814.ACBA680BB@code0.codespeak.net> Author: ianb Date: Tue Jul 3 01:58:14 2007 New Revision: 44683 Modified: lxml/branch/html/src/lxml/html/css.py lxml/branch/html/src/lxml/html/tests/test_css.py lxml/branch/html/src/lxml/html/tests/test_css.txt lxml/branch/html/src/lxml/html/tests/test_css_select.txt Log: Fix :only-child; adjust some tests that were acquired, where I don't understand the numbers they used. Modified: lxml/branch/html/src/lxml/html/css.py ============================================================================== --- lxml/branch/html/src/lxml/html/css.py (original) +++ lxml/branch/html/src/lxml/html/css.py Tue Jul 3 01:58:14 2007 @@ -259,6 +259,7 @@ def _xpath_only_child(self, xpath): xpath.add_name_test() + xpath.add_star_prefix() xpath.add_condition('last() = 1') return xpath Modified: lxml/branch/html/src/lxml/html/tests/test_css.py ============================================================================== --- lxml/branch/html/src/lxml/html/tests/test_css.py (original) +++ lxml/branch/html/src/lxml/html/tests/test_css.py Tue Jul 3 01:58:14 2007 @@ -12,9 +12,14 @@ class CSSTestCase(unittest.TestCase): selectors = [ - ('*', 252), + ## Changed from original; probably because I'm only searching the body + #('*', 252), + ('*', 246), ('div:only-child', 22), # ? - ('div:contains(CELIA)', 243), + ## Changed from original, because the original doesn't make sense. + ## There really aren't that many occurrances of 'celia' + #('div:contains(CELIA)', 243), + ('div:contains(CELIA)', 30), ('div:nth-child(even)', 106), ('div:nth-child(2n)', 106), ('div:nth-child(odd)', 137), Modified: lxml/branch/html/src/lxml/html/tests/test_css.txt ============================================================================== --- lxml/branch/html/src/lxml/html/tests/test_css.txt (original) +++ lxml/branch/html/src/lxml/html/tests/test_css.txt Tue Jul 3 01:58:14 2007 @@ -87,11 +87,11 @@ >>> xpath('E:last-of-type') */e[position() = last()] >>> xpath('E:only-child') - *[name() = 'e' and (last() = 1)] + */*[name() = 'e' and (last() = 1)] >>> xpath('E:only-of-type') e[last() = 1] >>> xpath('E:empty') - e[count(./child::*) = 0 and normalize-space(.) = ''] + e[not(*) and not(normalize-space())] >>> xpath('E:contains("foo")') e[contains(css:lower-case(string(.)), 'foo')] >>> xpath('E.warning') @@ -129,4 +129,4 @@ >>> parse_series('n') (1, 0) >>> parse_series('5') - (0, 5) \ No newline at end of file + (0, 5) Modified: lxml/branch/html/src/lxml/html/tests/test_css_select.txt ============================================================================== --- lxml/branch/html/src/lxml/html/tests/test_css_select.txt (original) +++ lxml/branch/html/src/lxml/html/tests/test_css_select.txt Tue Jul 3 01:58:14 2007 @@ -148,4 +148,3 @@ nofollow-anchor >>> pcss('ol#first-ol li:last-child', 'ol#first-ol *:last-child') seventh-li - \ No newline at end of file From ianb at codespeak.net Tue Jul 3 02:10:56 2007 From: ianb at codespeak.net (ianb at codespeak.net) Date: Tue, 3 Jul 2007 02:10:56 +0200 (CEST) Subject: [Lxml-checkins] r44684 - lxml/branch/html/src/lxml/html Message-ID: <20070703001056.A86F380BB@code0.codespeak.net> Author: ianb Date: Tue Jul 3 02:10:56 2007 New Revision: 44684 Modified: lxml/branch/html/src/lxml/html/css.py Log: Add some fast translation for id, class, and plain element name matches Modified: lxml/branch/html/src/lxml/html/css.py ============================================================================== --- lxml/branch/html/src/lxml/html/css.py (original) +++ lxml/branch/html/src/lxml/html/css.py Tue Jul 3 02:10:56 2007 @@ -474,8 +474,23 @@ ############################## ## XPathExpr objects: +_el_re = re.compile(r'^\w+\s*$') +_id_re = re.compile(r'^(\w*)#(\w+)\s*$') +_class_re = re.compile(r'^(\w*)\.(\w+)\s*$') + def css_to_xpath(css_expr, prefix='descendant-or-self::'): if isinstance(css_expr, basestring): + match = _el_re.search(css_expr) + if match is not None: + return '%s%s' % (prefix, match.group(0).strip()) + match = _id_re.search(css_expr) + if match is not None: + return "%s%s[@id = '%s']" % ( + prefix, match.group(1) or '*', match.group(2)) + match = _class_re.search(css_expr) + if match is not None: + return "%s%s[contains(concat(' ', normalize-space(@class), ' '), ' %s ')]" % ( + prefix, match.group(1) or '*', match.group(2)) css_expr = parse(css_expr) expr = css_expr.xpath() assert expr is not None, ( From ianb at codespeak.net Tue Jul 3 03:29:50 2007 From: ianb at codespeak.net (ianb at codespeak.net) Date: Tue, 3 Jul 2007 03:29:50 +0200 (CEST) Subject: [Lxml-checkins] r44685 - in lxml/branch/html/src/lxml/html: . tests Message-ID: <20070703012950.D3C5180BE@code0.codespeak.net> Author: ianb Date: Tue Jul 3 03:29:49 2007 New Revision: 44685 Modified: lxml/branch/html/src/lxml/html/clean.py lxml/branch/html/src/lxml/html/tests/test_clean.txt Log: Moved to a class-based cleaner instead of a function. Resulting rearrangement Modified: lxml/branch/html/src/lxml/html/clean.py ============================================================================== --- lxml/branch/html/src/lxml/html/clean.py (original) +++ lxml/branch/html/src/lxml/html/clean.py Tue Jul 3 03:29:49 2007 @@ -64,66 +64,44 @@ clean(doc, **kw) return tostring(doc) -# FIXME: I really have to figure out what a sane set of defaults is -# for these keyword arguments. And is this signature out of control? -# What about if we want things like whitelisting of or other -# controls? Maybe this has to be more than a function. -def clean(doc, - scripts=True, - javascript=True, - comments=True, - style=False, - links=True, - meta=True, - page_structure=True, - processing_instructions=True, - embedded=True, - frames=True, - forms=True, - annoying_tags=True, - remove_tags=None, - allow_tags=None, - strip_tags=True, - remove_unknown_tags=True, - safe_attrs_only=True, - add_nofollow=False, - # callbacks? - ): +class Cleaner(object): """ - Cleans the document of each of the possible offending elements: + Instances cleans the document of each of the possible offending + elements. The cleaning is controlled by attributes; you can + override attributes in a subclass, or set them in the constructor. ``scripts``: - Any ``