From scoder at codespeak.net Tue Aug 1 07:42:36 2006 From: scoder at codespeak.net (scoder at codespeak.net) Date: Tue, 1 Aug 2006 07:42:36 +0200 (CEST) Subject: [Lxml-checkins] r30827 - in lxml/trunk/src/lxml: . tests Message-ID: <20060801054236.088C410076@code0.codespeak.net> Author: scoder Date: Tue Aug 1 07:42:34 2006 New Revision: 30827 Modified: lxml/trunk/src/lxml/etree.pyx lxml/trunk/src/lxml/tests/test_elementtree.py Log: make copy/deepcopy work for ElementTree objects Modified: lxml/trunk/src/lxml/etree.pyx ============================================================================== --- lxml/trunk/src/lxml/etree.pyx (original) +++ lxml/trunk/src/lxml/etree.pyx Tue Aug 1 07:42:34 2006 @@ -395,6 +395,15 @@ """ return self._context_node + def __copy__(self): + return ElementTree(self._context_node) + + def __deepcopy__(self, memo): + if self._context_node is None: + return ElementTree() + else: + return ElementTree( self._context_node.__copy__() ) + property docinfo: """Information about the document provided by parser and DTD. This value is only defined for ElementTree objects based on the root node Modified: lxml/trunk/src/lxml/tests/test_elementtree.py ============================================================================== --- lxml/trunk/src/lxml/tests/test_elementtree.py (original) +++ lxml/trunk/src/lxml/tests/test_elementtree.py Tue Aug 1 07:42:34 2006 @@ -2106,6 +2106,20 @@ el = self.etree.parse(StringIO(isoxml)).getroot() self.assertEquals(utext, el.text) + def test_deepcopy_elementtree(self): + Element = self.etree.Element + ElementTree = self.etree.ElementTree + + a = Element('a') + a.text = "Foo" + atree = ElementTree(a) + + btree = copy.deepcopy(atree) + self.assertEqual("Foo", atree.getroot().text) + self.assertEqual("Foo", btree.getroot().text) + self.assertFalse(btree is atree) + self.assertFalse(btree.getroot() is atree.getroot()) + def test_deepcopy(self): Element = self.etree.Element @@ -2172,6 +2186,21 @@ self.assertEquals( root[0][0].get('{tns}foo'), copy.deepcopy(root[0][0]).get('{tns}foo') ) + + def test_deepcopy_append(self): + # previously caused a crash + Element = self.etree.Element + tostring = self.etree.tostring + + a = Element('a') + b = copy.deepcopy(a) + a.append( Element('C') ) + b.append( Element('X') ) + + self.assertEquals('', + tostring(a).replace(' ', '')) + self.assertEquals('', + tostring(b).replace(' ', '')) def test_shallowcopy(self): Element = self.etree.Element @@ -2186,21 +2215,19 @@ self.assertEquals('Bar', b.text) self.assertEquals('Foo', a.text) # XXX ElementTree will share nodes, but lxml.etree won't.. - - def test_deepcopy_append(self): - # previously caused a crash + + def test_shallowcopy_elementtree(self): Element = self.etree.Element - tostring = self.etree.tostring + ElementTree = self.etree.ElementTree a = Element('a') - b = copy.deepcopy(a) - a.append( Element('C') ) - b.append( Element('X') ) + a.text = 'Foo' + atree = ElementTree(a) - self.assertEquals('', - tostring(a).replace(' ', '')) - self.assertEquals('', - tostring(b).replace(' ', '')) + btree = copy.copy(atree) + self.assertFalse(btree is atree) + self.assert_(btree.getroot() is atree.getroot()) + self.assertEquals('Foo', atree.getroot().text) def test_element_boolean(self): etree = self.etree From scoder at codespeak.net Tue Aug 1 07:43:38 2006 From: scoder at codespeak.net (scoder at codespeak.net) Date: Tue, 1 Aug 2006 07:43:38 +0200 (CEST) Subject: [Lxml-checkins] r30828 - lxml/trunk Message-ID: <20060801054338.CD31410076@code0.codespeak.net> Author: scoder Date: Tue Aug 1 07:43:36 2006 New Revision: 30828 Modified: lxml/trunk/CHANGES.txt Log: mark ET copy/deepcopy bug fixed Modified: lxml/trunk/CHANGES.txt ============================================================================== --- lxml/trunk/CHANGES.txt (original) +++ lxml/trunk/CHANGES.txt Tue Aug 1 07:43:36 2006 @@ -13,6 +13,8 @@ Bugs fixed ---------- +* Copying/deepcopying did not work for ElementTree objects + * The EXSLT ``regexp:match`` function now works as defined (except for some differences in the regular expression syntax) From scoder at codespeak.net Tue Aug 1 07:44:55 2006 From: scoder at codespeak.net (scoder at codespeak.net) Date: Tue, 1 Aug 2006 07:44:55 +0200 (CEST) Subject: [Lxml-checkins] r30829 - in lxml/branch/lxml-1.0: . src/lxml src/lxml/tests Message-ID: <20060801054455.556E410076@code0.codespeak.net> Author: scoder Date: Tue Aug 1 07:44:51 2006 New Revision: 30829 Modified: lxml/branch/lxml-1.0/CHANGES.txt lxml/branch/lxml-1.0/src/lxml/etree.pyx lxml/branch/lxml-1.0/src/lxml/tests/test_elementtree.py Log: make copy/deepcopy work for ElementTree objects Modified: lxml/branch/lxml-1.0/CHANGES.txt ============================================================================== --- lxml/branch/lxml-1.0/CHANGES.txt (original) +++ lxml/branch/lxml-1.0/CHANGES.txt Tue Aug 1 07:44:51 2006 @@ -14,6 +14,8 @@ Bugs fixed ---------- +* Copying/deepcopying did not work for ElementTree objects + * Setting an attribute to a non-string value did not raise an exception * Element.remove() deleted the tail text from the removed Element Modified: lxml/branch/lxml-1.0/src/lxml/etree.pyx ============================================================================== --- lxml/branch/lxml-1.0/src/lxml/etree.pyx (original) +++ lxml/branch/lxml-1.0/src/lxml/etree.pyx Tue Aug 1 07:44:51 2006 @@ -377,6 +377,15 @@ """ return self._context_node + def __copy__(self): + return ElementTree(self._context_node) + + def __deepcopy__(self, memo): + if self._context_node is None: + return ElementTree() + else: + return ElementTree( self._context_node.__copy__() ) + property docinfo: """Information about the document provided by parser and DTD. This value is only defined for ElementTree objects based on the root node Modified: lxml/branch/lxml-1.0/src/lxml/tests/test_elementtree.py ============================================================================== --- lxml/branch/lxml-1.0/src/lxml/tests/test_elementtree.py (original) +++ lxml/branch/lxml-1.0/src/lxml/tests/test_elementtree.py Tue Aug 1 07:44:51 2006 @@ -1980,6 +1980,20 @@ el = self.etree.parse(StringIO(isoxml)).getroot() self.assertEquals(utext, el.text) + def test_deepcopy_elementtree(self): + Element = self.etree.Element + ElementTree = self.etree.ElementTree + + a = Element('a') + a.text = "Foo" + atree = ElementTree(a) + + btree = copy.deepcopy(atree) + self.assertEqual("Foo", atree.getroot().text) + self.assertEqual("Foo", btree.getroot().text) + self.assertFalse(btree is atree) + self.assertFalse(btree.getroot() is atree.getroot()) + def test_deepcopy(self): Element = self.etree.Element @@ -2046,6 +2060,21 @@ self.assertEquals( root[0][0].get('{tns}foo'), copy.deepcopy(root[0][0]).get('{tns}foo') ) + + def test_deepcopy_append(self): + # previously caused a crash + Element = self.etree.Element + tostring = self.etree.tostring + + a = Element('a') + b = copy.deepcopy(a) + a.append( Element('C') ) + b.append( Element('X') ) + + self.assertEquals('', + tostring(a).replace(' ', '')) + self.assertEquals('', + tostring(b).replace(' ', '')) def test_shallowcopy(self): Element = self.etree.Element @@ -2060,21 +2089,19 @@ self.assertEquals('Bar', b.text) self.assertEquals('Foo', a.text) # XXX ElementTree will share nodes, but lxml.etree won't.. - - def test_deepcopy_append(self): - # previously caused a crash + + def test_shallowcopy_elementtree(self): Element = self.etree.Element - tostring = self.etree.tostring + ElementTree = self.etree.ElementTree a = Element('a') - b = copy.deepcopy(a) - a.append( Element('C') ) - b.append( Element('X') ) + a.text = 'Foo' + atree = ElementTree(a) - self.assertEquals('', - tostring(a).replace(' ', '')) - self.assertEquals('', - tostring(b).replace(' ', '')) + btree = copy.copy(atree) + self.assertFalse(btree is atree) + self.assert_(btree.getroot() is atree.getroot()) + self.assertEquals('Foo', atree.getroot().text) def test_element_boolean(self): etree = self.etree From scoder at codespeak.net Tue Aug 1 12:49:44 2006 From: scoder at codespeak.net (scoder at codespeak.net) Date: Tue, 1 Aug 2006 12:49:44 +0200 (CEST) Subject: [Lxml-checkins] r30835 - in lxml/branch/capi/src/lxml: . tests Message-ID: <20060801104944.81AF31007A@code0.codespeak.net> Author: scoder Date: Tue Aug 1 12:49:43 2006 New Revision: 30835 Modified: lxml/branch/capi/src/lxml/objectify.pyx lxml/branch/capi/src/lxml/tests/test_objectify.py Log: forgot to rename function calls after renaming function Modified: lxml/branch/capi/src/lxml/objectify.pyx ============================================================================== --- lxml/branch/capi/src/lxml/objectify.pyx (original) +++ lxml/branch/capi/src/lxml/objectify.pyx Tue Aug 1 12:49:43 2006 @@ -80,7 +80,7 @@ PYTYPE_ATTRIBUTE = cetree.namespacedNameFromNsName( _PYTYPE_NAMESPACE, _PYTYPE_ATTRIBUTE_NAME) -setPytypeAttribute() +setPytypeAttributeTag() # namespace for XML Schema instance Modified: lxml/branch/capi/src/lxml/tests/test_objectify.py ============================================================================== --- lxml/branch/capi/src/lxml/tests/test_objectify.py (original) +++ lxml/branch/capi/src/lxml/tests/test_objectify.py Tue Aug 1 12:49:43 2006 @@ -36,7 +36,7 @@ def tearDown(self): self.etree.Namespace("otherNs").clear() - objectify.setPytypeAttribute() + objectify.setPytypeAttributeTag() objectify.unregister() def test_str(self): @@ -317,7 +317,7 @@ ''' pytype_ns, pytype_name = objectify.PYTYPE_ATTRIBUTE[1:].split('}') - objectify.setPytypeAttribute("{TEST}test") + objectify.setPytypeAttributeTag("{TEST}test") root = XML(xml) objectify.annotate(root) @@ -327,7 +327,7 @@ attribs = root.xpath("//@py:test", {"py" : "TEST"}) self.assertEquals(7, len(attribs)) - objectify.setPytypeAttribute() + objectify.setPytypeAttributeTag() pytype_ns, pytype_name = objectify.PYTYPE_ATTRIBUTE[1:].split('}') self.assertNotEqual("test", pytype_ns.lower()) From scoder at codespeak.net Tue Aug 1 13:18:27 2006 From: scoder at codespeak.net (scoder at codespeak.net) Date: Tue, 1 Aug 2006 13:18:27 +0200 (CEST) Subject: [Lxml-checkins] r30836 - lxml/branch/capi/src/lxml Message-ID: <20060801111827.008AD1007D@code0.codespeak.net> Author: scoder Date: Tue Aug 1 13:18:26 2006 New Revision: 30836 Modified: lxml/branch/capi/src/lxml/classlookup.pyx lxml/branch/capi/src/lxml/etree.pyx lxml/branch/capi/src/lxml/nsclasses.pxi Log: moved ElementDefaultClassLookup and ElementNamespaceClassLookup from lxml.elements.classlookup into lxml.etree to let namespace lookup support fallback mechanisms, some additional restructuring Modified: lxml/branch/capi/src/lxml/classlookup.pyx ============================================================================== --- lxml/branch/capi/src/lxml/classlookup.pyx (original) +++ lxml/branch/capi/src/lxml/classlookup.pyx Tue Aug 1 13:18:26 2006 @@ -17,19 +17,8 @@ # initialize C-API of lxml.etree cetree.import_etree(etree) - -cdef class ElementNamespaceClassLookup(ElementClassLookup): - """Looks up Element class in the Namespace registry. - """ - # uses default lookup - - -cdef class ElementDefaultClassLookup(ElementClassLookup): - """Always returns the default Element class. - """ - def __init__(self): - self._lookup_function = cetree.lookupDefaultElementClass - +ElementNamespaceClassLookup = etree.ElementNamespaceClassLookup +ElementDefaultClassLookup = etree.ElementDefaultClassLookup cdef class AttributeBasedElementClassLookup(FallbackElementClassLookup): """Checks an attribute of an Element and looks up the value in a class @@ -40,7 +29,8 @@ * class mapping (Python dict mapping attribute values to Element classes) * fallback (optional fallback lookup mechanism) - A None key in the class mapping will be checked if the attribute is missing. + A None key in the class mapping will be checked if the attribute is + missing. """ cdef object _class_mapping cdef object _pytag Modified: lxml/branch/capi/src/lxml/etree.pyx ============================================================================== --- lxml/branch/capi/src/lxml/etree.pyx (original) +++ lxml/branch/capi/src/lxml/etree.pyx Tue Aug 1 13:18:26 2006 @@ -1770,10 +1770,36 @@ self.fallback = lookup self._fallback_function = lookup._lookup_function - cdef object _callFallback(self, doc, tree.xmlNode* c_node): + cdef object _callFallback(self, doc, xmlNode* c_node): return self._fallback_function(self.fallback, doc, c_node) -# default: Namespace classes +cdef class ElementDefaultClassLookup(ElementClassLookup): + """Element class lookup scheme that always returns the default Element + class. + """ + def __init__(self): + self._lookup_function = _lookupDefaultElementClass + +cdef object _lookupDefaultElementClass(_state, _Document _doc, xmlNode* c_node): + "Trivial class lookup function that always returns the default class." + if c_node.type == tree.XML_ELEMENT_NODE: + return __DEFAULT_ELEMENT_CLASS + elif c_node.type == tree.XML_COMMENT_NODE: + return __DEFAULT_COMMENT_CLASS + elif c_node.type == tree.XML_PI_NODE: + return __DEFAULT_PI_CLASS + else: + assert 0, "Unknown node type: %s" % c_node.type + +cdef class ElementNamespaceClassLookup(FallbackElementClassLookup): + """Element class lookup scheme that searches the Element class in the + Namespace registry. + """ + def __init__(self, ElementClassLookup fallback=None): + FallbackElementClassLookup.__init__(self, fallback) + self._lookup_function = _find_nselement_class + +# default lookup: Namespace classes cdef _element_class_lookup_function DEFAULT_ELEMENT_CLASS_LOOKUP DEFAULT_ELEMENT_CLASS_LOOKUP = _find_nselement_class @@ -1781,6 +1807,7 @@ LOOKUP_ELEMENT_CLASS = DEFAULT_ELEMENT_CLASS_LOOKUP cdef object ELEMENT_CLASS_LOOKUP_STATE +ELEMENT_CLASS_LOOKUP_STATE = None cdef void _setElementClassLookupFunction( _element_class_lookup_function function, object state): @@ -1799,6 +1826,41 @@ _setElementClassLookupFunction(lookup._lookup_function, lookup) + +################################################################################ +# Custom Element classes + +cdef public class ElementBase(_Element) [ type LxmlElementBaseType, + object LxmlElementBase ]: + """All custom Element classes must inherit from this one. + + Note that subclasses *must not* override __init__ or __new__ as it is + absolutely undefined when these objects will be created or destroyed. All + persistent state of elements must be stored in the underlying XML. If you + really need to initialize the object after creation, you can implement an + ``_init(self)`` method that will be called after object creation. + """ + +def setDefaultElementClass(cls=None): + global __DEFAULT_ELEMENT_CLASS + if cls is None: + __DEFAULT_ELEMENT_CLASS = _Element + elif not python.PyType_Check(cls) or not issubclass(cls, ElementBase): + raise LxmlRegistryError, \ + "Registered element classes must be subtypes of ElementBase" + else: + __DEFAULT_ELEMENT_CLASS = cls + +cdef object __DEFAULT_ELEMENT_CLASS +__DEFAULT_ELEMENT_CLASS = _Element + +cdef object __DEFAULT_COMMENT_CLASS +__DEFAULT_COMMENT_CLASS = _Comment + +cdef object __DEFAULT_PI_CLASS +__DEFAULT_PI_CLASS = _ProcessingInstruction + + ################################################################################ # Include submodules Modified: lxml/branch/capi/src/lxml/nsclasses.pxi ============================================================================== --- lxml/branch/capi/src/lxml/nsclasses.pxi (original) +++ lxml/branch/capi/src/lxml/nsclasses.pxi Tue Aug 1 13:18:26 2006 @@ -6,48 +6,6 @@ class NamespaceRegistryError(LxmlRegistryError): pass -cdef public class ElementBase(_Element) [ type LxmlElementBaseType, - object LxmlElementBase ]: - """All custom Element classes must inherit from this one. - - Note that subclasses *must not* override __init__ or __new__ as it is - absolutely undefined when these objects will be created or destroyed. All - persistent state of elements must be stored in the underlying XML. If you - really need to initialize the object after creation, you can implement an - ``_init(self)`` method that will be called after object creation. - """ - -def setDefaultElementClass(cls=None): - global __DEFAULT_ELEMENT_CLASS - if cls is None: - __DEFAULT_ELEMENT_CLASS = _Element - elif not python.PyType_Check(cls) or not issubclass(cls, ElementBase): - raise LxmlRegistryError, \ - "Registered element classes must be subtypes of ElementBase" - else: - __DEFAULT_ELEMENT_CLASS = cls - -cdef object _lookupDefaultElementClass(_state, _doc, xmlNode* c_node): - "Trivial class lookup function that always returns the default class." - if c_node.type == tree.XML_ELEMENT_NODE: - return __DEFAULT_ELEMENT_CLASS - elif c_node.type == tree.XML_COMMENT_NODE: - return __DEFAULT_COMMENT_CLASS - elif c_node.type == tree.XML_PI_NODE: - return __DEFAULT_PI_CLASS - else: - assert 0, "Unknown node type: %s" % c_node.type - - -cdef object __DEFAULT_ELEMENT_CLASS -__DEFAULT_ELEMENT_CLASS = _Element - -cdef object __DEFAULT_COMMENT_CLASS -__DEFAULT_COMMENT_CLASS = _Comment - -cdef object __DEFAULT_PI_CLASS -__DEFAULT_PI_CLASS = _ProcessingInstruction - cdef object __NAMESPACE_REGISTRIES __NAMESPACE_REGISTRIES = {} @@ -217,7 +175,9 @@ cdef _NamespaceRegistry registry cdef char* c_namespace_utf if c_node.type != tree.XML_ELEMENT_NODE: - return _lookupDefaultElementClass(state, doc, c_node) + if state is None: + return _lookupDefaultElementClass(None, doc, c_node) + return (state)._callFallback(doc, c_node) c_namespace_utf = _getNs(c_node) if c_namespace_utf is not NULL: dict_result = python.PyDict_GetItemString( @@ -225,22 +185,22 @@ else: dict_result = python.PyDict_GetItem( __NAMESPACE_REGISTRIES, None) - if dict_result is NULL: - return __DEFAULT_ELEMENT_CLASS + if dict_result is not NULL: + registry = <_NamespaceRegistry>dict_result + classes = registry._entries - registry = <_NamespaceRegistry>dict_result - classes = registry._entries + if c_node.name is not NULL: + dict_result = python.PyDict_GetItemString( + classes, c_node.name) + else: + dict_result = NULL - if c_node.name is not NULL: - dict_result = python.PyDict_GetItemString( - classes, c_node.name) - else: - dict_result = NULL + if dict_result is NULL: + dict_result = python.PyDict_GetItem(classes, None) - if dict_result is NULL: - dict_result = python.PyDict_GetItem(classes, None) + if dict_result is not NULL: + return dict_result - if dict_result is not NULL: - return dict_result - else: + if state is None: return __DEFAULT_ELEMENT_CLASS + return (state)._callFallback(doc, c_node) From scoder at codespeak.net Tue Aug 1 13:28:17 2006 From: scoder at codespeak.net (scoder at codespeak.net) Date: Tue, 1 Aug 2006 13:28:17 +0200 (CEST) Subject: [Lxml-checkins] r30837 - lxml/branch/capi/src/lxml Message-ID: <20060801112817.62B4910078@code0.codespeak.net> Author: scoder Date: Tue Aug 1 13:28:16 2006 New Revision: 30837 Modified: lxml/branch/capi/src/lxml/classlookup.pyx Log: doc updates Modified: lxml/branch/capi/src/lxml/classlookup.pyx ============================================================================== --- lxml/branch/capi/src/lxml/classlookup.pyx (original) +++ lxml/branch/capi/src/lxml/classlookup.pyx Tue Aug 1 13:28:16 2006 @@ -1,8 +1,25 @@ # Configurable Element class lookup -__doc__ = """Configurable Element class lookup. +__doc__ = """Configurable Element class lookup mechanisms. +This module contains a number of different lookup implementations for Element +classes. +* ElementDefaultClassLookup: always use the default classes. This class is + copied from the lxml.etree module. + +* ElementNamespaceClassLookup: find the class in the Namespace registry or use + a fallback lookup mechanism. This class is copied from the lxml.etree + module. + +* AttributeBasedElementClassLookup: lookup the class based on the value of a + specific attribute of the element. + +* ParserBasedElementClassLookup: global lookup scheme that delegates to the + parser specific class lookup mechanism. + +* CustomElementClassLookup: customizable lookup scheme that delegates to a + callback method. """ from python cimport isinstance, getattr, _cstr, Py_ssize_t @@ -17,8 +34,8 @@ # initialize C-API of lxml.etree cetree.import_etree(etree) -ElementNamespaceClassLookup = etree.ElementNamespaceClassLookup ElementDefaultClassLookup = etree.ElementDefaultClassLookup +ElementNamespaceClassLookup = etree.ElementNamespaceClassLookup cdef class AttributeBasedElementClassLookup(FallbackElementClassLookup): """Checks an attribute of an Element and looks up the value in a class From scoder at codespeak.net Tue Aug 1 14:02:35 2006 From: scoder at codespeak.net (scoder at codespeak.net) Date: Tue, 1 Aug 2006 14:02:35 +0200 (CEST) Subject: [Lxml-checkins] r30842 - in lxml/trunk: . doc src/lxml src/lxml/tests Message-ID: <20060801120235.2841E1007A@code0.codespeak.net> Author: scoder Date: Tue Aug 1 14:02:33 2006 New Revision: 30842 Modified: lxml/trunk/CHANGES.txt lxml/trunk/doc/api.txt lxml/trunk/src/lxml/extensions.pxi lxml/trunk/src/lxml/tests/test_xpathevaluator.py Log: let xpath('/') return the root element instead of raising an exception Modified: lxml/trunk/CHANGES.txt ============================================================================== --- lxml/trunk/CHANGES.txt (original) +++ lxml/trunk/CHANGES.txt Tue Aug 1 14:02:33 2006 @@ -13,6 +13,9 @@ Bugs fixed ---------- +* The XPath expression "/" now returns the root element instead of raising an + exception + * Copying/deepcopying did not work for ElementTree objects * The EXSLT ``regexp:match`` function now works as defined (except for some Modified: lxml/trunk/doc/api.txt ============================================================================== --- lxml/trunk/doc/api.txt (original) +++ lxml/trunk/doc/api.txt Tue Aug 1 14:02:33 2006 @@ -546,6 +546,10 @@ are also returned as strings, enclosed by the usual ```` markers. +There is one special case for the XPath expression '/'. Here, lxml.etree +returns the root element of the document instead of the document root (which +does not have a representation in ElementTree). + A related convenience method of ElementTree objects is ``getpath(element)``, which returns a structural, absolute XPath expression to find that element:: Modified: lxml/trunk/src/lxml/extensions.pxi ============================================================================== --- lxml/trunk/src/lxml/extensions.pxi (original) +++ lxml/trunk/src/lxml/extensions.pxi Tue Aug 1 14:02:33 2006 @@ -305,7 +305,11 @@ return result for i from 0 <= i < xpathObj.nodesetval.nodeNr: c_node = xpathObj.nodesetval.nodeTab[i] - if _isElement(c_node): + if c_node.type == tree.XML_DOCUMENT_NODE: + c_node = _findChildForwards(c_node, 0) + if c_node is NULL: + value = None + elif _isElement(c_node): if c_node.doc != doc._c_doc: # XXX: works, but maybe not always the right thing to do? # XPath: only runs when extensions create or copy trees @@ -320,8 +324,8 @@ value = funicode(s) tree.xmlFree(s) else: - print "Not yet implemented result node type:", c_node.type - raise NotImplementedError + raise NotImplementedError, \ + "Not yet implemented result node type: %d" % c_node.type python.PyList_Append(result, value) return result Modified: lxml/trunk/src/lxml/tests/test_xpathevaluator.py ============================================================================== --- lxml/trunk/src/lxml/tests/test_xpathevaluator.py (original) +++ lxml/trunk/src/lxml/tests/test_xpathevaluator.py Tue Aug 1 14:02:33 2006 @@ -32,6 +32,11 @@ self.assertEquals('Foo', tree.xpath('string(/a/text())')) + def test_xpath_document_root(self): + tree = self.parse('') + self.assertEquals([tree.getroot()], + tree.xpath('/')) + def test_xpath_list_elements(self): tree = self.parse('FooBar') root = tree.getroot() From scoder at codespeak.net Tue Aug 1 15:47:30 2006 From: scoder at codespeak.net (scoder at codespeak.net) Date: Tue, 1 Aug 2006 15:47:30 +0200 (CEST) Subject: [Lxml-checkins] r30846 - in lxml/trunk: doc src/lxml src/lxml/tests Message-ID: <20060801134730.246471007F@code0.codespeak.net> Author: scoder Date: Tue Aug 1 15:47:28 2006 New Revision: 30846 Modified: lxml/trunk/doc/api.txt lxml/trunk/src/lxml/extensions.pxi lxml/trunk/src/lxml/tests/test_xpathevaluator.py Log: ignore document nodes in XPath results, return namespace declarations as (prefix, URI) tuple Modified: lxml/trunk/doc/api.txt ============================================================================== --- lxml/trunk/doc/api.txt (original) +++ lxml/trunk/doc/api.txt Tue Aug 1 15:47:28 2006 @@ -541,14 +541,11 @@ * a (unicode) string, when the XPath expression has a string result. * a list of items, when the XPath expression has a list as result. The items - may include elements and strings. Text nodes and attributes in the result - are returned as strings (the text node content or attribute value). Comments - are also returned as strings, enclosed by the usual ```` - markers. - -There is one special case for the XPath expression '/'. Here, lxml.etree -returns the root element of the document instead of the document root (which -does not have a representation in ElementTree). + may include elements, strings and tuples. Text nodes and attributes in the + result are returned as strings (the text node content or attribute value). + Comments are also returned as strings, enclosed by the usual ```` markers. Namespace declarations are returned as tuples of strings: + ``(prefix, URI)``. A related convenience method of ElementTree objects is ``getpath(element)``, which returns a structural, absolute XPath expression to find that element:: Modified: lxml/trunk/src/lxml/extensions.pxi ============================================================================== --- lxml/trunk/src/lxml/extensions.pxi (original) +++ lxml/trunk/src/lxml/extensions.pxi Tue Aug 1 15:47:28 2006 @@ -305,11 +305,7 @@ return result for i from 0 <= i < xpathObj.nodesetval.nodeNr: c_node = xpathObj.nodesetval.nodeTab[i] - if c_node.type == tree.XML_DOCUMENT_NODE: - c_node = _findChildForwards(c_node, 0) - if c_node is NULL: - value = None - elif _isElement(c_node): + if _isElement(c_node): if c_node.doc != doc._c_doc: # XXX: works, but maybe not always the right thing to do? # XPath: only runs when extensions create or copy trees @@ -323,6 +319,23 @@ s = tree.xmlNodeGetContent(c_node) value = funicode(s) tree.xmlFree(s) + elif c_node.type == tree.XML_NAMESPACE_DECL: + s = (c_node).href + if s is NULL: + href = None + else: + href = s + s = (c_node).prefix + if s is NULL: + prefix = None + else: + prefix = s + value = (prefix, href) + elif c_node.type == tree.XML_DOCUMENT_NODE or \ + c_node.type == tree.XML_HTML_DOCUMENT_NODE or \ + c_node.type == tree.XML_XINCLUDE_START or \ + c_node.type == tree.XML_XINCLUDE_END: + continue else: raise NotImplementedError, \ "Not yet implemented result node type: %d" % c_node.type Modified: lxml/trunk/src/lxml/tests/test_xpathevaluator.py ============================================================================== --- lxml/trunk/src/lxml/tests/test_xpathevaluator.py (original) +++ lxml/trunk/src/lxml/tests/test_xpathevaluator.py Tue Aug 1 15:47:28 2006 @@ -33,10 +33,20 @@ tree.xpath('string(/a/text())')) def test_xpath_document_root(self): - tree = self.parse('') - self.assertEquals([tree.getroot()], + tree = self.parse('') + self.assertEquals([], tree.xpath('/')) + def test_xpath_namespace(self): + tree = self.parse('') + self.assert_((None, "test") in tree.xpath('namespace::*')) + self.assert_(('p', 'myURI') in tree.xpath('namespace::*')) + + def test_xpath_namespace_empty(self): + tree = self.parse('') + self.assertEquals([('xml', 'http://www.w3.org/XML/1998/namespace')], + tree.xpath('namespace::*')) + def test_xpath_list_elements(self): tree = self.parse('FooBar') root = tree.getroot() From scoder at codespeak.net Tue Aug 1 16:18:32 2006 From: scoder at codespeak.net (scoder at codespeak.net) Date: Tue, 1 Aug 2006 16:18:32 +0200 (CEST) Subject: [Lxml-checkins] r30848 - lxml/trunk Message-ID: <20060801141832.9DC0710084@code0.codespeak.net> Author: scoder Date: Tue Aug 1 16:18:31 2006 New Revision: 30848 Modified: lxml/trunk/CHANGES.txt Log: note on XPath return value fixes Modified: lxml/trunk/CHANGES.txt ============================================================================== --- lxml/trunk/CHANGES.txt (original) +++ lxml/trunk/CHANGES.txt Tue Aug 1 16:18:31 2006 @@ -13,7 +13,10 @@ Bugs fixed ---------- -* The XPath expression "/" now returns the root element instead of raising an +* The ``namespace`` axis is supported in XPath and returns (prefix, URI) + tuples + +* The XPath expression "/" now returns an empty list instead of raising an exception * Copying/deepcopying did not work for ElementTree objects From faassen at codespeak.net Tue Aug 1 16:46:49 2006 From: faassen at codespeak.net (faassen at codespeak.net) Date: Tue, 1 Aug 2006 16:46:49 +0200 (CEST) Subject: [Lxml-checkins] r30851 - lxml/trunk/doc Message-ID: <20060801144649.F19A210080@code0.codespeak.net> Author: faassen Date: Tue Aug 1 16:46:48 2006 New Revision: 30851 Modified: lxml/trunk/doc/build.txt Log: Clarify build instructions; Pyrex is not necessary if you're trying to build a release version of lxml. Modified: lxml/trunk/doc/build.txt ============================================================================== --- lxml/trunk/doc/build.txt (original) +++ lxml/trunk/doc/build.txt Tue Aug 1 16:46:48 2006 @@ -1,14 +1,21 @@ How to build lxml from source ============================= -To build lxml from source, you need libxml2 and libxslt properly installed. +To build lxml from source, you need libxml2 and libxslt properly +installed, include header files (possibly shipped in -dev packages). Pyrex ----- -The lxml.etree module is written in Pyrex_. To build lxml from source, you -therefore need a working Pyrex installation. Pyrex now supports EasyInstall, -so you can install it by running the following command as super-user:: +The lxml.etree module is written in Pyrex_. Since we ship the +Pyrex-generated .c file with lxml releases however, you should not +need Pyrex to build lxml. + +If you're interested in building from a svn checkout of lxml or want +to be a lxml developer, you do need a working Pyrex installation. + +Pyrex now supports EasyInstall, so you can install it +by running the following command as super-user:: easy_install Pyrex From faassen at codespeak.net Tue Aug 1 18:54:46 2006 From: faassen at codespeak.net (faassen at codespeak.net) Date: Tue, 1 Aug 2006 18:54:46 +0200 (CEST) Subject: [Lxml-checkins] r30861 - lxml/branch/lxml-xpathroot Message-ID: <20060801165446.92A1610080@code0.codespeak.net> Author: faassen Date: Tue Aug 1 18:54:45 2006 New Revision: 30861 Added: lxml/branch/lxml-xpathroot/ - copied from r30860, lxml/trunk/ Log: Create branch for xpath root experiment. From faassen at codespeak.net Tue Aug 1 18:57:07 2006 From: faassen at codespeak.net (faassen at codespeak.net) Date: Tue, 1 Aug 2006 18:57:07 +0200 (CEST) Subject: [Lxml-checkins] r30862 - in lxml/branch/lxml-xpathroot/src/lxml: . tests Message-ID: <20060801165707.7F0D910080@code0.codespeak.net> Author: faassen Date: Tue Aug 1 18:57:06 2006 New Revision: 30862 Modified: lxml/branch/lxml-xpathroot/src/lxml/etree.pyx lxml/branch/lxml-xpathroot/src/lxml/extensions.pxi lxml/branch/lxml-xpathroot/src/lxml/tests/test_xpathevaluator.py Log: Check in experiments with a special XPath Root object. The functionality appears to work, but it affects memory management, possibly because we don't check whether the document root node has a proxy in _private yet, so causes crashes. Modified: lxml/branch/lxml-xpathroot/src/lxml/etree.pyx ============================================================================== --- lxml/branch/lxml-xpathroot/src/lxml/etree.pyx (original) +++ lxml/branch/lxml-xpathroot/src/lxml/etree.pyx Tue Aug 1 18:57:06 2006 @@ -1192,6 +1192,8 @@ element_class = _find_element_class(c_ns_href, c_node.name) elif c_node.type == tree.XML_COMMENT_NODE: element_class = _Comment + elif c_node.type == tree.XML_DOCUMENT_NODE: + element_class = _Root else: assert 0, "Unknown node type: %s" % c_node.type result = element_class() @@ -1201,6 +1203,26 @@ result._init() return result +# a special node only used in XPath representing the root of the tree +cdef class _Root(_Element): + def set(self, key, value): + pass + + property tag: + def __get__(self): + return None + + property attrib: + def __get__(self): + return {} + + property text: + def __get__(self): + return '' + + def __repr__(self): + return "" + cdef class _Comment(_Element): def set(self, key, value): pass @@ -1251,7 +1273,7 @@ def items(self): return [] - + cdef _Comment _commentFactory(_Document doc, xmlNode* c_node): cdef _Comment result result = getProxy(c_node) Modified: lxml/branch/lxml-xpathroot/src/lxml/extensions.pxi ============================================================================== --- lxml/branch/lxml-xpathroot/src/lxml/extensions.pxi (original) +++ lxml/branch/lxml-xpathroot/src/lxml/extensions.pxi Tue Aug 1 18:57:06 2006 @@ -331,8 +331,9 @@ else: prefix = s value = (prefix, href) - elif c_node.type == tree.XML_DOCUMENT_NODE or \ - c_node.type == tree.XML_HTML_DOCUMENT_NODE or \ + elif c_node.type == tree.XML_DOCUMENT_NODE: + value = _elementFactory(doc, c_node) + elif c_node.type == tree.XML_HTML_DOCUMENT_NODE or \ c_node.type == tree.XML_XINCLUDE_START or \ c_node.type == tree.XML_XINCLUDE_END: continue Modified: lxml/branch/lxml-xpathroot/src/lxml/tests/test_xpathevaluator.py ============================================================================== --- lxml/branch/lxml-xpathroot/src/lxml/tests/test_xpathevaluator.py (original) +++ lxml/branch/lxml-xpathroot/src/lxml/tests/test_xpathevaluator.py Tue Aug 1 18:57:06 2006 @@ -31,12 +31,19 @@ tree = self.parse('Foo') self.assertEquals('Foo', tree.xpath('string(/a/text())')) - + +## def test_xpath_document_root(self): +## tree = self.parse('') +## self.assertEquals([], +## tree.xpath('/')) + def test_xpath_document_root(self): - tree = self.parse('') - self.assertEquals([], - tree.xpath('/')) - + tree = self.parse('') + result = tree.xpath('/') + # XXX test not done yet, and causes crashing elsewhere.. + print result + print result.getchildren() + def test_xpath_namespace(self): tree = self.parse('') self.assert_((None, "test") in tree.xpath('namespace::*')) @@ -377,7 +384,7 @@ def test_xpath_elementtree_error(self): self.assertRaises(ValueError, etree.XPath('*'), etree.ElementTree()) - + class ETreeETXPathClassTestCase(HelperTestCase): "Tests for the ETXPath class" def test_xpath_compile_ns(self): From scoder at codespeak.net Tue Aug 1 19:39:22 2006 From: scoder at codespeak.net (scoder at codespeak.net) Date: Tue, 1 Aug 2006 19:39:22 +0200 (CEST) Subject: [Lxml-checkins] r30864 - lxml/branch/capi/src/lxml/tests Message-ID: <20060801173922.C4C2B1007F@code0.codespeak.net> Author: scoder Date: Tue Aug 1 19:39:21 2006 New Revision: 30864 Modified: lxml/branch/capi/src/lxml/tests/test_classlookup.py Log: test case for fallback in NS classes Modified: lxml/branch/capi/src/lxml/tests/test_classlookup.py ============================================================================== --- lxml/branch/capi/src/lxml/tests/test_classlookup.py (original) +++ lxml/branch/capi/src/lxml/tests/test_classlookup.py Tue Aug 1 19:39:21 2006 @@ -30,9 +30,10 @@ etree = etree def tearDown(self): + etree.setDefaultElementClass() etree.setElementClassLookup() - ns = etree.Namespace("myNS") - ns.clear() + etree.Namespace("myNS").clear() + etree.Namespace("otherNS").clear() def test_namespace_lookup(self): class TestElement(etree.ElementBase): @@ -98,6 +99,32 @@ TestElement.FIND_ME) self.assertFalse(hasattr(root[0][1], 'FIND_ME')) + def test_custom_lookup_ns_fallback(self): + class TestElement1(etree.ElementBase): + FIND_ME = "custom" + + class TestElement2(etree.ElementBase): + FIND_ME = "nsclasses" + + class MyLookup(classlookup.CustomElementClassLookup): + def lookup(self, t, d, ns, name): + if name == 'c1': + return TestElement1 + + ns = etree.Namespace("otherNS") + ns[None] = TestElement2 + + lookup = classlookup.ElementNamespaceClassLookup( MyLookup() ) + etree.setElementClassLookup(lookup) + + root = etree.XML(xml_str) + self.assertFalse(hasattr(root, 'FIND_ME')) + self.assertEquals(root[0].FIND_ME, + TestElement1.FIND_ME) + self.assertFalse(hasattr(root[0][1], 'FIND_ME')) + self.assertEquals(root[0][-1].FIND_ME, + TestElement2.FIND_ME) + def test_parser_based_lookup(self): class TestElement(etree.ElementBase): FIND_ME = "parser_based" From scoder at codespeak.net Tue Aug 1 19:40:19 2006 From: scoder at codespeak.net (scoder at codespeak.net) Date: Tue, 1 Aug 2006 19:40:19 +0200 (CEST) Subject: [Lxml-checkins] r30865 - lxml/branch/capi/src/lxml Message-ID: <20060801174019.6E0E61007F@code0.codespeak.net> Author: scoder Date: Tue Aug 1 19:40:18 2006 New Revision: 30865 Modified: lxml/branch/capi/src/lxml/python.pxd Log: some cleanup in python API function declarations Modified: lxml/branch/capi/src/lxml/python.pxd ============================================================================== --- lxml/branch/capi/src/lxml/python.pxd (original) +++ lxml/branch/capi/src/lxml/python.pxd Tue Aug 1 19:40:18 2006 @@ -32,11 +32,16 @@ cdef object PyBool_FromLong(long value) cdef object PyNumber_Int(object value) + cdef Py_ssize_t PyTuple_GET_SIZE(object t) + cdef object PyTuple_GET_ITEM(object o, Py_ssize_t pos) + cdef Py_ssize_t PyList_GET_SIZE(object l) + cdef object PyList_GET_ITEM(object l, Py_ssize_t index) cdef int PyList_Append(object l, object obj) except -1 cdef int PyList_Reverse(object l) except -1 cdef int PyList_Insert(object l, Py_ssize_t index, object o) except -1 - cdef object PyList_GET_ITEM(object l, Py_ssize_t index) + cdef object PyList_AsTuple(object o) + cdef int PyDict_SetItemString(object d, char* key, object value) except -1 cdef int PyDict_SetItem(object d, object key, object value) except -1 cdef PyObject* PyDict_GetItemString(object d, char* key) @@ -46,10 +51,8 @@ cdef object PyDict_Copy(object d) cdef Py_ssize_t PyDict_Size(object d) cdef int PyDict_Contains(object d, object key) - cdef object PyList_AsTuple(object o) cdef object PySequence_List(object o) cdef object PySequence_Tuple(object o) - cdef object PyTuple_GET_ITEM(object o, Py_ssize_t pos) cdef int PyDict_Check(object instance) cdef int PyList_Check(object instance) From scoder at codespeak.net Tue Aug 1 20:08:08 2006 From: scoder at codespeak.net (scoder at codespeak.net) Date: Tue, 1 Aug 2006 20:08:08 +0200 (CEST) Subject: [Lxml-checkins] r30866 - in lxml/branch/capi: doc src/lxml src/lxml/tests Message-ID: <20060801180808.1972310080@code0.codespeak.net> Author: scoder Date: Tue Aug 1 20:08:06 2006 New Revision: 30866 Modified: lxml/branch/capi/doc/objectify.txt lxml/branch/capi/src/lxml/objectify.pyx lxml/branch/capi/src/lxml/tests/test_objectify.py Log: ObjectPath, revised NS class lookup integration through standard fallback mechanism Modified: lxml/branch/capi/doc/objectify.txt ============================================================================== --- lxml/branch/capi/doc/objectify.txt (original) +++ lxml/branch/capi/doc/objectify.txt Tue Aug 1 20:08:06 2006 @@ -22,8 +22,10 @@ Note that `namespace specific classes`_ can override this default. If ``objectify`` is in use, it is therefore advisable to let other custom element -classes inherit from the ``ObjectifiedElement`` class to make sure that all -element classes provide the same API. +classes inherit from the ``ObjectifiedElement`` class (or a subclass) to make +sure that all element classes provide the same API. You can prevent the +lookup of namespace registered classes by passing False for the +``prefer_nsclasses`` keyword argument of the ``register()`` function. .. _`namespace specific classes`: namespace_extensions.html @@ -39,9 +41,16 @@ .. _`lxml.elements.classlookup`: elements.html -Since this API is meant for data-centered XML (as opposed to document XML with -mixed content), it might be worthwhile in this context to change the default -parser:: +To simulate the default behaviour of looking up namespace registered classes +first and then falling back to the ObjectifiedElement class, you can build a +lookup fallback chain like the following:: + + >>> lookup = etree.ElementNamespaceClassLookup( + ... objectify.ObjectifyElementClassLookup() ) + +Since the objectify API is meant for data-centered XML (as opposed to document +XML with mixed content), it might be worthwhile in this context to change the +default parser:: >>> etree.setDefaultParser( etree.XMLParser(remove_blank_text=True) ) @@ -381,7 +390,7 @@ None Note that you can change the name and namespace used for this attribute -through the ``setPytypeAttribute(tag)`` module function, in case your +through the ``setPytypeAttributeTag(tag)`` module function, in case your application ever needs to. There is also a utility function ``annotate()`` that recursively generates this attribute for the elements of a tree:: @@ -421,12 +430,6 @@ Defining additional data classes -------------------------------- -The objectify module support the standard `namespace classes API`_ of -lxml.etree. Note, however, that it is advisable to let custom element classes -inherit from ``ObjectifiedElement`` to inherit its API. - -.. _`namespace classes API`: namespace_extensions.html - Data classes can either inherit from ``ObjectifiedElement`` directly or from one of the specialised classes like ``NumberElement`` or ``BoolElement``. The numeric types require an initial call to ``self._setValueParser(function)`` to Modified: lxml/branch/capi/src/lxml/objectify.pyx ============================================================================== --- lxml/branch/capi/src/lxml/objectify.pyx (original) +++ lxml/branch/capi/src/lxml/objectify.pyx Tue Aug 1 20:08:06 2006 @@ -877,28 +877,12 @@ cdef class ObjectifyElementClassLookup(ElementClassLookup): """Element class lookup method that uses the objectify classes. - - The constructor accepts a keyword argument 'default_to_nsclasses'. You can - set it to False to divert from the default behaviour of looking up - namespace registered classes before trying to determine the right - objectify type class. """ - cdef int _default_to_nsclasses - def __init__(self, default_to_nsclasses=True): + def __init__(self): self._lookup_function = _lookupElementClass - self._default_to_nsclasses = bool(default_to_nsclasses) cdef object _lookupElementClass(state, _Document doc, tree.xmlNode* c_node): cdef python.PyObject* dict_result - if state is None or \ - not isinstance(state, ObjectifyElementClassLookup) or \ - (state)._default_to_nsclasses: - # default to namespace specific classes - nsclass = cetree.lookupNamespaceElementClass(state, doc, c_node) - default = cetree.lookupDefaultElementClass(state, doc, c_node) - if nsclass is not default: - return nsclass - # if element has children => no data class if cetree.findChildForwards(c_node, 0): return ObjectifiedElement @@ -935,6 +919,111 @@ # default to string element class if type attribute is not exploitable return _StringElement + +################################################################################ +# ObjectPath + +cdef class ObjectPath: + cdef object _path + cdef object _path_str + cdef object _path_segments + cdef char* _path_cstr + def __init__(self, path): + if python._isString(path): + path = path.split('.') + self._path = tuple(path) + self._path_str = '.'.join(self._path) + self._path_segments = self._path_str + '\0\0' + _splitPathInplace(self._path_segments) + self._path_cstr = _cstr(self._path_segments) + + def find(self, *args): + """Follow the attribute path in the object structure and return the + target attribute value. + + If it it not found, either returns a default value (if one was passed + as second argument) or raise AttributeError. + """ + cdef _Element root + cdef tree.xmlNode* c_node + cdef tree.xmlNode* c_child + cdef char* c_path + cdef char* c_href + cdef Py_ssize_t arg_count + cdef int use_default + arg_count = python.PyTuple_GET_SIZE(args) + if arg_count == 2: + default = python.PyTuple_GET_ITEM(args, 1) + python.Py_INCREF(default) + use_default = 1 + elif arg_count == 1: + use_default = 0 + else: + raise TypeError, "invalid number of arguments: needs one or two" + + root = python.PyTuple_GET_ITEM(args, 0) + python.Py_INCREF(root) + return _findObjectPath(root, self._path_cstr, default, use_default) + + def __call__(self, *args): + return self.find(*args) + + +def getFieldFromPath(_Element root not None, path): + path = path + '\0\0' + _splitPathInplace(path) + return _findObjectPath(root, _cstr(path), None, 0) + +def getOptionalFieldFromPath(_Element root not None, path, default): + path = path + '\0\0' + _splitPathInplace(path) + return _findObjectPath(root, _cstr(path), default, 1) + +def hasFieldPath(_Element root not None, path): + path = path + '\0\0' + _splitPathInplace(path) + return _findObjectPath(root, _cstr(path), None, 1) is not None + +cdef void _splitPathInplace(path): + cdef char* s + s = _cstr(path) + while s[0] != c'\0': + if s[0] == c'.': + s[0] = c'\0' + s = s + 1 + +cdef _findObjectPath(_Element root, char* c_path_segments, + default_value, int use_default): + cdef tree.xmlNode* c_node + cdef tree.xmlNode* c_child + cdef char* c_href + cdef char* c_path + c_node = root._c_node + c_href = tree._getNs(c_node) + c_path = c_path_segments + if cetree.tagMatches(c_node, c_href, c_path): + c_path = c_path + cstd.strlen(c_path) + 1 + while c_path[0] != c'\0': + c_child = cetree.findChildForwards(c_node, 0) + while c_child is not NULL and \ + not cetree.tagMatches(c_child, c_href, c_path): + c_child = cetree.nextElement(c_child) + c_node = c_child + if c_node is NULL: + break + c_path = c_path + cstd.strlen(c_path) + 1 + if c_node is not NULL and c_path[0] == c'\0': + return cetree.elementFactory(root._doc, c_node) + + if use_default: + return default_value + else: + raise AttributeError, "no such attribute: %s" % c_path + + +################################################################################ +# Type annotations + def annotate(element_or_tree, ignore_old=True): """Recursively annotates the elements of an XML tree with 'pytype' attributes. @@ -1017,13 +1106,23 @@ _cstr(pytype.name)) tree.END_FOR_EACH_ELEMENT_FROM(c_node) + ################################################################################ # Module setup -def register(): +def register(prefer_nsclasses=True): + """Globally register the objectify element class lookup mechanism. + + By default, namespace specific element classes override this lookup. + Passing False for the ``prefer_nsclasses`` keyword argument will prevent + the namespace lookup. + """ #etree.setDefaultElementClass(ObjectifiedElement) - cetree.setElementClassLookupFunction(_lookupElementClass, None) + lookup = ObjectifyElementClassLookup() + if prefer_nsclasses: + lookup = etree.ElementNamespaceClassLookup(lookup) + etree.setElementClassLookup(lookup) def unregister(): #etree.setDefaultElementClass() - cetree.setElementClassLookupFunction(NULL, None) + etree.setElementClassLookup() Modified: lxml/branch/capi/src/lxml/tests/test_objectify.py ============================================================================== --- lxml/branch/capi/src/lxml/tests/test_objectify.py (original) +++ lxml/branch/capi/src/lxml/tests/test_objectify.py Tue Aug 1 20:08:06 2006 @@ -31,11 +31,11 @@ def setUp(self): objectify.register() - ns = self.etree.Namespace("otherNs") + ns = self.etree.Namespace("otherNS") ns[None] = self.etree.ElementBase def tearDown(self): - self.etree.Namespace("otherNs").clear() + self.etree.Namespace("otherNS").clear() objectify.setPytypeAttributeTag() objectify.unregister() @@ -78,6 +78,12 @@ self.assertEquals([root.c1.c2[0], root.c1.c2[1]], list(iter((root.c1.c2)))) + def test_class_lookup(self): + root = self.etree.XML(xml_str) + self.assert_(isinstance(root.c1.c2, objectify.ObjectifiedElement)) + self.assertFalse(isinstance(getattr(root.c1, "{otherNS}c2"), + objectify.ObjectifiedElement)) + def test_dir(self): root = self.etree.XML(xml_str) dir_c1 = dir(objectify.ObjectifiedElement) + ['c1'] @@ -377,6 +383,18 @@ for pytype in orig_types: pytype.register() + def test_object_path(self): + root = self.etree.XML(xml_str) + path = objectify.ObjectPath( "root.c1.c2" ) + self.assertEquals(root.c1.c2, path.find(root)) + self.assertEquals(root.c1.c2, path(root)) + + def test_object_path_fail(self): + root = self.etree.XML(xml_str) + path = objectify.ObjectPath( "root.c1.c99" ) + self.assertRaises(AttributeError, path, root) + self.assertEquals(None, path(root, None)) + def test_suite(): suite = unittest.TestSuite() From scoder at codespeak.net Tue Aug 1 20:24:30 2006 From: scoder at codespeak.net (scoder at codespeak.net) Date: Tue, 1 Aug 2006 20:24:30 +0200 (CEST) Subject: [Lxml-checkins] r30867 - in lxml/branch/capi: doc src/lxml Message-ID: <20060801182430.53DC810080@code0.codespeak.net> Author: scoder Date: Tue Aug 1 20:24:28 2006 New Revision: 30867 Modified: lxml/branch/capi/doc/objectify.txt lxml/branch/capi/src/lxml/objectify.pyx Log: abbreviate XSI/pytype namespaces to xsi: and py: in string output Modified: lxml/branch/capi/doc/objectify.txt ============================================================================== --- lxml/branch/capi/doc/objectify.txt (original) +++ lxml/branch/capi/doc/objectify.txt Tue Aug 1 20:24:28 2006 @@ -281,6 +281,7 @@ b = True [BoolElement] c = 'what?' [StringElement] d = None [NoneElement] + * xsi:nil = 'true' You can freely switch between different types for the same child:: @@ -405,9 +406,9 @@ >>> print objectify.dump(root) root = None [ObjectifiedElement] a = 'test' [StringElement] - * {http://codespeak.net/lxml/objectify/pytype}pytype = 'str' + * py:pytype = 'str' b = 5 [IntElement] - * {http://codespeak.net/lxml/objectify/pytype}pytype = 'int' + * py:pytype = 'int' A second way of specifying data type information uses XML Schema types as element annotations. Objectify knows those that can be mapped to normal @@ -423,8 +424,11 @@ >>> print objectify.dump(root) root = None [ObjectifiedElement] d = 5.0 [FloatElement] + * xsi:type = 'double' l = 5L [LongElement] + * xsi:type = 'long' s = '5' [StringElement] + * xsi:type = 'string' Defining additional data classes @@ -534,6 +538,7 @@ b = True [BoolElement] c = 'what?' [StringElement] d = None [NoneElement] + * xsi:nil = 'true' This behaviour can be switched off in the same way:: Modified: lxml/branch/capi/src/lxml/objectify.pyx ============================================================================== --- lxml/branch/capi/src/lxml/objectify.pyx (original) +++ lxml/branch/capi/src/lxml/objectify.pyx Tue Aug 1 20:24:28 2006 @@ -857,10 +857,10 @@ value = None result = "%s%s = %r [%s]\n" % (indentstr, element.tag, value, type(element).__name__) - xsi_ns = "{%s}" % XML_SCHEMA_INSTANCE_NS + xsi_ns = "{%s}" % XML_SCHEMA_INSTANCE_NS + pytype_ns = "{%s}" % _PYTYPE_NAMESPACE for name, value in element.items(): - if name.startswith(xsi_ns): - continue + name = name.replace(xsi_ns, 'xsi:').replace(pytype_ns, 'py:') result = result + "%s * %s = %r\n" % (indentstr, name, value) indent = indent + 1 From scoder at codespeak.net Tue Aug 1 22:27:18 2006 From: scoder at codespeak.net (scoder at codespeak.net) Date: Tue, 1 Aug 2006 22:27:18 +0200 (CEST) Subject: [Lxml-checkins] r30868 - lxml/branch/capi/doc Message-ID: <20060801202718.03DF310082@code0.codespeak.net> Author: scoder Date: Tue Aug 1 22:27:17 2006 New Revision: 30868 Modified: lxml/branch/capi/doc/elements.txt Log: doc update: describe fallback mechanism of Namespace class lookup Modified: lxml/branch/capi/doc/elements.txt ============================================================================== --- lxml/branch/capi/doc/elements.txt (original) +++ lxml/branch/capi/doc/elements.txt Tue Aug 1 22:27:17 2006 @@ -42,6 +42,11 @@ >>> lookup = ElementNamespaceClassLookup() >>> etree.setElementClassLookup(lookup) +Note that this class supports a fallback mechanism that is used in the case +where the namespace is not found or no class was registered for the element +name. Normally, the default class lookup is used here. To change it, pass +the desired fallback lookup scheme to the constructor. + Default class lookup .................... From scoder at codespeak.net Tue Aug 1 23:04:24 2006 From: scoder at codespeak.net (scoder at codespeak.net) Date: Tue, 1 Aug 2006 23:04:24 +0200 (CEST) Subject: [Lxml-checkins] r30869 - lxml/branch/capi/src/lxml Message-ID: <20060801210424.3783B10082@code0.codespeak.net> Author: scoder Date: Tue Aug 1 23:04:23 2006 New Revision: 30869 Modified: lxml/branch/capi/src/lxml/objectify.pyx Log: cleanup and fixes in ObjectPath and related utility functions Modified: lxml/branch/capi/src/lxml/objectify.pyx ============================================================================== --- lxml/branch/capi/src/lxml/objectify.pyx (original) +++ lxml/branch/capi/src/lxml/objectify.pyx Tue Aug 1 23:04:23 2006 @@ -925,62 +925,53 @@ cdef class ObjectPath: cdef object _path - cdef object _path_str - cdef object _path_segments cdef char* _path_cstr def __init__(self, path): - if python._isString(path): - path = path.split('.') - self._path = tuple(path) - self._path_str = '.'.join(self._path) - self._path_segments = self._path_str + '\0\0' - _splitPathInplace(self._path_segments) - self._path_cstr = _cstr(self._path_segments) + if not python._isString(path): + path = '.'.join(path) + self._path = cetree.utf8(path) + '\0\0' + _splitPathInplace(self._path) + self._path_cstr = _cstr(self._path) - def find(self, *args): + def find(self, _Element root not None, *default): """Follow the attribute path in the object structure and return the target attribute value. If it it not found, either returns a default value (if one was passed - as second argument) or raise AttributeError. + as second argument) or raises AttributeError. """ - cdef _Element root - cdef tree.xmlNode* c_node - cdef tree.xmlNode* c_child - cdef char* c_path - cdef char* c_href - cdef Py_ssize_t arg_count - cdef int use_default - arg_count = python.PyTuple_GET_SIZE(args) - if arg_count == 2: - default = python.PyTuple_GET_ITEM(args, 1) + cdef Py_ssize_t use_default + use_default = python.PyTuple_GET_SIZE(default) + if use_default == 1: + default = python.PyTuple_GET_ITEM(default, 0) python.Py_INCREF(default) use_default = 1 - elif arg_count == 1: - use_default = 0 - else: + elif use_default > 1: raise TypeError, "invalid number of arguments: needs one or two" - - root = python.PyTuple_GET_ITEM(args, 0) - python.Py_INCREF(root) return _findObjectPath(root, self._path_cstr, default, use_default) def __call__(self, *args): return self.find(*args) -def getFieldFromPath(_Element root not None, path): - path = path + '\0\0' - _splitPathInplace(path) - return _findObjectPath(root, _cstr(path), None, 0) - -def getOptionalFieldFromPath(_Element root not None, path, default): - path = path + '\0\0' +def getFieldFromPath(_Element root not None, path, *default): + cdef Py_ssize_t use_default + if not python._isString(path): + path = '.'.join(path) + path = cetree.utf8(path) + '\0\0' _splitPathInplace(path) - return _findObjectPath(root, _cstr(path), default, 1) + use_default = python.PyTuple_GET_SIZE(default) + if use_default == 1: + default = python.PyTuple_GET_ITEM(default, 1) + python.Py_INCREF(default) + elif use_default > 1: + raise TypeError, "invalid number of arguments: needs one or two" + return _findObjectPath(root, _cstr(path), default, use_default) def hasFieldPath(_Element root not None, path): - path = path + '\0\0' + if not python._isString(path): + path = '.'.join(path) + path = cetree.utf8(path) + '\0\0' _splitPathInplace(path) return _findObjectPath(root, _cstr(path), None, 1) is not None From scoder at codespeak.net Tue Aug 1 23:19:14 2006 From: scoder at codespeak.net (scoder at codespeak.net) Date: Tue, 1 Aug 2006 23:19:14 +0200 (CEST) Subject: [Lxml-checkins] r30870 - lxml/branch/capi/src/lxml Message-ID: <20060801211914.29ABC10082@code0.codespeak.net> Author: scoder Date: Tue Aug 1 23:19:13 2006 New Revision: 30870 Modified: lxml/branch/capi/src/lxml/objectify.pyx Log: small C-ification: use loop instead of function Modified: lxml/branch/capi/src/lxml/objectify.pyx ============================================================================== --- lxml/branch/capi/src/lxml/objectify.pyx (original) +++ lxml/branch/capi/src/lxml/objectify.pyx Tue Aug 1 23:19:13 2006 @@ -1002,7 +1002,9 @@ c_node = c_child if c_node is NULL: break - c_path = c_path + cstd.strlen(c_path) + 1 + while c_path[0] != c'\0': + c_path = c_path + 1 + c_path = c_path + 1 if c_node is not NULL and c_path[0] == c'\0': return cetree.elementFactory(root._doc, c_node) From scoder at codespeak.net Wed Aug 2 07:31:47 2006 From: scoder at codespeak.net (scoder at codespeak.net) Date: Wed, 2 Aug 2006 07:31:47 +0200 (CEST) Subject: [Lxml-checkins] r30871 - lxml/trunk/doc Message-ID: <20060802053147.4B40D10081@code0.codespeak.net> Author: scoder Date: Wed Aug 2 07:31:45 2006 New Revision: 30871 Modified: lxml/trunk/doc/resolvers.txt Log: fixed resolver doctests: must not rely on the execution order of registered resolvers Modified: lxml/trunk/doc/resolvers.txt ============================================================================== --- lxml/trunk/doc/resolvers.txt (original) +++ lxml/trunk/doc/resolvers.txt Wed Aug 2 07:31:45 2006 @@ -70,11 +70,9 @@ ... ... ''' % prefix ... def resolve(self, url, pubid, context): - ... print "Resolving url %s as prefix %s ..." % (url, self.prefix), ... if url.startswith(self.prefix): - ... print "done" + ... print "Resolved url %s as prefix %s" % (url, self.prefix) ... return self.resolve_string(self.result_xml, context) - ... print "failed" We demonstrate this in XSLT and use the following stylesheet as an example:: @@ -126,7 +124,7 @@ document, everything works fine:: >>> transform = etree.XSLT(honk_doc) - Resolving url honk:test as prefix honk ... done + Resolved url honk:test as prefix honk Running the transform accesses the same parser context again, but since it now needs to resolve the ``hoi`` URI in the call to the document function, its @@ -151,8 +149,7 @@ >>> honk_parser.resolvers.add( PrefixResolver("hoi") ) >>> result = transform(honk_doc) - Resolving url hoi:test as prefix honk ... failed - Resolving url hoi:test as prefix hoi ... done + Resolved url hoi:test as prefix hoi >>> print str(result), hoi-TEST @@ -163,8 +160,7 @@ is resolved from within the stylesheet context:: >>> result = transform(normal_doc) - Resolving url hoi:test as prefix honk ... failed - Resolving url hoi:test as prefix hoi ... done + Resolved url hoi:test as prefix hoi >>> print str(result), hoi-TEST @@ -190,14 +186,13 @@ operations:: >>> transform = etree.XSLT(honk_doc) - Resolving url honk:test as prefix honk ... done + Resolved url honk:test as prefix honk >>> result = transform(normal_doc) - Resolving url hoi:test as prefix honk ... failed - Resolving url hoi:test as prefix hoi ... done + Resolved url hoi:test as prefix hoi >>> ac = etree.XSLTAccessControl(read_network=False) >>> transform = etree.XSLT(honk_doc, access_control=ac) - Resolving url honk:test as prefix honk ... done + Resolved url honk:test as prefix honk >>> result = transform(normal_doc) Traceback (most recent call last): [...] From scoder at codespeak.net Wed Aug 2 12:14:19 2006 From: scoder at codespeak.net (scoder at codespeak.net) Date: Wed, 2 Aug 2006 12:14:19 +0200 (CEST) Subject: [Lxml-checkins] r30885 - in lxml/branch/capi/src/lxml: . tests Message-ID: <20060802101419.7B30810082@code0.codespeak.net> Author: scoder Date: Wed Aug 2 12:14:17 2006 New Revision: 30885 Modified: lxml/branch/capi/src/lxml/etree.pyx lxml/branch/capi/src/lxml/tests/test_etree.py Log: let _Comment and _PI raise TypeError on mutation Modified: lxml/branch/capi/src/lxml/etree.pyx ============================================================================== --- lxml/branch/capi/src/lxml/etree.pyx (original) +++ lxml/branch/capi/src/lxml/etree.pyx Wed Aug 2 12:14:17 2006 @@ -1164,12 +1164,24 @@ return result cdef class __ContentOnlyElement(_Element): + cdef int _raiseImmutable(self) except -1: + raise TypeError, "this element does not have children or attributes" + def set(self, key, value): - pass - - def append(self, _Element element): - pass - + self._raiseImmutable() + + def append(self, value): + self._raiseImmutable() + + def insert(self, index, value): + self._raiseImmutable() + + def __setitem__(self, index, value): + self._raiseImmutable() + + def __setslice__(self, start, end, value): + self._raiseImmutable() + property attrib: def __get__(self): return {} Modified: lxml/branch/capi/src/lxml/tests/test_etree.py ============================================================================== --- lxml/branch/capi/src/lxml/tests/test_etree.py (original) +++ lxml/branch/capi/src/lxml/tests/test_etree.py Wed Aug 2 12:14:17 2006 @@ -371,6 +371,18 @@ '', tostring(a)) + # does not raise an exception in ElementTree + def test_comment_immutable(self): + Element = self.etree.Element + Comment = self.etree.Comment + + c = Comment() + el = Element('myel') + + self.assertRaises(TypeError, c.append, el) + self.assertRaises(TypeError, c.insert, 0, el) + self.assertRaises(TypeError, c.set, "myattr", "test") + # test weird dictionary interaction leading to segfault previously def test_weird_dict_interaction(self): root = self.etree.Element('root') From scoder at codespeak.net Wed Aug 2 15:58:58 2006 From: scoder at codespeak.net (scoder at codespeak.net) Date: Wed, 2 Aug 2006 15:58:58 +0200 (CEST) Subject: [Lxml-checkins] r30897 - in lxml/branch/capi/src/lxml: . tests Message-ID: <20060802135858.DE6AF10089@code0.codespeak.net> Author: scoder Date: Wed Aug 2 15:58:57 2006 New Revision: 30897 Modified: lxml/branch/capi/src/lxml/objectify.pyx lxml/branch/capi/src/lxml/tests/test_objectify.py Log: rewrite of ObjectPath/_findObjectPath to support {namespaces} Modified: lxml/branch/capi/src/lxml/objectify.pyx ============================================================================== --- lxml/branch/capi/src/lxml/objectify.pyx (original) +++ lxml/branch/capi/src/lxml/objectify.pyx Wed Aug 2 15:58:57 2006 @@ -15,6 +15,8 @@ cdef object SubElement SubElement = etree.SubElement +cdef object re +import re cdef object __builtin__ import __builtin__ cdef object int @@ -927,10 +929,7 @@ cdef object _path cdef char* _path_cstr def __init__(self, path): - if not python._isString(path): - path = '.'.join(path) - self._path = cetree.utf8(path) + '\0\0' - _splitPathInplace(self._path) + self._path = _parseObjectPath(path) self._path_cstr = _cstr(self._path) def find(self, _Element root not None, *default): @@ -953,60 +952,53 @@ def __call__(self, *args): return self.find(*args) +cdef object __SPLIT_PATH +__SPLIT_PATH = re.compile(r"(\.?)(?:\{([^}]*)\})?([^.]+)").findall -def getFieldFromPath(_Element root not None, path, *default): - cdef Py_ssize_t use_default +cdef _parseObjectPath(path): + new_path = [] if not python._isString(path): - path = '.'.join(path) - path = cetree.utf8(path) + '\0\0' - _splitPathInplace(path) - use_default = python.PyTuple_GET_SIZE(default) - if use_default == 1: - default = python.PyTuple_GET_ITEM(default, 1) - python.Py_INCREF(default) - elif use_default > 1: - raise TypeError, "invalid number of arguments: needs one or two" - return _findObjectPath(root, _cstr(path), default, use_default) - -def hasFieldPath(_Element root not None, path): - if not python._isString(path): - path = '.'.join(path) - path = cetree.utf8(path) + '\0\0' - _splitPathInplace(path) - return _findObjectPath(root, _cstr(path), None, 1) is not None - -cdef void _splitPathInplace(path): - cdef char* s - s = _cstr(path) - while s[0] != c'\0': - if s[0] == c'.': - s[0] = c'\0' - s = s + 1 + for item in path: + ns, name = cetree.getNsTag(item) + if ns is None: + entry = "\0%s\0" % name + else: + entry = "%s\0%s\0" % (ns, name) + python.PyList_Append(new_path, entry) + else: + for dot, ns, name in __SPLIT_PATH(cetree.utf8(path)): + if _cstr(dot)[0] != c'.' and python.PyList_GET_SIZE(new_path) > 0: + raise ValueError, "invalid path" + python.PyList_Append(new_path, "%s\0%s\0" % (ns, name)) + python.PyList_Append(new_path, "\0\0") + return ''.join(new_path) cdef _findObjectPath(_Element root, char* c_path_segments, default_value, int use_default): cdef tree.xmlNode* c_node - cdef tree.xmlNode* c_child cdef char* c_href cdef char* c_path c_node = root._c_node - c_href = tree._getNs(c_node) c_path = c_path_segments + if c_path[0] != c'\0': + c_href = c_path + c_path = c_path + cstd.strlen(c_href) + else: + c_href = tree._getNs(c_node) + c_path = c_path + 1 if cetree.tagMatches(c_node, c_href, c_path): - c_path = c_path + cstd.strlen(c_path) + 1 - while c_path[0] != c'\0': - c_child = cetree.findChildForwards(c_node, 0) - while c_child is not NULL and \ - not cetree.tagMatches(c_child, c_href, c_path): - c_child = cetree.nextElement(c_child) - c_node = c_child - if c_node is NULL: - break - while c_path[0] != c'\0': - c_path = c_path + 1 + while c_node is not NULL: + c_path = c_path + cstd.strlen(c_path) + 1 + if c_path[0] != c'\0': + c_href = c_path + c_path = c_path + cstd.strlen(c_href) + elif c_path[1] == c'\0': + return cetree.elementFactory(root._doc, c_node) c_path = c_path + 1 - if c_node is not NULL and c_path[0] == c'\0': - return cetree.elementFactory(root._doc, c_node) + c_node = cetree.findChildForwards(c_node, 0) + while c_node is not NULL and \ + not cetree.tagMatches(c_node, c_href, c_path): + c_node = cetree.nextElement(c_node) if use_default: return default_value Modified: lxml/branch/capi/src/lxml/tests/test_objectify.py ============================================================================== --- lxml/branch/capi/src/lxml/tests/test_objectify.py (original) +++ lxml/branch/capi/src/lxml/tests/test_objectify.py Wed Aug 2 15:58:57 2006 @@ -389,6 +389,19 @@ self.assertEquals(root.c1.c2, path.find(root)) self.assertEquals(root.c1.c2, path(root)) + def test_object_path_ns(self): + root = self.etree.XML(xml_str) + path = objectify.ObjectPath( "{objectified}root.c1.c2" ) + self.assertEquals(root.c1.c2, path.find(root)) + path = objectify.ObjectPath( "{objectified}root.{objectified}c1.c2" ) + self.assertEquals(root.c1.c2, path.find(root)) + path = objectify.ObjectPath( "root.{objectified}c1.{objectified}c2" ) + self.assertEquals(root.c1.c2, path.find(root)) + path = objectify.ObjectPath( "root.c1.{objectified}c2" ) + self.assertEquals(root.c1.c2, path.find(root)) + path = objectify.ObjectPath( "root.c1.{otherNS}c2" ) + self.assertEquals(getattr(root.c1, '{otherNS}c2'), path.find(root)) + def test_object_path_fail(self): root = self.etree.XML(xml_str) path = objectify.ObjectPath( "root.c1.c99" ) From scoder at codespeak.net Wed Aug 2 16:04:47 2006 From: scoder at codespeak.net (scoder at codespeak.net) Date: Wed, 2 Aug 2006 16:04:47 +0200 (CEST) Subject: [Lxml-checkins] r30900 - lxml/branch/capi/src/lxml Message-ID: <20060802140447.1EC8A10089@code0.codespeak.net> Author: scoder Date: Wed Aug 2 16:04:46 2006 New Revision: 30900 Modified: lxml/branch/capi/src/lxml/objectify.pyx Log: avoid calling strlen() in inner loop Modified: lxml/branch/capi/src/lxml/objectify.pyx ============================================================================== --- lxml/branch/capi/src/lxml/objectify.pyx (original) +++ lxml/branch/capi/src/lxml/objectify.pyx Wed Aug 2 16:04:46 2006 @@ -982,16 +982,20 @@ c_path = c_path_segments if c_path[0] != c'\0': c_href = c_path - c_path = c_path + cstd.strlen(c_href) + while c_path[0] != c'\0': + c_path = c_path + 1 else: c_href = tree._getNs(c_node) c_path = c_path + 1 if cetree.tagMatches(c_node, c_href, c_path): while c_node is not NULL: - c_path = c_path + cstd.strlen(c_path) + 1 + while c_path[0] != c'\0': + c_path = c_path + 1 + c_path = c_path + 1 if c_path[0] != c'\0': c_href = c_path - c_path = c_path + cstd.strlen(c_href) + while c_path[0] != c'\0': + c_path = c_path + 1 elif c_path[1] == c'\0': return cetree.elementFactory(root._doc, c_node) c_path = c_path + 1 From scoder at codespeak.net Wed Aug 2 18:41:44 2006 From: scoder at codespeak.net (scoder at codespeak.net) Date: Wed, 2 Aug 2006 18:41:44 +0200 (CEST) Subject: [Lxml-checkins] r30912 - in lxml/branch/capi/src/lxml: . tests Message-ID: <20060802164144.82DBD10087@code0.codespeak.net> Author: scoder Date: Wed Aug 2 18:41:42 2006 New Revision: 30912 Modified: lxml/branch/capi/src/lxml/objectify.pyx lxml/branch/capi/src/lxml/tests/test_objectify.py Log: ObjectifiedElement.descendentpaths() to return a list of attribute all access paths for the entire subtree Modified: lxml/branch/capi/src/lxml/objectify.pyx ============================================================================== --- lxml/branch/capi/src/lxml/objectify.pyx (original) +++ lxml/branch/capi/src/lxml/objectify.pyx Wed Aug 2 18:41:42 2006 @@ -353,6 +353,12 @@ else: return default + def descendentpaths(self, prefix=None): + """Returns a list of object path expressions for all descendents. + """ + if prefix is not None and not python._isString(prefix): + prefix = '.'.join(prefix) + return _buildDescendentPaths(self._c_node, prefix) cdef tree.xmlNode* _findFollowingSibling(tree.xmlNode* c_node, char* href, char* name, @@ -1009,6 +1015,48 @@ else: raise AttributeError, "no such attribute: %s" % c_path +cdef _buildDescendentPaths(tree.xmlNode* c_node, prefix_string): + """Returns a list of all descendent paths. + """ + tag = cetree.namespacedName(c_node) + if prefix_string: + if not prefix_string.endswith('.'): + prefix_string = prefix_string + '.' + prefix_string = prefix_string + tag + else: + prefix_string = tag + path = [prefix_string] + path_list = [] + _recursiveBuildDescendentPaths(c_node, path, path_list) + return path_list + +cdef _recursiveBuildDescendentPaths(tree.xmlNode* c_node, path, path_list): + """Fills the list 'path_list' with all descendent paths, initial prefix + being in the list 'path'. + """ + cdef python.PyObject* dict_result + cdef tree.xmlNode* c_child + cdef char* c_href + python.PyList_Append(path_list, '.'.join(path)) + tags = {} + c_href = tree._getNs(c_node) + c_child = cetree.findChildForwards(c_node, 0) + while c_child is not NULL: + if c_href is tree._getNs(c_child): + tag = c_child.name + else: + tag = cetree.namespacedName(c_child) + dict_result = python.PyDict_GetItem(tags, tag) + if dict_result is NULL: + count = 0 + else: + count = (dict_result) + 1 + tag = tag + '[%d]' % count + python.PyDict_SetItem(tags, tag, count) + python.PyList_Append(path, tag) + _recursiveBuildDescendentPaths(c_child, path, path_list) + del path[-1] + c_child = cetree.nextElement(c_child) ################################################################################ # Type annotations Modified: lxml/branch/capi/src/lxml/tests/test_objectify.py ============================================================================== --- lxml/branch/capi/src/lxml/tests/test_objectify.py (original) +++ lxml/branch/capi/src/lxml/tests/test_objectify.py Wed Aug 2 18:41:42 2006 @@ -408,6 +408,28 @@ self.assertRaises(AttributeError, path, root) self.assertEquals(None, path(root, None)) + def test_descendent_paths(self): + root = self.etree.XML(xml_str) + self.assertEquals( + ['{objectified}root', '{objectified}root.c1', + '{objectified}root.c1.c2', '{objectified}root.c1.c2[1]', + '{objectified}root.c1.{otherNS}c2'], + root.descendentpaths()) + + def test_descendent_paths_child(self): + root = self.etree.XML(xml_str) + self.assertEquals( + ['{objectified}c1', '{objectified}c1.c2', '{objectified}c1.c2[1]', + '{objectified}c1.{otherNS}c2'], + root.c1.descendentpaths()) + + def test_descendent_paths_prefix(self): + root = self.etree.XML(xml_str) + self.assertEquals( + ['root.{objectified}c1', 'root.{objectified}c1.c2', + 'root.{objectified}c1.c2[1]', 'root.{objectified}c1.{otherNS}c2'], + root.c1.descendentpaths('root')) + def test_suite(): suite = unittest.TestSuite() From scoder at codespeak.net Wed Aug 2 20:20:12 2006 From: scoder at codespeak.net (scoder at codespeak.net) Date: Wed, 2 Aug 2006 20:20:12 +0200 (CEST) Subject: [Lxml-checkins] r30915 - lxml/branch/capi/src/lxml Message-ID: <20060802182012.6EDF310083@code0.codespeak.net> Author: scoder Date: Wed Aug 2 20:20:11 2006 New Revision: 30915 Modified: lxml/branch/capi/src/lxml/objectify.pyx Log: doc strings Modified: lxml/branch/capi/src/lxml/objectify.pyx ============================================================================== --- lxml/branch/capi/src/lxml/objectify.pyx (original) +++ lxml/branch/capi/src/lxml/objectify.pyx Wed Aug 2 20:20:11 2006 @@ -962,6 +962,8 @@ __SPLIT_PATH = re.compile(r"(\.?)(?:\{([^}]*)\})?([^.]+)").findall cdef _parseObjectPath(path): + """Parse object path into a 'hrefOnameOhrefOnameOOO' char sequence. + """ new_path = [] if not python._isString(path): for item in path: @@ -981,6 +983,8 @@ cdef _findObjectPath(_Element root, char* c_path_segments, default_value, int use_default): + """Follow the path to find the target element. + """ cdef tree.xmlNode* c_node cdef char* c_href cdef char* c_path @@ -1003,6 +1007,7 @@ while c_path[0] != c'\0': c_path = c_path + 1 elif c_path[1] == c'\0': + # '\0\0' found, all done return cetree.elementFactory(root._doc, c_node) c_path = c_path + 1 c_node = cetree.findChildForwards(c_node, 0) From scoder at codespeak.net Thu Aug 3 11:32:39 2006 From: scoder at codespeak.net (scoder at codespeak.net) Date: Thu, 3 Aug 2006 11:32:39 +0200 (CEST) Subject: [Lxml-checkins] r30925 - in lxml/branch/capi/src/lxml: . tests Message-ID: <20060803093239.679A91007C@code0.codespeak.net> Author: scoder Date: Thu Aug 3 11:32:37 2006 New Revision: 30925 Modified: lxml/branch/capi/src/lxml/objectify.pyx lxml/branch/capi/src/lxml/python.pxd lxml/branch/capi/src/lxml/tests/test_objectify.py Log: support for indexes in ObjectPath expressions Modified: lxml/branch/capi/src/lxml/objectify.pyx ============================================================================== --- lxml/branch/capi/src/lxml/objectify.pyx (original) +++ lxml/branch/capi/src/lxml/objectify.pyx Thu Aug 3 11:32:37 2006 @@ -932,10 +932,14 @@ # ObjectPath cdef class ObjectPath: + cdef object _indexes cdef object _path cdef char* _path_cstr def __init__(self, path): - self._path = _parseObjectPath(path) + if python._isString(path): + self._path, self._indexes = _parseObjectPathString(path) + else: + self._path, self._indexes = _parseObjectPathList(path) self._path_cstr = _cstr(self._path) def find(self, _Element root not None, *default): @@ -953,41 +957,95 @@ use_default = 1 elif use_default > 1: raise TypeError, "invalid number of arguments: needs one or two" - return _findObjectPath(root, self._path_cstr, default, use_default) + return _findObjectPath(root, self._path_cstr, self._indexes, + default, use_default) def __call__(self, *args): return self.find(*args) cdef object __SPLIT_PATH -__SPLIT_PATH = re.compile(r"(\.?)(?:\{([^}]*)\})?([^.]+)").findall +__SPLIT_PATH = re.compile( + r"(\.?)(?:\{([^}]*)\})?([^.\[]+)\w*(?:\[\w*([0-9]+)\w*\])?").findall -cdef _parseObjectPath(path): - """Parse object path into a 'hrefOnameOhrefOnameOOO' char sequence. +cdef _parseObjectPathString(path): + """Parse object path string into a 'hrefOnameOhrefOnameOOO' string and an + index list. The index list is None if no index was used in the path. """ + cdef int has_index new_path = [] - if not python._isString(path): - for item in path: - ns, name = cetree.getNsTag(item) - if ns is None: - entry = "\0%s\0" % name - else: - entry = "%s\0%s\0" % (ns, name) - python.PyList_Append(new_path, entry) - else: - for dot, ns, name in __SPLIT_PATH(cetree.utf8(path)): - if _cstr(dot)[0] != c'.' and python.PyList_GET_SIZE(new_path) > 0: - raise ValueError, "invalid path" - python.PyList_Append(new_path, "%s\0%s\0" % (ns, name)) + indexes = [] + has_index = 0 + for dot, ns, name, index in __SPLIT_PATH(cetree.utf8(path)): + if index is not None and _cstr(index)[0] == c'\0': + index = None + if python.PyList_GET_SIZE(new_path) == 0: + if index is not None: + raise ValueError, "index not allowed on root node" + elif _cstr(dot)[0] != c'.': + raise ValueError, "invalid path" + python.PyList_Append(new_path, "%s\0%s\0" % (ns, name)) + if index is not None: + index = python.PyNumber_Int(index) + if index < 0: + raise ValueError, "index must be >= 0" + has_index = 1 + python.PyList_Append(indexes, index) + python.PyList_Append(new_path, "\0\0") + if not has_index: + indexes = None + return ''.join(new_path), indexes + +cdef _parseObjectPathList(path): + """Parse object path sequence into a 'hrefOnameOhrefOnameOOO' string and + an index list. The index list is None if no index was used in the path. + """ + cdef char* index_pos + cdef char* index_end + cdef char* c_name + cdef int has_index + new_path = [] + indexes = [] + has_index = 0 + for item in path: + ns, name = cetree.getNsTag(item.strip()) + c_name = _cstr(name) + index_pos = cstd.strchr(c_name, c'[') + if index_pos is NULL: + index = None + else: + if python.PyList_GET_SIZE(new_path) == 0: + raise ValueError, "index not allowed on root node" + name = python.PyString_FromStringAndSize( + c_name, (index_pos - c_name)) + index_pos = index_pos + 1 + index_end = cstd.strchr(index_pos, c']') + if index_end is NULL: + raise ValueError, "index must be enclosed in []" + index = python.PyNumber_Int( + python.PyString_FromStringAndSize( + index_pos, (index_end - index_pos))) + if index < 0: + raise ValueError, "index must be >= 0" + has_index = 1 + python.PyList_Append(indexes, index) + if ns is None: + entry = "\0%s\0" % name + else: + entry = "%s\0%s\0" % (ns, name) + python.PyList_Append(new_path, entry) python.PyList_Append(new_path, "\0\0") - return ''.join(new_path) + if not has_index: + indexes = None + return ''.join(new_path), indexes cdef _findObjectPath(_Element root, char* c_path_segments, - default_value, int use_default): + index_list, default_value, int use_default): """Follow the path to find the target element. """ cdef tree.xmlNode* c_node cdef char* c_href cdef char* c_path + cdef Py_ssize_t c_index_pos, c_index c_node = root._c_node c_path = c_path_segments if c_path[0] != c'\0': @@ -997,6 +1055,8 @@ else: c_href = tree._getNs(c_node) c_path = c_path + 1 + if index_list is not None: + c_index_pos = 1 if cetree.tagMatches(c_node, c_href, c_path): while c_node is not NULL: while c_path[0] != c'\0': @@ -1010,9 +1070,22 @@ # '\0\0' found, all done return cetree.elementFactory(root._doc, c_node) c_path = c_path + 1 + if index_list is not None: + index = python.PyList_GET_ITEM(index_list, c_index_pos) + python.Py_INCREF(index) + c_index_pos = c_index_pos + 1 + c_index = python.PyInt_AsSsize_t(index) + else: + c_index = 0 + c_node = cetree.findChildForwards(c_node, 0) - while c_node is not NULL and \ - not cetree.tagMatches(c_node, c_href, c_path): + while c_node is not NULL: + while c_node is not NULL and \ + not cetree.tagMatches(c_node, c_href, c_path): + c_node = cetree.nextElement(c_node) + c_index = c_index - 1 + if c_index < 0: + break c_node = cetree.nextElement(c_node) if use_default: Modified: lxml/branch/capi/src/lxml/python.pxd ============================================================================== --- lxml/branch/capi/src/lxml/python.pxd (original) +++ lxml/branch/capi/src/lxml/python.pxd Thu Aug 3 11:32:37 2006 @@ -29,8 +29,10 @@ cdef object PyString_FromString(char* s) cdef object PyString_FromFormat(char* format, ...) cdef Py_ssize_t PyString_GET_SIZE(object s) + cdef object PyBool_FromLong(long value) cdef object PyNumber_Int(object value) + cdef Py_ssize_t PyInt_AsSsize_t(object value) cdef Py_ssize_t PyTuple_GET_SIZE(object t) cdef object PyTuple_GET_ITEM(object o, Py_ssize_t pos) Modified: lxml/branch/capi/src/lxml/tests/test_objectify.py ============================================================================== --- lxml/branch/capi/src/lxml/tests/test_objectify.py (original) +++ lxml/branch/capi/src/lxml/tests/test_objectify.py Thu Aug 3 11:32:37 2006 @@ -389,6 +389,38 @@ self.assertEquals(root.c1.c2, path.find(root)) self.assertEquals(root.c1.c2, path(root)) + def test_object_path_list(self): + root = self.etree.XML(xml_str) + path = objectify.ObjectPath( ['root', 'c1', 'c2'] ) + self.assertEquals(root.c1.c2, path.find(root)) + self.assertEquals(root.c1.c2, path(root)) + + def test_object_path_index(self): + root = self.etree.XML(xml_str) + path = objectify.ObjectPath( "root.c1[0].c2[0]" ) + self.assertEquals(root.c1.c2, path.find(root)) + self.assertEquals(root.c1.c2, path(root)) + + path = objectify.ObjectPath( "root.c1[0].c2[1]" ) + self.assertEquals(root.c1.c2[1], path.find(root)) + self.assertEquals(root.c1.c2[1], path(root)) + + def test_object_path_index_list(self): + root = self.etree.XML(xml_str) + path = objectify.ObjectPath( ['root', 'c1[0]', 'c2[0]'] ) + self.assertEquals(root.c1.c2, path.find(root)) + self.assertEquals(root.c1.c2, path(root)) + + path = objectify.ObjectPath( ['root', 'c1[0]', 'c2[1]'] ) + self.assertEquals(root.c1.c2[1], path.find(root)) + self.assertEquals(root.c1.c2[1], path(root)) + + def test_object_path_index_fail(self): + self.assertRaises(ValueError, objectify.ObjectPath, + "root.c1[0].c2[-1]") + self.assertRaises(ValueError, objectify.ObjectPath, + ['root', 'c1[0]', 'c2[-1]']) + def test_object_path_ns(self): root = self.etree.XML(xml_str) path = objectify.ObjectPath( "{objectified}root.c1.c2" ) @@ -402,6 +434,19 @@ path = objectify.ObjectPath( "root.c1.{otherNS}c2" ) self.assertEquals(getattr(root.c1, '{otherNS}c2'), path.find(root)) + def test_object_path_ns_list(self): + root = self.etree.XML(xml_str) + path = objectify.ObjectPath( ['{objectified}root', 'c1', 'c2'] ) + self.assertEquals(root.c1.c2, path.find(root)) + path = objectify.ObjectPath( ['{objectified}root', '{objectified}c1', 'c2'] ) + self.assertEquals(root.c1.c2, path.find(root)) + path = objectify.ObjectPath( ['root', '{objectified}c1', '{objectified}c2'] ) + self.assertEquals(root.c1.c2, path.find(root)) + path = objectify.ObjectPath( ['root', 'c1', '{objectified}c2'] ) + self.assertEquals(root.c1.c2, path.find(root)) + path = objectify.ObjectPath( ['root', 'c1', '{otherNS}c2'] ) + self.assertEquals(getattr(root.c1, '{otherNS}c2'), path.find(root)) + def test_object_path_fail(self): root = self.etree.XML(xml_str) path = objectify.ObjectPath( "root.c1.c99" ) From scoder at codespeak.net Thu Aug 3 12:35:58 2006 From: scoder at codespeak.net (scoder at codespeak.net) Date: Thu, 3 Aug 2006 12:35:58 +0200 (CEST) Subject: [Lxml-checkins] r30929 - in lxml/branch/capi/src/lxml: . tests Message-ID: <20060803103558.935501007C@code0.codespeak.net> Author: scoder Date: Thu Aug 3 12:35:56 2006 New Revision: 30929 Modified: lxml/branch/capi/src/lxml/objectify.pyx lxml/branch/capi/src/lxml/tests/test_objectify.py Log: code cleanup, better test cases, fix: handle path expressions where only some of the attributes have indexes Modified: lxml/branch/capi/src/lxml/objectify.pyx ============================================================================== --- lxml/branch/capi/src/lxml/objectify.pyx (original) +++ lxml/branch/capi/src/lxml/objectify.pyx Thu Aug 3 12:35:56 2006 @@ -965,7 +965,8 @@ cdef object __SPLIT_PATH __SPLIT_PATH = re.compile( - r"(\.?)(?:\{([^}]*)\})?([^.\[]+)\w*(?:\[\w*([0-9]+)\w*\])?").findall + r"(\.?)\s*(?:\{([^}]*)\})?\s*(\w+)\s*(?:\[\s*([0-9]+)\s*\])?", + re.U).findall cdef _parseObjectPathString(path): """Parse object path string into a 'hrefOnameOhrefOnameOOO' string and an @@ -976,20 +977,21 @@ indexes = [] has_index = 0 for dot, ns, name, index in __SPLIT_PATH(cetree.utf8(path)): - if index is not None and _cstr(index)[0] == c'\0': - index = None + if index is not None: + if python.PyString_GET_SIZE(index) == 0: + index = None + else: + index = python.PyNumber_Int(index) + has_index = 1 + if index < 0: + raise ValueError, "index must be >= 0" + python.PyList_Append(indexes, index) if python.PyList_GET_SIZE(new_path) == 0: if index is not None: raise ValueError, "index not allowed on root node" elif _cstr(dot)[0] != c'.': raise ValueError, "invalid path" python.PyList_Append(new_path, "%s\0%s\0" % (ns, name)) - if index is not None: - index = python.PyNumber_Int(index) - if index < 0: - raise ValueError, "index must be >= 0" - has_index = 1 - python.PyList_Append(indexes, index) python.PyList_Append(new_path, "\0\0") if not has_index: indexes = None @@ -1070,13 +1072,14 @@ # '\0\0' found, all done return cetree.elementFactory(root._doc, c_node) c_path = c_path + 1 + + c_index = 0 if index_list is not None: index = python.PyList_GET_ITEM(index_list, c_index_pos) python.Py_INCREF(index) c_index_pos = c_index_pos + 1 - c_index = python.PyInt_AsSsize_t(index) - else: - c_index = 0 + if index is not None: + c_index = python.PyInt_AsSsize_t(index) c_node = cetree.findChildForwards(c_node, 0) while c_node is not NULL: Modified: lxml/branch/capi/src/lxml/tests/test_objectify.py ============================================================================== --- lxml/branch/capi/src/lxml/tests/test_objectify.py (original) +++ lxml/branch/capi/src/lxml/tests/test_objectify.py Thu Aug 3 12:35:56 2006 @@ -386,34 +386,54 @@ def test_object_path(self): root = self.etree.XML(xml_str) path = objectify.ObjectPath( "root.c1.c2" ) - self.assertEquals(root.c1.c2, path.find(root)) - self.assertEquals(root.c1.c2, path(root)) + self.assertEquals(root.c1.c2.text, path.find(root).text) + self.assertEquals(root.c1.c2.text, path(root).text) def test_object_path_list(self): root = self.etree.XML(xml_str) path = objectify.ObjectPath( ['root', 'c1', 'c2'] ) - self.assertEquals(root.c1.c2, path.find(root)) - self.assertEquals(root.c1.c2, path(root)) + self.assertEquals(root.c1.c2.text, path.find(root).text) + self.assertEquals(root.c1.c2.text, path(root).text) + + def test_object_path_syntax(self): + root = self.etree.XML(xml_str) + path = objectify.ObjectPath("root . {objectified}c1. c2") + self.assertEquals(root.c1.c2.text, path(root).text) + + path = objectify.ObjectPath(" root.{objectified} c1.c2 [ 0 ] ") + self.assertEquals(root.c1.c2.text, path(root).text) def test_object_path_index(self): root = self.etree.XML(xml_str) path = objectify.ObjectPath( "root.c1[0].c2[0]" ) - self.assertEquals(root.c1.c2, path.find(root)) - self.assertEquals(root.c1.c2, path(root)) + self.assertEquals(root.c1.c2.text, path.find(root).text) + self.assertEquals(root.c1.c2.text, path(root).text) + + path = objectify.ObjectPath( "root.c1[0].c2" ) + self.assertEquals(root.c1.c2.text, path.find(root).text) + self.assertEquals(root.c1.c2.text, path(root).text) path = objectify.ObjectPath( "root.c1[0].c2[1]" ) - self.assertEquals(root.c1.c2[1], path.find(root)) - self.assertEquals(root.c1.c2[1], path(root)) + self.assertEquals(root.c1.c2[1].text, path.find(root).text) + self.assertEquals(root.c1.c2[1].text, path(root).text) + + path = objectify.ObjectPath( "root.c1.c2[1]" ) + self.assertEquals(root.c1.c2[1].text, path.find(root).text) + self.assertEquals(root.c1.c2[1].text, path(root).text) def test_object_path_index_list(self): root = self.etree.XML(xml_str) path = objectify.ObjectPath( ['root', 'c1[0]', 'c2[0]'] ) - self.assertEquals(root.c1.c2, path.find(root)) - self.assertEquals(root.c1.c2, path(root)) + self.assertEquals(root.c1.c2.text, path.find(root).text) + self.assertEquals(root.c1.c2.text, path(root).text) path = objectify.ObjectPath( ['root', 'c1[0]', 'c2[1]'] ) - self.assertEquals(root.c1.c2[1], path.find(root)) - self.assertEquals(root.c1.c2[1], path(root)) + self.assertEquals(root.c1.c2[1].text, path.find(root).text) + self.assertEquals(root.c1.c2[1].text, path(root).text) + + path = objectify.ObjectPath( ['root', 'c1', 'c2[1]'] ) + self.assertEquals(root.c1.c2[1].text, path.find(root).text) + self.assertEquals(root.c1.c2[1].text, path(root).text) def test_object_path_index_fail(self): self.assertRaises(ValueError, objectify.ObjectPath, @@ -424,28 +444,30 @@ def test_object_path_ns(self): root = self.etree.XML(xml_str) path = objectify.ObjectPath( "{objectified}root.c1.c2" ) - self.assertEquals(root.c1.c2, path.find(root)) + self.assertEquals(root.c1.c2.text, path.find(root).text) path = objectify.ObjectPath( "{objectified}root.{objectified}c1.c2" ) - self.assertEquals(root.c1.c2, path.find(root)) + self.assertEquals(root.c1.c2.text, path.find(root).text) path = objectify.ObjectPath( "root.{objectified}c1.{objectified}c2" ) - self.assertEquals(root.c1.c2, path.find(root)) + self.assertEquals(root.c1.c2.text, path.find(root).text) path = objectify.ObjectPath( "root.c1.{objectified}c2" ) - self.assertEquals(root.c1.c2, path.find(root)) + self.assertEquals(root.c1.c2.text, path.find(root).text) path = objectify.ObjectPath( "root.c1.{otherNS}c2" ) - self.assertEquals(getattr(root.c1, '{otherNS}c2'), path.find(root)) + self.assertEquals(getattr(root.c1, '{otherNS}c2').text, + path.find(root).text) def test_object_path_ns_list(self): root = self.etree.XML(xml_str) path = objectify.ObjectPath( ['{objectified}root', 'c1', 'c2'] ) - self.assertEquals(root.c1.c2, path.find(root)) + self.assertEquals(root.c1.c2.text, path.find(root).text) path = objectify.ObjectPath( ['{objectified}root', '{objectified}c1', 'c2'] ) - self.assertEquals(root.c1.c2, path.find(root)) + self.assertEquals(root.c1.c2.text, path.find(root).text) path = objectify.ObjectPath( ['root', '{objectified}c1', '{objectified}c2'] ) - self.assertEquals(root.c1.c2, path.find(root)) + self.assertEquals(root.c1.c2.text, path.find(root).text) path = objectify.ObjectPath( ['root', 'c1', '{objectified}c2'] ) - self.assertEquals(root.c1.c2, path.find(root)) + self.assertEquals(root.c1.c2.text, path.find(root).text) path = objectify.ObjectPath( ['root', 'c1', '{otherNS}c2'] ) - self.assertEquals(getattr(root.c1, '{otherNS}c2'), path.find(root)) + self.assertEquals(getattr(root.c1, '{otherNS}c2').text, + path.find(root).text) def test_object_path_fail(self): root = self.etree.XML(xml_str) From scoder at codespeak.net Thu Aug 3 13:00:24 2006 From: scoder at codespeak.net (scoder at codespeak.net) Date: Thu, 3 Aug 2006 13:00:24 +0200 (CEST) Subject: [Lxml-checkins] r30932 - in lxml/branch/capi/src/lxml: . tests Message-ID: <20060803110024.58CA81007C@code0.codespeak.net> Author: scoder Date: Thu Aug 3 13:00:22 2006 New Revision: 30932 Modified: lxml/branch/capi/src/lxml/objectify.pyx lxml/branch/capi/src/lxml/tests/test_objectify.py Log: support '.' at the beginning of an object path to ignore the root element, some cleanup Modified: lxml/branch/capi/src/lxml/objectify.pyx ============================================================================== --- lxml/branch/capi/src/lxml/objectify.pyx (original) +++ lxml/branch/capi/src/lxml/objectify.pyx Thu Aug 3 13:00:22 2006 @@ -932,6 +932,7 @@ # ObjectPath cdef class ObjectPath: + cdef readonly object find cdef object _indexes cdef object _path cdef char* _path_cstr @@ -941,8 +942,9 @@ else: self._path, self._indexes = _parseObjectPathList(path) self._path_cstr = _cstr(self._path) + self.find = self.__call__ - def find(self, _Element root not None, *default): + def __call__(self, _Element root not None, *default): """Follow the attribute path in the object structure and return the target attribute value. @@ -960,9 +962,6 @@ return _findObjectPath(root, self._path_cstr, self._indexes, default, use_default) - def __call__(self, *args): - return self.find(*args) - cdef object __SPLIT_PATH __SPLIT_PATH = re.compile( r"(\.?)\s*(?:\{([^}]*)\})?\s*(\w+)\s*(?:\[\s*([0-9]+)\s*\])?", @@ -976,7 +975,8 @@ new_path = [] indexes = [] has_index = 0 - for dot, ns, name, index in __SPLIT_PATH(cetree.utf8(path)): + path = cetree.utf8(path.strip()) + for dot, ns, name, index in __SPLIT_PATH(path): if index is not None: if python.PyString_GET_SIZE(index) == 0: index = None @@ -985,13 +985,16 @@ has_index = 1 if index < 0: raise ValueError, "index must be >= 0" - python.PyList_Append(indexes, index) if python.PyList_GET_SIZE(new_path) == 0: if index is not None: raise ValueError, "index not allowed on root node" + if _cstr(dot)[0] == c'.': + python.PyList_Append(new_path, "\0\0") + python.PyList_Append(indexes, None) elif _cstr(dot)[0] != c'.': raise ValueError, "invalid path" python.PyList_Append(new_path, "%s\0%s\0" % (ns, name)) + python.PyList_Append(indexes, index) python.PyList_Append(new_path, "\0\0") if not has_index: indexes = None @@ -1009,32 +1012,37 @@ indexes = [] has_index = 0 for item in path: - ns, name = cetree.getNsTag(item.strip()) - c_name = _cstr(name) - index_pos = cstd.strchr(c_name, c'[') - if index_pos is NULL: + item = item.strip() + if python.PyList_GET_SIZE(new_path) == 0 and item == '': + entry = "\0\0" # == path '.child' => ignore root index = None else: - if python.PyList_GET_SIZE(new_path) == 0: - raise ValueError, "index not allowed on root node" - name = python.PyString_FromStringAndSize( - c_name, (index_pos - c_name)) - index_pos = index_pos + 1 - index_end = cstd.strchr(index_pos, c']') - if index_end is NULL: - raise ValueError, "index must be enclosed in []" - index = python.PyNumber_Int( - python.PyString_FromStringAndSize( - index_pos, (index_end - index_pos))) - if index < 0: - raise ValueError, "index must be >= 0" - has_index = 1 - python.PyList_Append(indexes, index) - if ns is None: - entry = "\0%s\0" % name - else: - entry = "%s\0%s\0" % (ns, name) + ns, name = cetree.getNsTag(item) + c_name = _cstr(name) + index_pos = cstd.strchr(c_name, c'[') + if index_pos is NULL: + index = None + else: + if python.PyList_GET_SIZE(new_path) == 0: + raise ValueError, "index not allowed on root node" + name = python.PyString_FromStringAndSize( + c_name, (index_pos - c_name)) + index_pos = index_pos + 1 + index_end = cstd.strchr(index_pos, c']') + if index_end is NULL: + raise ValueError, "index must be enclosed in []" + index = python.PyNumber_Int( + python.PyString_FromStringAndSize( + index_pos, (index_end - index_pos))) + if index < 0: + raise ValueError, "index must be >= 0" + has_index = 1 + if ns is None: + entry = "\0%s\0" % name + else: + entry = "%s\0%s\0" % (ns, name) python.PyList_Append(new_path, entry) + python.PyList_Append(indexes, index) python.PyList_Append(new_path, "\0\0") if not has_index: indexes = None @@ -1059,7 +1067,7 @@ c_path = c_path + 1 if index_list is not None: c_index_pos = 1 - if cetree.tagMatches(c_node, c_href, c_path): + if c_path[0] == c'\0' or cetree.tagMatches(c_node, c_href, c_path): while c_node is not NULL: while c_path[0] != c'\0': c_path = c_path + 1 Modified: lxml/branch/capi/src/lxml/tests/test_objectify.py ============================================================================== --- lxml/branch/capi/src/lxml/tests/test_objectify.py (original) +++ lxml/branch/capi/src/lxml/tests/test_objectify.py Thu Aug 3 13:00:22 2006 @@ -403,36 +403,39 @@ path = objectify.ObjectPath(" root.{objectified} c1.c2 [ 0 ] ") self.assertEquals(root.c1.c2.text, path(root).text) + def test_object_path_dot_root(self): + root = self.etree.XML(xml_str) + path = objectify.ObjectPath( ".c1.c2" ) + self.assertEquals(root.c1.c2.text, path(root).text) + + def test_object_path_dot_root_list(self): + root = self.etree.XML(xml_str) + path = objectify.ObjectPath( ['', 'c1', 'c2'] ) + self.assertEquals(root.c1.c2.text, path(root).text) + def test_object_path_index(self): root = self.etree.XML(xml_str) path = objectify.ObjectPath( "root.c1[0].c2[0]" ) - self.assertEquals(root.c1.c2.text, path.find(root).text) self.assertEquals(root.c1.c2.text, path(root).text) path = objectify.ObjectPath( "root.c1[0].c2" ) - self.assertEquals(root.c1.c2.text, path.find(root).text) self.assertEquals(root.c1.c2.text, path(root).text) path = objectify.ObjectPath( "root.c1[0].c2[1]" ) - self.assertEquals(root.c1.c2[1].text, path.find(root).text) self.assertEquals(root.c1.c2[1].text, path(root).text) path = objectify.ObjectPath( "root.c1.c2[1]" ) - self.assertEquals(root.c1.c2[1].text, path.find(root).text) self.assertEquals(root.c1.c2[1].text, path(root).text) def test_object_path_index_list(self): root = self.etree.XML(xml_str) path = objectify.ObjectPath( ['root', 'c1[0]', 'c2[0]'] ) - self.assertEquals(root.c1.c2.text, path.find(root).text) self.assertEquals(root.c1.c2.text, path(root).text) path = objectify.ObjectPath( ['root', 'c1[0]', 'c2[1]'] ) - self.assertEquals(root.c1.c2[1].text, path.find(root).text) self.assertEquals(root.c1.c2[1].text, path(root).text) path = objectify.ObjectPath( ['root', 'c1', 'c2[1]'] ) - self.assertEquals(root.c1.c2[1].text, path.find(root).text) self.assertEquals(root.c1.c2[1].text, path(root).text) def test_object_path_index_fail(self): From scoder at codespeak.net Thu Aug 3 13:02:40 2006 From: scoder at codespeak.net (scoder at codespeak.net) Date: Thu, 3 Aug 2006 13:02:40 +0200 (CEST) Subject: [Lxml-checkins] r30933 - lxml/branch/capi/src/lxml Message-ID: <20060803110240.2D50610081@code0.codespeak.net> Author: scoder Date: Thu Aug 3 13:02:39 2006 New Revision: 30933 Modified: lxml/branch/capi/src/lxml/objectify.pyx Log: show complete tag in AttributeError raised by ObjectPath Modified: lxml/branch/capi/src/lxml/objectify.pyx ============================================================================== --- lxml/branch/capi/src/lxml/objectify.pyx (original) +++ lxml/branch/capi/src/lxml/objectify.pyx Thu Aug 3 13:02:39 2006 @@ -1102,7 +1102,8 @@ if use_default: return default_value else: - raise AttributeError, "no such attribute: %s" % c_path + tag = cetree.namespacedNameFromNsName(c_href, c_path) + raise AttributeError, "no such attribute: %s" % tag cdef _buildDescendentPaths(tree.xmlNode* c_node, prefix_string): """Returns a list of all descendent paths. From scoder at codespeak.net Thu Aug 3 13:04:33 2006 From: scoder at codespeak.net (scoder at codespeak.net) Date: Thu, 3 Aug 2006 13:04:33 +0200 (CEST) Subject: [Lxml-checkins] r30934 - lxml/branch/capi/src/lxml Message-ID: <20060803110433.47FE010081@code0.codespeak.net> Author: scoder Date: Thu Aug 3 13:04:32 2006 New Revision: 30934 Modified: lxml/branch/capi/src/lxml/objectify.pyx Log: adapt ObjectPath error message to normal attribute access error message Modified: lxml/branch/capi/src/lxml/objectify.pyx ============================================================================== --- lxml/branch/capi/src/lxml/objectify.pyx (original) +++ lxml/branch/capi/src/lxml/objectify.pyx Thu Aug 3 13:04:32 2006 @@ -385,7 +385,7 @@ c_href = _cstr(ns) c_result = _findFollowingSibling(c_node.children, c_href, c_tag, 0) if c_result is NULL: - raise AttributeError, "no such child: %s" % \ + raise AttributeError, "no such child: " + \ cetree.namespacedNameFromNsName(c_href, c_tag) return elementFactory(parent._doc, c_result) @@ -1103,7 +1103,7 @@ return default_value else: tag = cetree.namespacedNameFromNsName(c_href, c_path) - raise AttributeError, "no such attribute: %s" % tag + raise AttributeError, "no such child: " + tag cdef _buildDescendentPaths(tree.xmlNode* c_node, prefix_string): """Returns a list of all descendent paths. From scoder at codespeak.net Thu Aug 3 13:37:51 2006 From: scoder at codespeak.net (scoder at codespeak.net) Date: Thu, 3 Aug 2006 13:37:51 +0200 (CEST) Subject: [Lxml-checkins] r30937 - lxml/branch/capi/src/lxml Message-ID: <20060803113751.F28281007C@code0.codespeak.net> Author: scoder Date: Thu Aug 3 13:37:50 2006 New Revision: 30937 Modified: lxml/branch/capi/src/lxml/objectify.pyx Log: code cleanup, fix for segfault if index superseeds number of children Modified: lxml/branch/capi/src/lxml/objectify.pyx ============================================================================== --- lxml/branch/capi/src/lxml/objectify.pyx (original) +++ lxml/branch/capi/src/lxml/objectify.pyx Thu Aug 3 13:37:50 2006 @@ -972,26 +972,29 @@ index list. The index list is None if no index was used in the path. """ cdef int has_index + cdef int has_dot new_path = [] indexes = [] has_index = 0 path = cetree.utf8(path.strip()) for dot, ns, name, index in __SPLIT_PATH(path): if index is not None: - if python.PyString_GET_SIZE(index) == 0: + if python.PyString_GET_SIZE(index) == 0 or index == '0': index = None else: index = python.PyNumber_Int(index) has_index = 1 if index < 0: raise ValueError, "index must be >= 0" + has_dot = _cstr(dot)[0] == c'.' if python.PyList_GET_SIZE(new_path) == 0: - if index is not None: - raise ValueError, "index not allowed on root node" - if _cstr(dot)[0] == c'.': + if has_dot: + # == path '.child' => ignore root python.PyList_Append(new_path, "\0\0") python.PyList_Append(indexes, None) - elif _cstr(dot)[0] != c'.': + elif index is not None: + raise ValueError, "index not allowed on root node" + elif not has_dot: raise ValueError, "invalid path" python.PyList_Append(new_path, "%s\0%s\0" % (ns, name)) python.PyList_Append(indexes, index) @@ -1034,9 +1037,12 @@ index = python.PyNumber_Int( python.PyString_FromStringAndSize( index_pos, (index_end - index_pos))) - if index < 0: - raise ValueError, "index must be >= 0" - has_index = 1 + if index == 0: + index = None + else: + if index < 0: + raise ValueError, "index must be >= 0" + has_index = 1 if ns is None: entry = "\0%s\0" % name else: @@ -1091,12 +1097,10 @@ c_node = cetree.findChildForwards(c_node, 0) while c_node is not NULL: - while c_node is not NULL and \ - not cetree.tagMatches(c_node, c_href, c_path): - c_node = cetree.nextElement(c_node) - c_index = c_index - 1 - if c_index < 0: - break + if cetree.tagMatches(c_node, c_href, c_path): + if c_index <= 0: + break + c_index = c_index - 1 c_node = cetree.nextElement(c_node) if use_default: From scoder at codespeak.net Thu Aug 3 13:41:22 2006 From: scoder at codespeak.net (scoder at codespeak.net) Date: Thu, 3 Aug 2006 13:41:22 +0200 (CEST) Subject: [Lxml-checkins] r30939 - lxml/branch/capi/doc Message-ID: <20060803114122.7CCC01007C@code0.codespeak.net> Author: scoder Date: Thu Aug 3 13:41:21 2006 New Revision: 30939 Modified: lxml/branch/capi/doc/objectify.txt Log: ObjectPath doctests Modified: lxml/branch/capi/doc/objectify.txt ============================================================================== --- lxml/branch/capi/doc/objectify.txt (original) +++ lxml/branch/capi/doc/objectify.txt Thu Aug 3 13:41:21 2006 @@ -222,6 +222,66 @@ {other}c +ObjectPath +---------- + +For both convenience and speed, objectify supports its own path language, +represented by the ``ObjectPath`` class:: + + >>> root = etree.Element("{ns}root") + >>> b1 = etree.SubElement(root, "{ns}b") + >>> c = etree.SubElement(b1, "{ns}c") + >>> b2 = etree.SubElement(root, "{ns}b") + >>> d = etree.SubElement(root, "{other}d") + + >>> find = objectify.ObjectPath("root.b.c") + >>> print find(root).tag + {ns}c + + >>> find = objectify.ObjectPath("root.{other}d") + >>> print find(root).tag + {other}d + + >>> find = objectify.ObjectPath("root.{not}there") + >>> print find(root).tag + Traceback (most recent call last): + ... + AttributeError: no such child: {not}there + + >>> find = objectify.ObjectPath("{not}there") + >>> print find(root).tag + Traceback (most recent call last): + ... + AttributeError: no such child: {not}there + + >>> find = objectify.ObjectPath("root.b[1]") + >>> print find(root).tag + {ns}b + + >>> find = objectify.ObjectPath("root.{ns}b[1]") + >>> print find(root).tag + {ns}b + +You can also use relative paths starting with a '.' that ignore the actual +root element and only inherit its namespace:: + + >>> find = objectify.ObjectPath(".b[1]") + >>> print find(root).tag + {ns}b + + >>> find = objectify.ObjectPath(".unknown[1]") + >>> print find(root).tag + Traceback (most recent call last): + ... + AttributeError: no such child: {ns}unknown + + >>> find = objectify.ObjectPath(".{other}unknown[1]") + >>> print find(root).tag + Traceback (most recent call last): + ... + AttributeError: no such child: {other}unknown + + Python data types ----------------- From scoder at codespeak.net Thu Aug 3 13:42:04 2006 From: scoder at codespeak.net (scoder at codespeak.net) Date: Thu, 3 Aug 2006 13:42:04 +0200 (CEST) Subject: [Lxml-checkins] r30940 - lxml/branch/capi/src/lxml/tests Message-ID: <20060803114204.9325C1007C@code0.codespeak.net> Author: scoder Date: Thu Aug 3 13:42:03 2006 New Revision: 30940 Modified: lxml/branch/capi/src/lxml/tests/test_objectify.py Log: test cases for failing index lookup in ObjectPath Modified: lxml/branch/capi/src/lxml/tests/test_objectify.py ============================================================================== --- lxml/branch/capi/src/lxml/tests/test_objectify.py (original) +++ lxml/branch/capi/src/lxml/tests/test_objectify.py Thu Aug 3 13:42:03 2006 @@ -438,12 +438,28 @@ path = objectify.ObjectPath( ['root', 'c1', 'c2[1]'] ) self.assertEquals(root.c1.c2[1].text, path(root).text) - def test_object_path_index_fail(self): + def test_object_path_index_fail_parse(self): self.assertRaises(ValueError, objectify.ObjectPath, "root.c1[0].c2[-1]") self.assertRaises(ValueError, objectify.ObjectPath, ['root', 'c1[0]', 'c2[-1]']) + self.assertRaises(ValueError, objectify.ObjectPath, + "root.c1[-1].c2") + self.assertRaises(ValueError, objectify.ObjectPath, + ['root', 'c1[-1]', 'c2']) + + def test_object_path_index_fail_lookup(self): + root = self.etree.XML(xml_str) + path = objectify.ObjectPath("root.c1[9999].c2") + self.assertRaises(AttributeError, path, root) + + path = objectify.ObjectPath("root.c1[0].c2[9999]") + self.assertRaises(AttributeError, path, root) + + path = objectify.ObjectPath(".c1[9999].c2[0]") + self.assertRaises(AttributeError, path, root) + def test_object_path_ns(self): root = self.etree.XML(xml_str) path = objectify.ObjectPath( "{objectified}root.c1.c2" ) From scoder at codespeak.net Thu Aug 3 13:59:08 2006 From: scoder at codespeak.net (scoder at codespeak.net) Date: Thu, 3 Aug 2006 13:59:08 +0200 (CEST) Subject: [Lxml-checkins] r30942 - lxml/branch/capi/doc Message-ID: <20060803115908.BD9511007C@code0.codespeak.net> Author: scoder Date: Thu Aug 3 13:59:06 2006 New Revision: 30942 Modified: lxml/branch/capi/doc/objectify.txt Log: doctests for ObjectPaths in list form Modified: lxml/branch/capi/doc/objectify.txt ============================================================================== --- lxml/branch/capi/doc/objectify.txt (original) +++ lxml/branch/capi/doc/objectify.txt Thu Aug 3 13:59:06 2006 @@ -262,6 +262,16 @@ >>> print find(root).tag {ns}b +Apart from strings, ObjectPath also accepts lists of path segments: + + >>> find = objectify.ObjectPath(['root', 'b', 'c']) + >>> print find(root).tag + {ns}c + + >>> find = objectify.ObjectPath(['root', '{ns}b[1]']) + >>> print find(root).tag + {ns}b + You can also use relative paths starting with a '.' that ignore the actual root element and only inherit its namespace:: @@ -269,6 +279,10 @@ >>> print find(root).tag {ns}b + >>> find = objectify.ObjectPath(['', 'b[1]']) + >>> print find(root).tag + {ns}b + >>> find = objectify.ObjectPath(".unknown[1]") >>> print find(root).tag Traceback (most recent call last): From scoder at codespeak.net Thu Aug 3 15:48:57 2006 From: scoder at codespeak.net (scoder at codespeak.net) Date: Thu, 3 Aug 2006 15:48:57 +0200 (CEST) Subject: [Lxml-checkins] r30951 - in lxml/branch/capi: doc src/lxml src/lxml/tests Message-ID: <20060803134857.882D410078@code0.codespeak.net> Author: scoder Date: Thu Aug 3 15:48:55 2006 New Revision: 30951 Modified: lxml/branch/capi/doc/objectify.txt lxml/branch/capi/src/lxml/objectify.pyx lxml/branch/capi/src/lxml/tests/test_objectify.py Log: ObjectPath.set(root, value) to create a path and set the target value Modified: lxml/branch/capi/doc/objectify.txt ============================================================================== --- lxml/branch/capi/doc/objectify.txt (original) +++ lxml/branch/capi/doc/objectify.txt Thu Aug 3 15:48:55 2006 @@ -63,11 +63,12 @@ .. 1 Element access through object attributes 2 Namespace handling - 3 Python data types - 4 Defining additional data classes - 5 Recursive string representation of elements - 6 What is different from ElementTree? - 7 Resetting the API + 3 ObjectPath + 4 Python data types + 5 Defining additional data classes + 6 Recursive string representation of elements + 7 What is different from ElementTree? + 8 Resetting the API Element access through object attributes @@ -234,6 +235,10 @@ >>> b2 = etree.SubElement(root, "{ns}b") >>> d = etree.SubElement(root, "{other}d") + >>> path = objectify.ObjectPath("root.b.c") + >>> print path.find(root).tag + {ns}c + >>> find = objectify.ObjectPath("root.b.c") >>> print find(root).tag {ns}c @@ -295,6 +300,30 @@ ... AttributeError: no such child: {other}unknown +ObjectPath objects can be used to manipulate trees:: + + >>> root = etree.Element("{ns}root") + + >>> path = objectify.ObjectPath(".some.child.{other}unknown") + >>> path.find(root) + Traceback (most recent call last): + ... + AttributeError: no such child: {ns}some + + >>> path.set(root, "my value") # create children as necessary + >>> print path.find(root).text + my value + >>> print root.some.child["{other}unknown"].text + my value + +Note, however, that indexing is not supported in this context:: + + >>> path = objectify.ObjectPath(".some[1].child.{other}unknown") + >>> path.set(root, "my value") + Traceback (most recent call last): + ... + TypeError: this operation does not support indexed paths + Python data types ----------------- Modified: lxml/branch/capi/src/lxml/objectify.pyx ============================================================================== --- lxml/branch/capi/src/lxml/objectify.pyx (original) +++ lxml/branch/capi/src/lxml/objectify.pyx Thu Aug 3 15:48:55 2006 @@ -962,6 +962,16 @@ return _findObjectPath(root, self._path_cstr, self._indexes, default, use_default) + def set(self, _Element root not None, value): + """Set the value of the target element in a subtree. + + If any of the children on the path does not exist, it is created. + Note that paths containing indexed attributes are not supported. + """ + if self._indexes is not None: + raise TypeError, "this operation does not support indexed paths" + _createObjectPath(root, self._path_cstr, value) + cdef object __SPLIT_PATH __SPLIT_PATH = re.compile( r"(\.?)\s*(?:\{([^}]*)\})?\s*(\w+)\s*(?:\[\s*([0-9]+)\s*\])?", @@ -998,6 +1008,8 @@ raise ValueError, "invalid path" python.PyList_Append(new_path, "%s\0%s\0" % (ns, name)) python.PyList_Append(indexes, index) + if python.PyList_GET_SIZE(new_path) == 0: + raise ValueError, "invalid path" python.PyList_Append(new_path, "\0\0") if not has_index: indexes = None @@ -1049,21 +1061,22 @@ entry = "%s\0%s\0" % (ns, name) python.PyList_Append(new_path, entry) python.PyList_Append(indexes, index) + if python.PyList_GET_SIZE(new_path) == 0 or \ + (python.PyList_GET_SIZE(new_path) == 1 and new_path[0] == '\0\0'): + raise ValueError, "invalid path" python.PyList_Append(new_path, "\0\0") if not has_index: indexes = None return ''.join(new_path), indexes -cdef _findObjectPath(_Element root, char* c_path_segments, - index_list, default_value, int use_default): +cdef _findObjectPath(_Element root, char* c_path, index_list, + default_value, int use_default): """Follow the path to find the target element. """ cdef tree.xmlNode* c_node cdef char* c_href - cdef char* c_path cdef Py_ssize_t c_index_pos, c_index c_node = root._c_node - c_path = c_path_segments if c_path[0] != c'\0': c_href = c_path while c_path[0] != c'\0': @@ -1109,6 +1122,53 @@ tag = cetree.namespacedNameFromNsName(c_href, c_path) raise AttributeError, "no such child: " + tag +cdef _createObjectPath(_Element root, char* c_path, value): + """Follow the path to find the target element, build the missing children + as needed and replace the target element by 'value'. + """ + cdef _Element child + cdef tree.xmlNode* c_node + cdef tree.xmlNode* c_child + cdef char* c_href + c_node = root._c_node + if c_path[0] != c'\0': + c_href = c_path + while c_path[0] != c'\0': + c_path = c_path + 1 + else: + c_href = tree._getNs(c_node) + c_path = c_path + 1 + if c_path[0] != c'\0' and not cetree.tagMatches(c_node, c_href, c_path): + raise ValueError, "root element does not match: need %s, got %s" % \ + (cetree.namespacedNameFromNsName(c_href, c_path), root.tag) + + while c_node is not NULL: + while c_path[0] != c'\0': + c_path = c_path + 1 + c_path = c_path + 1 + if c_path[0] != c'\0': + c_href = c_path + while c_path[0] != c'\0': + c_path = c_path + 1 + elif c_path[1] == c'\0': + # '\0\0' found => done, all children were there + element = cetree.elementFactory(root._doc, c_node) + _replaceElement(element, value) + return + c_path = c_path + 1 + + c_child = cetree.findChildForwards(c_node, 0) + while c_child is not NULL and \ + not cetree.tagMatches(c_child, c_href, c_path): + c_child = cetree.nextElement(c_child) + if c_child is not NULL: + c_node = c_child + else: + child = SubElement( + cetree.elementFactory(root._doc, c_node), + cetree.namespacedNameFromNsName(c_href, c_path)) + c_node = child._c_node + cdef _buildDescendentPaths(tree.xmlNode* c_node, prefix_string): """Returns a list of all descendent paths. """ Modified: lxml/branch/capi/src/lxml/tests/test_objectify.py ============================================================================== --- lxml/branch/capi/src/lxml/tests/test_objectify.py (original) +++ lxml/branch/capi/src/lxml/tests/test_objectify.py Thu Aug 3 15:48:55 2006 @@ -395,6 +395,12 @@ self.assertEquals(root.c1.c2.text, path.find(root).text) self.assertEquals(root.c1.c2.text, path(root).text) + def test_object_path_fail(self): + root = self.etree.XML(xml_str) + path = objectify.ObjectPath( "root.c1.c99" ) + self.assertRaises(AttributeError, path, root) + self.assertEquals(None, path(root, None)) + def test_object_path_syntax(self): root = self.etree.XML(xml_str) path = objectify.ObjectPath("root . {objectified}c1. c2") @@ -449,6 +455,13 @@ self.assertRaises(ValueError, objectify.ObjectPath, ['root', 'c1[-1]', 'c2']) + self.assertRaises(ValueError, objectify.ObjectPath, + ".") + self.assertRaises(ValueError, objectify.ObjectPath, + ['']) + self.assertRaises(ValueError, objectify.ObjectPath, + ['', '', '']) + def test_object_path_index_fail_lookup(self): root = self.etree.XML(xml_str) path = objectify.ObjectPath("root.c1[9999].c2") @@ -488,11 +501,27 @@ self.assertEquals(getattr(root.c1, '{otherNS}c2').text, path.find(root).text) - def test_object_path_fail(self): + def test_object_path_set(self): root = self.etree.XML(xml_str) - path = objectify.ObjectPath( "root.c1.c99" ) - self.assertRaises(AttributeError, path, root) - self.assertEquals(None, path(root, None)) + path = objectify.ObjectPath( "root.c1.c2" ) + self.assertEquals(root.c1.c2.text, path.find(root).text) + + new_value = "my new value" + path.set(root, new_value) + + self.assertEquals(new_value, root.c1.c2.text) + self.assertEquals(new_value, path(root).text) + + def test_object_path_set_create(self): + root = self.etree.XML(xml_str) + path = objectify.ObjectPath( "root.c1.c99.c126.honk" ) + self.assertRaises(AttributeError, path.find, root) + + new_value = "my new value" + path.set(root, new_value) + + self.assertEquals(new_value, root.c1.c99.c126.honk) + self.assertEquals(new_value, path(root).text) def test_descendent_paths(self): root = self.etree.XML(xml_str) From scoder at codespeak.net Thu Aug 3 15:54:55 2006 From: scoder at codespeak.net (scoder at codespeak.net) Date: Thu, 3 Aug 2006 15:54:55 +0200 (CEST) Subject: [Lxml-checkins] r30952 - in lxml/branch/capi: doc src/lxml Message-ID: <20060803135455.BE9ED10053@code0.codespeak.net> Author: scoder Date: Thu Aug 3 15:54:54 2006 New Revision: 30952 Modified: lxml/branch/capi/doc/objectify.txt lxml/branch/capi/src/lxml/objectify.pyx Log: str(ObjectPath) -> path Modified: lxml/branch/capi/doc/objectify.txt ============================================================================== --- lxml/branch/capi/doc/objectify.txt (original) +++ lxml/branch/capi/doc/objectify.txt Thu Aug 3 15:54:54 2006 @@ -236,6 +236,8 @@ >>> d = etree.SubElement(root, "{other}d") >>> path = objectify.ObjectPath("root.b.c") + >>> print path + root.b.c >>> print path.find(root).tag {ns}c Modified: lxml/branch/capi/src/lxml/objectify.pyx ============================================================================== --- lxml/branch/capi/src/lxml/objectify.pyx (original) +++ lxml/branch/capi/src/lxml/objectify.pyx Thu Aug 3 15:54:54 2006 @@ -932,18 +932,28 @@ # ObjectPath cdef class ObjectPath: + """Immutable object that represents a compiled object path. + + Example for a path: 'root.child[1].{other}child[25]' + """ cdef readonly object find cdef object _indexes cdef object _path + cdef object _path_str cdef char* _path_cstr def __init__(self, path): if python._isString(path): self._path, self._indexes = _parseObjectPathString(path) + self._path_str = path else: self._path, self._indexes = _parseObjectPathList(path) + self._path_str = '.'.join(path) self._path_cstr = _cstr(self._path) self.find = self.__call__ + def __str__(self): + return self._path_str + def __call__(self, _Element root not None, *default): """Follow the attribute path in the object structure and return the target attribute value. From scoder at codespeak.net Thu Aug 3 16:24:34 2006 From: scoder at codespeak.net (scoder at codespeak.net) Date: Thu, 3 Aug 2006 16:24:34 +0200 (CEST) Subject: [Lxml-checkins] r30956 - in lxml/branch/capi: doc src/lxml src/lxml/tests Message-ID: <20060803142434.33F471005A@code0.codespeak.net> Author: scoder Date: Thu Aug 3 16:24:31 2006 New Revision: 30956 Modified: lxml/branch/capi/doc/objectify.txt lxml/branch/capi/src/lxml/objectify.pyx lxml/branch/capi/src/lxml/tests/test_objectify.py Log: some code cleanup, let ObjectPath.find() raise ValueError on wrong root node instead of AttributeError, renamed OP.set() -> setattr(), new method hasattr() Modified: lxml/branch/capi/doc/objectify.txt ============================================================================== --- lxml/branch/capi/doc/objectify.txt (original) +++ lxml/branch/capi/doc/objectify.txt Thu Aug 3 16:24:31 2006 @@ -238,6 +238,8 @@ >>> path = objectify.ObjectPath("root.b.c") >>> print path root.b.c + >>> path.hasattr(root) + True >>> print path.find(root).tag {ns}c @@ -259,7 +261,7 @@ >>> print find(root).tag Traceback (most recent call last): ... - AttributeError: no such child: {not}there + ValueError: root element does not match: need {not}there, got {ns}root >>> find = objectify.ObjectPath("root.b[1]") >>> print find(root).tag @@ -307,12 +309,16 @@ >>> root = etree.Element("{ns}root") >>> path = objectify.ObjectPath(".some.child.{other}unknown") + >>> path.hasattr(root) + False >>> path.find(root) Traceback (most recent call last): ... AttributeError: no such child: {ns}some - >>> path.set(root, "my value") # create children as necessary + >>> path.setattr(root, "my value") # create children as necessary + >>> path.hasattr(root) + True >>> print path.find(root).text my value >>> print root.some.child["{other}unknown"].text @@ -321,7 +327,7 @@ Note, however, that indexing is not supported in this context:: >>> path = objectify.ObjectPath(".some[1].child.{other}unknown") - >>> path.set(root, "my value") + >>> path.setattr(root, "my value") Traceback (most recent call last): ... TypeError: this operation does not support indexed paths Modified: lxml/branch/capi/src/lxml/objectify.pyx ============================================================================== --- lxml/branch/capi/src/lxml/objectify.pyx (original) +++ lxml/branch/capi/src/lxml/objectify.pyx Thu Aug 3 16:24:31 2006 @@ -972,7 +972,14 @@ return _findObjectPath(root, self._path_cstr, self._indexes, default, use_default) - def set(self, _Element root not None, value): + def hasattr(self, _Element root not None): + try: + _findObjectPath(root, self._path_cstr, self._indexes, None, 0) + except AttributeError: + return False + return True + + def setattr(self, _Element root not None, value): """Set the value of the target element in a subtree. If any of the children on the path does not exist, it is created. @@ -1094,37 +1101,40 @@ else: c_href = tree._getNs(c_node) c_path = c_path + 1 + if c_path[0] != c'\0' and not cetree.tagMatches(c_node, c_href, c_path): + raise ValueError, "root element does not match: need %s, got %s" % \ + (cetree.namespacedNameFromNsName(c_href, c_path), root.tag) + if index_list is not None: c_index_pos = 1 - if c_path[0] == c'\0' or cetree.tagMatches(c_node, c_href, c_path): - while c_node is not NULL: + while c_node is not NULL: + while c_path[0] != c'\0': + c_path = c_path + 1 + c_path = c_path + 1 + if c_path[0] != c'\0': + c_href = c_path while c_path[0] != c'\0': c_path = c_path + 1 - c_path = c_path + 1 - if c_path[0] != c'\0': - c_href = c_path - while c_path[0] != c'\0': - c_path = c_path + 1 - elif c_path[1] == c'\0': - # '\0\0' found, all done - return cetree.elementFactory(root._doc, c_node) - c_path = c_path + 1 + elif c_path[1] == c'\0': + # '\0\0' found, all done + return cetree.elementFactory(root._doc, c_node) + c_path = c_path + 1 - c_index = 0 - if index_list is not None: - index = python.PyList_GET_ITEM(index_list, c_index_pos) - python.Py_INCREF(index) - c_index_pos = c_index_pos + 1 - if index is not None: - c_index = python.PyInt_AsSsize_t(index) - - c_node = cetree.findChildForwards(c_node, 0) - while c_node is not NULL: - if cetree.tagMatches(c_node, c_href, c_path): - if c_index <= 0: - break - c_index = c_index - 1 - c_node = cetree.nextElement(c_node) + c_index = 0 + if index_list is not None: + index = python.PyList_GET_ITEM(index_list, c_index_pos) + python.Py_INCREF(index) + c_index_pos = c_index_pos + 1 + if index is not None: + c_index = python.PyInt_AsSsize_t(index) + + c_node = cetree.findChildForwards(c_node, 0) + while c_node is not NULL: + if cetree.tagMatches(c_node, c_href, c_path): + if c_index <= 0: + break + c_index = c_index - 1 + c_node = cetree.nextElement(c_node) if use_default: return default_value Modified: lxml/branch/capi/src/lxml/tests/test_objectify.py ============================================================================== --- lxml/branch/capi/src/lxml/tests/test_objectify.py (original) +++ lxml/branch/capi/src/lxml/tests/test_objectify.py Thu Aug 3 16:24:31 2006 @@ -409,6 +409,23 @@ path = objectify.ObjectPath(" root.{objectified} c1.c2 [ 0 ] ") self.assertEquals(root.c1.c2.text, path(root).text) + def test_object_path_hasattr(self): + root = self.etree.XML(xml_str) + path = objectify.ObjectPath( "root" ) + self.assert_(path.hasattr(root)) + path = objectify.ObjectPath( "root.c1" ) + self.assert_(path.hasattr(root)) + path = objectify.ObjectPath( "root.c1.c2" ) + self.assert_(path.hasattr(root)) + path = objectify.ObjectPath( "root.c1.{otherNS}c2" ) + self.assert_(path.hasattr(root)) + path = objectify.ObjectPath( "root.c1.c2[1]" ) + self.assert_(path.hasattr(root)) + path = objectify.ObjectPath( "root.c1.c2[2]" ) + self.assertFalse(path.hasattr(root)) + path = objectify.ObjectPath( "root.c1[1].c2" ) + self.assertFalse(path.hasattr(root)) + def test_object_path_dot_root(self): root = self.etree.XML(xml_str) path = objectify.ObjectPath( ".c1.c2" ) @@ -507,7 +524,7 @@ self.assertEquals(root.c1.c2.text, path.find(root).text) new_value = "my new value" - path.set(root, new_value) + path.setattr(root, new_value) self.assertEquals(new_value, root.c1.c2.text) self.assertEquals(new_value, path(root).text) @@ -518,7 +535,7 @@ self.assertRaises(AttributeError, path.find, root) new_value = "my new value" - path.set(root, new_value) + path.setattr(root, new_value) self.assertEquals(new_value, root.c1.c99.c126.honk) self.assertEquals(new_value, path(root).text) From scoder at codespeak.net Thu Aug 3 16:49:54 2006 From: scoder at codespeak.net (scoder at codespeak.net) Date: Thu, 3 Aug 2006 16:49:54 +0200 (CEST) Subject: [Lxml-checkins] r30959 - lxml/branch/capi/doc Message-ID: <20060803144954.A3B131005A@code0.codespeak.net> Author: scoder Date: Thu Aug 3 16:49:52 2006 New Revision: 30959 Modified: lxml/branch/capi/doc/objectify.txt Log: doc updates: note that ObjectPath does not depend on objectify Modified: lxml/branch/capi/doc/objectify.txt ============================================================================== --- lxml/branch/capi/doc/objectify.txt (original) +++ lxml/branch/capi/doc/objectify.txt Thu Aug 3 16:49:52 2006 @@ -316,7 +316,7 @@ ... AttributeError: no such child: {ns}some - >>> path.setattr(root, "my value") # create children as necessary + >>> path.setattr(root, "my value") # creates children as necessary >>> path.hasattr(root) True >>> print path.find(root).text @@ -332,6 +332,10 @@ ... TypeError: this operation does not support indexed paths +It is worth noting that ObjectPath does not depend on the ``objectify`` module +or the ObjectifiedElement implementation. It can also be used in combination +with Elements from the normal lxml.etree API. + Python data types ----------------- From scoder at codespeak.net Fri Aug 4 13:13:42 2006 From: scoder at codespeak.net (scoder at codespeak.net) Date: Fri, 4 Aug 2006 13:13:42 +0200 (CEST) Subject: [Lxml-checkins] r30985 - in lxml/trunk: . src/lxml Message-ID: <20060804111342.3861910072@code0.codespeak.net> Author: scoder Date: Fri Aug 4 13:13:40 2006 New Revision: 30985 Modified: lxml/trunk/CHANGES.txt lxml/trunk/src/lxml/parser.pxi lxml/trunk/src/lxml/xslt.pxd lxml/trunk/src/lxml/xslt.pxi Log: fix for crash when mixing XSLT result elements into other trees Modified: lxml/trunk/CHANGES.txt ============================================================================== --- lxml/trunk/CHANGES.txt (original) +++ lxml/trunk/CHANGES.txt Fri Aug 4 13:13:40 2006 @@ -13,6 +13,8 @@ Bugs fixed ---------- +* Crash when mixing elements from XSLT results into other trees + * The ``namespace`` axis is supported in XPath and returns (prefix, URI) tuples Modified: lxml/trunk/src/lxml/parser.pxi ============================================================================== --- lxml/trunk/src/lxml/parser.pxi (original) +++ lxml/trunk/src/lxml/parser.pxi Fri Aug 4 13:13:40 2006 @@ -89,7 +89,7 @@ context._c_dict = xmlparser.xmlDictCreateSub(self._c_dict) return context._c_dict - cdef void _initThreadDictRef(self, xmlDict** c_dict_ref): + cdef void initThreadDictRef(self, xmlDict** c_dict_ref): cdef xmlDict* c_dict cdef xmlDict* c_thread_dict c_dict = c_dict_ref[0] @@ -103,18 +103,18 @@ cdef void initParserDict(self, xmlParserCtxt* pctxt): "Assure we always use the same string dictionary." - self._initThreadDictRef(&pctxt.dict) + self.initThreadDictRef(&pctxt.dict) cdef void initXPathParserDict(self, xpath.xmlXPathContext* pctxt): "Assure we always use the same string dictionary." - self._initThreadDictRef(&pctxt.dict) + self.initThreadDictRef(&pctxt.dict) cdef void initDocDict(self, xmlDoc* result): "Store dict of last object parsed if no shared dict yet" # XXX We also free the result dict here if there already was one. # This case should only occur for new documents with empty dicts, # otherwise we'd free data that's in use => segfault - self._initThreadDictRef(&result.dict) + self.initThreadDictRef(&result.dict) cdef _ParserContext __GLOBAL_PARSER_CONTEXT __GLOBAL_PARSER_CONTEXT = _ParserContext() Modified: lxml/trunk/src/lxml/xslt.pxd ============================================================================== --- lxml/trunk/src/lxml/xslt.pxd (original) +++ lxml/trunk/src/lxml/xslt.pxd Fri Aug 4 13:13:40 2006 @@ -20,6 +20,7 @@ xmlXPathContext* xpathCtxt xsltDocument* document void* _private + xmlDict* dict cdef xsltStylesheet* xsltParseStylesheetDoc(xmlDoc* doc) cdef void xsltFreeStylesheet(xsltStylesheet* sheet) Modified: lxml/trunk/src/lxml/xslt.pxi ============================================================================== --- lxml/trunk/src/lxml/xslt.pxi (original) +++ lxml/trunk/src/lxml/xslt.pxi Fri Aug 4 13:13:40 2006 @@ -366,6 +366,8 @@ _destroyFakeDoc(input_doc._c_doc, c_doc) raise XSLTApplyError, "Error preparing stylesheet run" + initTransformDict(transform_ctxt) + self._error_log.connect() xslt.xsltSetTransformErrorFunc(transform_ctxt, self._error_log, _receiveXSLTError) @@ -511,6 +513,10 @@ else: return result +cdef void initTransformDict(xslt.xsltTransformContext* transform_ctxt): + __GLOBAL_PARSER_CONTEXT.initThreadDictRef(&transform_ctxt.dict) + + ################################################################################ # EXSLT regexp implementation From scoder at codespeak.net Fri Aug 4 15:10:14 2006 From: scoder at codespeak.net (scoder at codespeak.net) Date: Fri, 4 Aug 2006 15:10:14 +0200 (CEST) Subject: [Lxml-checkins] r30997 - lxml/trunk/src/lxml/tests Message-ID: <20060804131014.5E2C81006E@code0.codespeak.net> Author: scoder Date: Fri Aug 4 15:10:12 2006 New Revision: 30997 Modified: lxml/trunk/src/lxml/tests/common_imports.py Log: test cases: import ET from stdlib under Python 2.5 Modified: lxml/trunk/src/lxml/tests/common_imports.py ============================================================================== --- lxml/trunk/src/lxml/tests/common_imports.py (original) +++ lxml/trunk/src/lxml/tests/common_imports.py Fri Aug 4 15:10:12 2006 @@ -6,9 +6,12 @@ from lxml import etree try: - from elementtree import ElementTree + from xml.etree import ElementTree # Python 2.5 except ImportError: - ElementTree = None + try: + from elementtree import ElementTree # standard ET + except ImportError: + ElementTree = None class HelperTestCase(unittest.TestCase): def parse(self, text): From scoder at codespeak.net Fri Aug 4 16:00:39 2006 From: scoder at codespeak.net (scoder at codespeak.net) Date: Fri, 4 Aug 2006 16:00:39 +0200 (CEST) Subject: [Lxml-checkins] r30998 - lxml/branch/capi/benchmark Message-ID: <20060804140039.095FD1006C@code0.codespeak.net> Author: scoder Date: Fri Aug 4 16:00:37 2006 New Revision: 30998 Modified: lxml/branch/capi/benchmark/bench_etree.py lxml/branch/capi/benchmark/benchbase.py Log: benchmark fixes for Py2.5 Modified: lxml/branch/capi/benchmark/bench_etree.py ============================================================================== --- lxml/branch/capi/benchmark/bench_etree.py (original) +++ lxml/branch/capi/benchmark/bench_etree.py Fri Aug 4 16:00:37 2006 @@ -212,12 +212,12 @@ child.text def bench_set_text(self, root): - text = _TEXT + text = TEXT for child in root: child.text = text def bench_set_utext(self, root): - text = _UTEXT + text = UTEXT for child in root: child.text = text Modified: lxml/branch/capi/benchmark/benchbase.py ============================================================================== --- lxml/branch/capi/benchmark/benchbase.py (original) +++ lxml/branch/capi/benchmark/benchbase.py Fri Aug 4 16:00:37 2006 @@ -6,8 +6,8 @@ TREE_FACTOR = 1 # increase tree size with '-l / '-L' cmd option -_TEXT = "some ASCII text" * TREE_FACTOR -_UTEXT = u"some klingon: \F8D2" * TREE_FACTOR +TEXT = "some ASCII text" * TREE_FACTOR +UTEXT = u"some klingon: \F8D2" * TREE_FACTOR _ATTRIBUTES = { '{attr}test1' : _TEXT, '{attr}test2' : _TEXT, @@ -435,18 +435,34 @@ if '-a' in sys.argv or '-c' in sys.argv: # 'all' or 'C-implementations' ? try: - import cElementTree as cET + sys.argv.remove('-c') + except ValueError: + pass + try: + import xml.etree.cElementTree as cET _etrees.append(cET) except ImportError: - pass + try: + import cElementTree as cET + _etrees.append(cET) + except ImportError: + pass try: # 'all' ? sys.argv.remove('-a') - from elementtree import ElementTree as ET - _etrees.append(ET) - except (ValueError, ImportError): + except ValueError: pass + else: + try: + from xml.etree import ElementTree as ET + _etrees.append(ET) + except ImportError: + try: + from elementtree import ElementTree as ET + _etrees.append(ET) + except ImportError: + pass if not _etrees: print "No library to test. Exiting." From scoder at codespeak.net Fri Aug 4 16:17:05 2006 From: scoder at codespeak.net (scoder at codespeak.net) Date: Fri, 4 Aug 2006 16:17:05 +0200 (CEST) Subject: [Lxml-checkins] r31000 - lxml/trunk/src Message-ID: <20060804141705.2C98A1006E@code0.codespeak.net> Author: scoder Date: Fri Aug 4 16:17:04 2006 New Revision: 31000 Removed: lxml/trunk/src/doctest.py Log: dropped src/doctest.py Deleted: /lxml/trunk/src/doctest.py ============================================================================== --- /lxml/trunk/src/doctest.py Fri Aug 4 16:17:04 2006 +++ (empty file) @@ -1,2704 +0,0 @@ -# Module doctest. -# Released to the public domain 16-Jan-2001, by Tim Peters (tim at python.org). -# Major enhancements and refactoring by: -# Jim Fulton -# Edward Loper - -# Provided as-is; use at your own risk; no warranty; no promises; enjoy! - -r"""Module doctest -- a framework for running examples in docstrings. - -In simplest use, end each module M to be tested with: - -def _test(): - import doctest - doctest.testmod() - -if __name__ == "__main__": - _test() - -Then running the module as a script will cause the examples in the -docstrings to get executed and verified: - -python M.py - -This won't display anything unless an example fails, in which case the -failing example(s) and the cause(s) of the failure(s) are printed to stdout -(why not stderr? because stderr is a lame hack <0.2 wink>), and the final -line of output is "Test failed.". - -Run it with the -v switch instead: - -python M.py -v - -and a detailed report of all examples tried is printed to stdout, along -with assorted summaries at the end. - -You can force verbose mode by passing "verbose=True" to testmod, or prohibit -it by passing "verbose=False". In either of those cases, sys.argv is not -examined by testmod. - -There are a variety of other ways to run doctests, including integration -with the unittest framework, and support for running non-Python text -files containing doctests. There are also many ways to override parts -of doctest's default behaviors. See the Library Reference Manual for -details. -""" - -__docformat__ = 'reStructuredText en' - -__all__ = [ - # 0, Option Flags - 'register_optionflag', - 'DONT_ACCEPT_TRUE_FOR_1', - 'DONT_ACCEPT_BLANKLINE', - 'NORMALIZE_WHITESPACE', - 'ELLIPSIS', - 'IGNORE_EXCEPTION_DETAIL', - 'COMPARISON_FLAGS', - 'REPORT_UDIFF', - 'REPORT_CDIFF', - 'REPORT_NDIFF', - 'REPORT_ONLY_FIRST_FAILURE', - 'REPORTING_FLAGS', - # 1. Utility Functions - 'is_private', - # 2. Example & DocTest - 'Example', - 'DocTest', - # 3. Doctest Parser - 'DocTestParser', - # 4. Doctest Finder - 'DocTestFinder', - # 5. Doctest Runner - 'DocTestRunner', - 'OutputChecker', - 'DocTestFailure', - 'UnexpectedException', - 'DebugRunner', - # 6. Test Functions - 'testmod', - 'testfile', - 'run_docstring_examples', - # 7. Tester - 'Tester', - # 8. Unittest Support - 'DocTestSuite', - 'DocFileSuite', - 'set_unittest_reportflags', - # 9. Debugging Support - 'script_from_examples', - 'testsource', - 'debug_src', - 'debug', -] - -import __future__ - -import sys, traceback, inspect, linecache, os, re, types -import unittest, difflib, pdb, tempfile -import warnings -from StringIO import StringIO - -# Don't whine about the deprecated is_private function in this -# module's tests. -warnings.filterwarnings("ignore", "is_private", DeprecationWarning, - __name__, 0) - -real_pdb_set_trace = pdb.set_trace - -# There are 4 basic classes: -# - Example: a pair, plus an intra-docstring line number. -# - DocTest: a collection of examples, parsed from a docstring, plus -# info about where the docstring came from (name, filename, lineno). -# - DocTestFinder: extracts DocTests from a given object's docstring and -# its contained objects' docstrings. -# - DocTestRunner: runs DocTest cases, and accumulates statistics. -# -# So the basic picture is: -# -# list of: -# +------+ +---------+ +-------+ -# |object| --DocTestFinder-> | DocTest | --DocTestRunner-> |results| -# +------+ +---------+ +-------+ -# | Example | -# | ... | -# | Example | -# +---------+ - -# Option constants. - -OPTIONFLAGS_BY_NAME = {} -def register_optionflag(name): - flag = 1 << len(OPTIONFLAGS_BY_NAME) - OPTIONFLAGS_BY_NAME[name] = flag - return flag - -DONT_ACCEPT_TRUE_FOR_1 = register_optionflag('DONT_ACCEPT_TRUE_FOR_1') -DONT_ACCEPT_BLANKLINE = register_optionflag('DONT_ACCEPT_BLANKLINE') -NORMALIZE_WHITESPACE = register_optionflag('NORMALIZE_WHITESPACE') -ELLIPSIS = register_optionflag('ELLIPSIS') -IGNORE_EXCEPTION_DETAIL = register_optionflag('IGNORE_EXCEPTION_DETAIL') - -COMPARISON_FLAGS = (DONT_ACCEPT_TRUE_FOR_1 | - DONT_ACCEPT_BLANKLINE | - NORMALIZE_WHITESPACE | - ELLIPSIS | - IGNORE_EXCEPTION_DETAIL) - -REPORT_UDIFF = register_optionflag('REPORT_UDIFF') -REPORT_CDIFF = register_optionflag('REPORT_CDIFF') -REPORT_NDIFF = register_optionflag('REPORT_NDIFF') -REPORT_ONLY_FIRST_FAILURE = register_optionflag('REPORT_ONLY_FIRST_FAILURE') - -REPORTING_FLAGS = (REPORT_UDIFF | - REPORT_CDIFF | - REPORT_NDIFF | - REPORT_ONLY_FIRST_FAILURE) - -# Special string markers for use in `want` strings: -BLANKLINE_MARKER = '' -ELLIPSIS_MARKER = '...' - -###################################################################### -## Table of Contents -###################################################################### -# 1. Utility Functions -# 2. Example & DocTest -- store test cases -# 3. DocTest Parser -- extracts examples from strings -# 4. DocTest Finder -- extracts test cases from objects -# 5. DocTest Runner -- runs test cases -# 6. Test Functions -- convenient wrappers for testing -# 7. Tester Class -- for backwards compatibility -# 8. Unittest Support -# 9. Debugging Support -# 10. Example Usage - -###################################################################### -## 1. Utility Functions -###################################################################### - -def is_private(prefix, base): - """prefix, base -> true iff name prefix + "." + base is "private". - - Prefix may be an empty string, and base does not contain a period. - Prefix is ignored (although functions you write conforming to this - protocol may make use of it). - Return true iff base begins with an (at least one) underscore, but - does not both begin and end with (at least) two underscores. - - >>> is_private("a.b", "my_func") - False - >>> is_private("____", "_my_func") - True - >>> is_private("someclass", "__init__") - False - >>> is_private("sometypo", "__init_") - True - >>> is_private("x.y.z", "_") - True - >>> is_private("_x.y.z", "__") - False - >>> is_private("", "") # senseless but consistent - False - """ - warnings.warn("is_private is deprecated; it wasn't useful; " - "examine DocTestFinder.find() lists instead", - DeprecationWarning, stacklevel=2) - return base[:1] == "_" and not base[:2] == "__" == base[-2:] - -def _extract_future_flags(globs): - """ - Return the compiler-flags associated with the future features that - have been imported into the given namespace (globs). - """ - flags = 0 - for fname in __future__.all_feature_names: - feature = globs.get(fname, None) - if feature is getattr(__future__, fname): - flags |= feature.compiler_flag - return flags - -def _normalize_module(module, depth=2): - """ - Return the module specified by `module`. In particular: - - If `module` is a module, then return module. - - If `module` is a string, then import and return the - module with that name. - - If `module` is None, then return the calling module. - The calling module is assumed to be the module of - the stack frame at the given depth in the call stack. - """ - if inspect.ismodule(module): - return module - elif isinstance(module, (str, unicode)): - return __import__(module, globals(), locals(), ["*"]) - elif module is None: - return sys.modules[sys._getframe(depth).f_globals['__name__']] - else: - raise TypeError("Expected a module, string, or None") - -def _indent(s, indent=4): - """ - Add the given number of space characters to the beginning every - non-blank line in `s`, and return the result. - """ - # This regexp matches the start of non-blank lines: - return re.sub('(?m)^(?!$)', indent*' ', s) - -def _exception_traceback(exc_info): - """ - Return a string containing a traceback message for the given - exc_info tuple (as returned by sys.exc_info()). - """ - # Get a traceback message. - excout = StringIO() - exc_type, exc_val, exc_tb = exc_info - traceback.print_exception(exc_type, exc_val, exc_tb, file=excout) - return excout.getvalue() - -# Override some StringIO methods. -class _SpoofOut(StringIO): - def getvalue(self): - result = StringIO.getvalue(self) - # If anything at all was written, make sure there's a trailing - # newline. There's no way for the expected output to indicate - # that a trailing newline is missing. - if result and not result.endswith("\n"): - result += "\n" - # Prevent softspace from screwing up the next test case, in - # case they used print with a trailing comma in an example. - if hasattr(self, "softspace"): - del self.softspace - return result - - def truncate(self, size=None): - StringIO.truncate(self, size) - if hasattr(self, "softspace"): - del self.softspace - -# Worst-case linear-time ellipsis matching. -def _ellipsis_match(want, got): - """ - Essentially the only subtle case: - >>> _ellipsis_match('aa...aa', 'aaa') - False - """ - if ELLIPSIS_MARKER not in want: - return want == got - - # Find "the real" strings. - ws = want.split(ELLIPSIS_MARKER) - assert len(ws) >= 2 - - # Deal with exact matches possibly needed at one or both ends. - startpos, endpos = 0, len(got) - w = ws[0] - if w: # starts with exact match - if got.startswith(w): - startpos = len(w) - del ws[0] - else: - return False - w = ws[-1] - if w: # ends with exact match - if got.endswith(w): - endpos -= len(w) - del ws[-1] - else: - return False - - if startpos > endpos: - # Exact end matches required more characters than we have, as in - # _ellipsis_match('aa...aa', 'aaa') - return False - - # For the rest, we only need to find the leftmost non-overlapping - # match for each piece. If there's no overall match that way alone, - # there's no overall match period. - for w in ws: - # w may be '' at times, if there are consecutive ellipses, or - # due to an ellipsis at the start or end of `want`. That's OK. - # Search for an empty string succeeds, and doesn't change startpos. - startpos = got.find(w, startpos, endpos) - if startpos < 0: - return False - startpos += len(w) - - return True - -def _comment_line(line): - "Return a commented form of the given line" - line = line.rstrip() - if line: - return '# '+line - else: - return '#' - -class _OutputRedirectingPdb(pdb.Pdb): - """ - A specialized version of the python debugger that redirects stdout - to a given stream when interacting with the user. Stdout is *not* - redirected when traced code is executed. - """ - def __init__(self, out): - self.__out = out - self.__debugger_used = False - pdb.Pdb.__init__(self) - - def set_trace(self): - self.__debugger_used = True - pdb.Pdb.set_trace(self) - - def set_continue(self): - # Calling set_continue unconditionally would break unit test coverage - # reporting, as Bdb.set_continue calls sys.settrace(None). - if self.__debugger_used: - pdb.Pdb.set_continue(self) - - def trace_dispatch(self, *args): - # Redirect stdout to the given stream. - save_stdout = sys.stdout - sys.stdout = self.__out - # Call Pdb's trace dispatch method. - result = pdb.Pdb.trace_dispatch(self, *args) - # Restore stdout. - sys.stdout = save_stdout - return result - -# [XX] Normalize with respect to os.path.pardir? -def _module_relative_path(module, path): - if not inspect.ismodule(module): - raise TypeError, 'Expected a module: %r' % module - if path.startswith('/'): - raise ValueError, 'Module-relative files may not have absolute paths' - - # Find the base directory for the path. - if hasattr(module, '__file__'): - # A normal module/package - basedir = os.path.split(module.__file__)[0] - elif module.__name__ == '__main__': - # An interactive session. - if len(sys.argv)>0 and sys.argv[0] != '': - basedir = os.path.split(sys.argv[0])[0] - else: - basedir = os.curdir - else: - # A module w/o __file__ (this includes builtins) - raise ValueError("Can't resolve paths relative to the module " + - module + " (it has no __file__)") - - # Combine the base directory and the path. - return os.path.join(basedir, *(path.split('/'))) - -###################################################################### -## 2. Example & DocTest -###################################################################### -## - An "example" is a pair, where "source" is a -## fragment of source code, and "want" is the expected output for -## "source." The Example class also includes information about -## where the example was extracted from. -## -## - A "doctest" is a collection of examples, typically extracted from -## a string (such as an object's docstring). The DocTest class also -## includes information about where the string was extracted from. - -class Example: - """ - A single doctest example, consisting of source code and expected - output. `Example` defines the following attributes: - - - source: A single Python statement, always ending with a newline. - The constructor adds a newline if needed. - - - want: The expected output from running the source code (either - from stdout, or a traceback in case of exception). `want` ends - with a newline unless it's empty, in which case it's an empty - string. The constructor adds a newline if needed. - - - exc_msg: The exception message generated by the example, if - the example is expected to generate an exception; or `None` if - it is not expected to generate an exception. This exception - message is compared against the return value of - `traceback.format_exception_only()`. `exc_msg` ends with a - newline unless it's `None`. The constructor adds a newline - if needed. - - - lineno: The line number within the DocTest string containing - this Example where the Example begins. This line number is - zero-based, with respect to the beginning of the DocTest. - - - indent: The example's indentation in the DocTest string. - I.e., the number of space characters that preceed the - example's first prompt. - - - options: A dictionary mapping from option flags to True or - False, which is used to override default options for this - example. Any option flags not contained in this dictionary - are left at their default value (as specified by the - DocTestRunner's optionflags). By default, no options are set. - """ - def __init__(self, source, want, exc_msg=None, lineno=0, indent=0, - options=None): - # Normalize inputs. - if not source.endswith('\n'): - source += '\n' - if want and not want.endswith('\n'): - want += '\n' - if exc_msg is not None and not exc_msg.endswith('\n'): - exc_msg += '\n' - # Store properties. - self.source = source - self.want = want - self.lineno = lineno - self.indent = indent - if options is None: options = {} - self.options = options - self.exc_msg = exc_msg - -class DocTest: - """ - A collection of doctest examples that should be run in a single - namespace. Each `DocTest` defines the following attributes: - - - examples: the list of examples. - - - globs: The namespace (aka globals) that the examples should - be run in. - - - name: A name identifying the DocTest (typically, the name of - the object whose docstring this DocTest was extracted from). - - - filename: The name of the file that this DocTest was extracted - from, or `None` if the filename is unknown. - - - lineno: The line number within filename where this DocTest - begins, or `None` if the line number is unavailable. This - line number is zero-based, with respect to the beginning of - the file. - - - docstring: The string that the examples were extracted from, - or `None` if the string is unavailable. - """ - def __init__(self, examples, globs, name, filename, lineno, docstring): - """ - Create a new DocTest containing the given examples. The - DocTest's globals are initialized with a copy of `globs`. - """ - assert not isinstance(examples, basestring), \ - "DocTest no longer accepts str; use DocTestParser instead" - self.examples = examples - self.docstring = docstring - self.globs = globs.copy() - self.name = name - self.filename = filename - self.lineno = lineno - - def __repr__(self): - if len(self.examples) == 0: - examples = 'no examples' - elif len(self.examples) == 1: - examples = '1 example' - else: - examples = '%d examples' % len(self.examples) - return ('' % - (self.name, self.filename, self.lineno, examples)) - - - # This lets us sort tests by name: - def __cmp__(self, other): - if not isinstance(other, DocTest): - return -1 - return cmp((self.name, self.filename, self.lineno, id(self)), - (other.name, other.filename, other.lineno, id(other))) - -###################################################################### -## 3. DocTestParser -###################################################################### - -class DocTestParser: - """ - A class used to parse strings containing doctest examples. - """ - # This regular expression is used to find doctest examples in a - # string. It defines three groups: `source` is the source code - # (including leading indentation and prompts); `indent` is the - # indentation of the first (PS1) line of the source code; and - # `want` is the expected output (including leading indentation). - _EXAMPLE_RE = re.compile(r''' - # Source consists of a PS1 line followed by zero or more PS2 lines. - (?P - (?:^(?P [ ]*) >>> .*) # PS1 line - (?:\n [ ]* \.\.\. .*)*) # PS2 lines - \n? - # Want consists of any non-blank lines that do not start with PS1. - (?P (?:(?![ ]*$) # Not a blank line - (?![ ]*>>>) # Not a line starting with PS1 - .*$\n? # But any other line - )*) - ''', re.MULTILINE | re.VERBOSE) - - # A regular expression for handling `want` strings that contain - # expected exceptions. It divides `want` into three pieces: - # - the traceback header line (`hdr`) - # - the traceback stack (`stack`) - # - the exception message (`msg`), as generated by - # traceback.format_exception_only() - # `msg` may have multiple lines. We assume/require that the - # exception message is the first non-indented line starting with a word - # character following the traceback header line. - _EXCEPTION_RE = re.compile(r""" - # Grab the traceback header. Different versions of Python have - # said different things on the first traceback line. - ^(?P Traceback\ \( - (?: most\ recent\ call\ last - | innermost\ last - ) \) : - ) - \s* $ # toss trailing whitespace on the header. - (?P .*?) # don't blink: absorb stuff until... - ^ (?P \w+ .*) # a line *starts* with alphanum. - """, re.VERBOSE | re.MULTILINE | re.DOTALL) - - # A callable returning a true value iff its argument is a blank line - # or contains a single comment. - _IS_BLANK_OR_COMMENT = re.compile(r'^[ ]*(#.*)?$').match - - def parse(self, string, name=''): - """ - Divide the given string into examples and intervening text, - and return them as a list of alternating Examples and strings. - Line numbers for the Examples are 0-based. The optional - argument `name` is a name identifying this string, and is only - used for error messages. - """ - string = string.expandtabs() - # If all lines begin with the same indentation, then strip it. - min_indent = self._min_indent(string) - if min_indent > 0: - string = '\n'.join([l[min_indent:] for l in string.split('\n')]) - - output = [] - charno, lineno = 0, 0 - # Find all doctest examples in the string: - for m in self._EXAMPLE_RE.finditer(string): - # Add the pre-example text to `output`. - output.append(string[charno:m.start()]) - # Update lineno (lines before this example) - lineno += string.count('\n', charno, m.start()) - # Extract info from the regexp match. - (source, options, want, exc_msg) = \ - self._parse_example(m, name, lineno) - # Create an Example, and add it to the list. - if not self._IS_BLANK_OR_COMMENT(source): - output.append( Example(source, want, exc_msg, - lineno=lineno, - indent=min_indent+len(m.group('indent')), - options=options) ) - # Update lineno (lines inside this example) - lineno += string.count('\n', m.start(), m.end()) - # Update charno. - charno = m.end() - # Add any remaining post-example text to `output`. - output.append(string[charno:]) - return output - - def get_doctest(self, string, globs, name, filename, lineno): - """ - Extract all doctest examples from the given string, and - collect them into a `DocTest` object. - - `globs`, `name`, `filename`, and `lineno` are attributes for - the new `DocTest` object. See the documentation for `DocTest` - for more information. - """ - return DocTest(self.get_examples(string, name), globs, - name, filename, lineno, string) - - def get_examples(self, string, name=''): - """ - Extract all doctest examples from the given string, and return - them as a list of `Example` objects. Line numbers are - 0-based, because it's most common in doctests that nothing - interesting appears on the same line as opening triple-quote, - and so the first interesting line is called \"line 1\" then. - - The optional argument `name` is a name identifying this - string, and is only used for error messages. - """ - return [x for x in self.parse(string, name) - if isinstance(x, Example)] - - def _parse_example(self, m, name, lineno): - """ - Given a regular expression match from `_EXAMPLE_RE` (`m`), - return a pair `(source, want)`, where `source` is the matched - example's source code (with prompts and indentation stripped); - and `want` is the example's expected output (with indentation - stripped). - - `name` is the string's name, and `lineno` is the line number - where the example starts; both are used for error messages. - """ - # Get the example's indentation level. - indent = len(m.group('indent')) - - # Divide source into lines; check that they're properly - # indented; and then strip their indentation & prompts. - source_lines = m.group('source').split('\n') - self._check_prompt_blank(source_lines, indent, name, lineno) - self._check_prefix(source_lines[1:], ' '*indent + '.', name, lineno) - source = '\n'.join([sl[indent+4:] for sl in source_lines]) - - # Divide want into lines; check that it's properly indented; and - # then strip the indentation. Spaces before the last newline should - # be preserved, so plain rstrip() isn't good enough. - want = m.group('want') - want_lines = want.split('\n') - if len(want_lines) > 1 and re.match(r' *$', want_lines[-1]): - del want_lines[-1] # forget final newline & spaces after it - self._check_prefix(want_lines, ' '*indent, name, - lineno + len(source_lines)) - want = '\n'.join([wl[indent:] for wl in want_lines]) - - # If `want` contains a traceback message, then extract it. - m = self._EXCEPTION_RE.match(want) - if m: - exc_msg = m.group('msg') - else: - exc_msg = None - - # Extract options from the source. - options = self._find_options(source, name, lineno) - - return source, options, want, exc_msg - - # This regular expression looks for option directives in the - # source code of an example. Option directives are comments - # starting with "doctest:". Warning: this may give false - # positives for string-literals that contain the string - # "#doctest:". Eliminating these false positives would require - # actually parsing the string; but we limit them by ignoring any - # line containing "#doctest:" that is *followed* by a quote mark. - _OPTION_DIRECTIVE_RE = re.compile(r'#\s*doctest:\s*([^\n\'"]*)$', - re.MULTILINE) - - def _find_options(self, source, name, lineno): - """ - Return a dictionary containing option overrides extracted from - option directives in the given source string. - - `name` is the string's name, and `lineno` is the line number - where the example starts; both are used for error messages. - """ - options = {} - # (note: with the current regexp, this will match at most once:) - for m in self._OPTION_DIRECTIVE_RE.finditer(source): - option_strings = m.group(1).replace(',', ' ').split() - for option in option_strings: - if (option[0] not in '+-' or - option[1:] not in OPTIONFLAGS_BY_NAME): - raise ValueError('line %r of the doctest for %s ' - 'has an invalid option: %r' % - (lineno+1, name, option)) - flag = OPTIONFLAGS_BY_NAME[option[1:]] - options[flag] = (option[0] == '+') - if options and self._IS_BLANK_OR_COMMENT(source): - raise ValueError('line %r of the doctest for %s has an option ' - 'directive on a line with no example: %r' % - (lineno, name, source)) - return options - - # This regular expression finds the indentation of every non-blank - # line in a string. - _INDENT_RE = re.compile('^([ ]*)(?=\S)', re.MULTILINE) - - def _min_indent(self, s): - "Return the minimum indentation of any non-blank line in `s`" - indents = [len(indent) for indent in self._INDENT_RE.findall(s)] - if len(indents) > 0: - return min(indents) - else: - return 0 - - def _check_prompt_blank(self, lines, indent, name, lineno): - """ - Given the lines of a source string (including prompts and - leading indentation), check to make sure that every prompt is - followed by a space character. If any line is not followed by - a space character, then raise ValueError. - """ - for i, line in enumerate(lines): - if len(line) >= indent+4 and line[indent+3] != ' ': - raise ValueError('line %r of the docstring for %s ' - 'lacks blank after %s: %r' % - (lineno+i+1, name, - line[indent:indent+3], line)) - - def _check_prefix(self, lines, prefix, name, lineno): - """ - Check that every line in the given list starts with the given - prefix; if any line does not, then raise a ValueError. - """ - for i, line in enumerate(lines): - if line and not line.startswith(prefix): - raise ValueError('line %r of the docstring for %s has ' - 'inconsistent leading whitespace: %r' % - (lineno+i+1, name, line)) - - -###################################################################### -## 4. DocTest Finder -###################################################################### - -class DocTestFinder: - """ - A class used to extract the DocTests that are relevant to a given - object, from its docstring and the docstrings of its contained - objects. Doctests can currently be extracted from the following - object types: modules, functions, classes, methods, staticmethods, - classmethods, and properties. - """ - - def __init__(self, verbose=False, parser=DocTestParser(), - recurse=True, _namefilter=None, exclude_empty=True): - """ - Create a new doctest finder. - - The optional argument `parser` specifies a class or - function that should be used to create new DocTest objects (or - objects that implement the same interface as DocTest). The - signature for this factory function should match the signature - of the DocTest constructor. - - If the optional argument `recurse` is false, then `find` will - only examine the given object, and not any contained objects. - - If the optional argument `exclude_empty` is false, then `find` - will include tests for objects with empty docstrings. - """ - self._parser = parser - self._verbose = verbose - self._recurse = recurse - self._exclude_empty = exclude_empty - # _namefilter is undocumented, and exists only for temporary backward- - # compatibility support of testmod's deprecated isprivate mess. - self._namefilter = _namefilter - - def find(self, obj, name=None, module=None, globs=None, - extraglobs=None): - """ - Return a list of the DocTests that are defined by the given - object's docstring, or by any of its contained objects' - docstrings. - - The optional parameter `module` is the module that contains - the given object. If the module is not specified or is None, then - the test finder will attempt to automatically determine the - correct module. The object's module is used: - - - As a default namespace, if `globs` is not specified. - - To prevent the DocTestFinder from extracting DocTests - from objects that are imported from other modules. - - To find the name of the file containing the object. - - To help find the line number of the object within its - file. - - Contained objects whose module does not match `module` are ignored. - - If `module` is False, no attempt to find the module will be made. - This is obscure, of use mostly in tests: if `module` is False, or - is None but cannot be found automatically, then all objects are - considered to belong to the (non-existent) module, so all contained - objects will (recursively) be searched for doctests. - - The globals for each DocTest is formed by combining `globs` - and `extraglobs` (bindings in `extraglobs` override bindings - in `globs`). A new copy of the globals dictionary is created - for each DocTest. If `globs` is not specified, then it - defaults to the module's `__dict__`, if specified, or {} - otherwise. If `extraglobs` is not specified, then it defaults - to {}. - - """ - # If name was not specified, then extract it from the object. - if name is None: - name = getattr(obj, '__name__', None) - if name is None: - raise ValueError("DocTestFinder.find: name must be given " - "when obj.__name__ doesn't exist: %r" % - (type(obj),)) - - # Find the module that contains the given object (if obj is - # a module, then module=obj.). Note: this may fail, in which - # case module will be None. - if module is False: - module = None - elif module is None: - module = inspect.getmodule(obj) - - # Read the module's source code. This is used by - # DocTestFinder._find_lineno to find the line number for a - # given object's docstring. - try: - file = inspect.getsourcefile(obj) or inspect.getfile(obj) - source_lines = linecache.getlines(file) - if not source_lines: - source_lines = None - except TypeError: - source_lines = None - - # Initialize globals, and merge in extraglobs. - if globs is None: - if module is None: - globs = {} - else: - globs = module.__dict__.copy() - else: - globs = globs.copy() - if extraglobs is not None: - globs.update(extraglobs) - - # Recursively expore `obj`, extracting DocTests. - tests = [] - self._find(tests, obj, name, module, source_lines, globs, {}) - return tests - - def _filter(self, obj, prefix, base): - """ - Return true if the given object should not be examined. - """ - return (self._namefilter is not None and - self._namefilter(prefix, base)) - - def _from_module(self, module, object): - """ - Return true if the given object is defined in the given - module. - """ - if module is None: - return True - elif inspect.isfunction(object): - return module.__dict__ is object.func_globals - elif inspect.isclass(object): - return module.__name__ == object.__module__ - elif inspect.getmodule(object) is not None: - return module is inspect.getmodule(object) - elif hasattr(object, '__module__'): - return module.__name__ == object.__module__ - elif isinstance(object, property): - return True # [XX] no way not be sure. - else: - raise ValueError("object must be a class or function") - - def _find(self, tests, obj, name, module, source_lines, globs, seen): - """ - Find tests for the given object and any contained objects, and - add them to `tests`. - """ - if self._verbose: - print 'Finding tests in %s' % name - - # If we've already processed this object, then ignore it. - if id(obj) in seen: - return - seen[id(obj)] = 1 - - # Find a test for this object, and add it to the list of tests. - test = self._get_test(obj, name, module, globs, source_lines) - if test is not None: - tests.append(test) - - # Look for tests in a module's contained objects. - if inspect.ismodule(obj) and self._recurse: - for valname, val in obj.__dict__.items(): - # Check if this contained object should be ignored. - if self._filter(val, name, valname): - continue - valname = '%s.%s' % (name, valname) - # Recurse to functions & classes. - if ((inspect.isfunction(val) or inspect.isclass(val)) and - self._from_module(module, val)): - self._find(tests, val, valname, module, source_lines, - globs, seen) - - # Look for tests in a module's __test__ dictionary. - if inspect.ismodule(obj) and self._recurse: - for valname, val in getattr(obj, '__test__', {}).items(): - if not isinstance(valname, basestring): - raise ValueError("DocTestFinder.find: __test__ keys " - "must be strings: %r" % - (type(valname),)) - if not (inspect.isfunction(val) or inspect.isclass(val) or - inspect.ismethod(val) or inspect.ismodule(val) or - isinstance(val, basestring)): - raise ValueError("DocTestFinder.find: __test__ values " - "must be strings, functions, methods, " - "classes, or modules: %r" % - (type(val),)) - valname = '%s.__test__.%s' % (name, valname) - self._find(tests, val, valname, module, source_lines, - globs, seen) - - # Look for tests in a class's contained objects. - if inspect.isclass(obj) and self._recurse: - for valname, val in obj.__dict__.items(): - # Check if this contained object should be ignored. - if self._filter(val, name, valname): - continue - # Special handling for staticmethod/classmethod. - if isinstance(val, staticmethod): - val = getattr(obj, valname) - if isinstance(val, classmethod): - val = getattr(obj, valname).im_func - - # Recurse to methods, properties, and nested classes. - if ((inspect.isfunction(val) or inspect.isclass(val) or - isinstance(val, property)) and - self._from_module(module, val)): - valname = '%s.%s' % (name, valname) - self._find(tests, val, valname, module, source_lines, - globs, seen) - - def _get_test(self, obj, name, module, globs, source_lines): - """ - Return a DocTest for the given object, if it defines a docstring; - otherwise, return None. - """ - # Extract the object's docstring. If it doesn't have one, - # then return None (no test for this object). - if isinstance(obj, basestring): - docstring = obj - else: - try: - if obj.__doc__ is None: - docstring = '' - else: - docstring = obj.__doc__ - if not isinstance(docstring, basestring): - docstring = str(docstring) - except (TypeError, AttributeError): - docstring = '' - - # Find the docstring's location in the file. - lineno = self._find_lineno(obj, source_lines) - - # Don't bother if the docstring is empty. - if self._exclude_empty and not docstring: - return None - - # Return a DocTest for this object. - if module is None: - filename = None - else: - filename = getattr(module, '__file__', module.__name__) - if filename[-4:] in (".pyc", ".pyo"): - filename = filename[:-1] - return self._parser.get_doctest(docstring, globs, name, - filename, lineno) - - def _find_lineno(self, obj, source_lines): - """ - Return a line number of the given object's docstring. Note: - this method assumes that the object has a docstring. - """ - lineno = None - - # Find the line number for modules. - if inspect.ismodule(obj): - lineno = 0 - - # Find the line number for classes. - # Note: this could be fooled if a class is defined multiple - # times in a single file. - if inspect.isclass(obj): - if source_lines is None: - return None - pat = re.compile(r'^\s*class\s*%s\b' % - getattr(obj, '__name__', '-')) - for i, line in enumerate(source_lines): - if pat.match(line): - lineno = i - break - - # Find the line number for functions & methods. - if inspect.ismethod(obj): obj = obj.im_func - if inspect.isfunction(obj): obj = obj.func_code - if inspect.istraceback(obj): obj = obj.tb_frame - if inspect.isframe(obj): obj = obj.f_code - if inspect.iscode(obj): - lineno = getattr(obj, 'co_firstlineno', None)-1 - - # Find the line number where the docstring starts. Assume - # that it's the first line that begins with a quote mark. - # Note: this could be fooled by a multiline function - # signature, where a continuation line begins with a quote - # mark. - if lineno is not None: - if source_lines is None: - return lineno+1 - pat = re.compile('(^|.*:)\s*\w*("|\')') - for lineno in range(lineno, len(source_lines)): - if pat.match(source_lines[lineno]): - return lineno - - # We couldn't find the line number. - return None - -###################################################################### -## 5. DocTest Runner -###################################################################### - -class DocTestRunner: - """ - A class used to run DocTest test cases, and accumulate statistics. - The `run` method is used to process a single DocTest case. It - returns a tuple `(f, t)`, where `t` is the number of test cases - tried, and `f` is the number of test cases that failed. - - >>> tests = DocTestFinder().find(_TestClass) - >>> runner = DocTestRunner(verbose=False) - >>> for test in tests: - ... print runner.run(test) - (0, 2) - (0, 1) - (0, 2) - (0, 2) - - The `summarize` method prints a summary of all the test cases that - have been run by the runner, and returns an aggregated `(f, t)` - tuple: - - >>> runner.summarize(verbose=1) - 4 items passed all tests: - 2 tests in _TestClass - 2 tests in _TestClass.__init__ - 2 tests in _TestClass.get - 1 tests in _TestClass.square - 7 tests in 4 items. - 7 passed and 0 failed. - Test passed. - (0, 7) - - The aggregated number of tried examples and failed examples is - also available via the `tries` and `failures` attributes: - - >>> runner.tries - 7 - >>> runner.failures - 0 - - The comparison between expected outputs and actual outputs is done - by an `OutputChecker`. This comparison may be customized with a - number of option flags; see the documentation for `testmod` for - more information. If the option flags are insufficient, then the - comparison may also be customized by passing a subclass of - `OutputChecker` to the constructor. - - The test runner's display output can be controlled in two ways. - First, an output function (`out) can be passed to - `TestRunner.run`; this function will be called with strings that - should be displayed. It defaults to `sys.stdout.write`. If - capturing the output is not sufficient, then the display output - can be also customized by subclassing DocTestRunner, and - overriding the methods `report_start`, `report_success`, - `report_unexpected_exception`, and `report_failure`. - """ - # This divider string is used to separate failure messages, and to - # separate sections of the summary. - DIVIDER = "*" * 70 - - def __init__(self, checker=None, verbose=None, optionflags=0): - """ - Create a new test runner. - - Optional keyword arg `checker` is the `OutputChecker` that - should be used to compare the expected outputs and actual - outputs of doctest examples. - - Optional keyword arg 'verbose' prints lots of stuff if true, - only failures if false; by default, it's true iff '-v' is in - sys.argv. - - Optional argument `optionflags` can be used to control how the - test runner compares expected output to actual output, and how - it displays failures. See the documentation for `testmod` for - more information. - """ - self._checker = checker or OutputChecker() - if verbose is None: - verbose = '-v' in sys.argv - self._verbose = verbose - self.optionflags = optionflags - self.original_optionflags = optionflags - - # Keep track of the examples we've run. - self.tries = 0 - self.failures = 0 - self._name2ft = {} - - # Create a fake output target for capturing doctest output. - self._fakeout = _SpoofOut() - - #///////////////////////////////////////////////////////////////// - # Reporting methods - #///////////////////////////////////////////////////////////////// - - def report_start(self, out, test, example): - """ - Report that the test runner is about to process the given - example. (Only displays a message if verbose=True) - """ - if self._verbose: - if example.want: - out('Trying:\n' + _indent(example.source) + - 'Expecting:\n' + _indent(example.want)) - else: - out('Trying:\n' + _indent(example.source) + - 'Expecting nothing\n') - - def report_success(self, out, test, example, got): - """ - Report that the given example ran successfully. (Only - displays a message if verbose=True) - """ - if self._verbose: - out("ok\n") - - def report_failure(self, out, test, example, got): - """ - Report that the given example failed. - """ - out(self._failure_header(test, example) + - self._checker.output_difference(example, got, self.optionflags)) - - def report_unexpected_exception(self, out, test, example, exc_info): - """ - Report that the given example raised an unexpected exception. - """ - out(self._failure_header(test, example) + - 'Exception raised:\n' + _indent(_exception_traceback(exc_info))) - - def _failure_header(self, test, example): - out = [self.DIVIDER] - if test.filename: - if test.lineno is not None and example.lineno is not None: - lineno = test.lineno + example.lineno + 1 - else: - lineno = '?' - out.append('File "%s", line %s, in %s' % - (test.filename, lineno, test.name)) - else: - out.append('Line %s, in %s' % (example.lineno+1, test.name)) - out.append('Failed example:') - source = example.source - out.append(_indent(source)) - return '\n'.join(out) - - #///////////////////////////////////////////////////////////////// - # DocTest Running - #///////////////////////////////////////////////////////////////// - - def __run(self, test, compileflags, out): - """ - Run the examples in `test`. Write the outcome of each example - with one of the `DocTestRunner.report_*` methods, using the - writer function `out`. `compileflags` is the set of compiler - flags that should be used to execute examples. Return a tuple - `(f, t)`, where `t` is the number of examples tried, and `f` - is the number of examples that failed. The examples are run - in the namespace `test.globs`. - """ - # Keep track of the number of failures and tries. - failures = tries = 0 - - # Save the option flags (since option directives can be used - # to modify them). - original_optionflags = self.optionflags - - SUCCESS, FAILURE, BOOM = range(3) # `outcome` state - - check = self._checker.check_output - - # Process each example. - for examplenum, example in enumerate(test.examples): - - # If REPORT_ONLY_FIRST_FAILURE is set, then supress - # reporting after the first failure. - quiet = (self.optionflags & REPORT_ONLY_FIRST_FAILURE and - failures > 0) - - # Merge in the example's options. - self.optionflags = original_optionflags - if example.options: - for (optionflag, val) in example.options.items(): - if val: - self.optionflags |= optionflag - else: - self.optionflags &= ~optionflag - - # Record that we started this example. - tries += 1 - if not quiet: - self.report_start(out, test, example) - - # Use a special filename for compile(), so we can retrieve - # the source code during interactive debugging (see - # __patched_linecache_getlines). - filename = '' % (test.name, examplenum) - - # Run the example in the given context (globs), and record - # any exception that gets raised. (But don't intercept - # keyboard interrupts.) - try: - # Don't blink! This is where the user's code gets run. - exec compile(example.source, filename, "single", - compileflags, 1) in test.globs - self.debugger.set_continue() # ==== Example Finished ==== - exception = None - except KeyboardInterrupt: - raise - except: - exception = sys.exc_info() - self.debugger.set_continue() # ==== Example Finished ==== - - got = self._fakeout.getvalue() # the actual output - self._fakeout.truncate(0) - outcome = FAILURE # guilty until proved innocent or insane - - # If the example executed without raising any exceptions, - # verify its output. - if exception is None: - if check(example.want, got, self.optionflags): - outcome = SUCCESS - - # The example raised an exception: check if it was expected. - else: - exc_info = sys.exc_info() - exc_msg = traceback.format_exception_only(*exc_info[:2])[-1] - if not quiet: - got += _exception_traceback(exc_info) - - # If `example.exc_msg` is None, then we weren't expecting - # an exception. - if example.exc_msg is None: - outcome = BOOM - - # We expected an exception: see whether it matches. - elif check(example.exc_msg, exc_msg, self.optionflags): - outcome = SUCCESS - - # Another chance if they didn't care about the detail. - elif self.optionflags & IGNORE_EXCEPTION_DETAIL: - m1 = re.match(r'[^:]*:', example.exc_msg) - m2 = re.match(r'[^:]*:', exc_msg) - if m1 and m2 and check(m1.group(0), m2.group(0), - self.optionflags): - outcome = SUCCESS - - # Report the outcome. - if outcome is SUCCESS: - if not quiet: - self.report_success(out, test, example, got) - elif outcome is FAILURE: - if not quiet: - self.report_failure(out, test, example, got) - failures += 1 - elif outcome is BOOM: - if not quiet: - self.report_unexpected_exception(out, test, example, - exc_info) - failures += 1 - else: - assert False, ("unknown outcome", outcome) - - # Restore the option flags (in case they were modified) - self.optionflags = original_optionflags - - # Record and return the number of failures and tries. - self.__record_outcome(test, failures, tries) - return failures, tries - - def __record_outcome(self, test, f, t): - """ - Record the fact that the given DocTest (`test`) generated `f` - failures out of `t` tried examples. - """ - f2, t2 = self._name2ft.get(test.name, (0,0)) - self._name2ft[test.name] = (f+f2, t+t2) - self.failures += f - self.tries += t - - __LINECACHE_FILENAME_RE = re.compile(r'[\w\.]+)' - r'\[(?P\d+)\]>$') - def __patched_linecache_getlines(self, filename): - m = self.__LINECACHE_FILENAME_RE.match(filename) - if m and m.group('name') == self.test.name: - example = self.test.examples[int(m.group('examplenum'))] - return example.source.splitlines(True) - else: - return self.save_linecache_getlines(filename) - - def run(self, test, compileflags=None, out=None, clear_globs=True): - """ - Run the examples in `test`, and display the results using the - writer function `out`. - - The examples are run in the namespace `test.globs`. If - `clear_globs` is true (the default), then this namespace will - be cleared after the test runs, to help with garbage - collection. If you would like to examine the namespace after - the test completes, then use `clear_globs=False`. - - `compileflags` gives the set of flags that should be used by - the Python compiler when running the examples. If not - specified, then it will default to the set of future-import - flags that apply to `globs`. - - The output of each example is checked using - `DocTestRunner.check_output`, and the results are formatted by - the `DocTestRunner.report_*` methods. - """ - self.test = test - - if compileflags is None: - compileflags = _extract_future_flags(test.globs) - - save_stdout = sys.stdout - if out is None: - out = save_stdout.write - sys.stdout = self._fakeout - - # Patch pdb.set_trace to restore sys.stdout during interactive - # debugging (so it's not still redirected to self._fakeout). - # Note that the interactive output will go to *our* - # save_stdout, even if that's not the real sys.stdout; this - # allows us to write test cases for the set_trace behavior. - save_set_trace = pdb.set_trace - self.debugger = _OutputRedirectingPdb(save_stdout) - self.debugger.reset() - pdb.set_trace = self.debugger.set_trace - - # Patch linecache.getlines, so we can see the example's source - # when we're inside the debugger. - self.save_linecache_getlines = linecache.getlines - linecache.getlines = self.__patched_linecache_getlines - - try: - return self.__run(test, compileflags, out) - finally: - sys.stdout = save_stdout - pdb.set_trace = save_set_trace - linecache.getlines = self.save_linecache_getlines - if clear_globs: - test.globs.clear() - - #///////////////////////////////////////////////////////////////// - # Summarization - #///////////////////////////////////////////////////////////////// - def summarize(self, verbose=None): - """ - Print a summary of all the test cases that have been run by - this DocTestRunner, and return a tuple `(f, t)`, where `f` is - the total number of failed examples, and `t` is the total - number of tried examples. - - The optional `verbose` argument controls how detailed the - summary is. If the verbosity is not specified, then the - DocTestRunner's verbosity is used. - """ - if verbose is None: - verbose = self._verbose - notests = [] - passed = [] - failed = [] - totalt = totalf = 0 - for x in self._name2ft.items(): - name, (f, t) = x - assert f <= t - totalt += t - totalf += f - if t == 0: - notests.append(name) - elif f == 0: - passed.append( (name, t) ) - else: - failed.append(x) - if verbose: - if notests: - print len(notests), "items had no tests:" - notests.sort() - for thing in notests: - print " ", thing - if passed: - print len(passed), "items passed all tests:" - passed.sort() - for thing, count in passed: - print " %3d tests in %s" % (count, thing) - if failed: - print self.DIVIDER - print len(failed), "items had failures:" - failed.sort() - for thing, (f, t) in failed: - print " %3d of %3d in %s" % (f, t, thing) - if verbose: - print totalt, "tests in", len(self._name2ft), "items." - print totalt - totalf, "passed and", totalf, "failed." - if totalf: - print "***Test Failed***", totalf, "failures." - elif verbose: - print "Test passed." - return totalf, totalt - - #///////////////////////////////////////////////////////////////// - # Backward compatibility cruft to maintain doctest.master. - #///////////////////////////////////////////////////////////////// - def merge(self, other): - d = self._name2ft - for name, (f, t) in other._name2ft.items(): - if name in d: - print "*** DocTestRunner.merge: '" + name + "' in both" \ - " testers; summing outcomes." - f2, t2 = d[name] - f = f + f2 - t = t + t2 - d[name] = f, t - -class OutputChecker: - """ - A class used to check the whether the actual output from a doctest - example matches the expected output. `OutputChecker` defines two - methods: `check_output`, which compares a given pair of outputs, - and returns true if they match; and `output_difference`, which - returns a string describing the differences between two outputs. - """ - def check_output(self, want, got, optionflags): - """ - Return True iff the actual output from an example (`got`) - matches the expected output (`want`). These strings are - always considered to match if they are identical; but - depending on what option flags the test runner is using, - several non-exact match types are also possible. See the - documentation for `TestRunner` for more information about - option flags. - """ - # Handle the common case first, for efficiency: - # if they're string-identical, always return true. - if got == want: - return True - - # The values True and False replaced 1 and 0 as the return - # value for boolean comparisons in Python 2.3. - if not (optionflags & DONT_ACCEPT_TRUE_FOR_1): - if (got,want) == ("True\n", "1\n"): - return True - if (got,want) == ("False\n", "0\n"): - return True - - # can be used as a special sequence to signify a - # blank line, unless the DONT_ACCEPT_BLANKLINE flag is used. - if not (optionflags & DONT_ACCEPT_BLANKLINE): - # Replace in want with a blank line. - want = re.sub('(?m)^%s\s*?$' % re.escape(BLANKLINE_MARKER), - '', want) - # If a line in got contains only spaces, then remove the - # spaces. - got = re.sub('(?m)^\s*?$', '', got) - if got == want: - return True - - # This flag causes doctest to ignore any differences in the - # contents of whitespace strings. Note that this can be used - # in conjunction with the ELLIPSIS flag. - if optionflags & NORMALIZE_WHITESPACE: - got = ' '.join(got.split()) - want = ' '.join(want.split()) - if got == want: - return True - - # The ELLIPSIS flag says to let the sequence "..." in `want` - # match any substring in `got`. - if optionflags & ELLIPSIS: - if _ellipsis_match(want, got): - return True - - # We didn't find any match; return false. - return False - - # Should we do a fancy diff? - def _do_a_fancy_diff(self, want, got, optionflags): - # Not unless they asked for a fancy diff. - if not optionflags & (REPORT_UDIFF | - REPORT_CDIFF | - REPORT_NDIFF): - return False - - # If expected output uses ellipsis, a meaningful fancy diff is - # too hard ... or maybe not. In two real-life failures Tim saw, - # a diff was a major help anyway, so this is commented out. - # [todo] _ellipsis_match() knows which pieces do and don't match, - # and could be the basis for a kick-ass diff in this case. - ##if optionflags & ELLIPSIS and ELLIPSIS_MARKER in want: - ## return False - - # ndiff does intraline difference marking, so can be useful even - # for 1-line differences. - if optionflags & REPORT_NDIFF: - return True - - # The other diff types need at least a few lines to be helpful. - return want.count('\n') > 2 and got.count('\n') > 2 - - def output_difference(self, example, got, optionflags): - """ - Return a string describing the differences between the - expected output for a given example (`example`) and the actual - output (`got`). `optionflags` is the set of option flags used - to compare `want` and `got`. - """ - want = example.want - # If s are being used, then replace blank lines - # with in the actual output string. - if not (optionflags & DONT_ACCEPT_BLANKLINE): - got = re.sub('(?m)^[ ]*(?=\n)', BLANKLINE_MARKER, got) - - # Check if we should use diff. - if self._do_a_fancy_diff(want, got, optionflags): - # Split want & got into lines. - want_lines = want.splitlines(True) # True == keep line ends - got_lines = got.splitlines(True) - # Use difflib to find their differences. - if optionflags & REPORT_UDIFF: - diff = difflib.unified_diff(want_lines, got_lines, n=2) - diff = list(diff)[2:] # strip the diff header - kind = 'unified diff with -expected +actual' - elif optionflags & REPORT_CDIFF: - diff = difflib.context_diff(want_lines, got_lines, n=2) - diff = list(diff)[2:] # strip the diff header - kind = 'context diff with expected followed by actual' - elif optionflags & REPORT_NDIFF: - engine = difflib.Differ(charjunk=difflib.IS_CHARACTER_JUNK) - diff = list(engine.compare(want_lines, got_lines)) - kind = 'ndiff with -expected +actual' - else: - assert 0, 'Bad diff option' - # Remove trailing whitespace on diff output. - diff = [line.rstrip() + '\n' for line in diff] - return 'Differences (%s):\n' % kind + _indent(''.join(diff)) - - # If we're not using diff, then simply list the expected - # output followed by the actual output. - if want and got: - return 'Expected:\n%sGot:\n%s' % (_indent(want), _indent(got)) - elif want: - return 'Expected:\n%sGot nothing\n' % _indent(want) - elif got: - return 'Expected nothing\nGot:\n%s' % _indent(got) - else: - return 'Expected nothing\nGot nothing\n' - -class DocTestFailure(Exception): - """A DocTest example has failed in debugging mode. - - The exception instance has variables: - - - test: the DocTest object being run - - - excample: the Example object that failed - - - got: the actual output - """ - def __init__(self, test, example, got): - self.test = test - self.example = example - self.got = got - - def __str__(self): - return str(self.test) - -class UnexpectedException(Exception): - """A DocTest example has encountered an unexpected exception - - The exception instance has variables: - - - test: the DocTest object being run - - - excample: the Example object that failed - - - exc_info: the exception info - """ - def __init__(self, test, example, exc_info): - self.test = test - self.example = example - self.exc_info = exc_info - - def __str__(self): - return str(self.test) - -class DebugRunner(DocTestRunner): - r"""Run doc tests but raise an exception as soon as there is a failure. - - If an unexpected exception occurs, an UnexpectedException is raised. - It contains the test, the example, and the original exception: - - >>> runner = DebugRunner(verbose=False) - >>> test = DocTestParser().get_doctest('>>> raise KeyError\n42', - ... {}, 'foo', 'foo.py', 0) - >>> try: - ... runner.run(test) - ... except UnexpectedException, failure: - ... pass - - >>> failure.test is test - True - - >>> failure.example.want - '42\n' - - >>> exc_info = failure.exc_info - >>> raise exc_info[0], exc_info[1], exc_info[2] - Traceback (most recent call last): - ... - KeyError - - We wrap the original exception to give the calling application - access to the test and example information. - - If the output doesn't match, then a DocTestFailure is raised: - - >>> test = DocTestParser().get_doctest(''' - ... >>> x = 1 - ... >>> x - ... 2 - ... ''', {}, 'foo', 'foo.py', 0) - - >>> try: - ... runner.run(test) - ... except DocTestFailure, failure: - ... pass - - DocTestFailure objects provide access to the test: - - >>> failure.test is test - True - - As well as to the example: - - >>> failure.example.want - '2\n' - - and the actual output: - - >>> failure.got - '1\n' - - If a failure or error occurs, the globals are left intact: - - >>> del test.globs['__builtins__'] - >>> test.globs - {'x': 1} - - >>> test = DocTestParser().get_doctest(''' - ... >>> x = 2 - ... >>> raise KeyError - ... ''', {}, 'foo', 'foo.py', 0) - - >>> runner.run(test) - Traceback (most recent call last): - ... - UnexpectedException: - - >>> del test.globs['__builtins__'] - >>> test.globs - {'x': 2} - - But the globals are cleared if there is no error: - - >>> test = DocTestParser().get_doctest(''' - ... >>> x = 2 - ... ''', {}, 'foo', 'foo.py', 0) - - >>> runner.run(test) - (0, 1) - - >>> test.globs - {} - - """ - - def run(self, test, compileflags=None, out=None, clear_globs=True): - r = DocTestRunner.run(self, test, compileflags, out, False) - if clear_globs: - test.globs.clear() - return r - - def report_unexpected_exception(self, out, test, example, exc_info): - raise UnexpectedException(test, example, exc_info) - - def report_failure(self, out, test, example, got): - raise DocTestFailure(test, example, got) - -###################################################################### -## 6. Test Functions -###################################################################### -# These should be backwards compatible. - -# For backward compatibility, a global instance of a DocTestRunner -# class, updated by testmod. -master = None - -def testmod(m=None, name=None, globs=None, verbose=None, isprivate=None, - report=True, optionflags=0, extraglobs=None, - raise_on_error=False, exclude_empty=False): - """m=None, name=None, globs=None, verbose=None, isprivate=None, - report=True, optionflags=0, extraglobs=None, raise_on_error=False, - exclude_empty=False - - Test examples in docstrings in functions and classes reachable - from module m (or the current module if m is not supplied), starting - with m.__doc__. Unless isprivate is specified, private names - are not skipped. - - Also test examples reachable from dict m.__test__ if it exists and is - not None. m.__test__ maps names to functions, classes and strings; - function and class docstrings are tested even if the name is private; - strings are tested directly, as if they were docstrings. - - Return (#failures, #tests). - - See doctest.__doc__ for an overview. - - Optional keyword arg "name" gives the name of the module; by default - use m.__name__. - - Optional keyword arg "globs" gives a dict to be used as the globals - when executing examples; by default, use m.__dict__. A copy of this - dict is actually used for each docstring, so that each docstring's - examples start with a clean slate. - - Optional keyword arg "extraglobs" gives a dictionary that should be - merged into the globals that are used to execute examples. By - default, no extra globals are used. This is new in 2.4. - - Optional keyword arg "verbose" prints lots of stuff if true, prints - only failures if false; by default, it's true iff "-v" is in sys.argv. - - Optional keyword arg "report" prints a summary at the end when true, - else prints nothing at the end. In verbose mode, the summary is - detailed, else very brief (in fact, empty if all tests passed). - - Optional keyword arg "optionflags" or's together module constants, - and defaults to 0. This is new in 2.3. Possible values (see the - docs for details): - - DONT_ACCEPT_TRUE_FOR_1 - DONT_ACCEPT_BLANKLINE - NORMALIZE_WHITESPACE - ELLIPSIS - IGNORE_EXCEPTION_DETAIL - REPORT_UDIFF - REPORT_CDIFF - REPORT_NDIFF - REPORT_ONLY_FIRST_FAILURE - - Optional keyword arg "raise_on_error" raises an exception on the - first unexpected exception or failure. This allows failures to be - post-mortem debugged. - - Deprecated in Python 2.4: - Optional keyword arg "isprivate" specifies a function used to - determine whether a name is private. The default function is - treat all functions as public. Optionally, "isprivate" can be - set to doctest.is_private to skip over functions marked as private - using the underscore naming convention; see its docs for details. - - Advanced tomfoolery: testmod runs methods of a local instance of - class doctest.Tester, then merges the results into (or creates) - global Tester instance doctest.master. Methods of doctest.master - can be called directly too, if you want to do something unusual. - Passing report=0 to testmod is especially useful then, to delay - displaying a summary. Invoke doctest.master.summarize(verbose) - when you're done fiddling. - """ - global master - - if isprivate is not None: - warnings.warn("the isprivate argument is deprecated; " - "examine DocTestFinder.find() lists instead", - DeprecationWarning) - - # If no module was given, then use __main__. - if m is None: - # DWA - m will still be None if this wasn't invoked from the command - # line, in which case the following TypeError is about as good an error - # as we should expect - m = sys.modules.get('__main__') - - # Check that we were actually given a module. - if not inspect.ismodule(m): - raise TypeError("testmod: module required; %r" % (m,)) - - # If no name was given, then use the module's name. - if name is None: - name = m.__name__ - - # Find, parse, and run all tests in the given module. - finder = DocTestFinder(_namefilter=isprivate, exclude_empty=exclude_empty) - - if raise_on_error: - runner = DebugRunner(verbose=verbose, optionflags=optionflags) - else: - runner = DocTestRunner(verbose=verbose, optionflags=optionflags) - - for test in finder.find(m, name, globs=globs, extraglobs=extraglobs): - runner.run(test) - - if report: - runner.summarize() - - if master is None: - master = runner - else: - master.merge(runner) - - return runner.failures, runner.tries - -def testfile(filename, module_relative=True, name=None, package=None, - globs=None, verbose=None, report=True, optionflags=0, - extraglobs=None, raise_on_error=False, parser=DocTestParser()): - """ - Test examples in the given file. Return (#failures, #tests). - - Optional keyword arg "module_relative" specifies how filenames - should be interpreted: - - - If "module_relative" is True (the default), then "filename" - specifies a module-relative path. By default, this path is - relative to the calling module's directory; but if the - "package" argument is specified, then it is relative to that - package. To ensure os-independence, "filename" should use - "/" characters to separate path segments, and should not - be an absolute path (i.e., it may not begin with "/"). - - - If "module_relative" is False, then "filename" specifies an - os-specific path. The path may be absolute or relative (to - the current working directory). - - Optional keyword arg "name" gives the name of the test; by default - use the file's basename. - - Optional keyword argument "package" is a Python package or the - name of a Python package whose directory should be used as the - base directory for a module relative filename. If no package is - specified, then the calling module's directory is used as the base - directory for module relative filenames. It is an error to - specify "package" if "module_relative" is False. - - Optional keyword arg "globs" gives a dict to be used as the globals - when executing examples; by default, use {}. A copy of this dict - is actually used for each docstring, so that each docstring's - examples start with a clean slate. - - Optional keyword arg "extraglobs" gives a dictionary that should be - merged into the globals that are used to execute examples. By - default, no extra globals are used. - - Optional keyword arg "verbose" prints lots of stuff if true, prints - only failures if false; by default, it's true iff "-v" is in sys.argv. - - Optional keyword arg "report" prints a summary at the end when true, - else prints nothing at the end. In verbose mode, the summary is - detailed, else very brief (in fact, empty if all tests passed). - - Optional keyword arg "optionflags" or's together module constants, - and defaults to 0. Possible values (see the docs for details): - - DONT_ACCEPT_TRUE_FOR_1 - DONT_ACCEPT_BLANKLINE - NORMALIZE_WHITESPACE - ELLIPSIS - IGNORE_EXCEPTION_DETAIL - REPORT_UDIFF - REPORT_CDIFF - REPORT_NDIFF - REPORT_ONLY_FIRST_FAILURE - - Optional keyword arg "raise_on_error" raises an exception on the - first unexpected exception or failure. This allows failures to be - post-mortem debugged. - - Optional keyword arg "parser" specifies a DocTestParser (or - subclass) that should be used to extract tests from the files. - - Advanced tomfoolery: testmod runs methods of a local instance of - class doctest.Tester, then merges the results into (or creates) - global Tester instance doctest.master. Methods of doctest.master - can be called directly too, if you want to do something unusual. - Passing report=0 to testmod is especially useful then, to delay - displaying a summary. Invoke doctest.master.summarize(verbose) - when you're done fiddling. - """ - global master - - if package and not module_relative: - raise ValueError("Package may only be specified for module-" - "relative paths.") - - # Relativize the path - if module_relative: - package = _normalize_module(package) - filename = _module_relative_path(package, filename) - - # If no name was given, then use the file's name. - if name is None: - name = os.path.basename(filename) - - # Assemble the globals. - if globs is None: - globs = {} - else: - globs = globs.copy() - if extraglobs is not None: - globs.update(extraglobs) - - if raise_on_error: - runner = DebugRunner(verbose=verbose, optionflags=optionflags) - else: - runner = DocTestRunner(verbose=verbose, optionflags=optionflags) - - # Read the file, convert it to a test, and run it. - s = open(filename).read() - test = parser.get_doctest(s, globs, name, filename, 0) - runner.run(test) - - if report: - runner.summarize() - - if master is None: - master = runner - else: - master.merge(runner) - - return runner.failures, runner.tries - -def run_docstring_examples(f, globs, verbose=False, name="NoName", - compileflags=None, optionflags=0): - """ - Test examples in the given object's docstring (`f`), using `globs` - as globals. Optional argument `name` is used in failure messages. - If the optional argument `verbose` is true, then generate output - even if there are no failures. - - `compileflags` gives the set of flags that should be used by the - Python compiler when running the examples. If not specified, then - it will default to the set of future-import flags that apply to - `globs`. - - Optional keyword arg `optionflags` specifies options for the - testing and output. See the documentation for `testmod` for more - information. - """ - # Find, parse, and run all tests in the given module. - finder = DocTestFinder(verbose=verbose, recurse=False) - runner = DocTestRunner(verbose=verbose, optionflags=optionflags) - for test in finder.find(f, name, globs=globs): - runner.run(test, compileflags=compileflags) - -###################################################################### -## 7. Tester -###################################################################### -# This is provided only for backwards compatibility. It's not -# actually used in any way. - -class Tester: - def __init__(self, mod=None, globs=None, verbose=None, - isprivate=None, optionflags=0): - - warnings.warn("class Tester is deprecated; " - "use class doctest.DocTestRunner instead", - DeprecationWarning, stacklevel=2) - if mod is None and globs is None: - raise TypeError("Tester.__init__: must specify mod or globs") - if mod is not None and not inspect.ismodule(mod): - raise TypeError("Tester.__init__: mod must be a module; %r" % - (mod,)) - if globs is None: - globs = mod.__dict__ - self.globs = globs - - self.verbose = verbose - self.isprivate = isprivate - self.optionflags = optionflags - self.testfinder = DocTestFinder(_namefilter=isprivate) - self.testrunner = DocTestRunner(verbose=verbose, - optionflags=optionflags) - - def runstring(self, s, name): - test = DocTestParser().get_doctest(s, self.globs, name, None, None) - if self.verbose: - print "Running string", name - (f,t) = self.testrunner.run(test) - if self.verbose: - print f, "of", t, "examples failed in string", name - return (f,t) - - def rundoc(self, object, name=None, module=None): - f = t = 0 - tests = self.testfinder.find(object, name, module=module, - globs=self.globs) - for test in tests: - (f2, t2) = self.testrunner.run(test) - (f,t) = (f+f2, t+t2) - return (f,t) - - def rundict(self, d, name, module=None): - import new - m = new.module(name) - m.__dict__.update(d) - if module is None: - module = False - return self.rundoc(m, name, module) - - def run__test__(self, d, name): - import new - m = new.module(name) - m.__test__ = d - return self.rundoc(m, name) - - def summarize(self, verbose=None): - return self.testrunner.summarize(verbose) - - def merge(self, other): - self.testrunner.merge(other.testrunner) - -###################################################################### -## 8. Unittest Support -###################################################################### - -_unittest_reportflags = 0 - -def set_unittest_reportflags(flags): - """Sets the unittest option flags. - - The old flag is returned so that a runner could restore the old - value if it wished to: - - >>> old = _unittest_reportflags - >>> set_unittest_reportflags(REPORT_NDIFF | - ... REPORT_ONLY_FIRST_FAILURE) == old - True - - >>> import doctest - >>> doctest._unittest_reportflags == (REPORT_NDIFF | - ... REPORT_ONLY_FIRST_FAILURE) - True - - Only reporting flags can be set: - - >>> set_unittest_reportflags(ELLIPSIS) - Traceback (most recent call last): - ... - ValueError: ('Only reporting flags allowed', 8) - - >>> set_unittest_reportflags(old) == (REPORT_NDIFF | - ... REPORT_ONLY_FIRST_FAILURE) - True - """ - global _unittest_reportflags - - if (flags & REPORTING_FLAGS) != flags: - raise ValueError("Only reporting flags allowed", flags) - old = _unittest_reportflags - _unittest_reportflags = flags - return old - -_para_re = re.compile('\s*\n\s*\n\s*') -def _unittest_count(docstring): - words = 0 - count = 0 - for p in _para_re.split(docstring): - p = p.strip() - if not p: - continue - if p.startswith('>>> '): - if words: - count += 1 - words = 0 - else: - words = 1 - - return count or 1 - - -class DocTestCase(unittest.TestCase): - - def __init__(self, test, optionflags=0, setUp=None, tearDown=None, - checker=None): - - unittest.TestCase.__init__(self) - self._dt_optionflags = optionflags - self._dt_checker = checker - self._dt_test = test - self._dt_setUp = setUp - self._dt_tearDown = tearDown - - self._dt_count = _unittest_count(test.docstring) - - def countTestCases(self): - return self._dt_count - - def setUp(self): - test = self._dt_test - - if self._dt_setUp is not None: - self._dt_setUp(test) - - def tearDown(self): - test = self._dt_test - - if self._dt_tearDown is not None: - self._dt_tearDown(test) - - test.globs.clear() - - def runTest(self): - test = self._dt_test - old = sys.stdout - new = StringIO() - optionflags = self._dt_optionflags - - if not (optionflags & REPORTING_FLAGS): - # The option flags don't include any reporting flags, - # so add the default reporting flags - optionflags |= _unittest_reportflags - - runner = DocTestRunner(optionflags=optionflags, - checker=self._dt_checker, verbose=False) - - try: - runner.DIVIDER = "-"*70 - failures, tries = runner.run( - test, out=new.write, clear_globs=False) - finally: - sys.stdout = old - - if failures: - raise self.failureException(self.format_failure(new.getvalue())) - - def format_failure(self, err): - test = self._dt_test - if test.lineno is None: - lineno = 'unknown line number' - else: - lineno = '%s' % test.lineno - lname = '.'.join(test.name.split('.')[-1:]) - return ('Failed doctest test for %s\n' - ' File "%s", line %s, in %s\n\n%s' - % (test.name, test.filename, lineno, lname, err) - ) - - def debug(self): - r"""Run the test case without results and without catching exceptions - - The unit test framework includes a debug method on test cases - and test suites to support post-mortem debugging. The test code - is run in such a way that errors are not caught. This way a - caller can catch the errors and initiate post-mortem debugging. - - The DocTestCase provides a debug method that raises - UnexpectedException errors if there is an unexepcted - exception: - - >>> test = DocTestParser().get_doctest('>>> raise KeyError\n42', - ... {}, 'foo', 'foo.py', 0) - >>> case = DocTestCase(test) - >>> try: - ... case.debug() - ... except UnexpectedException, failure: - ... pass - - The UnexpectedException contains the test, the example, and - the original exception: - - >>> failure.test is test - True - - >>> failure.example.want - '42\n' - - >>> exc_info = failure.exc_info - >>> raise exc_info[0], exc_info[1], exc_info[2] - Traceback (most recent call last): - ... - KeyError - - If the output doesn't match, then a DocTestFailure is raised: - - >>> test = DocTestParser().get_doctest(''' - ... >>> x = 1 - ... >>> x - ... 2 - ... ''', {}, 'foo', 'foo.py', 0) - >>> case = DocTestCase(test) - - >>> try: - ... case.debug() - ... except DocTestFailure, failure: - ... pass - - DocTestFailure objects provide access to the test: - - >>> failure.test is test - True - - As well as to the example: - - >>> failure.example.want - '2\n' - - and the actual output: - - >>> failure.got - '1\n' - - """ - - self.setUp() - runner = DebugRunner(optionflags=self._dt_optionflags, - checker=self._dt_checker, verbose=False) - runner.run(self._dt_test) - self.tearDown() - - def id(self): - return self._dt_test.name - - def __repr__(self): - name = self._dt_test.name.split('.') - return "%s (%s)" % (name[-1], '.'.join(name[:-1])) - - __str__ = __repr__ - - def shortDescription(self): - return "Doctest: " + self._dt_test.name - -def DocTestSuite(module=None, globs=None, extraglobs=None, test_finder=None, - **options): - """ - Convert doctest tests for a module to a unittest test suite. - - This converts each documentation string in a module that - contains doctest tests to a unittest test case. If any of the - tests in a doc string fail, then the test case fails. An exception - is raised showing the name of the file containing the test and a - (sometimes approximate) line number. - - The `module` argument provides the module to be tested. The argument - can be either a module or a module name. - - If no argument is given, the calling module is used. - - A number of options may be provided as keyword arguments: - - setUp - A set-up function. This is called before running the - tests in each file. The setUp function will be passed a DocTest - object. The setUp function can access the test globals as the - globs attribute of the test passed. - - tearDown - A tear-down function. This is called after running the - tests in each file. The tearDown function will be passed a DocTest - object. The tearDown function can access the test globals as the - globs attribute of the test passed. - - globs - A dictionary containing initial global variables for the tests. - - optionflags - A set of doctest option flags expressed as an integer. - """ - - if test_finder is None: - test_finder = DocTestFinder() - - module = _normalize_module(module) - tests = test_finder.find(module, globs=globs, extraglobs=extraglobs) - if globs is None: - globs = module.__dict__ - if not tests: - # Why do we want to do this? Because it reveals a bug that might - # otherwise be hidden. - raise ValueError(module, "has no tests") - - tests.sort() - suite = unittest.TestSuite() - for test in tests: - if len(test.examples) == 0: - continue - if not test.filename: - filename = module.__file__ - if filename[-4:] in (".pyc", ".pyo"): - filename = filename[:-1] - test.filename = filename - suite.addTest(DocTestCase(test, **options)) - - return suite - -class DocFileCase(DocTestCase): - - def id(self): - return '_'.join(self._dt_test.name.split('.')) - - def __repr__(self): - return self._dt_test.filename - __str__ = __repr__ - - def format_failure(self, err): - return ('Failed doctest test for %s\n File "%s", line 0\n\n%s' - % (self._dt_test.name, self._dt_test.filename, err) - ) - -def DocFileTest(path, module_relative=True, package=None, - globs=None, parser=DocTestParser(), **options): - if globs is None: - globs = {} - else: - globs = globs.copy() - - if package and not module_relative: - raise ValueError("Package may only be specified for module-" - "relative paths.") - - # Relativize the path. - if module_relative: - package = _normalize_module(package) - path = _module_relative_path(package, path) - if "__file__" not in globs: - globs["__file__"] = path - - # Find the file and read it. - name = os.path.basename(path) - doc = open(path).read() - - # Convert it to a test, and wrap it in a DocFileCase. - test = parser.get_doctest(doc, globs, name, path, 0) - return DocFileCase(test, **options) - -def DocFileSuite(*paths, **kw): - """A unittest suite for one or more doctest files. - - The path to each doctest file is given as a string; the - interpretation of that string depends on the keyword argument - "module_relative". - - A number of options may be provided as keyword arguments: - - module_relative - If "module_relative" is True, then the given file paths are - interpreted as os-independent module-relative paths. By - default, these paths are relative to the calling module's - directory; but if the "package" argument is specified, then - they are relative to that package. To ensure os-independence, - "filename" should use "/" characters to separate path - segments, and may not be an absolute path (i.e., it may not - begin with "/"). - - If "module_relative" is False, then the given file paths are - interpreted as os-specific paths. These paths may be absolute - or relative (to the current working directory). - - package - A Python package or the name of a Python package whose directory - should be used as the base directory for module relative paths. - If "package" is not specified, then the calling module's - directory is used as the base directory for module relative - filenames. It is an error to specify "package" if - "module_relative" is False. - - setUp - A set-up function. This is called before running the - tests in each file. The setUp function will be passed a DocTest - object. The setUp function can access the test globals as the - globs attribute of the test passed. - - tearDown - A tear-down function. This is called after running the - tests in each file. The tearDown function will be passed a DocTest - object. The tearDown function can access the test globals as the - globs attribute of the test passed. - - globs - A dictionary containing initial global variables for the tests. - - optionflags - A set of doctest option flags expressed as an integer. - - parser - A DocTestParser (or subclass) that should be used to extract - tests from the files. - """ - suite = unittest.TestSuite() - - # We do this here so that _normalize_module is called at the right - # level. If it were called in DocFileTest, then this function - # would be the caller and we might guess the package incorrectly. - if kw.get('module_relative', True): - kw['package'] = _normalize_module(kw.get('package')) - - for path in paths: - suite.addTest(DocFileTest(path, **kw)) - - return suite - -###################################################################### -## 9. Debugging Support -###################################################################### - -def script_from_examples(s): - r"""Extract script from text with examples. - - Converts text with examples to a Python script. Example input is - converted to regular code. Example output and all other words - are converted to comments: - - >>> text = ''' - ... Here are examples of simple math. - ... - ... Python has super accurate integer addition - ... - ... >>> 2 + 2 - ... 5 - ... - ... And very friendly error messages: - ... - ... >>> 1/0 - ... To Infinity - ... And - ... Beyond - ... - ... You can use logic if you want: - ... - ... >>> if 0: - ... ... blah - ... ... blah - ... ... - ... - ... Ho hum - ... ''' - - >>> print script_from_examples(text) - # Here are examples of simple math. - # - # Python has super accurate integer addition - # - 2 + 2 - # Expected: - ## 5 - # - # And very friendly error messages: - # - 1/0 - # Expected: - ## To Infinity - ## And - ## Beyond - # - # You can use logic if you want: - # - if 0: - blah - blah - # - # Ho hum - """ - output = [] - for piece in DocTestParser().parse(s): - if isinstance(piece, Example): - # Add the example's source code (strip trailing NL) - output.append(piece.source[:-1]) - # Add the expected output: - want = piece.want - if want: - output.append('# Expected:') - output += ['## '+l for l in want.split('\n')[:-1]] - else: - # Add non-example text. - output += [_comment_line(l) - for l in piece.split('\n')[:-1]] - - # Trim junk on both ends. - while output and output[-1] == '#': - output.pop() - while output and output[0] == '#': - output.pop(0) - # Combine the output, and return it. - return '\n'.join(output) - -def testsource(module, name): - """Extract the test sources from a doctest docstring as a script. - - Provide the module (or dotted name of the module) containing the - test to be debugged and the name (within the module) of the object - with the doc string with tests to be debugged. - """ - module = _normalize_module(module) - tests = DocTestFinder().find(module) - test = [t for t in tests if t.name == name] - if not test: - raise ValueError(name, "not found in tests") - test = test[0] - testsrc = script_from_examples(test.docstring) - return testsrc - -def debug_src(src, pm=False, globs=None): - """Debug a single doctest docstring, in argument `src`'""" - testsrc = script_from_examples(src) - debug_script(testsrc, pm, globs) - -def debug_script(src, pm=False, globs=None): - "Debug a test script. `src` is the script, as a string." - import pdb - - # Note that tempfile.NameTemporaryFile() cannot be used. As the - # docs say, a file so created cannot be opened by name a second time - # on modern Windows boxes, and execfile() needs to open it. - srcfilename = tempfile.mktemp(".py", "doctestdebug") - f = open(srcfilename, 'w') - f.write(src) - f.close() - - try: - if globs: - globs = globs.copy() - else: - globs = {} - - if pm: - try: - execfile(srcfilename, globs, globs) - except: - print sys.exc_info()[1] - pdb.post_mortem(sys.exc_info()[2]) - else: - # Note that %r is vital here. '%s' instead can, e.g., cause - # backslashes to get treated as metacharacters on Windows. - pdb.run("execfile(%r)" % srcfilename, globs, globs) - - finally: - os.remove(srcfilename) - -def debug(module, name, pm=False): - """Debug a single doctest docstring. - - Provide the module (or dotted name of the module) containing the - test to be debugged and the name (within the module) of the object - with the docstring with tests to be debugged. - """ - module = _normalize_module(module) - testsrc = testsource(module, name) - debug_script(testsrc, pm, module.__dict__) - -###################################################################### -## 10. Example Usage -###################################################################### -class _TestClass: - """ - A pointless class, for sanity-checking of docstring testing. - - Methods: - square() - get() - - >>> _TestClass(13).get() + _TestClass(-12).get() - 1 - >>> hex(_TestClass(13).square().get()) - '0xa9' - """ - - def __init__(self, val): - """val -> _TestClass object with associated value val. - - >>> t = _TestClass(123) - >>> print t.get() - 123 - """ - - self.val = val - - def square(self): - """square() -> square TestClass's associated value - - >>> _TestClass(13).square().get() - 169 - """ - - self.val = self.val ** 2 - return self - - def get(self): - """get() -> return TestClass's associated value. - - >>> x = _TestClass(-42) - >>> print x.get() - -42 - """ - - return self.val - -__test__ = {"_TestClass": _TestClass, - "string": r""" - Example of a string object, searched as-is. - >>> x = 1; y = 2 - >>> x + y, x * y - (3, 2) - """, - - "bool-int equivalence": r""" - In 2.2, boolean expressions displayed - 0 or 1. By default, we still accept - them. This can be disabled by passing - DONT_ACCEPT_TRUE_FOR_1 to the new - optionflags argument. - >>> 4 == 4 - 1 - >>> 4 == 4 - True - >>> 4 > 4 - 0 - >>> 4 > 4 - False - """, - - "blank lines": r""" - Blank lines can be marked with : - >>> print 'foo\n\nbar\n' - foo - - bar - - """, - - "ellipsis": r""" - If the ellipsis flag is used, then '...' can be used to - elide substrings in the desired output: - >>> print range(1000) #doctest: +ELLIPSIS - [0, 1, 2, ..., 999] - """, - - "whitespace normalization": r""" - If the whitespace normalization flag is used, then - differences in whitespace are ignored. - >>> print range(30) #doctest: +NORMALIZE_WHITESPACE - [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, - 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, - 27, 28, 29] - """, - } - -def _test(): - r = unittest.TextTestRunner() - r.run(DocTestSuite()) - -if __name__ == "__main__": - _test() From scoder at codespeak.net Fri Aug 4 16:17:34 2006 From: scoder at codespeak.net (scoder at codespeak.net) Date: Fri, 4 Aug 2006 16:17:34 +0200 (CEST) Subject: [Lxml-checkins] r31001 - lxml/branch/capi/src Message-ID: <20060804141734.3AC7910072@code0.codespeak.net> Author: scoder Date: Fri Aug 4 16:17:33 2006 New Revision: 31001 Removed: lxml/branch/capi/src/doctest.py Log: dropped src/doctest.py Deleted: /lxml/branch/capi/src/doctest.py ============================================================================== --- /lxml/branch/capi/src/doctest.py Fri Aug 4 16:17:33 2006 +++ (empty file) @@ -1,2704 +0,0 @@ -# Module doctest. -# Released to the public domain 16-Jan-2001, by Tim Peters (tim at python.org). -# Major enhancements and refactoring by: -# Jim Fulton -# Edward Loper - -# Provided as-is; use at your own risk; no warranty; no promises; enjoy! - -r"""Module doctest -- a framework for running examples in docstrings. - -In simplest use, end each module M to be tested with: - -def _test(): - import doctest - doctest.testmod() - -if __name__ == "__main__": - _test() - -Then running the module as a script will cause the examples in the -docstrings to get executed and verified: - -python M.py - -This won't display anything unless an example fails, in which case the -failing example(s) and the cause(s) of the failure(s) are printed to stdout -(why not stderr? because stderr is a lame hack <0.2 wink>), and the final -line of output is "Test failed.". - -Run it with the -v switch instead: - -python M.py -v - -and a detailed report of all examples tried is printed to stdout, along -with assorted summaries at the end. - -You can force verbose mode by passing "verbose=True" to testmod, or prohibit -it by passing "verbose=False". In either of those cases, sys.argv is not -examined by testmod. - -There are a variety of other ways to run doctests, including integration -with the unittest framework, and support for running non-Python text -files containing doctests. There are also many ways to override parts -of doctest's default behaviors. See the Library Reference Manual for -details. -""" - -__docformat__ = 'reStructuredText en' - -__all__ = [ - # 0, Option Flags - 'register_optionflag', - 'DONT_ACCEPT_TRUE_FOR_1', - 'DONT_ACCEPT_BLANKLINE', - 'NORMALIZE_WHITESPACE', - 'ELLIPSIS', - 'IGNORE_EXCEPTION_DETAIL', - 'COMPARISON_FLAGS', - 'REPORT_UDIFF', - 'REPORT_CDIFF', - 'REPORT_NDIFF', - 'REPORT_ONLY_FIRST_FAILURE', - 'REPORTING_FLAGS', - # 1. Utility Functions - 'is_private', - # 2. Example & DocTest - 'Example', - 'DocTest', - # 3. Doctest Parser - 'DocTestParser', - # 4. Doctest Finder - 'DocTestFinder', - # 5. Doctest Runner - 'DocTestRunner', - 'OutputChecker', - 'DocTestFailure', - 'UnexpectedException', - 'DebugRunner', - # 6. Test Functions - 'testmod', - 'testfile', - 'run_docstring_examples', - # 7. Tester - 'Tester', - # 8. Unittest Support - 'DocTestSuite', - 'DocFileSuite', - 'set_unittest_reportflags', - # 9. Debugging Support - 'script_from_examples', - 'testsource', - 'debug_src', - 'debug', -] - -import __future__ - -import sys, traceback, inspect, linecache, os, re, types -import unittest, difflib, pdb, tempfile -import warnings -from StringIO import StringIO - -# Don't whine about the deprecated is_private function in this -# module's tests. -warnings.filterwarnings("ignore", "is_private", DeprecationWarning, - __name__, 0) - -real_pdb_set_trace = pdb.set_trace - -# There are 4 basic classes: -# - Example: a pair, plus an intra-docstring line number. -# - DocTest: a collection of examples, parsed from a docstring, plus -# info about where the docstring came from (name, filename, lineno). -# - DocTestFinder: extracts DocTests from a given object's docstring and -# its contained objects' docstrings. -# - DocTestRunner: runs DocTest cases, and accumulates statistics. -# -# So the basic picture is: -# -# list of: -# +------+ +---------+ +-------+ -# |object| --DocTestFinder-> | DocTest | --DocTestRunner-> |results| -# +------+ +---------+ +-------+ -# | Example | -# | ... | -# | Example | -# +---------+ - -# Option constants. - -OPTIONFLAGS_BY_NAME = {} -def register_optionflag(name): - flag = 1 << len(OPTIONFLAGS_BY_NAME) - OPTIONFLAGS_BY_NAME[name] = flag - return flag - -DONT_ACCEPT_TRUE_FOR_1 = register_optionflag('DONT_ACCEPT_TRUE_FOR_1') -DONT_ACCEPT_BLANKLINE = register_optionflag('DONT_ACCEPT_BLANKLINE') -NORMALIZE_WHITESPACE = register_optionflag('NORMALIZE_WHITESPACE') -ELLIPSIS = register_optionflag('ELLIPSIS') -IGNORE_EXCEPTION_DETAIL = register_optionflag('IGNORE_EXCEPTION_DETAIL') - -COMPARISON_FLAGS = (DONT_ACCEPT_TRUE_FOR_1 | - DONT_ACCEPT_BLANKLINE | - NORMALIZE_WHITESPACE | - ELLIPSIS | - IGNORE_EXCEPTION_DETAIL) - -REPORT_UDIFF = register_optionflag('REPORT_UDIFF') -REPORT_CDIFF = register_optionflag('REPORT_CDIFF') -REPORT_NDIFF = register_optionflag('REPORT_NDIFF') -REPORT_ONLY_FIRST_FAILURE = register_optionflag('REPORT_ONLY_FIRST_FAILURE') - -REPORTING_FLAGS = (REPORT_UDIFF | - REPORT_CDIFF | - REPORT_NDIFF | - REPORT_ONLY_FIRST_FAILURE) - -# Special string markers for use in `want` strings: -BLANKLINE_MARKER = '' -ELLIPSIS_MARKER = '...' - -###################################################################### -## Table of Contents -###################################################################### -# 1. Utility Functions -# 2. Example & DocTest -- store test cases -# 3. DocTest Parser -- extracts examples from strings -# 4. DocTest Finder -- extracts test cases from objects -# 5. DocTest Runner -- runs test cases -# 6. Test Functions -- convenient wrappers for testing -# 7. Tester Class -- for backwards compatibility -# 8. Unittest Support -# 9. Debugging Support -# 10. Example Usage - -###################################################################### -## 1. Utility Functions -###################################################################### - -def is_private(prefix, base): - """prefix, base -> true iff name prefix + "." + base is "private". - - Prefix may be an empty string, and base does not contain a period. - Prefix is ignored (although functions you write conforming to this - protocol may make use of it). - Return true iff base begins with an (at least one) underscore, but - does not both begin and end with (at least) two underscores. - - >>> is_private("a.b", "my_func") - False - >>> is_private("____", "_my_func") - True - >>> is_private("someclass", "__init__") - False - >>> is_private("sometypo", "__init_") - True - >>> is_private("x.y.z", "_") - True - >>> is_private("_x.y.z", "__") - False - >>> is_private("", "") # senseless but consistent - False - """ - warnings.warn("is_private is deprecated; it wasn't useful; " - "examine DocTestFinder.find() lists instead", - DeprecationWarning, stacklevel=2) - return base[:1] == "_" and not base[:2] == "__" == base[-2:] - -def _extract_future_flags(globs): - """ - Return the compiler-flags associated with the future features that - have been imported into the given namespace (globs). - """ - flags = 0 - for fname in __future__.all_feature_names: - feature = globs.get(fname, None) - if feature is getattr(__future__, fname): - flags |= feature.compiler_flag - return flags - -def _normalize_module(module, depth=2): - """ - Return the module specified by `module`. In particular: - - If `module` is a module, then return module. - - If `module` is a string, then import and return the - module with that name. - - If `module` is None, then return the calling module. - The calling module is assumed to be the module of - the stack frame at the given depth in the call stack. - """ - if inspect.ismodule(module): - return module - elif isinstance(module, (str, unicode)): - return __import__(module, globals(), locals(), ["*"]) - elif module is None: - return sys.modules[sys._getframe(depth).f_globals['__name__']] - else: - raise TypeError("Expected a module, string, or None") - -def _indent(s, indent=4): - """ - Add the given number of space characters to the beginning every - non-blank line in `s`, and return the result. - """ - # This regexp matches the start of non-blank lines: - return re.sub('(?m)^(?!$)', indent*' ', s) - -def _exception_traceback(exc_info): - """ - Return a string containing a traceback message for the given - exc_info tuple (as returned by sys.exc_info()). - """ - # Get a traceback message. - excout = StringIO() - exc_type, exc_val, exc_tb = exc_info - traceback.print_exception(exc_type, exc_val, exc_tb, file=excout) - return excout.getvalue() - -# Override some StringIO methods. -class _SpoofOut(StringIO): - def getvalue(self): - result = StringIO.getvalue(self) - # If anything at all was written, make sure there's a trailing - # newline. There's no way for the expected output to indicate - # that a trailing newline is missing. - if result and not result.endswith("\n"): - result += "\n" - # Prevent softspace from screwing up the next test case, in - # case they used print with a trailing comma in an example. - if hasattr(self, "softspace"): - del self.softspace - return result - - def truncate(self, size=None): - StringIO.truncate(self, size) - if hasattr(self, "softspace"): - del self.softspace - -# Worst-case linear-time ellipsis matching. -def _ellipsis_match(want, got): - """ - Essentially the only subtle case: - >>> _ellipsis_match('aa...aa', 'aaa') - False - """ - if ELLIPSIS_MARKER not in want: - return want == got - - # Find "the real" strings. - ws = want.split(ELLIPSIS_MARKER) - assert len(ws) >= 2 - - # Deal with exact matches possibly needed at one or both ends. - startpos, endpos = 0, len(got) - w = ws[0] - if w: # starts with exact match - if got.startswith(w): - startpos = len(w) - del ws[0] - else: - return False - w = ws[-1] - if w: # ends with exact match - if got.endswith(w): - endpos -= len(w) - del ws[-1] - else: - return False - - if startpos > endpos: - # Exact end matches required more characters than we have, as in - # _ellipsis_match('aa...aa', 'aaa') - return False - - # For the rest, we only need to find the leftmost non-overlapping - # match for each piece. If there's no overall match that way alone, - # there's no overall match period. - for w in ws: - # w may be '' at times, if there are consecutive ellipses, or - # due to an ellipsis at the start or end of `want`. That's OK. - # Search for an empty string succeeds, and doesn't change startpos. - startpos = got.find(w, startpos, endpos) - if startpos < 0: - return False - startpos += len(w) - - return True - -def _comment_line(line): - "Return a commented form of the given line" - line = line.rstrip() - if line: - return '# '+line - else: - return '#' - -class _OutputRedirectingPdb(pdb.Pdb): - """ - A specialized version of the python debugger that redirects stdout - to a given stream when interacting with the user. Stdout is *not* - redirected when traced code is executed. - """ - def __init__(self, out): - self.__out = out - self.__debugger_used = False - pdb.Pdb.__init__(self) - - def set_trace(self): - self.__debugger_used = True - pdb.Pdb.set_trace(self) - - def set_continue(self): - # Calling set_continue unconditionally would break unit test coverage - # reporting, as Bdb.set_continue calls sys.settrace(None). - if self.__debugger_used: - pdb.Pdb.set_continue(self) - - def trace_dispatch(self, *args): - # Redirect stdout to the given stream. - save_stdout = sys.stdout - sys.stdout = self.__out - # Call Pdb's trace dispatch method. - result = pdb.Pdb.trace_dispatch(self, *args) - # Restore stdout. - sys.stdout = save_stdout - return result - -# [XX] Normalize with respect to os.path.pardir? -def _module_relative_path(module, path): - if not inspect.ismodule(module): - raise TypeError, 'Expected a module: %r' % module - if path.startswith('/'): - raise ValueError, 'Module-relative files may not have absolute paths' - - # Find the base directory for the path. - if hasattr(module, '__file__'): - # A normal module/package - basedir = os.path.split(module.__file__)[0] - elif module.__name__ == '__main__': - # An interactive session. - if len(sys.argv)>0 and sys.argv[0] != '': - basedir = os.path.split(sys.argv[0])[0] - else: - basedir = os.curdir - else: - # A module w/o __file__ (this includes builtins) - raise ValueError("Can't resolve paths relative to the module " + - module + " (it has no __file__)") - - # Combine the base directory and the path. - return os.path.join(basedir, *(path.split('/'))) - -###################################################################### -## 2. Example & DocTest -###################################################################### -## - An "example" is a pair, where "source" is a -## fragment of source code, and "want" is the expected output for -## "source." The Example class also includes information about -## where the example was extracted from. -## -## - A "doctest" is a collection of examples, typically extracted from -## a string (such as an object's docstring). The DocTest class also -## includes information about where the string was extracted from. - -class Example: - """ - A single doctest example, consisting of source code and expected - output. `Example` defines the following attributes: - - - source: A single Python statement, always ending with a newline. - The constructor adds a newline if needed. - - - want: The expected output from running the source code (either - from stdout, or a traceback in case of exception). `want` ends - with a newline unless it's empty, in which case it's an empty - string. The constructor adds a newline if needed. - - - exc_msg: The exception message generated by the example, if - the example is expected to generate an exception; or `None` if - it is not expected to generate an exception. This exception - message is compared against the return value of - `traceback.format_exception_only()`. `exc_msg` ends with a - newline unless it's `None`. The constructor adds a newline - if needed. - - - lineno: The line number within the DocTest string containing - this Example where the Example begins. This line number is - zero-based, with respect to the beginning of the DocTest. - - - indent: The example's indentation in the DocTest string. - I.e., the number of space characters that preceed the - example's first prompt. - - - options: A dictionary mapping from option flags to True or - False, which is used to override default options for this - example. Any option flags not contained in this dictionary - are left at their default value (as specified by the - DocTestRunner's optionflags). By default, no options are set. - """ - def __init__(self, source, want, exc_msg=None, lineno=0, indent=0, - options=None): - # Normalize inputs. - if not source.endswith('\n'): - source += '\n' - if want and not want.endswith('\n'): - want += '\n' - if exc_msg is not None and not exc_msg.endswith('\n'): - exc_msg += '\n' - # Store properties. - self.source = source - self.want = want - self.lineno = lineno - self.indent = indent - if options is None: options = {} - self.options = options - self.exc_msg = exc_msg - -class DocTest: - """ - A collection of doctest examples that should be run in a single - namespace. Each `DocTest` defines the following attributes: - - - examples: the list of examples. - - - globs: The namespace (aka globals) that the examples should - be run in. - - - name: A name identifying the DocTest (typically, the name of - the object whose docstring this DocTest was extracted from). - - - filename: The name of the file that this DocTest was extracted - from, or `None` if the filename is unknown. - - - lineno: The line number within filename where this DocTest - begins, or `None` if the line number is unavailable. This - line number is zero-based, with respect to the beginning of - the file. - - - docstring: The string that the examples were extracted from, - or `None` if the string is unavailable. - """ - def __init__(self, examples, globs, name, filename, lineno, docstring): - """ - Create a new DocTest containing the given examples. The - DocTest's globals are initialized with a copy of `globs`. - """ - assert not isinstance(examples, basestring), \ - "DocTest no longer accepts str; use DocTestParser instead" - self.examples = examples - self.docstring = docstring - self.globs = globs.copy() - self.name = name - self.filename = filename - self.lineno = lineno - - def __repr__(self): - if len(self.examples) == 0: - examples = 'no examples' - elif len(self.examples) == 1: - examples = '1 example' - else: - examples = '%d examples' % len(self.examples) - return ('' % - (self.name, self.filename, self.lineno, examples)) - - - # This lets us sort tests by name: - def __cmp__(self, other): - if not isinstance(other, DocTest): - return -1 - return cmp((self.name, self.filename, self.lineno, id(self)), - (other.name, other.filename, other.lineno, id(other))) - -###################################################################### -## 3. DocTestParser -###################################################################### - -class DocTestParser: - """ - A class used to parse strings containing doctest examples. - """ - # This regular expression is used to find doctest examples in a - # string. It defines three groups: `source` is the source code - # (including leading indentation and prompts); `indent` is the - # indentation of the first (PS1) line of the source code; and - # `want` is the expected output (including leading indentation). - _EXAMPLE_RE = re.compile(r''' - # Source consists of a PS1 line followed by zero or more PS2 lines. - (?P - (?:^(?P [ ]*) >>> .*) # PS1 line - (?:\n [ ]* \.\.\. .*)*) # PS2 lines - \n? - # Want consists of any non-blank lines that do not start with PS1. - (?P (?:(?![ ]*$) # Not a blank line - (?![ ]*>>>) # Not a line starting with PS1 - .*$\n? # But any other line - )*) - ''', re.MULTILINE | re.VERBOSE) - - # A regular expression for handling `want` strings that contain - # expected exceptions. It divides `want` into three pieces: - # - the traceback header line (`hdr`) - # - the traceback stack (`stack`) - # - the exception message (`msg`), as generated by - # traceback.format_exception_only() - # `msg` may have multiple lines. We assume/require that the - # exception message is the first non-indented line starting with a word - # character following the traceback header line. - _EXCEPTION_RE = re.compile(r""" - # Grab the traceback header. Different versions of Python have - # said different things on the first traceback line. - ^(?P Traceback\ \( - (?: most\ recent\ call\ last - | innermost\ last - ) \) : - ) - \s* $ # toss trailing whitespace on the header. - (?P .*?) # don't blink: absorb stuff until... - ^ (?P \w+ .*) # a line *starts* with alphanum. - """, re.VERBOSE | re.MULTILINE | re.DOTALL) - - # A callable returning a true value iff its argument is a blank line - # or contains a single comment. - _IS_BLANK_OR_COMMENT = re.compile(r'^[ ]*(#.*)?$').match - - def parse(self, string, name=''): - """ - Divide the given string into examples and intervening text, - and return them as a list of alternating Examples and strings. - Line numbers for the Examples are 0-based. The optional - argument `name` is a name identifying this string, and is only - used for error messages. - """ - string = string.expandtabs() - # If all lines begin with the same indentation, then strip it. - min_indent = self._min_indent(string) - if min_indent > 0: - string = '\n'.join([l[min_indent:] for l in string.split('\n')]) - - output = [] - charno, lineno = 0, 0 - # Find all doctest examples in the string: - for m in self._EXAMPLE_RE.finditer(string): - # Add the pre-example text to `output`. - output.append(string[charno:m.start()]) - # Update lineno (lines before this example) - lineno += string.count('\n', charno, m.start()) - # Extract info from the regexp match. - (source, options, want, exc_msg) = \ - self._parse_example(m, name, lineno) - # Create an Example, and add it to the list. - if not self._IS_BLANK_OR_COMMENT(source): - output.append( Example(source, want, exc_msg, - lineno=lineno, - indent=min_indent+len(m.group('indent')), - options=options) ) - # Update lineno (lines inside this example) - lineno += string.count('\n', m.start(), m.end()) - # Update charno. - charno = m.end() - # Add any remaining post-example text to `output`. - output.append(string[charno:]) - return output - - def get_doctest(self, string, globs, name, filename, lineno): - """ - Extract all doctest examples from the given string, and - collect them into a `DocTest` object. - - `globs`, `name`, `filename`, and `lineno` are attributes for - the new `DocTest` object. See the documentation for `DocTest` - for more information. - """ - return DocTest(self.get_examples(string, name), globs, - name, filename, lineno, string) - - def get_examples(self, string, name=''): - """ - Extract all doctest examples from the given string, and return - them as a list of `Example` objects. Line numbers are - 0-based, because it's most common in doctests that nothing - interesting appears on the same line as opening triple-quote, - and so the first interesting line is called \"line 1\" then. - - The optional argument `name` is a name identifying this - string, and is only used for error messages. - """ - return [x for x in self.parse(string, name) - if isinstance(x, Example)] - - def _parse_example(self, m, name, lineno): - """ - Given a regular expression match from `_EXAMPLE_RE` (`m`), - return a pair `(source, want)`, where `source` is the matched - example's source code (with prompts and indentation stripped); - and `want` is the example's expected output (with indentation - stripped). - - `name` is the string's name, and `lineno` is the line number - where the example starts; both are used for error messages. - """ - # Get the example's indentation level. - indent = len(m.group('indent')) - - # Divide source into lines; check that they're properly - # indented; and then strip their indentation & prompts. - source_lines = m.group('source').split('\n') - self._check_prompt_blank(source_lines, indent, name, lineno) - self._check_prefix(source_lines[1:], ' '*indent + '.', name, lineno) - source = '\n'.join([sl[indent+4:] for sl in source_lines]) - - # Divide want into lines; check that it's properly indented; and - # then strip the indentation. Spaces before the last newline should - # be preserved, so plain rstrip() isn't good enough. - want = m.group('want') - want_lines = want.split('\n') - if len(want_lines) > 1 and re.match(r' *$', want_lines[-1]): - del want_lines[-1] # forget final newline & spaces after it - self._check_prefix(want_lines, ' '*indent, name, - lineno + len(source_lines)) - want = '\n'.join([wl[indent:] for wl in want_lines]) - - # If `want` contains a traceback message, then extract it. - m = self._EXCEPTION_RE.match(want) - if m: - exc_msg = m.group('msg') - else: - exc_msg = None - - # Extract options from the source. - options = self._find_options(source, name, lineno) - - return source, options, want, exc_msg - - # This regular expression looks for option directives in the - # source code of an example. Option directives are comments - # starting with "doctest:". Warning: this may give false - # positives for string-literals that contain the string - # "#doctest:". Eliminating these false positives would require - # actually parsing the string; but we limit them by ignoring any - # line containing "#doctest:" that is *followed* by a quote mark. - _OPTION_DIRECTIVE_RE = re.compile(r'#\s*doctest:\s*([^\n\'"]*)$', - re.MULTILINE) - - def _find_options(self, source, name, lineno): - """ - Return a dictionary containing option overrides extracted from - option directives in the given source string. - - `name` is the string's name, and `lineno` is the line number - where the example starts; both are used for error messages. - """ - options = {} - # (note: with the current regexp, this will match at most once:) - for m in self._OPTION_DIRECTIVE_RE.finditer(source): - option_strings = m.group(1).replace(',', ' ').split() - for option in option_strings: - if (option[0] not in '+-' or - option[1:] not in OPTIONFLAGS_BY_NAME): - raise ValueError('line %r of the doctest for %s ' - 'has an invalid option: %r' % - (lineno+1, name, option)) - flag = OPTIONFLAGS_BY_NAME[option[1:]] - options[flag] = (option[0] == '+') - if options and self._IS_BLANK_OR_COMMENT(source): - raise ValueError('line %r of the doctest for %s has an option ' - 'directive on a line with no example: %r' % - (lineno, name, source)) - return options - - # This regular expression finds the indentation of every non-blank - # line in a string. - _INDENT_RE = re.compile('^([ ]*)(?=\S)', re.MULTILINE) - - def _min_indent(self, s): - "Return the minimum indentation of any non-blank line in `s`" - indents = [len(indent) for indent in self._INDENT_RE.findall(s)] - if len(indents) > 0: - return min(indents) - else: - return 0 - - def _check_prompt_blank(self, lines, indent, name, lineno): - """ - Given the lines of a source string (including prompts and - leading indentation), check to make sure that every prompt is - followed by a space character. If any line is not followed by - a space character, then raise ValueError. - """ - for i, line in enumerate(lines): - if len(line) >= indent+4 and line[indent+3] != ' ': - raise ValueError('line %r of the docstring for %s ' - 'lacks blank after %s: %r' % - (lineno+i+1, name, - line[indent:indent+3], line)) - - def _check_prefix(self, lines, prefix, name, lineno): - """ - Check that every line in the given list starts with the given - prefix; if any line does not, then raise a ValueError. - """ - for i, line in enumerate(lines): - if line and not line.startswith(prefix): - raise ValueError('line %r of the docstring for %s has ' - 'inconsistent leading whitespace: %r' % - (lineno+i+1, name, line)) - - -###################################################################### -## 4. DocTest Finder -###################################################################### - -class DocTestFinder: - """ - A class used to extract the DocTests that are relevant to a given - object, from its docstring and the docstrings of its contained - objects. Doctests can currently be extracted from the following - object types: modules, functions, classes, methods, staticmethods, - classmethods, and properties. - """ - - def __init__(self, verbose=False, parser=DocTestParser(), - recurse=True, _namefilter=None, exclude_empty=True): - """ - Create a new doctest finder. - - The optional argument `parser` specifies a class or - function that should be used to create new DocTest objects (or - objects that implement the same interface as DocTest). The - signature for this factory function should match the signature - of the DocTest constructor. - - If the optional argument `recurse` is false, then `find` will - only examine the given object, and not any contained objects. - - If the optional argument `exclude_empty` is false, then `find` - will include tests for objects with empty docstrings. - """ - self._parser = parser - self._verbose = verbose - self._recurse = recurse - self._exclude_empty = exclude_empty - # _namefilter is undocumented, and exists only for temporary backward- - # compatibility support of testmod's deprecated isprivate mess. - self._namefilter = _namefilter - - def find(self, obj, name=None, module=None, globs=None, - extraglobs=None): - """ - Return a list of the DocTests that are defined by the given - object's docstring, or by any of its contained objects' - docstrings. - - The optional parameter `module` is the module that contains - the given object. If the module is not specified or is None, then - the test finder will attempt to automatically determine the - correct module. The object's module is used: - - - As a default namespace, if `globs` is not specified. - - To prevent the DocTestFinder from extracting DocTests - from objects that are imported from other modules. - - To find the name of the file containing the object. - - To help find the line number of the object within its - file. - - Contained objects whose module does not match `module` are ignored. - - If `module` is False, no attempt to find the module will be made. - This is obscure, of use mostly in tests: if `module` is False, or - is None but cannot be found automatically, then all objects are - considered to belong to the (non-existent) module, so all contained - objects will (recursively) be searched for doctests. - - The globals for each DocTest is formed by combining `globs` - and `extraglobs` (bindings in `extraglobs` override bindings - in `globs`). A new copy of the globals dictionary is created - for each DocTest. If `globs` is not specified, then it - defaults to the module's `__dict__`, if specified, or {} - otherwise. If `extraglobs` is not specified, then it defaults - to {}. - - """ - # If name was not specified, then extract it from the object. - if name is None: - name = getattr(obj, '__name__', None) - if name is None: - raise ValueError("DocTestFinder.find: name must be given " - "when obj.__name__ doesn't exist: %r" % - (type(obj),)) - - # Find the module that contains the given object (if obj is - # a module, then module=obj.). Note: this may fail, in which - # case module will be None. - if module is False: - module = None - elif module is None: - module = inspect.getmodule(obj) - - # Read the module's source code. This is used by - # DocTestFinder._find_lineno to find the line number for a - # given object's docstring. - try: - file = inspect.getsourcefile(obj) or inspect.getfile(obj) - source_lines = linecache.getlines(file) - if not source_lines: - source_lines = None - except TypeError: - source_lines = None - - # Initialize globals, and merge in extraglobs. - if globs is None: - if module is None: - globs = {} - else: - globs = module.__dict__.copy() - else: - globs = globs.copy() - if extraglobs is not None: - globs.update(extraglobs) - - # Recursively expore `obj`, extracting DocTests. - tests = [] - self._find(tests, obj, name, module, source_lines, globs, {}) - return tests - - def _filter(self, obj, prefix, base): - """ - Return true if the given object should not be examined. - """ - return (self._namefilter is not None and - self._namefilter(prefix, base)) - - def _from_module(self, module, object): - """ - Return true if the given object is defined in the given - module. - """ - if module is None: - return True - elif inspect.isfunction(object): - return module.__dict__ is object.func_globals - elif inspect.isclass(object): - return module.__name__ == object.__module__ - elif inspect.getmodule(object) is not None: - return module is inspect.getmodule(object) - elif hasattr(object, '__module__'): - return module.__name__ == object.__module__ - elif isinstance(object, property): - return True # [XX] no way not be sure. - else: - raise ValueError("object must be a class or function") - - def _find(self, tests, obj, name, module, source_lines, globs, seen): - """ - Find tests for the given object and any contained objects, and - add them to `tests`. - """ - if self._verbose: - print 'Finding tests in %s' % name - - # If we've already processed this object, then ignore it. - if id(obj) in seen: - return - seen[id(obj)] = 1 - - # Find a test for this object, and add it to the list of tests. - test = self._get_test(obj, name, module, globs, source_lines) - if test is not None: - tests.append(test) - - # Look for tests in a module's contained objects. - if inspect.ismodule(obj) and self._recurse: - for valname, val in obj.__dict__.items(): - # Check if this contained object should be ignored. - if self._filter(val, name, valname): - continue - valname = '%s.%s' % (name, valname) - # Recurse to functions & classes. - if ((inspect.isfunction(val) or inspect.isclass(val)) and - self._from_module(module, val)): - self._find(tests, val, valname, module, source_lines, - globs, seen) - - # Look for tests in a module's __test__ dictionary. - if inspect.ismodule(obj) and self._recurse: - for valname, val in getattr(obj, '__test__', {}).items(): - if not isinstance(valname, basestring): - raise ValueError("DocTestFinder.find: __test__ keys " - "must be strings: %r" % - (type(valname),)) - if not (inspect.isfunction(val) or inspect.isclass(val) or - inspect.ismethod(val) or inspect.ismodule(val) or - isinstance(val, basestring)): - raise ValueError("DocTestFinder.find: __test__ values " - "must be strings, functions, methods, " - "classes, or modules: %r" % - (type(val),)) - valname = '%s.__test__.%s' % (name, valname) - self._find(tests, val, valname, module, source_lines, - globs, seen) - - # Look for tests in a class's contained objects. - if inspect.isclass(obj) and self._recurse: - for valname, val in obj.__dict__.items(): - # Check if this contained object should be ignored. - if self._filter(val, name, valname): - continue - # Special handling for staticmethod/classmethod. - if isinstance(val, staticmethod): - val = getattr(obj, valname) - if isinstance(val, classmethod): - val = getattr(obj, valname).im_func - - # Recurse to methods, properties, and nested classes. - if ((inspect.isfunction(val) or inspect.isclass(val) or - isinstance(val, property)) and - self._from_module(module, val)): - valname = '%s.%s' % (name, valname) - self._find(tests, val, valname, module, source_lines, - globs, seen) - - def _get_test(self, obj, name, module, globs, source_lines): - """ - Return a DocTest for the given object, if it defines a docstring; - otherwise, return None. - """ - # Extract the object's docstring. If it doesn't have one, - # then return None (no test for this object). - if isinstance(obj, basestring): - docstring = obj - else: - try: - if obj.__doc__ is None: - docstring = '' - else: - docstring = obj.__doc__ - if not isinstance(docstring, basestring): - docstring = str(docstring) - except (TypeError, AttributeError): - docstring = '' - - # Find the docstring's location in the file. - lineno = self._find_lineno(obj, source_lines) - - # Don't bother if the docstring is empty. - if self._exclude_empty and not docstring: - return None - - # Return a DocTest for this object. - if module is None: - filename = None - else: - filename = getattr(module, '__file__', module.__name__) - if filename[-4:] in (".pyc", ".pyo"): - filename = filename[:-1] - return self._parser.get_doctest(docstring, globs, name, - filename, lineno) - - def _find_lineno(self, obj, source_lines): - """ - Return a line number of the given object's docstring. Note: - this method assumes that the object has a docstring. - """ - lineno = None - - # Find the line number for modules. - if inspect.ismodule(obj): - lineno = 0 - - # Find the line number for classes. - # Note: this could be fooled if a class is defined multiple - # times in a single file. - if inspect.isclass(obj): - if source_lines is None: - return None - pat = re.compile(r'^\s*class\s*%s\b' % - getattr(obj, '__name__', '-')) - for i, line in enumerate(source_lines): - if pat.match(line): - lineno = i - break - - # Find the line number for functions & methods. - if inspect.ismethod(obj): obj = obj.im_func - if inspect.isfunction(obj): obj = obj.func_code - if inspect.istraceback(obj): obj = obj.tb_frame - if inspect.isframe(obj): obj = obj.f_code - if inspect.iscode(obj): - lineno = getattr(obj, 'co_firstlineno', None)-1 - - # Find the line number where the docstring starts. Assume - # that it's the first line that begins with a quote mark. - # Note: this could be fooled by a multiline function - # signature, where a continuation line begins with a quote - # mark. - if lineno is not None: - if source_lines is None: - return lineno+1 - pat = re.compile('(^|.*:)\s*\w*("|\')') - for lineno in range(lineno, len(source_lines)): - if pat.match(source_lines[lineno]): - return lineno - - # We couldn't find the line number. - return None - -###################################################################### -## 5. DocTest Runner -###################################################################### - -class DocTestRunner: - """ - A class used to run DocTest test cases, and accumulate statistics. - The `run` method is used to process a single DocTest case. It - returns a tuple `(f, t)`, where `t` is the number of test cases - tried, and `f` is the number of test cases that failed. - - >>> tests = DocTestFinder().find(_TestClass) - >>> runner = DocTestRunner(verbose=False) - >>> for test in tests: - ... print runner.run(test) - (0, 2) - (0, 1) - (0, 2) - (0, 2) - - The `summarize` method prints a summary of all the test cases that - have been run by the runner, and returns an aggregated `(f, t)` - tuple: - - >>> runner.summarize(verbose=1) - 4 items passed all tests: - 2 tests in _TestClass - 2 tests in _TestClass.__init__ - 2 tests in _TestClass.get - 1 tests in _TestClass.square - 7 tests in 4 items. - 7 passed and 0 failed. - Test passed. - (0, 7) - - The aggregated number of tried examples and failed examples is - also available via the `tries` and `failures` attributes: - - >>> runner.tries - 7 - >>> runner.failures - 0 - - The comparison between expected outputs and actual outputs is done - by an `OutputChecker`. This comparison may be customized with a - number of option flags; see the documentation for `testmod` for - more information. If the option flags are insufficient, then the - comparison may also be customized by passing a subclass of - `OutputChecker` to the constructor. - - The test runner's display output can be controlled in two ways. - First, an output function (`out) can be passed to - `TestRunner.run`; this function will be called with strings that - should be displayed. It defaults to `sys.stdout.write`. If - capturing the output is not sufficient, then the display output - can be also customized by subclassing DocTestRunner, and - overriding the methods `report_start`, `report_success`, - `report_unexpected_exception`, and `report_failure`. - """ - # This divider string is used to separate failure messages, and to - # separate sections of the summary. - DIVIDER = "*" * 70 - - def __init__(self, checker=None, verbose=None, optionflags=0): - """ - Create a new test runner. - - Optional keyword arg `checker` is the `OutputChecker` that - should be used to compare the expected outputs and actual - outputs of doctest examples. - - Optional keyword arg 'verbose' prints lots of stuff if true, - only failures if false; by default, it's true iff '-v' is in - sys.argv. - - Optional argument `optionflags` can be used to control how the - test runner compares expected output to actual output, and how - it displays failures. See the documentation for `testmod` for - more information. - """ - self._checker = checker or OutputChecker() - if verbose is None: - verbose = '-v' in sys.argv - self._verbose = verbose - self.optionflags = optionflags - self.original_optionflags = optionflags - - # Keep track of the examples we've run. - self.tries = 0 - self.failures = 0 - self._name2ft = {} - - # Create a fake output target for capturing doctest output. - self._fakeout = _SpoofOut() - - #///////////////////////////////////////////////////////////////// - # Reporting methods - #///////////////////////////////////////////////////////////////// - - def report_start(self, out, test, example): - """ - Report that the test runner is about to process the given - example. (Only displays a message if verbose=True) - """ - if self._verbose: - if example.want: - out('Trying:\n' + _indent(example.source) + - 'Expecting:\n' + _indent(example.want)) - else: - out('Trying:\n' + _indent(example.source) + - 'Expecting nothing\n') - - def report_success(self, out, test, example, got): - """ - Report that the given example ran successfully. (Only - displays a message if verbose=True) - """ - if self._verbose: - out("ok\n") - - def report_failure(self, out, test, example, got): - """ - Report that the given example failed. - """ - out(self._failure_header(test, example) + - self._checker.output_difference(example, got, self.optionflags)) - - def report_unexpected_exception(self, out, test, example, exc_info): - """ - Report that the given example raised an unexpected exception. - """ - out(self._failure_header(test, example) + - 'Exception raised:\n' + _indent(_exception_traceback(exc_info))) - - def _failure_header(self, test, example): - out = [self.DIVIDER] - if test.filename: - if test.lineno is not None and example.lineno is not None: - lineno = test.lineno + example.lineno + 1 - else: - lineno = '?' - out.append('File "%s", line %s, in %s' % - (test.filename, lineno, test.name)) - else: - out.append('Line %s, in %s' % (example.lineno+1, test.name)) - out.append('Failed example:') - source = example.source - out.append(_indent(source)) - return '\n'.join(out) - - #///////////////////////////////////////////////////////////////// - # DocTest Running - #///////////////////////////////////////////////////////////////// - - def __run(self, test, compileflags, out): - """ - Run the examples in `test`. Write the outcome of each example - with one of the `DocTestRunner.report_*` methods, using the - writer function `out`. `compileflags` is the set of compiler - flags that should be used to execute examples. Return a tuple - `(f, t)`, where `t` is the number of examples tried, and `f` - is the number of examples that failed. The examples are run - in the namespace `test.globs`. - """ - # Keep track of the number of failures and tries. - failures = tries = 0 - - # Save the option flags (since option directives can be used - # to modify them). - original_optionflags = self.optionflags - - SUCCESS, FAILURE, BOOM = range(3) # `outcome` state - - check = self._checker.check_output - - # Process each example. - for examplenum, example in enumerate(test.examples): - - # If REPORT_ONLY_FIRST_FAILURE is set, then supress - # reporting after the first failure. - quiet = (self.optionflags & REPORT_ONLY_FIRST_FAILURE and - failures > 0) - - # Merge in the example's options. - self.optionflags = original_optionflags - if example.options: - for (optionflag, val) in example.options.items(): - if val: - self.optionflags |= optionflag - else: - self.optionflags &= ~optionflag - - # Record that we started this example. - tries += 1 - if not quiet: - self.report_start(out, test, example) - - # Use a special filename for compile(), so we can retrieve - # the source code during interactive debugging (see - # __patched_linecache_getlines). - filename = '' % (test.name, examplenum) - - # Run the example in the given context (globs), and record - # any exception that gets raised. (But don't intercept - # keyboard interrupts.) - try: - # Don't blink! This is where the user's code gets run. - exec compile(example.source, filename, "single", - compileflags, 1) in test.globs - self.debugger.set_continue() # ==== Example Finished ==== - exception = None - except KeyboardInterrupt: - raise - except: - exception = sys.exc_info() - self.debugger.set_continue() # ==== Example Finished ==== - - got = self._fakeout.getvalue() # the actual output - self._fakeout.truncate(0) - outcome = FAILURE # guilty until proved innocent or insane - - # If the example executed without raising any exceptions, - # verify its output. - if exception is None: - if check(example.want, got, self.optionflags): - outcome = SUCCESS - - # The example raised an exception: check if it was expected. - else: - exc_info = sys.exc_info() - exc_msg = traceback.format_exception_only(*exc_info[:2])[-1] - if not quiet: - got += _exception_traceback(exc_info) - - # If `example.exc_msg` is None, then we weren't expecting - # an exception. - if example.exc_msg is None: - outcome = BOOM - - # We expected an exception: see whether it matches. - elif check(example.exc_msg, exc_msg, self.optionflags): - outcome = SUCCESS - - # Another chance if they didn't care about the detail. - elif self.optionflags & IGNORE_EXCEPTION_DETAIL: - m1 = re.match(r'[^:]*:', example.exc_msg) - m2 = re.match(r'[^:]*:', exc_msg) - if m1 and m2 and check(m1.group(0), m2.group(0), - self.optionflags): - outcome = SUCCESS - - # Report the outcome. - if outcome is SUCCESS: - if not quiet: - self.report_success(out, test, example, got) - elif outcome is FAILURE: - if not quiet: - self.report_failure(out, test, example, got) - failures += 1 - elif outcome is BOOM: - if not quiet: - self.report_unexpected_exception(out, test, example, - exc_info) - failures += 1 - else: - assert False, ("unknown outcome", outcome) - - # Restore the option flags (in case they were modified) - self.optionflags = original_optionflags - - # Record and return the number of failures and tries. - self.__record_outcome(test, failures, tries) - return failures, tries - - def __record_outcome(self, test, f, t): - """ - Record the fact that the given DocTest (`test`) generated `f` - failures out of `t` tried examples. - """ - f2, t2 = self._name2ft.get(test.name, (0,0)) - self._name2ft[test.name] = (f+f2, t+t2) - self.failures += f - self.tries += t - - __LINECACHE_FILENAME_RE = re.compile(r'[\w\.]+)' - r'\[(?P\d+)\]>$') - def __patched_linecache_getlines(self, filename): - m = self.__LINECACHE_FILENAME_RE.match(filename) - if m and m.group('name') == self.test.name: - example = self.test.examples[int(m.group('examplenum'))] - return example.source.splitlines(True) - else: - return self.save_linecache_getlines(filename) - - def run(self, test, compileflags=None, out=None, clear_globs=True): - """ - Run the examples in `test`, and display the results using the - writer function `out`. - - The examples are run in the namespace `test.globs`. If - `clear_globs` is true (the default), then this namespace will - be cleared after the test runs, to help with garbage - collection. If you would like to examine the namespace after - the test completes, then use `clear_globs=False`. - - `compileflags` gives the set of flags that should be used by - the Python compiler when running the examples. If not - specified, then it will default to the set of future-import - flags that apply to `globs`. - - The output of each example is checked using - `DocTestRunner.check_output`, and the results are formatted by - the `DocTestRunner.report_*` methods. - """ - self.test = test - - if compileflags is None: - compileflags = _extract_future_flags(test.globs) - - save_stdout = sys.stdout - if out is None: - out = save_stdout.write - sys.stdout = self._fakeout - - # Patch pdb.set_trace to restore sys.stdout during interactive - # debugging (so it's not still redirected to self._fakeout). - # Note that the interactive output will go to *our* - # save_stdout, even if that's not the real sys.stdout; this - # allows us to write test cases for the set_trace behavior. - save_set_trace = pdb.set_trace - self.debugger = _OutputRedirectingPdb(save_stdout) - self.debugger.reset() - pdb.set_trace = self.debugger.set_trace - - # Patch linecache.getlines, so we can see the example's source - # when we're inside the debugger. - self.save_linecache_getlines = linecache.getlines - linecache.getlines = self.__patched_linecache_getlines - - try: - return self.__run(test, compileflags, out) - finally: - sys.stdout = save_stdout - pdb.set_trace = save_set_trace - linecache.getlines = self.save_linecache_getlines - if clear_globs: - test.globs.clear() - - #///////////////////////////////////////////////////////////////// - # Summarization - #///////////////////////////////////////////////////////////////// - def summarize(self, verbose=None): - """ - Print a summary of all the test cases that have been run by - this DocTestRunner, and return a tuple `(f, t)`, where `f` is - the total number of failed examples, and `t` is the total - number of tried examples. - - The optional `verbose` argument controls how detailed the - summary is. If the verbosity is not specified, then the - DocTestRunner's verbosity is used. - """ - if verbose is None: - verbose = self._verbose - notests = [] - passed = [] - failed = [] - totalt = totalf = 0 - for x in self._name2ft.items(): - name, (f, t) = x - assert f <= t - totalt += t - totalf += f - if t == 0: - notests.append(name) - elif f == 0: - passed.append( (name, t) ) - else: - failed.append(x) - if verbose: - if notests: - print len(notests), "items had no tests:" - notests.sort() - for thing in notests: - print " ", thing - if passed: - print len(passed), "items passed all tests:" - passed.sort() - for thing, count in passed: - print " %3d tests in %s" % (count, thing) - if failed: - print self.DIVIDER - print len(failed), "items had failures:" - failed.sort() - for thing, (f, t) in failed: - print " %3d of %3d in %s" % (f, t, thing) - if verbose: - print totalt, "tests in", len(self._name2ft), "items." - print totalt - totalf, "passed and", totalf, "failed." - if totalf: - print "***Test Failed***", totalf, "failures." - elif verbose: - print "Test passed." - return totalf, totalt - - #///////////////////////////////////////////////////////////////// - # Backward compatibility cruft to maintain doctest.master. - #///////////////////////////////////////////////////////////////// - def merge(self, other): - d = self._name2ft - for name, (f, t) in other._name2ft.items(): - if name in d: - print "*** DocTestRunner.merge: '" + name + "' in both" \ - " testers; summing outcomes." - f2, t2 = d[name] - f = f + f2 - t = t + t2 - d[name] = f, t - -class OutputChecker: - """ - A class used to check the whether the actual output from a doctest - example matches the expected output. `OutputChecker` defines two - methods: `check_output`, which compares a given pair of outputs, - and returns true if they match; and `output_difference`, which - returns a string describing the differences between two outputs. - """ - def check_output(self, want, got, optionflags): - """ - Return True iff the actual output from an example (`got`) - matches the expected output (`want`). These strings are - always considered to match if they are identical; but - depending on what option flags the test runner is using, - several non-exact match types are also possible. See the - documentation for `TestRunner` for more information about - option flags. - """ - # Handle the common case first, for efficiency: - # if they're string-identical, always return true. - if got == want: - return True - - # The values True and False replaced 1 and 0 as the return - # value for boolean comparisons in Python 2.3. - if not (optionflags & DONT_ACCEPT_TRUE_FOR_1): - if (got,want) == ("True\n", "1\n"): - return True - if (got,want) == ("False\n", "0\n"): - return True - - # can be used as a special sequence to signify a - # blank line, unless the DONT_ACCEPT_BLANKLINE flag is used. - if not (optionflags & DONT_ACCEPT_BLANKLINE): - # Replace in want with a blank line. - want = re.sub('(?m)^%s\s*?$' % re.escape(BLANKLINE_MARKER), - '', want) - # If a line in got contains only spaces, then remove the - # spaces. - got = re.sub('(?m)^\s*?$', '', got) - if got == want: - return True - - # This flag causes doctest to ignore any differences in the - # contents of whitespace strings. Note that this can be used - # in conjunction with the ELLIPSIS flag. - if optionflags & NORMALIZE_WHITESPACE: - got = ' '.join(got.split()) - want = ' '.join(want.split()) - if got == want: - return True - - # The ELLIPSIS flag says to let the sequence "..." in `want` - # match any substring in `got`. - if optionflags & ELLIPSIS: - if _ellipsis_match(want, got): - return True - - # We didn't find any match; return false. - return False - - # Should we do a fancy diff? - def _do_a_fancy_diff(self, want, got, optionflags): - # Not unless they asked for a fancy diff. - if not optionflags & (REPORT_UDIFF | - REPORT_CDIFF | - REPORT_NDIFF): - return False - - # If expected output uses ellipsis, a meaningful fancy diff is - # too hard ... or maybe not. In two real-life failures Tim saw, - # a diff was a major help anyway, so this is commented out. - # [todo] _ellipsis_match() knows which pieces do and don't match, - # and could be the basis for a kick-ass diff in this case. - ##if optionflags & ELLIPSIS and ELLIPSIS_MARKER in want: - ## return False - - # ndiff does intraline difference marking, so can be useful even - # for 1-line differences. - if optionflags & REPORT_NDIFF: - return True - - # The other diff types need at least a few lines to be helpful. - return want.count('\n') > 2 and got.count('\n') > 2 - - def output_difference(self, example, got, optionflags): - """ - Return a string describing the differences between the - expected output for a given example (`example`) and the actual - output (`got`). `optionflags` is the set of option flags used - to compare `want` and `got`. - """ - want = example.want - # If s are being used, then replace blank lines - # with in the actual output string. - if not (optionflags & DONT_ACCEPT_BLANKLINE): - got = re.sub('(?m)^[ ]*(?=\n)', BLANKLINE_MARKER, got) - - # Check if we should use diff. - if self._do_a_fancy_diff(want, got, optionflags): - # Split want & got into lines. - want_lines = want.splitlines(True) # True == keep line ends - got_lines = got.splitlines(True) - # Use difflib to find their differences. - if optionflags & REPORT_UDIFF: - diff = difflib.unified_diff(want_lines, got_lines, n=2) - diff = list(diff)[2:] # strip the diff header - kind = 'unified diff with -expected +actual' - elif optionflags & REPORT_CDIFF: - diff = difflib.context_diff(want_lines, got_lines, n=2) - diff = list(diff)[2:] # strip the diff header - kind = 'context diff with expected followed by actual' - elif optionflags & REPORT_NDIFF: - engine = difflib.Differ(charjunk=difflib.IS_CHARACTER_JUNK) - diff = list(engine.compare(want_lines, got_lines)) - kind = 'ndiff with -expected +actual' - else: - assert 0, 'Bad diff option' - # Remove trailing whitespace on diff output. - diff = [line.rstrip() + '\n' for line in diff] - return 'Differences (%s):\n' % kind + _indent(''.join(diff)) - - # If we're not using diff, then simply list the expected - # output followed by the actual output. - if want and got: - return 'Expected:\n%sGot:\n%s' % (_indent(want), _indent(got)) - elif want: - return 'Expected:\n%sGot nothing\n' % _indent(want) - elif got: - return 'Expected nothing\nGot:\n%s' % _indent(got) - else: - return 'Expected nothing\nGot nothing\n' - -class DocTestFailure(Exception): - """A DocTest example has failed in debugging mode. - - The exception instance has variables: - - - test: the DocTest object being run - - - excample: the Example object that failed - - - got: the actual output - """ - def __init__(self, test, example, got): - self.test = test - self.example = example - self.got = got - - def __str__(self): - return str(self.test) - -class UnexpectedException(Exception): - """A DocTest example has encountered an unexpected exception - - The exception instance has variables: - - - test: the DocTest object being run - - - excample: the Example object that failed - - - exc_info: the exception info - """ - def __init__(self, test, example, exc_info): - self.test = test - self.example = example - self.exc_info = exc_info - - def __str__(self): - return str(self.test) - -class DebugRunner(DocTestRunner): - r"""Run doc tests but raise an exception as soon as there is a failure. - - If an unexpected exception occurs, an UnexpectedException is raised. - It contains the test, the example, and the original exception: - - >>> runner = DebugRunner(verbose=False) - >>> test = DocTestParser().get_doctest('>>> raise KeyError\n42', - ... {}, 'foo', 'foo.py', 0) - >>> try: - ... runner.run(test) - ... except UnexpectedException, failure: - ... pass - - >>> failure.test is test - True - - >>> failure.example.want - '42\n' - - >>> exc_info = failure.exc_info - >>> raise exc_info[0], exc_info[1], exc_info[2] - Traceback (most recent call last): - ... - KeyError - - We wrap the original exception to give the calling application - access to the test and example information. - - If the output doesn't match, then a DocTestFailure is raised: - - >>> test = DocTestParser().get_doctest(''' - ... >>> x = 1 - ... >>> x - ... 2 - ... ''', {}, 'foo', 'foo.py', 0) - - >>> try: - ... runner.run(test) - ... except DocTestFailure, failure: - ... pass - - DocTestFailure objects provide access to the test: - - >>> failure.test is test - True - - As well as to the example: - - >>> failure.example.want - '2\n' - - and the actual output: - - >>> failure.got - '1\n' - - If a failure or error occurs, the globals are left intact: - - >>> del test.globs['__builtins__'] - >>> test.globs - {'x': 1} - - >>> test = DocTestParser().get_doctest(''' - ... >>> x = 2 - ... >>> raise KeyError - ... ''', {}, 'foo', 'foo.py', 0) - - >>> runner.run(test) - Traceback (most recent call last): - ... - UnexpectedException: - - >>> del test.globs['__builtins__'] - >>> test.globs - {'x': 2} - - But the globals are cleared if there is no error: - - >>> test = DocTestParser().get_doctest(''' - ... >>> x = 2 - ... ''', {}, 'foo', 'foo.py', 0) - - >>> runner.run(test) - (0, 1) - - >>> test.globs - {} - - """ - - def run(self, test, compileflags=None, out=None, clear_globs=True): - r = DocTestRunner.run(self, test, compileflags, out, False) - if clear_globs: - test.globs.clear() - return r - - def report_unexpected_exception(self, out, test, example, exc_info): - raise UnexpectedException(test, example, exc_info) - - def report_failure(self, out, test, example, got): - raise DocTestFailure(test, example, got) - -###################################################################### -## 6. Test Functions -###################################################################### -# These should be backwards compatible. - -# For backward compatibility, a global instance of a DocTestRunner -# class, updated by testmod. -master = None - -def testmod(m=None, name=None, globs=None, verbose=None, isprivate=None, - report=True, optionflags=0, extraglobs=None, - raise_on_error=False, exclude_empty=False): - """m=None, name=None, globs=None, verbose=None, isprivate=None, - report=True, optionflags=0, extraglobs=None, raise_on_error=False, - exclude_empty=False - - Test examples in docstrings in functions and classes reachable - from module m (or the current module if m is not supplied), starting - with m.__doc__. Unless isprivate is specified, private names - are not skipped. - - Also test examples reachable from dict m.__test__ if it exists and is - not None. m.__test__ maps names to functions, classes and strings; - function and class docstrings are tested even if the name is private; - strings are tested directly, as if they were docstrings. - - Return (#failures, #tests). - - See doctest.__doc__ for an overview. - - Optional keyword arg "name" gives the name of the module; by default - use m.__name__. - - Optional keyword arg "globs" gives a dict to be used as the globals - when executing examples; by default, use m.__dict__. A copy of this - dict is actually used for each docstring, so that each docstring's - examples start with a clean slate. - - Optional keyword arg "extraglobs" gives a dictionary that should be - merged into the globals that are used to execute examples. By - default, no extra globals are used. This is new in 2.4. - - Optional keyword arg "verbose" prints lots of stuff if true, prints - only failures if false; by default, it's true iff "-v" is in sys.argv. - - Optional keyword arg "report" prints a summary at the end when true, - else prints nothing at the end. In verbose mode, the summary is - detailed, else very brief (in fact, empty if all tests passed). - - Optional keyword arg "optionflags" or's together module constants, - and defaults to 0. This is new in 2.3. Possible values (see the - docs for details): - - DONT_ACCEPT_TRUE_FOR_1 - DONT_ACCEPT_BLANKLINE - NORMALIZE_WHITESPACE - ELLIPSIS - IGNORE_EXCEPTION_DETAIL - REPORT_UDIFF - REPORT_CDIFF - REPORT_NDIFF - REPORT_ONLY_FIRST_FAILURE - - Optional keyword arg "raise_on_error" raises an exception on the - first unexpected exception or failure. This allows failures to be - post-mortem debugged. - - Deprecated in Python 2.4: - Optional keyword arg "isprivate" specifies a function used to - determine whether a name is private. The default function is - treat all functions as public. Optionally, "isprivate" can be - set to doctest.is_private to skip over functions marked as private - using the underscore naming convention; see its docs for details. - - Advanced tomfoolery: testmod runs methods of a local instance of - class doctest.Tester, then merges the results into (or creates) - global Tester instance doctest.master. Methods of doctest.master - can be called directly too, if you want to do something unusual. - Passing report=0 to testmod is especially useful then, to delay - displaying a summary. Invoke doctest.master.summarize(verbose) - when you're done fiddling. - """ - global master - - if isprivate is not None: - warnings.warn("the isprivate argument is deprecated; " - "examine DocTestFinder.find() lists instead", - DeprecationWarning) - - # If no module was given, then use __main__. - if m is None: - # DWA - m will still be None if this wasn't invoked from the command - # line, in which case the following TypeError is about as good an error - # as we should expect - m = sys.modules.get('__main__') - - # Check that we were actually given a module. - if not inspect.ismodule(m): - raise TypeError("testmod: module required; %r" % (m,)) - - # If no name was given, then use the module's name. - if name is None: - name = m.__name__ - - # Find, parse, and run all tests in the given module. - finder = DocTestFinder(_namefilter=isprivate, exclude_empty=exclude_empty) - - if raise_on_error: - runner = DebugRunner(verbose=verbose, optionflags=optionflags) - else: - runner = DocTestRunner(verbose=verbose, optionflags=optionflags) - - for test in finder.find(m, name, globs=globs, extraglobs=extraglobs): - runner.run(test) - - if report: - runner.summarize() - - if master is None: - master = runner - else: - master.merge(runner) - - return runner.failures, runner.tries - -def testfile(filename, module_relative=True, name=None, package=None, - globs=None, verbose=None, report=True, optionflags=0, - extraglobs=None, raise_on_error=False, parser=DocTestParser()): - """ - Test examples in the given file. Return (#failures, #tests). - - Optional keyword arg "module_relative" specifies how filenames - should be interpreted: - - - If "module_relative" is True (the default), then "filename" - specifies a module-relative path. By default, this path is - relative to the calling module's directory; but if the - "package" argument is specified, then it is relative to that - package. To ensure os-independence, "filename" should use - "/" characters to separate path segments, and should not - be an absolute path (i.e., it may not begin with "/"). - - - If "module_relative" is False, then "filename" specifies an - os-specific path. The path may be absolute or relative (to - the current working directory). - - Optional keyword arg "name" gives the name of the test; by default - use the file's basename. - - Optional keyword argument "package" is a Python package or the - name of a Python package whose directory should be used as the - base directory for a module relative filename. If no package is - specified, then the calling module's directory is used as the base - directory for module relative filenames. It is an error to - specify "package" if "module_relative" is False. - - Optional keyword arg "globs" gives a dict to be used as the globals - when executing examples; by default, use {}. A copy of this dict - is actually used for each docstring, so that each docstring's - examples start with a clean slate. - - Optional keyword arg "extraglobs" gives a dictionary that should be - merged into the globals that are used to execute examples. By - default, no extra globals are used. - - Optional keyword arg "verbose" prints lots of stuff if true, prints - only failures if false; by default, it's true iff "-v" is in sys.argv. - - Optional keyword arg "report" prints a summary at the end when true, - else prints nothing at the end. In verbose mode, the summary is - detailed, else very brief (in fact, empty if all tests passed). - - Optional keyword arg "optionflags" or's together module constants, - and defaults to 0. Possible values (see the docs for details): - - DONT_ACCEPT_TRUE_FOR_1 - DONT_ACCEPT_BLANKLINE - NORMALIZE_WHITESPACE - ELLIPSIS - IGNORE_EXCEPTION_DETAIL - REPORT_UDIFF - REPORT_CDIFF - REPORT_NDIFF - REPORT_ONLY_FIRST_FAILURE - - Optional keyword arg "raise_on_error" raises an exception on the - first unexpected exception or failure. This allows failures to be - post-mortem debugged. - - Optional keyword arg "parser" specifies a DocTestParser (or - subclass) that should be used to extract tests from the files. - - Advanced tomfoolery: testmod runs methods of a local instance of - class doctest.Tester, then merges the results into (or creates) - global Tester instance doctest.master. Methods of doctest.master - can be called directly too, if you want to do something unusual. - Passing report=0 to testmod is especially useful then, to delay - displaying a summary. Invoke doctest.master.summarize(verbose) - when you're done fiddling. - """ - global master - - if package and not module_relative: - raise ValueError("Package may only be specified for module-" - "relative paths.") - - # Relativize the path - if module_relative: - package = _normalize_module(package) - filename = _module_relative_path(package, filename) - - # If no name was given, then use the file's name. - if name is None: - name = os.path.basename(filename) - - # Assemble the globals. - if globs is None: - globs = {} - else: - globs = globs.copy() - if extraglobs is not None: - globs.update(extraglobs) - - if raise_on_error: - runner = DebugRunner(verbose=verbose, optionflags=optionflags) - else: - runner = DocTestRunner(verbose=verbose, optionflags=optionflags) - - # Read the file, convert it to a test, and run it. - s = open(filename).read() - test = parser.get_doctest(s, globs, name, filename, 0) - runner.run(test) - - if report: - runner.summarize() - - if master is None: - master = runner - else: - master.merge(runner) - - return runner.failures, runner.tries - -def run_docstring_examples(f, globs, verbose=False, name="NoName", - compileflags=None, optionflags=0): - """ - Test examples in the given object's docstring (`f`), using `globs` - as globals. Optional argument `name` is used in failure messages. - If the optional argument `verbose` is true, then generate output - even if there are no failures. - - `compileflags` gives the set of flags that should be used by the - Python compiler when running the examples. If not specified, then - it will default to the set of future-import flags that apply to - `globs`. - - Optional keyword arg `optionflags` specifies options for the - testing and output. See the documentation for `testmod` for more - information. - """ - # Find, parse, and run all tests in the given module. - finder = DocTestFinder(verbose=verbose, recurse=False) - runner = DocTestRunner(verbose=verbose, optionflags=optionflags) - for test in finder.find(f, name, globs=globs): - runner.run(test, compileflags=compileflags) - -###################################################################### -## 7. Tester -###################################################################### -# This is provided only for backwards compatibility. It's not -# actually used in any way. - -class Tester: - def __init__(self, mod=None, globs=None, verbose=None, - isprivate=None, optionflags=0): - - warnings.warn("class Tester is deprecated; " - "use class doctest.DocTestRunner instead", - DeprecationWarning, stacklevel=2) - if mod is None and globs is None: - raise TypeError("Tester.__init__: must specify mod or globs") - if mod is not None and not inspect.ismodule(mod): - raise TypeError("Tester.__init__: mod must be a module; %r" % - (mod,)) - if globs is None: - globs = mod.__dict__ - self.globs = globs - - self.verbose = verbose - self.isprivate = isprivate - self.optionflags = optionflags - self.testfinder = DocTestFinder(_namefilter=isprivate) - self.testrunner = DocTestRunner(verbose=verbose, - optionflags=optionflags) - - def runstring(self, s, name): - test = DocTestParser().get_doctest(s, self.globs, name, None, None) - if self.verbose: - print "Running string", name - (f,t) = self.testrunner.run(test) - if self.verbose: - print f, "of", t, "examples failed in string", name - return (f,t) - - def rundoc(self, object, name=None, module=None): - f = t = 0 - tests = self.testfinder.find(object, name, module=module, - globs=self.globs) - for test in tests: - (f2, t2) = self.testrunner.run(test) - (f,t) = (f+f2, t+t2) - return (f,t) - - def rundict(self, d, name, module=None): - import new - m = new.module(name) - m.__dict__.update(d) - if module is None: - module = False - return self.rundoc(m, name, module) - - def run__test__(self, d, name): - import new - m = new.module(name) - m.__test__ = d - return self.rundoc(m, name) - - def summarize(self, verbose=None): - return self.testrunner.summarize(verbose) - - def merge(self, other): - self.testrunner.merge(other.testrunner) - -###################################################################### -## 8. Unittest Support -###################################################################### - -_unittest_reportflags = 0 - -def set_unittest_reportflags(flags): - """Sets the unittest option flags. - - The old flag is returned so that a runner could restore the old - value if it wished to: - - >>> old = _unittest_reportflags - >>> set_unittest_reportflags(REPORT_NDIFF | - ... REPORT_ONLY_FIRST_FAILURE) == old - True - - >>> import doctest - >>> doctest._unittest_reportflags == (REPORT_NDIFF | - ... REPORT_ONLY_FIRST_FAILURE) - True - - Only reporting flags can be set: - - >>> set_unittest_reportflags(ELLIPSIS) - Traceback (most recent call last): - ... - ValueError: ('Only reporting flags allowed', 8) - - >>> set_unittest_reportflags(old) == (REPORT_NDIFF | - ... REPORT_ONLY_FIRST_FAILURE) - True - """ - global _unittest_reportflags - - if (flags & REPORTING_FLAGS) != flags: - raise ValueError("Only reporting flags allowed", flags) - old = _unittest_reportflags - _unittest_reportflags = flags - return old - -_para_re = re.compile('\s*\n\s*\n\s*') -def _unittest_count(docstring): - words = 0 - count = 0 - for p in _para_re.split(docstring): - p = p.strip() - if not p: - continue - if p.startswith('>>> '): - if words: - count += 1 - words = 0 - else: - words = 1 - - return count or 1 - - -class DocTestCase(unittest.TestCase): - - def __init__(self, test, optionflags=0, setUp=None, tearDown=None, - checker=None): - - unittest.TestCase.__init__(self) - self._dt_optionflags = optionflags - self._dt_checker = checker - self._dt_test = test - self._dt_setUp = setUp - self._dt_tearDown = tearDown - - self._dt_count = _unittest_count(test.docstring) - - def countTestCases(self): - return self._dt_count - - def setUp(self): - test = self._dt_test - - if self._dt_setUp is not None: - self._dt_setUp(test) - - def tearDown(self): - test = self._dt_test - - if self._dt_tearDown is not None: - self._dt_tearDown(test) - - test.globs.clear() - - def runTest(self): - test = self._dt_test - old = sys.stdout - new = StringIO() - optionflags = self._dt_optionflags - - if not (optionflags & REPORTING_FLAGS): - # The option flags don't include any reporting flags, - # so add the default reporting flags - optionflags |= _unittest_reportflags - - runner = DocTestRunner(optionflags=optionflags, - checker=self._dt_checker, verbose=False) - - try: - runner.DIVIDER = "-"*70 - failures, tries = runner.run( - test, out=new.write, clear_globs=False) - finally: - sys.stdout = old - - if failures: - raise self.failureException(self.format_failure(new.getvalue())) - - def format_failure(self, err): - test = self._dt_test - if test.lineno is None: - lineno = 'unknown line number' - else: - lineno = '%s' % test.lineno - lname = '.'.join(test.name.split('.')[-1:]) - return ('Failed doctest test for %s\n' - ' File "%s", line %s, in %s\n\n%s' - % (test.name, test.filename, lineno, lname, err) - ) - - def debug(self): - r"""Run the test case without results and without catching exceptions - - The unit test framework includes a debug method on test cases - and test suites to support post-mortem debugging. The test code - is run in such a way that errors are not caught. This way a - caller can catch the errors and initiate post-mortem debugging. - - The DocTestCase provides a debug method that raises - UnexpectedException errors if there is an unexepcted - exception: - - >>> test = DocTestParser().get_doctest('>>> raise KeyError\n42', - ... {}, 'foo', 'foo.py', 0) - >>> case = DocTestCase(test) - >>> try: - ... case.debug() - ... except UnexpectedException, failure: - ... pass - - The UnexpectedException contains the test, the example, and - the original exception: - - >>> failure.test is test - True - - >>> failure.example.want - '42\n' - - >>> exc_info = failure.exc_info - >>> raise exc_info[0], exc_info[1], exc_info[2] - Traceback (most recent call last): - ... - KeyError - - If the output doesn't match, then a DocTestFailure is raised: - - >>> test = DocTestParser().get_doctest(''' - ... >>> x = 1 - ... >>> x - ... 2 - ... ''', {}, 'foo', 'foo.py', 0) - >>> case = DocTestCase(test) - - >>> try: - ... case.debug() - ... except DocTestFailure, failure: - ... pass - - DocTestFailure objects provide access to the test: - - >>> failure.test is test - True - - As well as to the example: - - >>> failure.example.want - '2\n' - - and the actual output: - - >>> failure.got - '1\n' - - """ - - self.setUp() - runner = DebugRunner(optionflags=self._dt_optionflags, - checker=self._dt_checker, verbose=False) - runner.run(self._dt_test) - self.tearDown() - - def id(self): - return self._dt_test.name - - def __repr__(self): - name = self._dt_test.name.split('.') - return "%s (%s)" % (name[-1], '.'.join(name[:-1])) - - __str__ = __repr__ - - def shortDescription(self): - return "Doctest: " + self._dt_test.name - -def DocTestSuite(module=None, globs=None, extraglobs=None, test_finder=None, - **options): - """ - Convert doctest tests for a module to a unittest test suite. - - This converts each documentation string in a module that - contains doctest tests to a unittest test case. If any of the - tests in a doc string fail, then the test case fails. An exception - is raised showing the name of the file containing the test and a - (sometimes approximate) line number. - - The `module` argument provides the module to be tested. The argument - can be either a module or a module name. - - If no argument is given, the calling module is used. - - A number of options may be provided as keyword arguments: - - setUp - A set-up function. This is called before running the - tests in each file. The setUp function will be passed a DocTest - object. The setUp function can access the test globals as the - globs attribute of the test passed. - - tearDown - A tear-down function. This is called after running the - tests in each file. The tearDown function will be passed a DocTest - object. The tearDown function can access the test globals as the - globs attribute of the test passed. - - globs - A dictionary containing initial global variables for the tests. - - optionflags - A set of doctest option flags expressed as an integer. - """ - - if test_finder is None: - test_finder = DocTestFinder() - - module = _normalize_module(module) - tests = test_finder.find(module, globs=globs, extraglobs=extraglobs) - if globs is None: - globs = module.__dict__ - if not tests: - # Why do we want to do this? Because it reveals a bug that might - # otherwise be hidden. - raise ValueError(module, "has no tests") - - tests.sort() - suite = unittest.TestSuite() - for test in tests: - if len(test.examples) == 0: - continue - if not test.filename: - filename = module.__file__ - if filename[-4:] in (".pyc", ".pyo"): - filename = filename[:-1] - test.filename = filename - suite.addTest(DocTestCase(test, **options)) - - return suite - -class DocFileCase(DocTestCase): - - def id(self): - return '_'.join(self._dt_test.name.split('.')) - - def __repr__(self): - return self._dt_test.filename - __str__ = __repr__ - - def format_failure(self, err): - return ('Failed doctest test for %s\n File "%s", line 0\n\n%s' - % (self._dt_test.name, self._dt_test.filename, err) - ) - -def DocFileTest(path, module_relative=True, package=None, - globs=None, parser=DocTestParser(), **options): - if globs is None: - globs = {} - else: - globs = globs.copy() - - if package and not module_relative: - raise ValueError("Package may only be specified for module-" - "relative paths.") - - # Relativize the path. - if module_relative: - package = _normalize_module(package) - path = _module_relative_path(package, path) - if "__file__" not in globs: - globs["__file__"] = path - - # Find the file and read it. - name = os.path.basename(path) - doc = open(path).read() - - # Convert it to a test, and wrap it in a DocFileCase. - test = parser.get_doctest(doc, globs, name, path, 0) - return DocFileCase(test, **options) - -def DocFileSuite(*paths, **kw): - """A unittest suite for one or more doctest files. - - The path to each doctest file is given as a string; the - interpretation of that string depends on the keyword argument - "module_relative". - - A number of options may be provided as keyword arguments: - - module_relative - If "module_relative" is True, then the given file paths are - interpreted as os-independent module-relative paths. By - default, these paths are relative to the calling module's - directory; but if the "package" argument is specified, then - they are relative to that package. To ensure os-independence, - "filename" should use "/" characters to separate path - segments, and may not be an absolute path (i.e., it may not - begin with "/"). - - If "module_relative" is False, then the given file paths are - interpreted as os-specific paths. These paths may be absolute - or relative (to the current working directory). - - package - A Python package or the name of a Python package whose directory - should be used as the base directory for module relative paths. - If "package" is not specified, then the calling module's - directory is used as the base directory for module relative - filenames. It is an error to specify "package" if - "module_relative" is False. - - setUp - A set-up function. This is called before running the - tests in each file. The setUp function will be passed a DocTest - object. The setUp function can access the test globals as the - globs attribute of the test passed. - - tearDown - A tear-down function. This is called after running the - tests in each file. The tearDown function will be passed a DocTest - object. The tearDown function can access the test globals as the - globs attribute of the test passed. - - globs - A dictionary containing initial global variables for the tests. - - optionflags - A set of doctest option flags expressed as an integer. - - parser - A DocTestParser (or subclass) that should be used to extract - tests from the files. - """ - suite = unittest.TestSuite() - - # We do this here so that _normalize_module is called at the right - # level. If it were called in DocFileTest, then this function - # would be the caller and we might guess the package incorrectly. - if kw.get('module_relative', True): - kw['package'] = _normalize_module(kw.get('package')) - - for path in paths: - suite.addTest(DocFileTest(path, **kw)) - - return suite - -###################################################################### -## 9. Debugging Support -###################################################################### - -def script_from_examples(s): - r"""Extract script from text with examples. - - Converts text with examples to a Python script. Example input is - converted to regular code. Example output and all other words - are converted to comments: - - >>> text = ''' - ... Here are examples of simple math. - ... - ... Python has super accurate integer addition - ... - ... >>> 2 + 2 - ... 5 - ... - ... And very friendly error messages: - ... - ... >>> 1/0 - ... To Infinity - ... And - ... Beyond - ... - ... You can use logic if you want: - ... - ... >>> if 0: - ... ... blah - ... ... blah - ... ... - ... - ... Ho hum - ... ''' - - >>> print script_from_examples(text) - # Here are examples of simple math. - # - # Python has super accurate integer addition - # - 2 + 2 - # Expected: - ## 5 - # - # And very friendly error messages: - # - 1/0 - # Expected: - ## To Infinity - ## And - ## Beyond - # - # You can use logic if you want: - # - if 0: - blah - blah - # - # Ho hum - """ - output = [] - for piece in DocTestParser().parse(s): - if isinstance(piece, Example): - # Add the example's source code (strip trailing NL) - output.append(piece.source[:-1]) - # Add the expected output: - want = piece.want - if want: - output.append('# Expected:') - output += ['## '+l for l in want.split('\n')[:-1]] - else: - # Add non-example text. - output += [_comment_line(l) - for l in piece.split('\n')[:-1]] - - # Trim junk on both ends. - while output and output[-1] == '#': - output.pop() - while output and output[0] == '#': - output.pop(0) - # Combine the output, and return it. - return '\n'.join(output) - -def testsource(module, name): - """Extract the test sources from a doctest docstring as a script. - - Provide the module (or dotted name of the module) containing the - test to be debugged and the name (within the module) of the object - with the doc string with tests to be debugged. - """ - module = _normalize_module(module) - tests = DocTestFinder().find(module) - test = [t for t in tests if t.name == name] - if not test: - raise ValueError(name, "not found in tests") - test = test[0] - testsrc = script_from_examples(test.docstring) - return testsrc - -def debug_src(src, pm=False, globs=None): - """Debug a single doctest docstring, in argument `src`'""" - testsrc = script_from_examples(src) - debug_script(testsrc, pm, globs) - -def debug_script(src, pm=False, globs=None): - "Debug a test script. `src` is the script, as a string." - import pdb - - # Note that tempfile.NameTemporaryFile() cannot be used. As the - # docs say, a file so created cannot be opened by name a second time - # on modern Windows boxes, and execfile() needs to open it. - srcfilename = tempfile.mktemp(".py", "doctestdebug") - f = open(srcfilename, 'w') - f.write(src) - f.close() - - try: - if globs: - globs = globs.copy() - else: - globs = {} - - if pm: - try: - execfile(srcfilename, globs, globs) - except: - print sys.exc_info()[1] - pdb.post_mortem(sys.exc_info()[2]) - else: - # Note that %r is vital here. '%s' instead can, e.g., cause - # backslashes to get treated as metacharacters on Windows. - pdb.run("execfile(%r)" % srcfilename, globs, globs) - - finally: - os.remove(srcfilename) - -def debug(module, name, pm=False): - """Debug a single doctest docstring. - - Provide the module (or dotted name of the module) containing the - test to be debugged and the name (within the module) of the object - with the docstring with tests to be debugged. - """ - module = _normalize_module(module) - testsrc = testsource(module, name) - debug_script(testsrc, pm, module.__dict__) - -###################################################################### -## 10. Example Usage -###################################################################### -class _TestClass: - """ - A pointless class, for sanity-checking of docstring testing. - - Methods: - square() - get() - - >>> _TestClass(13).get() + _TestClass(-12).get() - 1 - >>> hex(_TestClass(13).square().get()) - '0xa9' - """ - - def __init__(self, val): - """val -> _TestClass object with associated value val. - - >>> t = _TestClass(123) - >>> print t.get() - 123 - """ - - self.val = val - - def square(self): - """square() -> square TestClass's associated value - - >>> _TestClass(13).square().get() - 169 - """ - - self.val = self.val ** 2 - return self - - def get(self): - """get() -> return TestClass's associated value. - - >>> x = _TestClass(-42) - >>> print x.get() - -42 - """ - - return self.val - -__test__ = {"_TestClass": _TestClass, - "string": r""" - Example of a string object, searched as-is. - >>> x = 1; y = 2 - >>> x + y, x * y - (3, 2) - """, - - "bool-int equivalence": r""" - In 2.2, boolean expressions displayed - 0 or 1. By default, we still accept - them. This can be disabled by passing - DONT_ACCEPT_TRUE_FOR_1 to the new - optionflags argument. - >>> 4 == 4 - 1 - >>> 4 == 4 - True - >>> 4 > 4 - 0 - >>> 4 > 4 - False - """, - - "blank lines": r""" - Blank lines can be marked with : - >>> print 'foo\n\nbar\n' - foo - - bar - - """, - - "ellipsis": r""" - If the ellipsis flag is used, then '...' can be used to - elide substrings in the desired output: - >>> print range(1000) #doctest: +ELLIPSIS - [0, 1, 2, ..., 999] - """, - - "whitespace normalization": r""" - If the whitespace normalization flag is used, then - differences in whitespace are ignored. - >>> print range(30) #doctest: +NORMALIZE_WHITESPACE - [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, - 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, - 27, 28, 29] - """, - } - -def _test(): - r = unittest.TextTestRunner() - r.run(DocTestSuite()) - -if __name__ == "__main__": - _test() From scoder at codespeak.net Fri Aug 4 16:18:03 2006 From: scoder at codespeak.net (scoder at codespeak.net) Date: Fri, 4 Aug 2006 16:18:03 +0200 (CEST) Subject: [Lxml-checkins] r31002 - lxml/branch/capi/doc Message-ID: <20060804141803.F2BFE10072@code0.codespeak.net> Author: scoder Date: Fri Aug 4 16:18:02 2006 New Revision: 31002 Modified: lxml/branch/capi/doc/objectify.txt Log: small clarification in the docs Modified: lxml/branch/capi/doc/objectify.txt ============================================================================== --- lxml/branch/capi/doc/objectify.txt (original) +++ lxml/branch/capi/doc/objectify.txt Fri Aug 4 16:18:02 2006 @@ -132,7 +132,7 @@ >>> [ el.tag for el in root.b ] ['b'] -Attributes are accessed as in the normal ElementTree API:: +XML attributes are accessed as in the normal ElementTree API:: >>> c = etree.SubElement(root, "c", myattr="someval") >>> print root.c.get("myattr") @@ -143,9 +143,9 @@ oh-oh In addition to the normal ElementTree API for appending elements to trees, -subtrees can also be added by assigning them to attributes. In this case, the -subtree is automatically deep copied and the tag name of its root is updated -to match the attribute name:: +subtrees can also be added by assigning them to object attributes. In this +case, the subtree is automatically deep copied and the tag name of its root is +updated to match the attribute name:: >>> el = etree.Element("yet_another_child") >>> root.new_child = el From scoder at codespeak.net Sat Aug 5 19:21:52 2006 From: scoder at codespeak.net (scoder at codespeak.net) Date: Sat, 5 Aug 2006 19:21:52 +0200 (CEST) Subject: [Lxml-checkins] r31028 - in lxml/branch/capi: . doc src/lxml src/lxml/tests Message-ID: <20060805172152.7315810078@code0.codespeak.net> Author: scoder Date: Sat Aug 5 19:21:46 2006 New Revision: 31028 Added: lxml/branch/capi/doc/element_classes.txt - copied, changed from r30633, lxml/branch/capi/doc/namespace_extensions.txt lxml/branch/capi/src/lxml/classlookup.pxi - copied, changed from r30837, lxml/branch/capi/src/lxml/classlookup.pyx Removed: lxml/branch/capi/doc/elements.txt lxml/branch/capi/doc/namespace_extensions.txt lxml/branch/capi/src/lxml/classlookup.pyx Modified: lxml/branch/capi/doc/mkhtml.py lxml/branch/capi/doc/objectify.txt lxml/branch/capi/setup.py lxml/branch/capi/src/lxml/apihelpers.pxi lxml/branch/capi/src/lxml/etree.pyx lxml/branch/capi/src/lxml/etreepublic.pxd lxml/branch/capi/src/lxml/nsclasses.pxi lxml/branch/capi/src/lxml/objectify.pyx lxml/branch/capi/src/lxml/parser.pxi lxml/branch/capi/src/lxml/public-api.pxi lxml/branch/capi/src/lxml/tests/test_classlookup.py lxml/branch/capi/src/lxml/tests/test_nsclasses.py lxml/branch/capi/src/lxml/tests/test_objectify.py Log: large cleanup and code restructuring * moved lxml.elements.objectify to lxml.objectify * merged lxml.elements.classlookup into lxml.etree (classlookup.pxi) * new _BaseParser function makeelement() to create an element associated with this parser * merged code of Element() and makeelement() methods into new utility function _makeElement * rewrite of default element class lookup to support element classes local to the lookup object * support passing parser as second argument to XML() and HTML() * merged namespace_extensions.txt and elements.txt into element_classes.txt that describes all lookup schemes * updated objectify.txt to make a per-parser setup the preferred way of using objectify Copied: lxml/branch/capi/doc/element_classes.txt (from r30633, lxml/branch/capi/doc/namespace_extensions.txt) ============================================================================== --- lxml/branch/capi/doc/namespace_extensions.txt (original) +++ lxml/branch/capi/doc/element_classes.txt Sat Aug 5 19:21:46 2006 @@ -3,8 +3,9 @@ ==================================== lxml has very sophisticated support for custom Element classes. You can -provide your own classes for Elements and have lxml use them by default or -only for a specific tag name in a specific namespace. +provide your own classes for Elements and have lxml use them by default, for +all elements generated by a specific parser or only for a specific tag name in +a specific namespace. Custom Elements must inherit from the ``lxml.etree.ElementBase`` class, which provides the Element interface for subclasses:: @@ -22,18 +23,93 @@ .. contents:: .. - 1 Changing the default element class - 2 Implementing namespaces - 3 Element initialization - 4 Default implementations + 1 Element initialization + 2 Setting up a class lookup scheme + 2.1 Default class lookup + 2.2 Namespace class lookup + 2.3 Attribute based lookup + 2.4 Parser based lookup + 2.5 Custom element class lookup + 3 Implementing namespaces + 4 Resetting the class lookup scheme -Changing the default element class ----------------------------------- +Element initialization +---------------------- + +There is one thing to know up front. Element classes *must not* have a +constructor, neither must there be any internal state (except for the data +stored in the underlying XML tree). Element instances are created and garbage +collected at need, so there is no way to predict when and how often a +constructor would be called. Even worse, when the ``__init__`` method is +called, the object may not even be initialized yet to represent the XML tag, +so there is not much use in providing an ``__init__`` method in subclasses. + +However, there is one possible way to do things on element initialization, if +you really need to. ElementBase classes have an ``_init()`` method that can +be overridden. It can be used to modify the XML tree, e.g. to construct +special children or verify and update attributes. + +The semantics of ``_init()`` are as follows: + +* It is called at least once on element instantiation time. That is, when a + Python representation of the element is created by lxml. At that time, the + element object is completely initialized to represent a specific XML element + within the tree. + +* The method has complete access to the XML tree. Modifications can be done + in exactly the same way as anywhere else in the program. + +* Python representations of elements may be created multiple times during the + lifetime of an XML element in the underlying tree. The ``_init()`` code + provided by subclasses must take special care by itself that multiple + executions either are harmless or that they are prevented by some kind of + flag in the XML tree. The latter can be achieved by modifying an attribute + value or by removing or adding a specific child node and then verifying this + before running through the init process. -You can let lxml use your new class for every Element it generates:: +* Any exceptions raised in ``_init()`` will be propagated throught the API + call that lead to the creation of the Element. So be careful with the code + you write here as its exceptions may turn up in various unexpected places. + + +Setting up a class lookup scheme +-------------------------------- + +The first thing to do when deploying custom element classes is to register a +class lookup scheme. lxml.etree provides quite a number of different schemes, +that also support class lookup local to a parser or namespace. Most lookups +support fallback chaining, which allows the next lookup mechanism to take over +when the previous one fails to find a class. + + +Default class lookup +.................... + +This is the default lookup mechanism. It always returns the default element +class. Consequently, no further fallbacks are supported, but this scheme is a +good fallback for other custom lookup mechanisms. + +Usage:: + + >>> lookup = etree.ElementDefaultClassLookup() + >>> etree.setElementClassLookup(lookup) + +Or shorter, since it is the default:: + + >>> etree.setElementClassLookup() + +To change the default element implementation, you can pass your new class to +the constructor. While it accepts classes for ``element``, ``comment`` and +``pi`` nodes, most use cases will only override the element class:: + + >>> el = etree.Element("myelement") + >>> print isinstance(el, HonkElement) + False + + >>> lookup = etree.ElementDefaultClassLookup(element=HonkElement) + >>> etree.setElementClassLookup(lookup) - >>> etree.setDefaultElementClass(HonkElement) >>> el = etree.Element("myelement") >>> print isinstance(el, HonkElement) True @@ -45,21 +121,103 @@ >>> el.honking True -To reset lxml.etree to the original element class, pass ``None`` or nothing:: - >>> etree.setDefaultElementClass() - >>> el = etree.Element("myelement") - >>> print isinstance(el, HonkElement) - False +Namespace class lookup +...................... + +This is an advanced lookup mechanism that supports namespace/tag-name specific +element classes. You can select it by calling:: + + >>> lookup = etree.ElementNamespaceClassLookup() + >>> etree.setElementClassLookup(lookup) + +See the separate section on `implementing namespaces`_ below to learn how to +make use of it. + +.. _`implementing namespaces`: #implementing-namespaces + +This scheme supports a fallback mechanism that is used in the case where the +namespace is not found or no class was registered for the element name. +Normally, the default class lookup is used here. To change it, pass the +desired fallback lookup scheme to the constructor:: + + >>> fallback = etree.ElementDefaultClassLookup(element=HonkElement) + >>> lookup = etree.ElementNamespaceClassLookup(fallback) + >>> etree.setElementClassLookup(lookup) + + +Attribute based lookup +...................... + +This scheme uses a mapping from attribute values to classes. An attribute +name is set at initialisation time and is then used to find the corresponding +value. It is selected as follows:: + + >>> id_class_mapping = {} # maps attribute values to element classes + >>> lookup = etree.AttributeBasedElementClassLookup('id', id_class_mapping) + >>> etree.setElementClassLookup(lookup) + +This class uses its fallback if the attribute is not found or its value is not +in the mapping. Normally, the default class lookup is used here. If you want +to use the namespace lookup, for example, you can use this code:: + + >>> fallback = etree.ElementNamespaceClassLookup() + >>> lookup = etree.AttributeBasedElementClassLookup( + ... 'id', id_class_mapping, fallback) + >>> etree.setElementClassLookup(lookup) + + +Parser based lookup +................... + +lxml.etree supports a per-parser setup of element lookup schemes. You can +enable it as follows:: + + >>> lookup = etree.ParserBasedElementClassLookup() + >>> etree.setElementClassLookup(lookup) + +Now you can set a separate lookup strategy for each parser you create:: + + >>> parser_lookup = etree.ElementDefaultClassLookup(element=HonkElement) + >>> parser = etree.XMLParser() + >>> parser.setElementClassLookup(parser_lookup) + +Whenever you create a document with this parser, its lookup scheme will be +inherited by the document and all subsequent element instantiations for this +document will use it. Note that the parser lookup supports a fallback just +like the previous one. + + +Custom element class lookup +........................... + +This is the most customisable way of finding element classes. It allows you +to implement a custom lookup scheme in a subclass:: + + >>> class MyLookup(etree.CustomElementClassLookup): + ... def lookup(self, node_type, document, namespace, name): + ... return MyElementClass # defined elsewhere + + >>> etree.setElementClassLookup( MyLookup() ) + +The ``lookup()`` method is only required to return either None (which triggers +the fallback mechanism) or a subclass of ``lxml.etree.ElementBase``. It can +otherwise take any decision it wants based on the node type (one of "element", +"comment", "PI"), the XML document of the element, or its namespace or tag +name. Implementing namespaces ----------------------- -lxml allows you to implement namespaces, in a rather literal sense. You can +lxml allows you to implement namespaces, in a rather literal sense. After +setting up the namespace class lookup mechanism as described above, you can build a new element namespace (or retrieve an existing one) by calling the Namespace class:: + >>> lookup = etree.ElementNamespaceClassLookup() + >>> etree.setElementClassLookup(lookup) + >>> namespace = etree.Namespace('http://hui.de/honk') and then register the new element type with that namespace, say, under the tag @@ -90,49 +248,6 @@ .. _`extension functions`: extensions.html - -Element initialization ----------------------- - -There is one thing to remember. Element classes *must not* have a -constructor, neither must there be any internal state (except for the data -stored in the underlying XML tree). Element instances are created and garbage -collected at need, so there is no way to predict when and how often a -constructor would be called. Even worse, when the ``__init__`` method is -called, the object may not even be initialized yet to represent the XML tag, -so there is not much use in providing an ``__init__`` method in subclasses. - -However, there is one possible way to do things on element initialization, if -you really need to. ElementBase classes have an ``_init()`` method that can -be overridden. It can be used to modify the XML tree, e.g. to construct -special children or verify and update attributes. - -The semantics of ``_init()`` are as follows: - -* It is called at least once on element instantiation time. That is, when a - Python representation of the element is created by lxml. At that time, the - element object is completely initialized to represent a specific XML element - within the tree. - -* The method has complete access to the XML tree. Modifications can be done - in exactly the same way as anywhere else in the program. - -* Python representations of elements may be created multiple times during the - lifetime of an XML element in the underlying tree. The ``_init()`` code - provided by subclasses must take special care by itself that multiple - executions either are harmless or that they are prevented by some kind of - flag in the XML tree. The latter can be achieved by modifying an attribute - value or by removing or adding a specific child node and then verifying this - before running through the init process. - -* Any exceptions raised in ``_init()`` will be propagated throught the API - call that lead to the creation of the Element. So be careful with the code - you write here as its exceptions may turn up in various unexpected places. - - -Default implementations ------------------------ - In the Namespace example above, we associated the HonkElement class only with the 'honk' element. If an XML tree contains different elements in the same namespace, they do not pick up the same implementation:: @@ -191,3 +306,15 @@ Note that you can also combine this with the global default class. Namespace specific classes will simply override the less specific default. + + +Resetting the class lookup scheme +--------------------------------- + +To reset lxml.etree to the original class lookup, simply pass ``None`` or +nothing to the register function:: + + >>> etree.setElementClassLookup() + >>> el = etree.Element("myelement") + >>> print isinstance(el, HonkElement) + False Deleted: /lxml/branch/capi/doc/elements.txt ============================================================================== --- /lxml/branch/capi/doc/elements.txt Sat Aug 5 19:21:46 2006 +++ (empty file) @@ -1,124 +0,0 @@ -============= -lxml.elements -============= - -The lxml.elements package is a collection of Element related modules. It -provides enhanced XML APIs based on element classes and different lookup -schemes for element class implementations. - - -lxml.elements.objectify ------------------------ - -`objectify`_ is an alternative XML API implementation similar in spirit to the -Amara or gnosis.objectify tools. - -.. _`objectify`: objectify.html - - -lxml.elements.classlookup -------------------------- - -The classlookup module contains a set of generic Element class lookup -mechanisms. By default, lxml.etree supports `namespace based class lookup`_. -This module provides access to this method and to the following additional -lookup schemes. - -.. _`namespace based class lookup`: namespace_extensions.html - - -Namespace class lookup -...................... - -This is the default lookup mechanism: `namespace based class lookup`_. This -module provides it mainly as a fallback mechanism for other lookups. You can -select the default mechanism by calling:: - - >>> etree.setElementClassLookup() - -or, more explicitly, by doing this:: - - >>> from lxml.elements.classlookup import ElementNamespaceClassLookup - >>> lookup = ElementNamespaceClassLookup() - >>> etree.setElementClassLookup(lookup) - -Note that this class supports a fallback mechanism that is used in the case -where the namespace is not found or no class was registered for the element -name. Normally, the default class lookup is used here. To change it, pass -the desired fallback lookup scheme to the constructor. - - -Default class lookup -.................... - -This is a faster replacement for the default lookup mechanism. It skips the -namespace lookup and always returns the default element class. - -Usage:: - - >>> from lxml.elements.classlookup import ElementDefaultClassLookup - >>> lookup = ElementDefaultClassLookup() - >>> etree.setElementClassLookup(lookup) - - -Attribute based lookup -...................... - -This uses a mapping from attribute values to classes. An attribute name is -set at initialisation time and is then used to find the corresponding value. -It is selected as follows:: - - >>> from lxml.elements.classlookup import AttributeBasedElementClassLookup - >>> lookup = AttributeBasedElementClassLookup('id', id_class_mapping) - >>> etree.setElementClassLookup(lookup) - -Note that this class supports a fallback mechanism that is used in the case -where the attribute is not found or its value is not in the mapping. -Normally, the default class lookup is used here. If you want to use the -namespace lookup, for example, you can use this code:: - - >>> fallback = ElementNamespaceClassLookup() - >>> lookup = AttributeBasedElementClassLookup( - ... 'id', id_class_mapping, fallback) - >>> etree.setElementClassLookup(lookup) - - -Parser based lookup -................... - -lxml.etree supports a per-parser setup of element lookup schemes. You can -enable it as follows:: - - >>> from lxml.elements.classlookup import ParserBasedElementClassLookup - >>> lookup = ParserBasedElementClassLookup() - >>> etree.setElementClassLookup(lookup) - -Now you can set a separate lookup strategy for each parser you create:: - - >>> parser = etree.XMLParser() - >>> parser.setElementClassLookup( ElementDefaultClassLookup() ) - -Whenever you create a document with this parser, its lookup scheme will be -inherited by the document and all subsequent element instantiations will use -it. Note that the parser lookup supports a fallback just like the previous -one. - - -Custom element class lookup -........................... - -This is the most customisable way of finding element classes. It allows you -to implement a custom lookup scheme in a subclass:: - - >>> from lxml.elements.classlookup import CustomElementClassLookup - >>> class MyLookup(CustomElementClassLookup): - ... def lookup(self, node_type, document, namespace, name): - ... return MyElementClass # defined elsewhere - - >>> etree.setElementClassLookup( MyLookup() ) - -The ``lookup()`` method is only required to return either None (which triggers -its fallback mechanism) or a subclass of ``lxml.etree.ElementBase``. It can -otherwise take any decision it wants based on the node type (one of "element", -"comment", "PI"), the XML document of the element, or its namespace or tag -name. Modified: lxml/branch/capi/doc/mkhtml.py ============================================================================== --- lxml/branch/capi/doc/mkhtml.py (original) +++ lxml/branch/capi/doc/mkhtml.py Sat Aug 5 19:21:46 2006 @@ -12,9 +12,9 @@ shutil.copy(pubkey, dirname) for name in ['main.txt', 'intro.txt', 'api.txt', 'compatibility.txt', - 'extensions.txt', 'namespace_extensions.txt', 'sax.txt', + 'extensions.txt', 'element_classes.txt', 'sax.txt', 'build.txt', 'FAQ.txt', 'performance.txt', 'resolvers.txt', - 'capi.txt', 'objectify.txt', 'elements.txt']: + 'capi.txt', 'objectify.txt']: path = os.path.join(doc_dir, name) outname = os.path.splitext(name)[0] + '.html' outpath = os.path.join(dirname, outname) Deleted: /lxml/branch/capi/doc/namespace_extensions.txt ============================================================================== --- /lxml/branch/capi/doc/namespace_extensions.txt Sat Aug 5 19:21:46 2006 +++ (empty file) @@ -1,193 +0,0 @@ -==================================== -Using custom Element classes in lxml -==================================== - -lxml has very sophisticated support for custom Element classes. You can -provide your own classes for Elements and have lxml use them by default or -only for a specific tag name in a specific namespace. - -Custom Elements must inherit from the ``lxml.etree.ElementBase`` class, which -provides the Element interface for subclasses:: - - >>> from lxml import etree - >>> class HonkElement(etree.ElementBase): - ... def honking(self): - ... return self.get('honking') == 'true' - ... honking = property(honking) - -This defines a new Element class ``HonkElement`` with a property ``honking``. - -Note that you cannot (or rather *must not*) instantiate this class yourself. -lxml.etree will do that for you through its normal ElementTree API. - -.. contents:: -.. - 1 Changing the default element class - 2 Implementing namespaces - 3 Element initialization - 4 Default implementations - - -Changing the default element class ----------------------------------- - -You can let lxml use your new class for every Element it generates:: - - >>> etree.setDefaultElementClass(HonkElement) - >>> el = etree.Element("myelement") - >>> print isinstance(el, HonkElement) - True - >>> el.honking - False - >>> el = etree.Element("myelement", honking='true') - >>> print etree.tostring(el) - - >>> el.honking - True - -To reset lxml.etree to the original element class, pass ``None`` or nothing:: - - >>> etree.setDefaultElementClass() - >>> el = etree.Element("myelement") - >>> print isinstance(el, HonkElement) - False - - -Implementing namespaces ------------------------ - -lxml allows you to implement namespaces, in a rather literal sense. You can -build a new element namespace (or retrieve an existing one) by calling the -Namespace class:: - - >>> namespace = etree.Namespace('http://hui.de/honk') - -and then register the new element type with that namespace, say, under the tag -name ``honk``:: - - >>> namespace['honk'] = HonkElement - -After this, you create and use your XML elements through the normal API of -lxml:: - - >>> xml = '' - >>> honk_element = etree.XML(xml) - >>> print honk_element.honking - True - -The same works when creating elements by hand:: - - >>> honk_element = etree.Element('{http://hui.de/honk}honk', - ... honking='true') - >>> print honk_element.honking - True - -Essentially, what this allows you to do, is to give elements a custom API -based on their namespace and tag name. - -A somewhat related topic are `extension functions`_ which use a similar -mechanism for registering extension functions in XPath and XSLT. - -.. _`extension functions`: extensions.html - - -Element initialization ----------------------- - -There is one thing to remember. Element classes *must not* have a -constructor, neither must there be any internal state (except for the data -stored in the underlying XML tree). Element instances are created and garbage -collected at need, so there is no way to predict when and how often a -constructor would be called. Even worse, when the ``__init__`` method is -called, the object may not even be initialized yet to represent the XML tag, -so there is not much use in providing an ``__init__`` method in subclasses. - -However, there is one possible way to do things on element initialization, if -you really need to. ElementBase classes have an ``_init()`` method that can -be overridden. It can be used to modify the XML tree, e.g. to construct -special children or verify and update attributes. - -The semantics of ``_init()`` are as follows: - -* It is called at least once on element instantiation time. That is, when a - Python representation of the element is created by lxml. At that time, the - element object is completely initialized to represent a specific XML element - within the tree. - -* The method has complete access to the XML tree. Modifications can be done - in exactly the same way as anywhere else in the program. - -* Python representations of elements may be created multiple times during the - lifetime of an XML element in the underlying tree. The ``_init()`` code - provided by subclasses must take special care by itself that multiple - executions either are harmless or that they are prevented by some kind of - flag in the XML tree. The latter can be achieved by modifying an attribute - value or by removing or adding a specific child node and then verifying this - before running through the init process. - -* Any exceptions raised in ``_init()`` will be propagated throught the API - call that lead to the creation of the Element. So be careful with the code - you write here as its exceptions may turn up in various unexpected places. - - -Default implementations ------------------------ - -In the Namespace example above, we associated the HonkElement class only with -the 'honk' element. If an XML tree contains different elements in the same -namespace, they do not pick up the same implementation:: - - >>> xml = '' - >>> honk_element = etree.XML(xml) - >>> print honk_element.honking - True - >>> print honk_element[0].honking - Traceback (most recent call last): - ... - AttributeError: 'etree._Element' object has no attribute 'honking' - -You can therefore provide one implementation per element name in each -namespace and have lxml select the right one on the fly. If you want one -element implementation per namespace (ignoring the element name) or prefer -having a common class for most elements except a few, you can specify a -default implementation for an entire namespace by registering that class with -the empty element name (None). - -You may consider following an object oriented approach here. If you build a -class hierarchy of element classes, you can also implement a base class for a -namespace that is used if no specific element class is provided. Again, you -can just pass None as an element name:: - - >>> class HonkNSElement(etree.ElementBase): - ... def honk(self): - ... return "HONK" - >>> namespace[None] = HonkNSElement - - >>> class HonkElement(HonkNSElement): - ... def honking(self): - ... return self.get('honking') == 'true' - ... honking = property(honking) - >>> namespace['honk'] = HonkElement - -Now you can rely on lxml to always return objects of type HonkNSElement or its -subclasses for elements of this namespace:: - - >>> xml = '' - >>> honk_element = etree.XML(xml) - - >>> print type(honk_element), type(honk_element[0]) - - - >>> print honk_element.honking - True - >>> print honk_element.honk() - HONK - >>> print honk_element[0].honk() - HONK - >>> print honk_element[0].honking - Traceback (most recent call last): - ... - AttributeError: 'HonkNSElement' object has no attribute 'honking' - -Note that you can also combine this with the global default class. Namespace -specific classes will simply override the less specific default. Modified: lxml/branch/capi/doc/objectify.txt ============================================================================== --- lxml/branch/capi/doc/objectify.txt (original) +++ lxml/branch/capi/doc/objectify.txt Sat Aug 5 19:21:46 2006 @@ -4,60 +4,82 @@ lxml supports an alternative element API similar to the Amara_ bindery through a custom Element implementation. This API is very different from the -ElementTree API. If it is used, it should be used exclusively, to avoid -common pitfalls when mixing element implementations. +ElementTree API. If it is used, it should not be mixed with other element +implementations, to avoid non-obvious behaviour. .. _Amara: http://uche.ogbuji.net/tech/4suite/amara/ -You can replace the original implementation by the ``objectify`` element class -by simply importing the module and calling the ``register()`` function:: +To make use of ``objectify``, you need both the ``lxml.etree`` module and +``lxml.objectify``:: >>> from lxml import etree - >>> from lxml.elements import objectify - >>> objectify.register() + >>> from lxml import objectify - >>> el = etree.Element("test") - >>> print isinstance(el, objectify.ObjectifiedElement) +To avoid interfering with other modules that use ``lxml.etree``, the normal +way to use ``objectify`` is to register it with a dedicated parser. This +requires setting up ``lxml.etree`` to use `parser specific element classes`_ +first:: + + >>> lookup = etree.ParserBasedElementClassLookup() + >>> etree.setElementClassLookup(lookup) + +.. _`parser specific element classes`: element_classes.html#parser-based-lookup + +The next step is to create a parser that builds objectify documents. The +objectify API is meant for data-centered XML (as opposed to document XML with +mixed content). Therefore, we configure the parser to let it remove +whitespace-only text from the parsed document if it is not enclosed by an XML +element. Note that this alters the document infoset, so if you consider the +removed spaces as data in your specific use case, you should go with a normal +parser and just set the element class lookup. Most applications, however, +will work fine with the following setup:: + + >>> parser = etree.XMLParser(remove_blank_text=True) + + >>> lookup = objectify.ObjectifyElementClassLookup() + >>> parser.setElementClassLookup(lookup) + +To create an ``objectify`` tree, you can either parse a document with this +parser:: + + >>> from StringIO import StringIO + >>> xml = StringIO('') + >>> tree = etree.parse(xml, parser) + >>> print isinstance(tree.getroot(), objectify.ObjectifiedElement) True -Note that `namespace specific classes`_ can override this default. If -``objectify`` is in use, it is therefore advisable to let other custom element -classes inherit from the ``ObjectifiedElement`` class (or a subclass) to make -sure that all element classes provide the same API. You can prevent the -lookup of namespace registered classes by passing False for the -``prefer_nsclasses`` keyword argument of the ``register()`` function. - -.. _`namespace specific classes`: namespace_extensions.html - -It is also possible to use objectify's element classes on a more fine-grained -basis. Instead of activating it globally, it can be integrated with the class -lookup framework from `lxml.elements.classlookup`_. This is accomplished by -the class ``ObjectifyElementClassLookup``. By setting it as the local class -lookup scheme of a parser, for example, you can restrict the objectify API to -documents that were parsed by this specific parser. As said above, you really -have to take care in this case to prevent mixing the Element implementations -between documents. If you do, however, this provides a very convenient way of -using different XML APIs at the same time, e.g. in different Python modules. - -.. _`lxml.elements.classlookup`: elements.html - -To simulate the default behaviour of looking up namespace registered classes -first and then falling back to the ObjectifiedElement class, you can build a -lookup fallback chain like the following:: - - >>> lookup = etree.ElementNamespaceClassLookup( - ... objectify.ObjectifyElementClassLookup() ) - -Since the objectify API is meant for data-centered XML (as opposed to document -XML with mixed content), it might be worthwhile in this context to change the -default parser:: - - >>> etree.setDefaultParser( etree.XMLParser(remove_blank_text=True) ) - -Now the parser will remove whitespace-only text from the parsed document, -unless it is found enclosed by an XML element. Note that this alters the -document infoset, so if you consider the removed spaces as data in your -specific use case, you should go with the normal parser. +or you can call the ``makeelement()`` method of the parser to create a new +root element from scratch:: + + >>> obj_el = parser.makeelement("test") + >>> print isinstance(obj_el, objectify.ObjectifiedElement) + True + +New subelements will automatically inherit the setup. However, all +independent elements that you create through the normal etree API will not be +associated with the parser and therefore not support the ``objectify`` API:: + + >>> subel = etree.SubElement(obj_el, "sub") + >>> print isinstance(subel, objectify.ObjectifiedElement) + True + + >>> independent_el = etree.Element("new") + >>> print isinstance(independent_el, objectify.ObjectifiedElement) + False + +The ``makeelement()`` method of the parser has the same signature as the +normal ``Element()`` factory known from lxml.etree and can therefore easily +replace the respective calls. If you create your parser globally at a module +level, it may be convenient to also assign ``parser.makeelement`` to the name +``Element`` in your module to avoid accidentally calling the wrong factory. +The same applies to the ``XML()`` function of ``lxml.etree``, which must now +use the dedicated parser:: + + >>> Element = parser.makeelement + >>> SubElement = etree.SubElement + >>> def XML(xml): + ... return etree.XML(xml, parser) + .. contents:: .. @@ -78,13 +100,13 @@ behind the usual object attribute access pattern. Asking an element for an attribute will return the sequence of children with corresponding tag names:: - >>> root = etree.Element("root") - >>> b = etree.SubElement(root, "b") + >>> root = Element("root") + >>> b = SubElement(root, "b") >>> print root.b[0].tag b >>> root.index(root.b[0]) 0 - >>> b = etree.SubElement(root, "b") + >>> b = SubElement(root, "b") >>> print root.b[0].tag b >>> print root.b[1].tag @@ -102,9 +124,9 @@ Iteration and slicing also obey the requested tag:: - >>> x1 = etree.SubElement(root, "x") - >>> x2 = etree.SubElement(root, "x") - >>> x3 = etree.SubElement(root, "x") + >>> x1 = SubElement(root, "x") + >>> x2 = SubElement(root, "x") + >>> x3 = SubElement(root, "x") >>> [ el.tag for el in root.x ] ['x', 'x', 'x'] @@ -134,7 +156,7 @@ XML attributes are accessed as in the normal ElementTree API:: - >>> c = etree.SubElement(root, "c", myattr="someval") + >>> c = SubElement(root, "c", myattr="someval") >>> print root.c.get("myattr") someval @@ -147,31 +169,31 @@ case, the subtree is automatically deep copied and the tag name of its root is updated to match the attribute name:: - >>> el = etree.Element("yet_another_child") + >>> el = Element("yet_another_child") >>> root.new_child = el >>> print root.new_child.tag new_child >>> print el.tag yet_another_child - >>> root.y = [ etree.Element("y"), etree.Element("y") ] + >>> root.y = [ Element("y"), Element("y") ] >>> [ el.tag for el in root.y ] ['y', 'y'] The latter is a short form for operations on the full slice:: - >>> root.y[:] = [ etree.Element("y") ] + >>> root.y[:] = [ Element("y") ] >>> [ el.tag for el in root.y ] ['y'] You can also replace children that way:: - >>> child1 = etree.SubElement(root, "child") - >>> child2 = etree.SubElement(root, "child") - >>> child3 = etree.SubElement(root, "child") + >>> child1 = SubElement(root, "child") + >>> child2 = SubElement(root, "child") + >>> child3 = SubElement(root, "child") - >>> el = etree.Element("new_child") - >>> subel = etree.SubElement(el, "sub") + >>> el = Element("new_child") + >>> subel = SubElement(el, "sub") >>> root.child = el >>> print root.child.sub.tag @@ -201,9 +223,9 @@ Element without specifying a namespace, the lookup will use the namespace of the parent:: - >>> root = etree.Element("{ns}root") - >>> b = etree.SubElement(root, "{ns}b") - >>> c = etree.SubElement(root, "{other}c") + >>> root = Element("{ns}root") + >>> b = SubElement(root, "{ns}b") + >>> c = SubElement(root, "{other}c") >>> print root.b.tag {ns}b @@ -229,11 +251,11 @@ For both convenience and speed, objectify supports its own path language, represented by the ``ObjectPath`` class:: - >>> root = etree.Element("{ns}root") - >>> b1 = etree.SubElement(root, "{ns}b") - >>> c = etree.SubElement(b1, "{ns}c") - >>> b2 = etree.SubElement(root, "{ns}b") - >>> d = etree.SubElement(root, "{other}d") + >>> root = Element("{ns}root") + >>> b1 = SubElement(root, "{ns}b") + >>> c = SubElement(b1, "{ns}c") + >>> b2 = SubElement(root, "{ns}b") + >>> d = SubElement(root, "{other}d") >>> path = objectify.ObjectPath("root.b.c") >>> print path @@ -306,7 +328,7 @@ ObjectPath objects can be used to manipulate trees:: - >>> root = etree.Element("{ns}root") + >>> root = Element("{ns}root") >>> path = objectify.ObjectPath(".some.child.{other}unknown") >>> path.hasattr(root) @@ -344,7 +366,7 @@ element content behave like them. For example, they support the normal math operators:: - >>> root = etree.XML("511truehoi") + >>> root = XML("511truehoi") >>> root.a + root.b 16 >>> root.a += root.b @@ -375,7 +397,7 @@ ``dump()`` function that returns a recursive string representation for elements:: - >>> root = etree.XML(""" + >>> root = XML(""" ... ... 1 ... 1.2 @@ -400,7 +422,7 @@ You can freely switch between different types for the same child:: - >>> root = etree.fromstring("""5""") + >>> root = XML("5") >>> print objectify.dump(root) root = None [ObjectifiedElement] a = 5 [IntElement] @@ -434,7 +456,7 @@ cannot behave as the Python types. Like all other tree elements, they show the normal slicing behaviour of objectify elements:: - >>> root = etree.XML("testtoast") + >>> root = XML("testtoast") >>> print root.a + ' me' # behaves like a string, right? test me >>> len(root.a) # but there's only one 'a' element! @@ -454,7 +476,7 @@ normal ElementTree ``.text`` attribute. Additionally, all data classes provide a ``.pyval`` attribute that returns the value as plain Python type:: - >>> root = etree.XML("test5") + >>> root = XML("test5") >>> root.a.text 'test' >>> root.a.pyval @@ -490,7 +512,7 @@ {http://codespeak.net/lxml/objectify/pytype}pytype >>> ns, name = objectify.PYTYPE_ATTRIBUTE[1:].split('}') - >>> root = etree.XML("""\ + >>> root = XML("""\ ... ... 5 ... 5 @@ -510,7 +532,7 @@ application ever needs to. There is also a utility function ``annotate()`` that recursively generates this attribute for the elements of a tree:: - >>> root = etree.XML("test5") + >>> root = XML("test5") >>> print objectify.dump(root) root = None [ObjectifiedElement] a = 'test' [StringElement] @@ -529,7 +551,7 @@ element annotations. Objectify knows those that can be mapped to normal Python types:: - >>> root = etree.XML('''\ + >>> root = XML('''\ ... ... 5 ... 5 @@ -578,7 +600,7 @@ >>> xmas_type.register() - >>> root = etree.XML("24.12.200012.24.2000") + >>> root = XML("24.12.200012.24.2000") >>> root.a.callSanta() Ho ho ho! >>> root.b.callSanta() @@ -597,7 +619,7 @@ If you provide XML Schema type information, this will override the type check function defined above:: - >>> root = etree.XML('''\ + >>> root = XML('''\ ... ... 12.24.2000 ... @@ -632,7 +654,7 @@ >>> objectify.enableRecursiveStr() - >>> root = etree.XML(""" + >>> root = XML(""" ... ... 1 ... 1.2 @@ -679,28 +701,15 @@ Resetting the API ----------------- -You can reset the API to the original ElementTree API by calling the -``unregister()`` function. Be aware, though, that this does not immediately -apply to elements to which there is a Python reference. Their Python class -will only be changed after all references are gone and the Python object is -garbage collected. The same applies to registered data classes for elements. - -When you access an element for which there is not currently a Python -representation, it will be created with the currently registered element -class:: - - >>> el = etree.Element("test") - >>> print isinstance(el, objectify.ObjectifiedElement) - True - - >>> objectify.unregister() - - >>> print isinstance(el, objectify.ObjectifiedElement) - True - >>> new_el = etree.Element("test") - >>> print isinstance(new_el, objectify.ObjectifiedElement) - False - -In case you changed the default parser also, here is how to change it back:: - - >>> etree.setDefaultParser() +As the objectify setup is local to a parser, it does not interfere with the +rest of lxml. However, if you stop using the parser you registered +``objectify`` for, you might also want to reset the global class lookup +mechanism back to the default one, to disable the per-parser lookup. This is +easily achieved by calling the setup function without arguments:: + + >>> etree.setElementClassLookup() + +Be aware, though, that this does not immediately apply to elements to which +there already is a Python reference. Their Python class will only be changed +after all references are gone and the Python object is garbage collected. The +same applies to registered data classes for elements. Modified: lxml/branch/capi/setup.py ============================================================================== --- lxml/branch/capi/setup.py (original) +++ lxml/branch/capi/setup.py Sat Aug 5 19:21:46 2006 @@ -2,8 +2,7 @@ EXT_MODULES = [ ("etree", "lxml.etree"), - ("objectify", "lxml.elements.objectify"), - ("classlookup", "lxml.elements.classlookup") + ("objectify", "lxml.objectify") ] setup_args = {} Modified: lxml/branch/capi/src/lxml/apihelpers.pxi ============================================================================== --- lxml/branch/capi/src/lxml/apihelpers.pxi (original) +++ lxml/branch/capi/src/lxml/apihelpers.pxi Sat Aug 5 19:21:46 2006 @@ -80,6 +80,37 @@ else: return None +cdef _Element _makeElement(tag, xmlDoc* c_doc, _Document doc, + _BaseParser parser, attrib, nsmap, extra_attrs): + """Create a new element and initialize namespaces and attributes. + + This helper function will reuse as much of the existing document as + possible: + + If 'parser' is None, the parser will be inherited from 'doc' or the + default parser will be used. + + If 'doc' is None, 'c_doc' is used to create a new _Document and the new + element is made its root node. + + If 'c_doc' is also NULL, a new xmlDoc will be created. + """ + cdef xmlNode* c_node + ns_utf, name_utf = _getNsTag(tag) + if c_doc is NULL: + if doc is None: + c_doc = _newDoc() + else: + c_doc = doc._c_doc + c_node = _createElement(c_doc, name_utf) + if doc is None: + tree.xmlDocSetRootElement(c_doc, c_node) + doc = _documentFactory(c_doc, parser) + # add namespaces to node if necessary + doc._setNodeNamespaces(c_node, ns_utf, nsmap) + _initNodeAttributes(c_node, doc, attrib, extra_attrs) + return _elementFactory(doc, c_node) + cdef object _attributeValue(xmlNode* c_element, xmlAttr* c_attrib_node): cdef char* value cdef char* href Copied: lxml/branch/capi/src/lxml/classlookup.pxi (from r30837, lxml/branch/capi/src/lxml/classlookup.pyx) ============================================================================== --- lxml/branch/capi/src/lxml/classlookup.pyx (original) +++ lxml/branch/capi/src/lxml/classlookup.pxi Sat Aug 5 19:21:46 2006 @@ -1,41 +1,164 @@ # Configurable Element class lookup -__doc__ = """Configurable Element class lookup mechanisms. +################################################################################ +# Custom Element classes -This module contains a number of different lookup implementations for Element -classes. +cdef public class ElementBase(_Element) [ type LxmlElementBaseType, + object LxmlElementBase ]: + """All custom Element classes must inherit from this one. + + Note that subclasses *must not* override __init__ or __new__ as it is + absolutely undefined when these objects will be created or destroyed. All + persistent state of Elements must be stored in the underlying XML. If you + really need to initialize the object after creation, you can implement an + ``_init(self)`` method that will be called after object creation. + """ + +cdef class CommentBase(_Comment): + """All custom Comment classes must inherit from this one. + + Note that subclasses *must not* override __init__ or __new__ as it is + absolutely undefined when these objects will be created or destroyed. All + persistent state of Comments must be stored in the underlying XML. If you + really need to initialize the object after creation, you can implement an + ``_init(self)`` method that will be called after object creation. + """ + +cdef class PIBase(_ProcessingInstruction): + """All custom Processing Instruction classes must inherit from this one. + + Note that subclasses *must not* override __init__ or __new__ as it is + absolutely undefined when these objects will be created or destroyed. All + persistent state of PIs must be stored in the underlying XML. If you + really need to initialize the object after creation, you can implement an + ``_init(self)`` method that will be called after object creation. + """ + + +################################################################################ +# Element class lookup + +ctypedef object (*_element_class_lookup_function)(object, _Document, xmlNode*) + +# class to store element class lookup functions +cdef public class ElementClassLookup [ type LxmlElementClassLookupType, + object LxmlElementClassLookup ]: + """Superclass of Element class lookups. + """ + cdef _element_class_lookup_function _lookup_function + def __init__(self): + self._lookup_function = NULL # use default lookup + +cdef public class FallbackElementClassLookup(ElementClassLookup) \ + [ type LxmlFallbackElementClassLookupType, + object LxmlFallbackElementClassLookup ]: + """Superclass of Element class lookups with additional fallback. + """ + cdef readonly ElementClassLookup fallback + cdef _element_class_lookup_function _fallback_function + def __init__(self, ElementClassLookup fallback=None): + self._lookup_function = NULL # use default lookup + if fallback is not None: + self.setFallback(fallback) + else: + self._fallback_function = DEFAULT_ELEMENT_CLASS_LOOKUP + + def setFallback(self, ElementClassLookup lookup not None): + """Sets the fallback scheme for this lookup method. + """ + self.fallback = lookup + self._fallback_function = lookup._lookup_function + + cdef object _callFallback(self, doc, xmlNode* c_node): + return self._fallback_function(self.fallback, doc, c_node) + +# default lookup: Namespace classes +cdef _element_class_lookup_function DEFAULT_ELEMENT_CLASS_LOOKUP +DEFAULT_ELEMENT_CLASS_LOOKUP = _find_nselement_class + +cdef _element_class_lookup_function LOOKUP_ELEMENT_CLASS +LOOKUP_ELEMENT_CLASS = DEFAULT_ELEMENT_CLASS_LOOKUP + +cdef object ELEMENT_CLASS_LOOKUP_STATE +ELEMENT_CLASS_LOOKUP_STATE = None + +cdef void _setElementClassLookupFunction( + _element_class_lookup_function function, object state): + global LOOKUP_ELEMENT_CLASS, ELEMENT_CLASS_LOOKUP_STATE + if function is NULL: + LOOKUP_ELEMENT_CLASS = DEFAULT_ELEMENT_CLASS_LOOKUP + ELEMENT_CLASS_LOOKUP_STATE = None + else: + LOOKUP_ELEMENT_CLASS = function + ELEMENT_CLASS_LOOKUP_STATE = state + +def setElementClassLookup(ElementClassLookup lookup = None): + if lookup is None or lookup._lookup_function is NULL: + _setElementClassLookupFunction(NULL, None) + else: + _setElementClassLookupFunction(lookup._lookup_function, lookup) + + +################################################################################ +# Custom Element class lookup schemes -* ElementDefaultClassLookup: always use the default classes. This class is - copied from the lxml.etree module. +cdef class ElementDefaultClassLookup(ElementClassLookup): + """Element class lookup scheme that always returns the default Element + class. + """ + cdef readonly object element_class + cdef readonly object comment_class + cdef readonly object pi_class + def __init__(self, element=None, comment=None, pi=None): + self._lookup_function = _lookupDefaultElementClass + if element is None: + self.element_class = _Element + elif issubclass(element, ElementBase): + self.element_class = element + else: + raise TypeError, "element class must be subclass of ElementBase" + + if comment is None: + self.comment_class = _Comment + elif issubclass(comment, CommentBase): + self.comment_class = comment + else: + raise TypeError, "comment class must be subclass of CommentBase" + + if pi is None: + self.pi_class = _ProcessingInstruction + elif issubclass(pi, PIBase): + self.pi_class = pi + else: + raise TypeError, "PI class must be subclass of PIBase" -* ElementNamespaceClassLookup: find the class in the Namespace registry or use - a fallback lookup mechanism. This class is copied from the lxml.etree - module. - -* AttributeBasedElementClassLookup: lookup the class based on the value of a - specific attribute of the element. - -* ParserBasedElementClassLookup: global lookup scheme that delegates to the - parser specific class lookup mechanism. - -* CustomElementClassLookup: customizable lookup scheme that delegates to a - callback method. -""" - -from python cimport isinstance, getattr, _cstr, Py_ssize_t -from etreepublic cimport _Document -from etreepublic cimport ElementClassLookup, FallbackElementClassLookup -cimport etreepublic as cetree -cimport python -cimport tree - -cdef object etree -from lxml import etree -# initialize C-API of lxml.etree -cetree.import_etree(etree) +cdef object _lookupDefaultElementClass(state, _Document _doc, xmlNode* c_node): + "Trivial class lookup function that always returns the default class." + if c_node.type == tree.XML_ELEMENT_NODE: + if state is None: + return _Element + else: + return (state).element_class + elif c_node.type == tree.XML_COMMENT_NODE: + if state is None: + return _Comment + else: + return (state).comment_class + elif c_node.type == tree.XML_PI_NODE: + if state is None: + return _ProcessingInstruction + else: + return (state).pi_class + else: + assert 0, "Unknown node type: %s" % c_node.type -ElementDefaultClassLookup = etree.ElementDefaultClassLookup -ElementNamespaceClassLookup = etree.ElementNamespaceClassLookup +cdef class ElementNamespaceClassLookup(FallbackElementClassLookup): + """Element class lookup scheme that searches the Element class in the + Namespace registry. + """ + def __init__(self, ElementClassLookup fallback=None): + FallbackElementClassLookup.__init__(self, fallback) + self._lookup_function = _find_nselement_class cdef class AttributeBasedElementClassLookup(FallbackElementClassLookup): """Checks an attribute of an Element and looks up the value in a class @@ -55,7 +178,7 @@ cdef char* _c_name def __init__(self, attribute_name, class_mapping, ElementClassLookup fallback=None): - self._pytag = cetree.getNsTag(attribute_name) + self._pytag = _getNsTag(attribute_name) ns, name = self._pytag if ns is None: self._c_ns = NULL @@ -65,20 +188,20 @@ self._class_mapping = dict(class_mapping) FallbackElementClassLookup.__init__(self, fallback) - self._lookup_function = _attribute_lookup + self._lookup_function = _attribute_class_lookup -cdef object _attribute_lookup(state, _Document doc, tree.xmlNode* c_node): +cdef object _attribute_class_lookup(state, _Document doc, xmlNode* c_node): cdef AttributeBasedElementClassLookup lookup cdef python.PyObject* dict_result lookup = state if c_node.type == tree.XML_ELEMENT_NODE: - value = cetree.attributeValueFromNsName( + value = _attributeValueFromNsName( c_node, lookup._c_ns, lookup._c_name) dict_result = python.PyDict_GetItem(lookup._class_mapping, value) if dict_result is not NULL: return dict_result - return cetree.callLookupFallback(lookup, doc, c_node) + return lookup._callFallback(doc, c_node) cdef class ParserBasedElementClassLookup(FallbackElementClassLookup): @@ -86,18 +209,18 @@ """ def __init__(self, ElementClassLookup fallback=None): FallbackElementClassLookup.__init__(self, fallback) - self._lookup_function = _parser_lookup + self._lookup_function = _parser_class_lookup -cdef object _parser_lookup(state, _Document doc, tree.xmlNode* c_node): - cdef ElementClassLookup lookup +cdef object _parser_class_lookup(state, _Document doc, xmlNode* c_node): + cdef FallbackElementClassLookup lookup cdef ElementClassLookup parser_lookup - lookup = state + lookup = state if c_node.type == tree.XML_ELEMENT_NODE: - parser_lookup = cetree.getParserElementLookupFromDocument(doc) + parser_lookup = doc._parser._class_lookup if parser_lookup is not None: return parser_lookup._lookup_function(parser_lookup, doc, c_node) - return cetree.callLookupFallback(lookup, doc, c_node) + return lookup._callFallback(doc, c_node) cdef class CustomElementClassLookup(FallbackElementClassLookup): @@ -117,12 +240,12 @@ """ def __init__(self, ElementClassLookup fallback=None): FallbackElementClassLookup.__init__(self, fallback) - self._lookup_function = _custom_lookup + self._lookup_function = _custom_class_lookup def lookup(self, type, doc, namespace, name): return None -cdef object _custom_lookup(state, _Document doc, tree.xmlNode* c_node): +cdef object _custom_class_lookup(state, _Document doc, xmlNode* c_node): cdef CustomElementClassLookup lookup cdef char* c_str @@ -147,4 +270,4 @@ cls = lookup.lookup(element_type, doc, ns, name) if cls is not None: return cls - return cetree.callLookupFallback(lookup, doc, c_node) + return lookup._callFallback(doc, c_node) Deleted: /lxml/branch/capi/src/lxml/classlookup.pyx ============================================================================== --- /lxml/branch/capi/src/lxml/classlookup.pyx Sat Aug 5 19:21:46 2006 +++ (empty file) @@ -1,150 +0,0 @@ -# Configurable Element class lookup - -__doc__ = """Configurable Element class lookup mechanisms. - -This module contains a number of different lookup implementations for Element -classes. - -* ElementDefaultClassLookup: always use the default classes. This class is - copied from the lxml.etree module. - -* ElementNamespaceClassLookup: find the class in the Namespace registry or use - a fallback lookup mechanism. This class is copied from the lxml.etree - module. - -* AttributeBasedElementClassLookup: lookup the class based on the value of a - specific attribute of the element. - -* ParserBasedElementClassLookup: global lookup scheme that delegates to the - parser specific class lookup mechanism. - -* CustomElementClassLookup: customizable lookup scheme that delegates to a - callback method. -""" - -from python cimport isinstance, getattr, _cstr, Py_ssize_t -from etreepublic cimport _Document -from etreepublic cimport ElementClassLookup, FallbackElementClassLookup -cimport etreepublic as cetree -cimport python -cimport tree - -cdef object etree -from lxml import etree -# initialize C-API of lxml.etree -cetree.import_etree(etree) - -ElementDefaultClassLookup = etree.ElementDefaultClassLookup -ElementNamespaceClassLookup = etree.ElementNamespaceClassLookup - -cdef class AttributeBasedElementClassLookup(FallbackElementClassLookup): - """Checks an attribute of an Element and looks up the value in a class - dictionary. - - Arguments: - * attribute name ('{ns}name' style string) - * class mapping (Python dict mapping attribute values to Element classes) - * fallback (optional fallback lookup mechanism) - - A None key in the class mapping will be checked if the attribute is - missing. - """ - cdef object _class_mapping - cdef object _pytag - cdef char* _c_ns - cdef char* _c_name - def __init__(self, attribute_name, class_mapping, - ElementClassLookup fallback=None): - self._pytag = cetree.getNsTag(attribute_name) - ns, name = self._pytag - if ns is None: - self._c_ns = NULL - else: - self._c_ns = _cstr(ns) - self._c_name = _cstr(name) - self._class_mapping = dict(class_mapping) - - FallbackElementClassLookup.__init__(self, fallback) - self._lookup_function = _attribute_lookup - -cdef object _attribute_lookup(state, _Document doc, tree.xmlNode* c_node): - cdef AttributeBasedElementClassLookup lookup - cdef python.PyObject* dict_result - - lookup = state - if c_node.type == tree.XML_ELEMENT_NODE: - value = cetree.attributeValueFromNsName( - c_node, lookup._c_ns, lookup._c_name) - dict_result = python.PyDict_GetItem(lookup._class_mapping, value) - if dict_result is not NULL: - return dict_result - return cetree.callLookupFallback(lookup, doc, c_node) - - -cdef class ParserBasedElementClassLookup(FallbackElementClassLookup): - """Element class lookup based on the XML parser. - """ - def __init__(self, ElementClassLookup fallback=None): - FallbackElementClassLookup.__init__(self, fallback) - self._lookup_function = _parser_lookup - -cdef object _parser_lookup(state, _Document doc, tree.xmlNode* c_node): - cdef ElementClassLookup lookup - cdef ElementClassLookup parser_lookup - - lookup = state - if c_node.type == tree.XML_ELEMENT_NODE: - parser_lookup = cetree.getParserElementLookupFromDocument(doc) - if parser_lookup is not None: - return parser_lookup._lookup_function(parser_lookup, doc, c_node) - return cetree.callLookupFallback(lookup, doc, c_node) - - -cdef class CustomElementClassLookup(FallbackElementClassLookup): - """Element class lookup based on a subclass method. - - You can inherit from this class and override the method - - lookup(type, doc, namespace, name) - - to lookup the element class for a node. Arguments of the method: - * type: one of 'element', 'comment', 'PI' - * doc: document that the node is in - * namespace: namespace URI of the node (or None for comments/PIs) - * name: name of the element, None for comments, target for PIs - - If you return None from this method, the fallback will be called. - """ - def __init__(self, ElementClassLookup fallback=None): - FallbackElementClassLookup.__init__(self, fallback) - self._lookup_function = _custom_lookup - - def lookup(self, type, doc, namespace, name): - return None - -cdef object _custom_lookup(state, _Document doc, tree.xmlNode* c_node): - cdef CustomElementClassLookup lookup - cdef char* c_str - - lookup = state - - if c_node.type == tree.XML_COMMENT_NODE: - element_type = "comment" - elif c_node.type == tree.XML_PI_NODE: - element_type = "PI" - else: - element_type = "element" - if c_node.name is NULL: - name = None - else: - name = c_node.name - c_str = tree._getNs(c_node) - if c_str is NULL: - ns = None - else: - ns = c_str - - cls = lookup.lookup(element_type, doc, ns, name) - if cls is not None: - return cls - return cetree.callLookupFallback(lookup, doc, c_node) Modified: lxml/branch/capi/src/lxml/etree.pyx ============================================================================== --- lxml/branch/capi/src/lxml/etree.pyx (original) +++ lxml/branch/capi/src/lxml/etree.pyx Sat Aug 5 19:21:46 2006 @@ -1,6 +1,6 @@ cimport tree, python from tree cimport xmlDoc, xmlNode, xmlAttr, xmlNs, _isElement, _getNs -from python cimport isinstance, issubclass, hasattr, callable +from python cimport isinstance, issubclass, hasattr, getattr, callable from python cimport iter, str, _cstr, _isString, Py_ssize_t cimport xpath cimport xinclude @@ -1113,18 +1113,7 @@ def makeelement(self, _tag, attrib=None, nsmap=None, **_extra): """Creates a new element associated with the same document. """ - # a little code duplication, but less overhead through doc reuse - cdef xmlNode* c_node - cdef xmlDoc* c_doc - cdef _Document doc - ns_utf, name_utf = _getNsTag(_tag) - doc = self._doc - c_doc = doc._c_doc - c_node = _createElement(c_doc, name_utf) - # add namespaces to node if necessary - doc._setNodeNamespaces(c_node, ns_utf, nsmap) - _initNodeAttributes(c_node, doc, attrib, _extra) - return _elementFactory(doc, c_node) + return _makeElement(_tag, NULL, self._doc, None, attrib, nsmap, _extra) def find(self, path): """Finds the first matching subelement, by tag name or path. @@ -1556,18 +1545,8 @@ def Element(_tag, attrib=None, nsmap=None, **_extra): """Element factory. This function returns an object implementing the Element interface. """ - cdef xmlNode* c_node - cdef xmlDoc* c_doc - cdef _Document doc - ns_utf, name_utf = _getNsTag(_tag) - c_doc = _newDoc() - c_node = _createElement(c_doc, name_utf) - tree.xmlDocSetRootElement(c_doc, c_node) - doc = _documentFactory(c_doc, None) - # add namespaces to node if necessary - doc._setNodeNamespaces(c_node, ns_utf, nsmap) - _initNodeAttributes(c_node, doc, attrib, _extra) - return _elementFactory(doc, c_node) + ### also look at _Element.makeelement() and _BaseParser.makeelement() ### + return _makeElement(_tag, NULL, None, None, attrib, nsmap, _extra) def Comment(text=None): """Comment element factory. This factory function creates a special element that will @@ -1640,20 +1619,24 @@ return _elementTreeFactory(doc, element) -def HTML(text): +def HTML(text, _BaseParser parser=None): """Parses an HTML document from a string constant. This function can be used to embed "HTML literals" in Python code. """ cdef _Document doc - doc = _parseMemoryDocument(text, None, __DEFAULT_HTML_PARSER) + if parser is None: + parser = __DEFAULT_HTML_PARSER + doc = _parseMemoryDocument(text, None, parser) return doc.getroot() -def XML(text): +def XML(text, _BaseParser parser=None): """Parses an XML document from a string constant. This function can be used to embed "XML literals" in Python code. """ cdef _Document doc - doc = _parseMemoryDocument(text, None, __DEFAULT_XML_PARSER) + if parser is None: + parser = __DEFAULT_XML_PARSER + doc = _parseMemoryDocument(text, None, parser) return doc.getroot() fromstring = XML @@ -1749,136 +1732,12 @@ ################################################################################ -# Element class lookup - -ctypedef object (*_element_class_lookup_function)(object, _Document, xmlNode*) - -# class to store element class lookup functions -cdef public class ElementClassLookup [ type LxmlElementClassLookupType, - object LxmlElementClassLookup ]: - """Superclass of Element class lookups. - """ - cdef _element_class_lookup_function _lookup_function - def __init__(self): - self._lookup_function = NULL # use default lookup - -cdef public class FallbackElementClassLookup(ElementClassLookup) \ - [ type LxmlFallbackElementClassLookupType, - object LxmlFallbackElementClassLookup ]: - """Superclass of Element class lookups with additional fallback. - """ - cdef readonly ElementClassLookup fallback - cdef _element_class_lookup_function _fallback_function - def __init__(self, ElementClassLookup fallback=None): - self._lookup_function = NULL # use default lookup - if fallback is not None: - self.setFallback(fallback) - else: - self._fallback_function = DEFAULT_ELEMENT_CLASS_LOOKUP - - def setFallback(self, ElementClassLookup lookup not None): - """Sets the fallback scheme for this lookup method. - """ - self.fallback = lookup - self._fallback_function = lookup._lookup_function - - cdef object _callFallback(self, doc, xmlNode* c_node): - return self._fallback_function(self.fallback, doc, c_node) - -cdef class ElementDefaultClassLookup(ElementClassLookup): - """Element class lookup scheme that always returns the default Element - class. - """ - def __init__(self): - self._lookup_function = _lookupDefaultElementClass - -cdef object _lookupDefaultElementClass(_state, _Document _doc, xmlNode* c_node): - "Trivial class lookup function that always returns the default class." - if c_node.type == tree.XML_ELEMENT_NODE: - return __DEFAULT_ELEMENT_CLASS - elif c_node.type == tree.XML_COMMENT_NODE: - return __DEFAULT_COMMENT_CLASS - elif c_node.type == tree.XML_PI_NODE: - return __DEFAULT_PI_CLASS - else: - assert 0, "Unknown node type: %s" % c_node.type - -cdef class ElementNamespaceClassLookup(FallbackElementClassLookup): - """Element class lookup scheme that searches the Element class in the - Namespace registry. - """ - def __init__(self, ElementClassLookup fallback=None): - FallbackElementClassLookup.__init__(self, fallback) - self._lookup_function = _find_nselement_class - -# default lookup: Namespace classes -cdef _element_class_lookup_function DEFAULT_ELEMENT_CLASS_LOOKUP -DEFAULT_ELEMENT_CLASS_LOOKUP = _find_nselement_class - -cdef _element_class_lookup_function LOOKUP_ELEMENT_CLASS -LOOKUP_ELEMENT_CLASS = DEFAULT_ELEMENT_CLASS_LOOKUP - -cdef object ELEMENT_CLASS_LOOKUP_STATE -ELEMENT_CLASS_LOOKUP_STATE = None - -cdef void _setElementClassLookupFunction( - _element_class_lookup_function function, object state): - global LOOKUP_ELEMENT_CLASS, ELEMENT_CLASS_LOOKUP_STATE - if function is NULL: - LOOKUP_ELEMENT_CLASS = DEFAULT_ELEMENT_CLASS_LOOKUP - ELEMENT_CLASS_LOOKUP_STATE = None - else: - LOOKUP_ELEMENT_CLASS = function - ELEMENT_CLASS_LOOKUP_STATE = state - -def setElementClassLookup(ElementClassLookup lookup = None): - if lookup is None or lookup._lookup_function is NULL: - _setElementClassLookupFunction(NULL, None) - else: - _setElementClassLookupFunction(lookup._lookup_function, lookup) - - - -################################################################################ -# Custom Element classes - -cdef public class ElementBase(_Element) [ type LxmlElementBaseType, - object LxmlElementBase ]: - """All custom Element classes must inherit from this one. - - Note that subclasses *must not* override __init__ or __new__ as it is - absolutely undefined when these objects will be created or destroyed. All - persistent state of elements must be stored in the underlying XML. If you - really need to initialize the object after creation, you can implement an - ``_init(self)`` method that will be called after object creation. - """ - -def setDefaultElementClass(cls=None): - global __DEFAULT_ELEMENT_CLASS - if cls is None: - __DEFAULT_ELEMENT_CLASS = _Element - elif not python.PyType_Check(cls) or not issubclass(cls, ElementBase): - raise LxmlRegistryError, \ - "Registered element classes must be subtypes of ElementBase" - else: - __DEFAULT_ELEMENT_CLASS = cls - -cdef object __DEFAULT_ELEMENT_CLASS -__DEFAULT_ELEMENT_CLASS = _Element - -cdef object __DEFAULT_COMMENT_CLASS -__DEFAULT_COMMENT_CLASS = _Comment - -cdef object __DEFAULT_PI_CLASS -__DEFAULT_PI_CLASS = _ProcessingInstruction - - -################################################################################ # Include submodules include "proxy.pxi" # Proxy handling (element backpointers/memory/etc.) include "apihelpers.pxi" # Private helper functions include "xmlerror.pxi" # Error and log handling +include "classlookup.pxi"# Namespace implementation and registry include "nsclasses.pxi" # Namespace implementation and registry include "docloader.pxi" # Support for custom document loaders include "parser.pxi" # XML Parser Modified: lxml/branch/capi/src/lxml/etreepublic.pxd ============================================================================== --- lxml/branch/capi/src/lxml/etreepublic.pxd (original) +++ lxml/branch/capi/src/lxml/etreepublic.pxd Sat Aug 5 19:21:46 2006 @@ -71,16 +71,15 @@ object (*function)(object, _Document, tree.xmlNode*), object state) # lookup function that always returns the default Element class + # note that the first argument is expected to be None! cdef object lookupDefaultElementClass(_1, _Document _2, tree.xmlNode* c_node) # lookup function for namespace/tag specific Element classes + # note that the first argument is expected to be None! cdef object lookupNamespaceElementClass(_1, _Document _2, tree.xmlNode* c_node) - # return the element class lookup registered for the parser of this document - cdef ElementClassLookup getParserElementLookupFromDocument(_Document doc) - # call the fallback lookup function of an FallbackElementClassLookup cdef object callLookupFallback(FallbackElementClassLookup lookup, _Document doc, tree.xmlNode* c_node) Modified: lxml/branch/capi/src/lxml/nsclasses.pxi ============================================================================== --- lxml/branch/capi/src/lxml/nsclasses.pxi (original) +++ lxml/branch/capi/src/lxml/nsclasses.pxi Sat Aug 5 19:21:46 2006 @@ -202,5 +202,5 @@ return dict_result if state is None: - return __DEFAULT_ELEMENT_CLASS + return _lookupDefaultElementClass(None, doc, c_node) return (state)._callFallback(doc, c_node) Modified: lxml/branch/capi/src/lxml/objectify.pyx ============================================================================== --- lxml/branch/capi/src/lxml/objectify.pyx (original) +++ lxml/branch/capi/src/lxml/objectify.pyx Sat Aug 5 19:21:46 2006 @@ -160,13 +160,13 @@ count = 1 c_node = c_self_node.next while c_node is not NULL: - if tree._isElement(c_node) and \ + if c_node.type == tree.XML_ELEMENT_NODE and \ cetree.tagMatches(c_node, c_href, c_tag): count = count + 1 c_node = c_node.next c_node = c_self_node.prev while c_node is not NULL: - if tree._isElement(c_node) and \ + if c_node.type == tree.XML_ELEMENT_NODE and \ cetree.tagMatches(c_node, c_href, c_tag): count = count + 1 c_node = c_node.prev @@ -364,7 +364,8 @@ char* href, char* name, Py_ssize_t index): while c_node is not NULL: - if tree._isElement(c_node) and cetree.tagMatches(c_node, href, name): + if c_node.type == tree.XML_ELEMENT_NODE and \ + cetree.tagMatches(c_node, href, name): index = index - 1 if index < 0: return c_node @@ -1328,12 +1329,10 @@ Passing False for the ``prefer_nsclasses`` keyword argument will prevent the namespace lookup. """ - #etree.setDefaultElementClass(ObjectifiedElement) lookup = ObjectifyElementClassLookup() if prefer_nsclasses: lookup = etree.ElementNamespaceClassLookup(lookup) etree.setElementClassLookup(lookup) def unregister(): - #etree.setDefaultElementClass() etree.setElementClassLookup() Modified: lxml/branch/capi/src/lxml/parser.pxi ============================================================================== --- lxml/branch/capi/src/lxml/parser.pxi (original) +++ lxml/branch/capi/src/lxml/parser.pxi Sat Aug 5 19:21:46 2006 @@ -398,6 +398,11 @@ "Create a new parser with the same configuration." return self._copy() + def makeelement(self, _tag, attrib=None, nsmap=None, **_extra): + """Creates a new element associated with this parser. + """ + return _makeElement(_tag, NULL, None, self, attrib, nsmap, _extra) + cdef xmlDoc* _parseUnicodeDoc(self, utext, char* c_filename) except NULL: """Parse unicode document, share dictionary if possible. """ Modified: lxml/branch/capi/src/lxml/public-api.pxi ============================================================================== --- lxml/branch/capi/src/lxml/public-api.pxi (original) +++ lxml/branch/capi/src/lxml/public-api.pxi Sat Aug 5 19:21:46 2006 @@ -31,9 +31,6 @@ cdef public object lookupNamespaceElementClass(state, doc, xmlNode* c_node): return _find_nselement_class(state, doc, c_node) -cdef public ElementClassLookup getParserElementLookupFromDocument(_Document doc): - return doc._parser._class_lookup - cdef public object callLookupFallback(FallbackElementClassLookup lookup, _Document doc, xmlNode* c_node): return lookup._callFallback(doc, c_node) Modified: lxml/branch/capi/src/lxml/tests/test_classlookup.py ============================================================================== --- lxml/branch/capi/src/lxml/tests/test_classlookup.py (original) +++ lxml/branch/capi/src/lxml/tests/test_classlookup.py Sat Aug 5 19:21:46 2006 @@ -13,8 +13,6 @@ from common_imports import etree, StringIO, HelperTestCase, fileInTestDir from common_imports import SillyFileLike, canonicalize -from lxml.elements import classlookup - xml_str = '''\ @@ -30,7 +28,6 @@ etree = etree def tearDown(self): - etree.setDefaultElementClass() etree.setElementClassLookup() etree.Namespace("myNS").clear() etree.Namespace("otherNS").clear() @@ -42,7 +39,7 @@ ns = etree.Namespace("myNS") ns[None] = TestElement - lookup = classlookup.ElementNamespaceClassLookup() + lookup = etree.ElementNamespaceClassLookup() etree.setElementClassLookup(lookup) root = etree.XML(xml_str) @@ -59,7 +56,7 @@ ns = etree.Namespace("myNS") ns[None] = TestElement - lookup = classlookup.ElementDefaultClassLookup() + lookup = etree.ElementDefaultClassLookup() etree.setElementClassLookup(lookup) root = etree.XML(xml_str) @@ -72,7 +69,7 @@ class_dict = {"A1" : TestElement} - lookup = classlookup.AttributeBasedElementClassLookup( + lookup = etree.AttributeBasedElementClassLookup( "a1", class_dict) etree.setElementClassLookup(lookup) @@ -86,7 +83,7 @@ class TestElement(etree.ElementBase): FIND_ME = "custom" - class MyLookup(classlookup.CustomElementClassLookup): + class MyLookup(etree.CustomElementClassLookup): def lookup(self, t, d, ns, name): if name == 'c1': return TestElement @@ -106,7 +103,7 @@ class TestElement2(etree.ElementBase): FIND_ME = "nsclasses" - class MyLookup(classlookup.CustomElementClassLookup): + class MyLookup(etree.CustomElementClassLookup): def lookup(self, t, d, ns, name): if name == 'c1': return TestElement1 @@ -114,7 +111,7 @@ ns = etree.Namespace("otherNS") ns[None] = TestElement2 - lookup = classlookup.ElementNamespaceClassLookup( MyLookup() ) + lookup = etree.ElementNamespaceClassLookup( MyLookup() ) etree.setElementClassLookup(lookup) root = etree.XML(xml_str) @@ -129,10 +126,10 @@ class TestElement(etree.ElementBase): FIND_ME = "parser_based" - lookup = classlookup.ParserBasedElementClassLookup() + lookup = etree.ParserBasedElementClassLookup() etree.setElementClassLookup(lookup) - class MyLookup(classlookup.CustomElementClassLookup): + class MyLookup(etree.CustomElementClassLookup): def lookup(self, t, d, ns, name): return TestElement Modified: lxml/branch/capi/src/lxml/tests/test_nsclasses.py ============================================================================== --- lxml/branch/capi/src/lxml/tests/test_nsclasses.py (original) +++ lxml/branch/capi/src/lxml/tests/test_nsclasses.py Sat Aug 5 19:21:46 2006 @@ -144,31 +144,12 @@ etree.Namespace(None).clear() etree.Namespace(u'ns30').clear() - def test_default_element_class(self): - class local_default_class(etree.ElementBase): - pass - - try: - etree.setDefaultElementClass(local_default_class) - self.assert_(isinstance(etree.Element("test"), - local_default_class)) - self.assert_(isinstance(etree.Element("{http://myns}test"), - local_default_class)) - - etree.setDefaultElementClass() - self.assertFalse(isinstance(etree.Element("test"), - local_default_class)) - self.assertFalse(isinstance(etree.Element("{http://myns}test"), - local_default_class)) - finally: - etree.setDefaultElementClass() - def test_suite(): suite = unittest.TestSuite() suite.addTests([unittest.makeSuite(ETreeNamespaceClassesTestCase)]) optionflags = doctest.NORMALIZE_WHITESPACE|doctest.ELLIPSIS suite.addTests( - [doctest.DocFileSuite('../../../doc/namespace_extensions.txt', + [doctest.DocFileSuite('../../../doc/element_classes.txt', optionflags=optionflags)], ) return suite Modified: lxml/branch/capi/src/lxml/tests/test_objectify.py ============================================================================== --- lxml/branch/capi/src/lxml/tests/test_objectify.py (original) +++ lxml/branch/capi/src/lxml/tests/test_objectify.py Sat Aug 5 19:21:46 2006 @@ -13,7 +13,7 @@ from common_imports import etree, StringIO, HelperTestCase, fileInTestDir from common_imports import SillyFileLike, canonicalize -from lxml.elements import objectify +from lxml import objectify xml_str = '''\ From scoder at codespeak.net Sat Aug 5 20:08:08 2006 From: scoder at codespeak.net (scoder at codespeak.net) Date: Sat, 5 Aug 2006 20:08:08 +0200 (CEST) Subject: [Lxml-checkins] r31033 - in lxml/branch/capi: doc src/lxml Message-ID: <20060805180808.5A6F31007A@code0.codespeak.net> Author: scoder Date: Sat Aug 5 20:08:06 2006 New Revision: 31033 Modified: lxml/branch/capi/doc/objectify.txt lxml/branch/capi/src/lxml/objectify.pyx Log: lxml.objectify now replicates the Element() and XML() functions to let them use an objectify specific parser Modified: lxml/branch/capi/doc/objectify.txt ============================================================================== --- lxml/branch/capi/doc/objectify.txt (original) +++ lxml/branch/capi/doc/objectify.txt Sat Aug 5 20:08:06 2006 @@ -26,7 +26,7 @@ .. _`parser specific element classes`: element_classes.html#parser-based-lookup The next step is to create a parser that builds objectify documents. The -objectify API is meant for data-centered XML (as opposed to document XML with +objectify API is meant for data-centric XML (as opposed to document XML with mixed content). Therefore, we configure the parser to let it remove whitespace-only text from the parsed document if it is not enclosed by an XML element. Note that this alters the document infoset, so if you consider the @@ -69,16 +69,21 @@ The ``makeelement()`` method of the parser has the same signature as the normal ``Element()`` factory known from lxml.etree and can therefore easily -replace the respective calls. If you create your parser globally at a module -level, it may be convenient to also assign ``parser.makeelement`` to the name -``Element`` in your module to avoid accidentally calling the wrong factory. -The same applies to the ``XML()`` function of ``lxml.etree``, which must now -use the dedicated parser:: - - >>> Element = parser.makeelement - >>> SubElement = etree.SubElement - >>> def XML(xml): - ... return etree.XML(xml, parser) +replace the respective calls. + +For convenience, ``objectify`` also replicates the standard factories +``Element()`` and ``XML()`` from ``lxml.etree`` using a parser that is local +to the ``objectify`` module. So, after setting up the parser based element +lookup above, you can keep using the same API as in ``lxml.etree``, except +that you have to import these functions from a different module:: + + >>> obj_el = objectify.Element("new") + >>> print isinstance(obj_el, objectify.ObjectifiedElement) + True + + >>> el = objectify.XML("") + >>> print isinstance(el, objectify.ObjectifiedElement) + True .. contents:: @@ -100,13 +105,13 @@ behind the usual object attribute access pattern. Asking an element for an attribute will return the sequence of children with corresponding tag names:: - >>> root = Element("root") - >>> b = SubElement(root, "b") + >>> root = objectify.Element("root") + >>> b = etree.SubElement(root, "b") >>> print root.b[0].tag b >>> root.index(root.b[0]) 0 - >>> b = SubElement(root, "b") + >>> b = etree.SubElement(root, "b") >>> print root.b[0].tag b >>> print root.b[1].tag @@ -124,9 +129,9 @@ Iteration and slicing also obey the requested tag:: - >>> x1 = SubElement(root, "x") - >>> x2 = SubElement(root, "x") - >>> x3 = SubElement(root, "x") + >>> x1 = etree.SubElement(root, "x") + >>> x2 = etree.SubElement(root, "x") + >>> x3 = etree.SubElement(root, "x") >>> [ el.tag for el in root.x ] ['x', 'x', 'x'] @@ -156,7 +161,7 @@ XML attributes are accessed as in the normal ElementTree API:: - >>> c = SubElement(root, "c", myattr="someval") + >>> c = etree.SubElement(root, "c", myattr="someval") >>> print root.c.get("myattr") someval @@ -169,31 +174,31 @@ case, the subtree is automatically deep copied and the tag name of its root is updated to match the attribute name:: - >>> el = Element("yet_another_child") + >>> el = objectify.Element("yet_another_child") >>> root.new_child = el >>> print root.new_child.tag new_child >>> print el.tag yet_another_child - >>> root.y = [ Element("y"), Element("y") ] + >>> root.y = [ objectify.Element("y"), objectify.Element("y") ] >>> [ el.tag for el in root.y ] ['y', 'y'] The latter is a short form for operations on the full slice:: - >>> root.y[:] = [ Element("y") ] + >>> root.y[:] = [ objectify.Element("y") ] >>> [ el.tag for el in root.y ] ['y'] You can also replace children that way:: - >>> child1 = SubElement(root, "child") - >>> child2 = SubElement(root, "child") - >>> child3 = SubElement(root, "child") + >>> child1 = etree.SubElement(root, "child") + >>> child2 = etree.SubElement(root, "child") + >>> child3 = etree.SubElement(root, "child") - >>> el = Element("new_child") - >>> subel = SubElement(el, "sub") + >>> el = objectify.Element("new_child") + >>> subel = etree.SubElement(el, "sub") >>> root.child = el >>> print root.child.sub.tag @@ -223,9 +228,9 @@ Element without specifying a namespace, the lookup will use the namespace of the parent:: - >>> root = Element("{ns}root") - >>> b = SubElement(root, "{ns}b") - >>> c = SubElement(root, "{other}c") + >>> root = objectify.Element("{ns}root") + >>> b = etree.SubElement(root, "{ns}b") + >>> c = etree.SubElement(root, "{other}c") >>> print root.b.tag {ns}b @@ -251,11 +256,11 @@ For both convenience and speed, objectify supports its own path language, represented by the ``ObjectPath`` class:: - >>> root = Element("{ns}root") - >>> b1 = SubElement(root, "{ns}b") - >>> c = SubElement(b1, "{ns}c") - >>> b2 = SubElement(root, "{ns}b") - >>> d = SubElement(root, "{other}d") + >>> root = objectify.Element("{ns}root") + >>> b1 = etree.SubElement(root, "{ns}b") + >>> c = etree.SubElement(b1, "{ns}c") + >>> b2 = etree.SubElement(root, "{ns}b") + >>> d = etree.SubElement(root, "{other}d") >>> path = objectify.ObjectPath("root.b.c") >>> print path @@ -328,7 +333,7 @@ ObjectPath objects can be used to manipulate trees:: - >>> root = Element("{ns}root") + >>> root = objectify.Element("{ns}root") >>> path = objectify.ObjectPath(".some.child.{other}unknown") >>> path.hasattr(root) @@ -366,7 +371,8 @@ element content behave like them. For example, they support the normal math operators:: - >>> root = XML("511truehoi") + >>> root = objectify.XML( + ... "511truehoi") >>> root.a + root.b 16 >>> root.a += root.b @@ -397,7 +403,7 @@ ``dump()`` function that returns a recursive string representation for elements:: - >>> root = XML(""" + >>> root = objectify.XML(""" ... ... 1 ... 1.2 @@ -422,7 +428,7 @@ You can freely switch between different types for the same child:: - >>> root = XML("5") + >>> root = objectify.XML("5") >>> print objectify.dump(root) root = None [ObjectifiedElement] a = 5 [IntElement] @@ -456,7 +462,7 @@ cannot behave as the Python types. Like all other tree elements, they show the normal slicing behaviour of objectify elements:: - >>> root = XML("testtoast") + >>> root = objectify.XML("testtoast") >>> print root.a + ' me' # behaves like a string, right? test me >>> len(root.a) # but there's only one 'a' element! @@ -476,7 +482,7 @@ normal ElementTree ``.text`` attribute. Additionally, all data classes provide a ``.pyval`` attribute that returns the value as plain Python type:: - >>> root = XML("test5") + >>> root = objectify.XML("test5") >>> root.a.text 'test' >>> root.a.pyval @@ -512,7 +518,7 @@ {http://codespeak.net/lxml/objectify/pytype}pytype >>> ns, name = objectify.PYTYPE_ATTRIBUTE[1:].split('}') - >>> root = XML("""\ + >>> root = objectify.XML("""\ ... ... 5 ... 5 @@ -532,7 +538,7 @@ application ever needs to. There is also a utility function ``annotate()`` that recursively generates this attribute for the elements of a tree:: - >>> root = XML("test5") + >>> root = objectify.XML("test5") >>> print objectify.dump(root) root = None [ObjectifiedElement] a = 'test' [StringElement] @@ -551,7 +557,7 @@ element annotations. Objectify knows those that can be mapped to normal Python types:: - >>> root = XML('''\ + >>> root = objectify.XML('''\ ... ... 5 ... 5 @@ -600,7 +606,7 @@ >>> xmas_type.register() - >>> root = XML("24.12.200012.24.2000") + >>> root = objectify.XML("24.12.200012.24.2000") >>> root.a.callSanta() Ho ho ho! >>> root.b.callSanta() @@ -619,7 +625,7 @@ If you provide XML Schema type information, this will override the type check function defined above:: - >>> root = XML('''\ + >>> root = objectify.XML('''\ ... ... 12.24.2000 ... @@ -654,7 +660,7 @@ >>> objectify.enableRecursiveStr() - >>> root = XML(""" + >>> root = objectify.XML(""" ... ... 1 ... 1.2 Modified: lxml/branch/capi/src/lxml/objectify.pyx ============================================================================== --- lxml/branch/capi/src/lxml/objectify.pyx (original) +++ lxml/branch/capi/src/lxml/objectify.pyx Sat Aug 5 20:08:06 2006 @@ -95,7 +95,37 @@ XML_SCHEMA_INSTANCE_NIL_ATTR = "{%s}nil" % XML_SCHEMA_INSTANCE_NS -# element class for the main API +################################################################################ +# Module level parser setup + +cdef object parser +parser = etree.XMLParser(remove_blank_text=True) +parser.setElementClassLookup( ObjectifyElementClassLookup() ) + +cdef object _XML +_XML = etree.XML + +def XML(xml): + """Objectify specific version of the lxml.etree XML() factory. + + NOTE: requires parser based element class lookup activated in lxml.etree! + """ + return _XML(xml, parser) + +cdef object _makeelement +_makeelement = parser.makeelement + +def Element(*args, **kwargs): + """Objectify specific version of the lxml.etree Element() factory. + + NOTE: requires parser based element class lookup activated in lxml.etree! + """ + return _makeelement(*args, **kwargs) + + +################################################################################ +# Element class for the main API + cdef class ObjectifiedElement(ElementBase): """Main XML Element class. From scoder at codespeak.net Sat Aug 5 20:43:40 2006 From: scoder at codespeak.net (scoder at codespeak.net) Date: Sat, 5 Aug 2006 20:43:40 +0200 (CEST) Subject: [Lxml-checkins] r31037 - in lxml/branch/capi: doc src/lxml Message-ID: <20060805184340.7E79510082@code0.codespeak.net> Author: scoder Date: Sat Aug 5 20:43:36 2006 New Revision: 31037 Modified: lxml/branch/capi/doc/objectify.txt lxml/branch/capi/src/lxml/objectify.pyx Log: switched from XML() to fromstring() in objectify, support for changing default parser used by objectify.Element()/.fromstring() Modified: lxml/branch/capi/doc/objectify.txt ============================================================================== --- lxml/branch/capi/doc/objectify.txt (original) +++ lxml/branch/capi/doc/objectify.txt Sat Aug 5 20:43:36 2006 @@ -15,10 +15,9 @@ >>> from lxml import etree >>> from lxml import objectify -To avoid interfering with other modules that use ``lxml.etree``, the normal -way to use ``objectify`` is to register it with a dedicated parser. This -requires setting up ``lxml.etree`` to use `parser specific element classes`_ -first:: +The normal way to use ``objectify`` is to register it with a dedicated parser. +This requires setting up ``lxml.etree`` to use `parser specific element +classes`_ first:: >>> lookup = etree.ParserBasedElementClassLookup() >>> etree.setElementClassLookup(lookup) @@ -39,8 +38,37 @@ >>> lookup = objectify.ObjectifyElementClassLookup() >>> parser.setElementClassLookup(lookup) -To create an ``objectify`` tree, you can either parse a document with this -parser:: +If you want additional support for `namespace specific classes`_, you can +register the objectify lookup as a fallback of the namespace lookup. Note, +however, that you have to take care in this case, that the namespace classes +inherit from ``objectify.ObjectifiedElement``, not only from the normal +``lxml.etree.ElementBase``, so that they support the ``objectify`` API:: + + >>> lookup = etree.ElementNamespaceClassLookup( + ... objectify.ObjectifyElementClassLookup() ) + >>> parser.setElementClassLookup(lookup) + +.. _`namespace specific classes`: element_classes.html#namespace-class-lookup + + +.. contents:: +.. + 1 Creating objectify trees + 2 Element access through object attributes + 3 Namespace handling + 4 ObjectPath + 5 Python data types + 6 Defining additional data classes + 7 Recursive string representation of elements + 8 What is different from ElementTree? + 9 Resetting the API + + +Creating objectify trees +------------------------ + +To create an ``objectify`` tree, you can either parse a document with the +parser you created:: >>> from StringIO import StringIO >>> xml = StringIO('') @@ -71,31 +99,23 @@ normal ``Element()`` factory known from lxml.etree and can therefore easily replace the respective calls. -For convenience, ``objectify`` also replicates the standard factories -``Element()`` and ``XML()`` from ``lxml.etree`` using a parser that is local -to the ``objectify`` module. So, after setting up the parser based element -lookup above, you can keep using the same API as in ``lxml.etree``, except -that you have to import these functions from a different module:: +For convenience, ``objectify`` also replicates the standard factory +``Element()`` and the ``fromstring()`` function from ``lxml.etree`` using a +parser that is local to the ``objectify`` module. So, after setting up the +parser based element lookup above, you can keep using the same API as in +``lxml.etree``, except that you have to import these functions from a +different module:: >>> obj_el = objectify.Element("new") >>> print isinstance(obj_el, objectify.ObjectifiedElement) True - >>> el = objectify.XML("") + >>> el = objectify.fromstring("") >>> print isinstance(el, objectify.ObjectifiedElement) True - -.. contents:: -.. - 1 Element access through object attributes - 2 Namespace handling - 3 ObjectPath - 4 Python data types - 5 Defining additional data classes - 6 Recursive string representation of elements - 7 What is different from ElementTree? - 8 Resetting the API +You can change this parser with ``objectify.setDefaultParser(parser)``, which +also allows to add the above support for namespace specific element classes. Element access through object attributes @@ -371,7 +391,7 @@ element content behave like them. For example, they support the normal math operators:: - >>> root = objectify.XML( + >>> root = objectify.fromstring( ... "511truehoi") >>> root.a + root.b 16 @@ -403,7 +423,7 @@ ``dump()`` function that returns a recursive string representation for elements:: - >>> root = objectify.XML(""" + >>> root = objectify.fromstring(""" ... ... 1 ... 1.2 @@ -428,7 +448,7 @@ You can freely switch between different types for the same child:: - >>> root = objectify.XML("5") + >>> root = objectify.fromstring("5") >>> print objectify.dump(root) root = None [ObjectifiedElement] a = 5 [IntElement] @@ -462,7 +482,7 @@ cannot behave as the Python types. Like all other tree elements, they show the normal slicing behaviour of objectify elements:: - >>> root = objectify.XML("testtoast") + >>> root = objectify.fromstring("testtoast") >>> print root.a + ' me' # behaves like a string, right? test me >>> len(root.a) # but there's only one 'a' element! @@ -482,7 +502,7 @@ normal ElementTree ``.text`` attribute. Additionally, all data classes provide a ``.pyval`` attribute that returns the value as plain Python type:: - >>> root = objectify.XML("test5") + >>> root = objectify.fromstring("test5") >>> root.a.text 'test' >>> root.a.pyval @@ -518,7 +538,7 @@ {http://codespeak.net/lxml/objectify/pytype}pytype >>> ns, name = objectify.PYTYPE_ATTRIBUTE[1:].split('}') - >>> root = objectify.XML("""\ + >>> root = objectify.fromstring("""\ ... ... 5 ... 5 @@ -538,7 +558,7 @@ application ever needs to. There is also a utility function ``annotate()`` that recursively generates this attribute for the elements of a tree:: - >>> root = objectify.XML("test5") + >>> root = objectify.fromstring("test5") >>> print objectify.dump(root) root = None [ObjectifiedElement] a = 'test' [StringElement] @@ -557,7 +577,7 @@ element annotations. Objectify knows those that can be mapped to normal Python types:: - >>> root = objectify.XML('''\ + >>> root = objectify.fromstring('''\ ... ... 5 ... 5 @@ -606,7 +626,8 @@ >>> xmas_type.register() - >>> root = objectify.XML("24.12.200012.24.2000") + >>> root = objectify.fromstring( + ... "24.12.200012.24.2000") >>> root.a.callSanta() Ho ho ho! >>> root.b.callSanta() @@ -625,7 +646,7 @@ If you provide XML Schema type information, this will override the type check function defined above:: - >>> root = objectify.XML('''\ + >>> root = objectify.fromstring('''\ ... ... 12.24.2000 ... @@ -660,7 +681,7 @@ >>> objectify.enableRecursiveStr() - >>> root = objectify.XML(""" + >>> root = objectify.fromstring(""" ... ... 1 ... 1.2 Modified: lxml/branch/capi/src/lxml/objectify.pyx ============================================================================== --- lxml/branch/capi/src/lxml/objectify.pyx (original) +++ lxml/branch/capi/src/lxml/objectify.pyx Sat Aug 5 20:43:36 2006 @@ -98,19 +98,40 @@ ################################################################################ # Module level parser setup +cdef object __DEFAULT_PARSER +__DEFAULT_PARSER = etree.XMLParser(remove_blank_text=True) +__DEFAULT_PARSER.setElementClassLookup( ObjectifyElementClassLookup() ) + cdef object parser -parser = etree.XMLParser(remove_blank_text=True) -parser.setElementClassLookup( ObjectifyElementClassLookup() ) +parser = __DEFAULT_PARSER + +def setDefaultParser(new_parser = None): + """Replace the default parser used by objectify's Element() and + fromstring() functions. + + The new parser must be an etree.XMLParser. + + Call without arguments to reset to the original parser. + """ + global parser + if new_parser is None: + parser = __DEFAULT_PARSER + elif isinstance(new_parser, etree.XMLParser): + parser = new_parser + else: + raise TypeError, "parser must inherit from lxml.etree.XMLParser" -cdef object _XML -_XML = etree.XML +cdef object _fromstring +_fromstring = etree.fromstring -def XML(xml): - """Objectify specific version of the lxml.etree XML() factory. +def fromstring(xml): + """Objectify specific version of the lxml.etree fromstring() function. NOTE: requires parser based element class lookup activated in lxml.etree! """ - return _XML(xml, parser) + return _fromstring(xml, parser) + +XML = fromstring cdef object _makeelement _makeelement = parser.makeelement @@ -1358,6 +1379,9 @@ By default, namespace specific element classes override this lookup. Passing False for the ``prefer_nsclasses`` keyword argument will prevent the namespace lookup. + + Note that this is not the preferred way of using the objectify + module. Consider using a parser specific setup instead. """ lookup = ObjectifyElementClassLookup() if prefer_nsclasses: From scoder at codespeak.net Sat Aug 5 21:02:09 2006 From: scoder at codespeak.net (scoder at codespeak.net) Date: Sat, 5 Aug 2006 21:02:09 +0200 (CEST) Subject: [Lxml-checkins] r31039 - lxml/branch/capi/doc Message-ID: <20060805190209.907DF10083@code0.codespeak.net> Author: scoder Date: Sat Aug 5 21:02:08 2006 New Revision: 31039 Modified: lxml/branch/capi/doc/objectify.txt Log: small doc fixes Modified: lxml/branch/capi/doc/objectify.txt ============================================================================== --- lxml/branch/capi/doc/objectify.txt (original) +++ lxml/branch/capi/doc/objectify.txt Sat Aug 5 21:02:08 2006 @@ -42,7 +42,8 @@ register the objectify lookup as a fallback of the namespace lookup. Note, however, that you have to take care in this case, that the namespace classes inherit from ``objectify.ObjectifiedElement``, not only from the normal -``lxml.etree.ElementBase``, so that they support the ``objectify`` API:: +``lxml.etree.ElementBase``, so that they support the ``objectify`` API. The +above setup code then becomes:: >>> lookup = etree.ElementNamespaceClassLookup( ... objectify.ObjectifyElementClassLookup() ) @@ -110,8 +111,8 @@ >>> print isinstance(obj_el, objectify.ObjectifiedElement) True - >>> el = objectify.fromstring("") - >>> print isinstance(el, objectify.ObjectifiedElement) + >>> obj_el = objectify.fromstring("") + >>> print isinstance(obj_el, objectify.ObjectifiedElement) True You can change this parser with ``objectify.setDefaultParser(parser)``, which From scoder at codespeak.net Sat Aug 5 21:13:22 2006 From: scoder at codespeak.net (scoder at codespeak.net) Date: Sat, 5 Aug 2006 21:13:22 +0200 (CEST) Subject: [Lxml-checkins] r31041 - lxml/branch/capi/src/lxml Message-ID: <20060805191322.323C610083@code0.codespeak.net> Author: scoder Date: Sat Aug 5 21:13:20 2006 New Revision: 31041 Modified: lxml/branch/capi/src/lxml/apihelpers.pxi Log: cleanup Modified: lxml/branch/capi/src/lxml/apihelpers.pxi ============================================================================== --- lxml/branch/capi/src/lxml/apihelpers.pxi (original) +++ lxml/branch/capi/src/lxml/apihelpers.pxi Sat Aug 5 21:13:20 2006 @@ -97,11 +97,10 @@ """ cdef xmlNode* c_node ns_utf, name_utf = _getNsTag(tag) - if c_doc is NULL: - if doc is None: - c_doc = _newDoc() - else: - c_doc = doc._c_doc + if doc is not None: + c_doc = doc._c_doc + elif c_doc is NULL: + c_doc = _newDoc() c_node = _createElement(c_doc, name_utf) if doc is None: tree.xmlDocSetRootElement(c_doc, c_node) From scoder at codespeak.net Sat Aug 5 21:55:02 2006 From: scoder at codespeak.net (scoder at codespeak.net) Date: Sat, 5 Aug 2006 21:55:02 +0200 (CEST) Subject: [Lxml-checkins] r31042 - lxml/branch/capi/src/lxml Message-ID: <20060805195502.5F6BA10083@code0.codespeak.net> Author: scoder Date: Sat Aug 5 21:55:00 2006 New Revision: 31042 Modified: lxml/branch/capi/src/lxml/parser.pxi Log: copy class lookup scheme when copying parsers Modified: lxml/branch/capi/src/lxml/parser.pxi ============================================================================== --- lxml/branch/capi/src/lxml/parser.pxi (original) +++ lxml/branch/capi/src/lxml/parser.pxi Sat Aug 5 21:55:00 2006 @@ -389,6 +389,7 @@ cdef _BaseParser parser parser = self.__class__() parser._parse_options = self._parse_options + parser._class_lookup = self._class_lookup parser.resolvers = self.resolvers._copy() parser._context = _ResolverContext(parser.resolvers) parser._parser_ctxt._private = parser._context From scoder at codespeak.net Sat Aug 5 21:57:26 2006 From: scoder at codespeak.net (scoder at codespeak.net) Date: Sat, 5 Aug 2006 21:57:26 +0200 (CEST) Subject: [Lxml-checkins] r31043 - lxml/branch/capi/doc Message-ID: <20060805195726.7294210083@code0.codespeak.net> Author: scoder Date: Sat Aug 5 21:57:25 2006 New Revision: 31043 Modified: lxml/branch/capi/doc/element_classes.txt Log: note: parser delegation for class lookup is preferred Modified: lxml/branch/capi/doc/element_classes.txt ============================================================================== --- lxml/branch/capi/doc/element_classes.txt (original) +++ lxml/branch/capi/doc/element_classes.txt Sat Aug 5 21:57:25 2006 @@ -82,6 +82,14 @@ support fallback chaining, which allows the next lookup mechanism to take over when the previous one fails to find a class. +For small projects, setting a lookup scheme globally can be satisfactory. To +avoid interfering with other modules, however, it is usually a better idea to +globally register the parser specific scheme, instantiate a dedicated parser +for your module and then register the required lookup scheme only for the +parser. Registering the per-parser lookup can be done repeatedly by many +modules without side effects and the separate parsers will prevent any +interference. + Default class lookup .................... @@ -122,6 +130,50 @@ True +Parser based lookup +................... + +This is the preferred global lookup scheme for lxml.etree in the case where a +more specific element lookup scheme is required. It delegates the class +request to the original parser of the current document. If no specific lookup +scheme was registered for that parser, the global lookup simply calls its own +fallback mechanism. + +You can enable the parser delegation as follows:: + + >>> lookup = etree.ParserBasedElementClassLookup() + >>> etree.setElementClassLookup(lookup) + +To specify a different fallback scheme than the default class lookup, you can +pass it in the constructor:: + + >>> fallback = etree.ElementDefaultClassLookup() + >>> lookup = etree.ParserBasedElementClassLookup(fallback) + >>> etree.setElementClassLookup(lookup) + +With such a global setup, you can now set a separate lookup mechanism for each +parser you create, without interfering with other parsers:: + + >>> parser_lookup = etree.ElementDefaultClassLookup(element=HonkElement) + >>> parser = etree.XMLParser() + >>> parser.setElementClassLookup(parser_lookup) + + >>> element = etree.XML("") + >>> print isinstance(element, HonkElement) + False + +Whenever you create a document with this parser, it will inherit the lookup +scheme and all subsequent element instantiations for this document will use +it:: + + >>> element = etree.fromstring("", parser) + >>> print isinstance(element, HonkElement) + True + >>> el = etree.SubElement(element, "subel") + >>> print isinstance(el, HonkElement) + True + + Namespace class lookup ...................... @@ -167,27 +219,6 @@ >>> etree.setElementClassLookup(lookup) -Parser based lookup -................... - -lxml.etree supports a per-parser setup of element lookup schemes. You can -enable it as follows:: - - >>> lookup = etree.ParserBasedElementClassLookup() - >>> etree.setElementClassLookup(lookup) - -Now you can set a separate lookup strategy for each parser you create:: - - >>> parser_lookup = etree.ElementDefaultClassLookup(element=HonkElement) - >>> parser = etree.XMLParser() - >>> parser.setElementClassLookup(parser_lookup) - -Whenever you create a document with this parser, its lookup scheme will be -inherited by the document and all subsequent element instantiations for this -document will use it. Note that the parser lookup supports a fallback just -like the previous one. - - Custom element class lookup ........................... From scoder at codespeak.net Sat Aug 5 22:05:18 2006 From: scoder at codespeak.net (scoder at codespeak.net) Date: Sat, 5 Aug 2006 22:05:18 +0200 (CEST) Subject: [Lxml-checkins] r31044 - lxml/branch/capi/doc Message-ID: <20060805200518.E965810083@code0.codespeak.net> Author: scoder Date: Sat Aug 5 22:05:18 2006 New Revision: 31044 Modified: lxml/branch/capi/doc/element_classes.txt Log: clarifications in docs Modified: lxml/branch/capi/doc/element_classes.txt ============================================================================== --- lxml/branch/capi/doc/element_classes.txt (original) +++ lxml/branch/capi/doc/element_classes.txt Sat Aug 5 22:05:18 2006 @@ -203,12 +203,15 @@ This scheme uses a mapping from attribute values to classes. An attribute name is set at initialisation time and is then used to find the corresponding -value. It is selected as follows:: +value. It is set up as follows:: >>> id_class_mapping = {} # maps attribute values to element classes >>> lookup = etree.AttributeBasedElementClassLookup('id', id_class_mapping) >>> etree.setElementClassLookup(lookup) +Instead of a global setup of this scheme, you should consider using a +per-parser setup. + This class uses its fallback if the attribute is not found or its value is not in the mapping. Normally, the default class lookup is used here. If you want to use the namespace lookup, for example, you can use this code:: @@ -231,11 +234,13 @@ >>> etree.setElementClassLookup( MyLookup() ) -The ``lookup()`` method is only required to return either None (which triggers -the fallback mechanism) or a subclass of ``lxml.etree.ElementBase``. It can -otherwise take any decision it wants based on the node type (one of "element", -"comment", "PI"), the XML document of the element, or its namespace or tag -name. +The ``lookup()`` method must either return None (which triggers the fallback +mechanism) or a subclass of ``lxml.etree.ElementBase``. It can take any +decision it wants based on the node type (one of "element", "comment", "PI"), +the XML document of the element, or its namespace or tag name. + +Instead of a global setup of this scheme, you should consider using a +per-parser setup. Implementing namespaces From scoder at codespeak.net Sat Aug 5 22:06:49 2006 From: scoder at codespeak.net (scoder at codespeak.net) Date: Sat, 5 Aug 2006 22:06:49 +0200 (CEST) Subject: [Lxml-checkins] r31045 - lxml/branch/capi/doc Message-ID: <20060805200649.32FC610083@code0.codespeak.net> Author: scoder Date: Sat Aug 5 22:06:48 2006 New Revision: 31045 Modified: lxml/branch/capi/doc/element_classes.txt Log: doc fixes Modified: lxml/branch/capi/doc/element_classes.txt ============================================================================== --- lxml/branch/capi/doc/element_classes.txt (original) +++ lxml/branch/capi/doc/element_classes.txt Sat Aug 5 22:06:48 2006 @@ -26,14 +26,13 @@ 1 Element initialization 2 Setting up a class lookup scheme 2.1 Default class lookup - 2.2 Namespace class lookup - 2.3 Attribute based lookup - 2.4 Parser based lookup + 2.2 Parser based lookup + 2.3 Namespace class lookup + 2.4 Attribute based lookup 2.5 Custom element class lookup 3 Implementing namespaces 4 Resetting the class lookup scheme - Element initialization ---------------------- From scoder at codespeak.net Sat Aug 5 22:12:10 2006 From: scoder at codespeak.net (scoder at codespeak.net) Date: Sat, 5 Aug 2006 22:12:10 +0200 (CEST) Subject: [Lxml-checkins] r31046 - lxml/branch/capi/doc Message-ID: <20060805201210.7072510083@code0.codespeak.net> Author: scoder Date: Sat Aug 5 22:12:09 2006 New Revision: 31046 Modified: lxml/branch/capi/doc/element_classes.txt Log: clarifications in docs Modified: lxml/branch/capi/doc/element_classes.txt ============================================================================== --- lxml/branch/capi/doc/element_classes.txt (original) +++ lxml/branch/capi/doc/element_classes.txt Sat Aug 5 22:12:09 2006 @@ -353,3 +353,7 @@ >>> el = etree.Element("myelement") >>> print isinstance(el, HonkElement) False + +Be aware, though, that this does not immediately apply to elements to which +there already is a Python reference. Their Python class will only be changed +after all references are gone and the Python object is garbage collected. From scoder at codespeak.net Sat Aug 5 22:13:45 2006 From: scoder at codespeak.net (scoder at codespeak.net) Date: Sat, 5 Aug 2006 22:13:45 +0200 (CEST) Subject: [Lxml-checkins] r31047 - lxml/branch/capi/doc Message-ID: <20060805201345.1645510083@code0.codespeak.net> Author: scoder Date: Sat Aug 5 22:13:44 2006 New Revision: 31047 Modified: lxml/branch/capi/doc/element_classes.txt Log: doc fixes Modified: lxml/branch/capi/doc/element_classes.txt ============================================================================== --- lxml/branch/capi/doc/element_classes.txt (original) +++ lxml/branch/capi/doc/element_classes.txt Sat Aug 5 22:13:44 2006 @@ -339,9 +339,6 @@ ... AttributeError: 'HonkNSElement' object has no attribute 'honking' -Note that you can also combine this with the global default class. Namespace -specific classes will simply override the less specific default. - Resetting the class lookup scheme --------------------------------- From scoder at codespeak.net Sun Aug 6 07:19:47 2006 From: scoder at codespeak.net (scoder at codespeak.net) Date: Sun, 6 Aug 2006 07:19:47 +0200 (CEST) Subject: [Lxml-checkins] r31062 - lxml/branch/lxml-1.0/src/lxml/tests Message-ID: <20060806051947.53D8B10074@code0.codespeak.net> Author: scoder Date: Sun Aug 6 07:19:45 2006 New Revision: 31062 Modified: lxml/branch/lxml-1.0/src/lxml/tests/test_xslt.py Log: test case for crash found by John Krukoff Modified: lxml/branch/lxml-1.0/src/lxml/tests/test_xslt.py ============================================================================== --- lxml/branch/lxml-1.0/src/lxml/tests/test_xslt.py (original) +++ lxml/branch/lxml-1.0/src/lxml/tests/test_xslt.py Sun Aug 6 07:19:45 2006 @@ -511,6 +511,31 @@ """)) self.assertRaises(etree.XSLTApplyError, xslt, etree.XML('')) + def test_xslt_move_result(self): + root = etree.XML('''\ + + + ''') + + xslt = etree.XSLT(etree.XML('''\ + + + + + + + + + + + + + ''')) + + result = xslt(root[0]) + root[:] = result.getroot()[:] + del root # segfaulted before + def test_exslt_regexp_test(self): xslt = etree.XSLT(etree.XML("""\ Author: scoder Date: Sun Aug 6 07:23:34 2006 New Revision: 31063 Modified: lxml/branch/lxml-1.0/src/lxml/parser.pxi lxml/branch/lxml-1.0/src/lxml/xslt.pxd lxml/branch/lxml-1.0/src/lxml/xslt.pxi Log: fix for XSLT crash in new test Modified: lxml/branch/lxml-1.0/src/lxml/parser.pxi ============================================================================== --- lxml/branch/lxml-1.0/src/lxml/parser.pxi (original) +++ lxml/branch/lxml-1.0/src/lxml/parser.pxi Sun Aug 6 07:23:34 2006 @@ -25,23 +25,24 @@ if self._c_dict is not NULL: xmlparser.xmlDictFree(self._c_dict) - cdef void _initParserDict(self, xmlParserCtxt* pctxt): + cdef void _initDictRef(self, xmlDict** c_dict_ref): "Assure we always use the same string dictionary." - if self._c_dict is NULL or self._c_dict is pctxt.dict: + cdef xmlDict* c_dict + c_dict = c_dict_ref[0] + if self._c_dict is NULL or self._c_dict is c_dict: return - if pctxt.dict is not NULL: - xmlparser.xmlDictFree(pctxt.dict) - pctxt.dict = self._c_dict - xmlparser.xmlDictReference(pctxt.dict) + if c_dict is not NULL: + xmlparser.xmlDictFree(c_dict) + c_dict_ref[0] = self._c_dict + xmlparser.xmlDictReference(self._c_dict) + + cdef void _initParserDict(self, xmlParserCtxt* pctxt): + "Assure we always use the same string dictionary." + self._initDictRef(&pctxt.dict) cdef void _initXPathParserDict(self, xpath.xmlXPathContext* pctxt): "Assure we always use the same string dictionary." - if self._c_dict is NULL or self._c_dict is pctxt.dict: - return - if pctxt.dict is not NULL: - xmlparser.xmlDictFree(pctxt.dict) - pctxt.dict = self._c_dict - xmlparser.xmlDictReference(pctxt.dict) + self._initDictRef(&pctxt.dict) cdef void _initDocDict(self, xmlDoc* result): "Store dict of last object parsed if no shared dict yet" Modified: lxml/branch/lxml-1.0/src/lxml/xslt.pxd ============================================================================== --- lxml/branch/lxml-1.0/src/lxml/xslt.pxd (original) +++ lxml/branch/lxml-1.0/src/lxml/xslt.pxd Sun Aug 6 07:23:34 2006 @@ -19,6 +19,7 @@ xsltStylesheet* style xmlXPathContext* xpathCtxt xsltDocument* document + xmlDict* dict cdef xsltStylesheet* xsltParseStylesheetDoc(xmlDoc* doc) cdef void xsltFreeStylesheet(xsltStylesheet* sheet) Modified: lxml/branch/lxml-1.0/src/lxml/xslt.pxi ============================================================================== --- lxml/branch/lxml-1.0/src/lxml/xslt.pxi (original) +++ lxml/branch/lxml-1.0/src/lxml/xslt.pxi Sun Aug 6 07:23:34 2006 @@ -337,6 +337,8 @@ _destroyFakeDoc(input_doc._c_doc, c_doc) raise XSLTApplyError, "Error preparing stylesheet run" + __GLOBAL_PARSER_CONTEXT._initDictRef(&transform_ctxt.dict) + self._error_log.connect() xslt.xsltSetTransformErrorFunc(transform_ctxt, self._error_log, _receiveGenericError) From scoder at codespeak.net Sun Aug 6 07:24:18 2006 From: scoder at codespeak.net (scoder at codespeak.net) Date: Sun, 6 Aug 2006 07:24:18 +0200 (CEST) Subject: [Lxml-checkins] r31064 - lxml/trunk/src/lxml/tests Message-ID: <20060806052418.F205210074@code0.codespeak.net> Author: scoder Date: Sun Aug 6 07:24:17 2006 New Revision: 31064 Modified: lxml/trunk/src/lxml/tests/test_xslt.py Log: test case for crash found by John Krukoff Modified: lxml/trunk/src/lxml/tests/test_xslt.py ============================================================================== --- lxml/trunk/src/lxml/tests/test_xslt.py (original) +++ lxml/trunk/src/lxml/tests/test_xslt.py Sun Aug 6 07:24:17 2006 @@ -511,6 +511,31 @@ """)) self.assertRaises(etree.XSLTApplyError, xslt, etree.XML('')) + def test_xslt_move_result(self): + root = etree.XML('''\ + + + ''') + + xslt = etree.XSLT(etree.XML('''\ + + + + + + + + + + + + + ''')) + + result = xslt(root[0]) + root[:] = result.getroot()[:] + del root # segfaulted before + def test_exslt_regexp_test(self): xslt = etree.XSLT(etree.XML("""\ Author: scoder Date: Sun Aug 6 07:59:18 2006 New Revision: 31065 Modified: lxml/branch/capi/doc/main.txt Log: fixed doc references in main.txt, link to objectify Modified: lxml/branch/capi/doc/main.txt ============================================================================== --- lxml/branch/capi/doc/main.txt (original) +++ lxml/branch/capi/doc/main.txt Sun Aug 6 07:59:18 2006 @@ -92,10 +92,10 @@ Documentation ------------- -lxml follows the ElementTree_ API as much as possible, building it on top of -the native libxml2 tree. See also the `ElementTree compatibility overview`_ -and the `benchmark results`_ comparing lxml to the original ElementTree_ and -cElementTree_ implementations. +lxml.etree follows the ElementTree_ API as much as possible, building it on +top of the native libxml2 tree. See also the `ElementTree compatibility +overview`_ and the `benchmark results`_ comparing lxml to the original +ElementTree_ and cElementTree_ implementations. Right after the ElementTree_ documentation, the most important place to look is the `lxml.etree API documentation`_. It describes how lxml extends the @@ -105,12 +105,15 @@ `extension functions`_. lxml also offers a `SAX compliant API`_, that works with the SAX support in the standard library. +There is a separate module `lxml.objectify`_ that implements a data-binding +API on top of lxml.etree. See the `objectify and etree`_ FAQ entry for a +comparison. + In addition to the ElementTree API, lxml also features a sophisticated API for `custom element classes`_. This is a simple way to write arbitrary XML driven -APIs on top of lxml. Some common XML APIs are implemented in the -`lxml.elements`_ module. As of version 1.1, lxml.etree features a new -`C-level API`_ that can be used to efficiently extend lxml.etree in external C -modules, including custom element class support. +APIs on top of lxml. As of version 1.1, lxml.etree has a new `C-level API`_ +that can be used to efficiently extend lxml.etree in external C modules, +including custom element class support. .. _ElementTree: http://effbot.org/zone/element-index.htm .. _cElementTree: http://effbot.org/zone/celementtree.htm @@ -118,11 +121,12 @@ .. _`benchmark results`: performance.html .. _`ElementTree compatibility overview`: compatibility.html .. _`lxml.etree API documentation`: api.html -.. _`lxml.elements`: elements.html .. _`extension functions`: extensions.html -.. _`custom element classes`: namespace_extensions.html +.. _`custom element classes`: element_classes.html .. _`SAX compliant API`: sax.html .. _`C-level API`: capi.html +.. _`lxml.objectify`: objectify.html +.. _`objectify and etree`: FAQ.html##what-is-the-difference-between-lxml-etree-and-lxml-objectify .. _XPath: http://www.w3.org/TR/xpath .. _`Relax NG`: http://www.relaxng.org/ From scoder at codespeak.net Sun Aug 6 08:01:20 2006 From: scoder at codespeak.net (scoder at codespeak.net) Date: Sun, 6 Aug 2006 08:01:20 +0200 (CEST) Subject: [Lxml-checkins] r31066 - lxml/branch/capi/doc Message-ID: <20060806060120.905C610074@code0.codespeak.net> Author: scoder Date: Sun Aug 6 08:01:03 2006 New Revision: 31066 Modified: lxml/branch/capi/doc/FAQ.txt Log: FAQ entry on etree/objectify, restructured FAQ to use headings instead of enumeration Modified: lxml/branch/capi/doc/FAQ.txt ============================================================================== --- lxml/branch/capi/doc/FAQ.txt (original) +++ lxml/branch/capi/doc/FAQ.txt Sun Aug 6 08:01:03 2006 @@ -6,210 +6,267 @@ .. _compatibility: compatibility.html .. _ElementTree: http://effbot.org/zone/element-index.htm - -#) Is there a tutorial? - - There is a `tutorial for ElementTree`_ which also works for lxml.etree. - The `API documentation`_ also contains many examples. - - .. _`tutorial for ElementTree`: http://effbot.org/zone/element.htm - .. _`API documentation`: api.html - - -#) Where can I find more documentation about lxml? - - There is a lot of documentation as lxml implements the well-known - `ElementTree API`_ and tries to follow its documentation as closely as - possible. There are a couple of issues where lxml cannot keep up - compatibility. They are described in the compatibility_ documentation. - The lxml specific extensions to the API are described by individual files - in the ``doc`` directory of the distribution and on `the web page`_. - - .. _`ElementTree API`: http://effbot.org/zone/element-index.htm - .. _`the web page`: http://codespeak.net/lxml/#documentation - - -#) My application crashes! Why does lxml.etree do that? - - One of the goals of lxml is "no segfaults", so if there is no clear warning - in the documentation that you were doing something potentially harmful, you - have found a bug and we would like to hear about it. Please report this - bug to the mailing list. See the next section on how to do that. - - -#) I think I have found a bug in lxml. What should I do? - - a) First, you should look at the `current developer changelog`_ to see if - this is a known problem that has already been fixed in the SVN trunk. - - .. _`current developer changelog`: http://codespeak.net/svn/lxml/trunk/CHANGES.txt - - b) If you are using threads, please see the following section to check if - you touch on one of the potential pitfalls. - - c) Otherwise, we would really like to hear about it. Please report it to - the `mailing list`_ so that we can fix it. It is very helpful in this - case if you can come up with a short code snippet that demonstrates your - problem. Please also report the version of lxml, libxml2 and libxslt - that you are using by calling this:: - - from lxml import etree - print "lxml.etree: ", etree.LXML_VERSION - print "libxml used: ", etree.LIBXML_VERSION - print "libxml compiled: ", etree.LIBXML_COMPILED_VERSION - print "libxslt used: ", etree.LIBXSLT_VERSION - print "libxslt compiled: ", etree.LIBXSLT_COMPILED_VERSION - - .. _`mailing list`: http://codespeak.net/mailman/listinfo/lxml-dev - - -#) Can I use threads to concurrently access the lxml API? - - Yes, although not carelessly. - - lxml frees the GIL (Python's global interpreter lock) internally when - parsing from disk and memory, as long as you use either the default parser - (which is replicated for each thread) or create a parser for each thread - yourself. lxml also allows concurrency during validation (RelaxNG and - XMLSchema) and XSL transformation. You can share RelaxNG, XMLSchema and - XSLT objects between threads. While you can also share parsers between - threads, this will serialize the access to each of them, so it is better to - copy() parsers or to use the default parser. Note that access to the XML() - and HTML() functions is always serialized. If you need to parse from - strings, use StringIO. - - Warning: You should generally avoid modifying trees in other threads than - the one it was generated in. Although this should work in many cases, - there are certain scenarios where the termination of a thread that parsed a - tree can crash the application if subtrees of this tree are moved to other - documents. You should be on the safe side when passing trees between - threads if you either - - a) do not modify these trees and do not move its elements to other trees, or - b) do not terminate threads while the trees they parsed are still in use - - -#) Why doesn't the ``pretty_print`` option reformat my XML output? - - Pretty printing (or formatting) an XML document means adding white space to - the content. These modifications are harmless if they only impact elements - in the document that do not carry (text) data. They corrupt your data if - they impact elements that contain data. If lxml cannot distinguish between - whitespace and data, it will not alter your data. Whitespace is therefore - only added between nodes that do not contain data. This is always the case - for trees constructed element-by-element, so no problems should be expected - here. For parsed trees, a good way to assure that no conflicting - whitespace is left in the tree is the ``remove_blank_text`` option:: +.. contents:: +.. + 1 Is there a tutorial? + 2 Where can I find more documentation about lxml? + 3 My application crashes! Why does lxml.etree do that? + 4 I think I have found a bug in lxml. What should I do? + 5 Can I use threads to concurrently access the lxml API? + 6 What is the difference between lxml.etree and lxml.objectify? + 7 Why doesn't the ``pretty_print`` option reformat my XML output? + 8 Why can't lxml parse my XML from unicode strings? + 9 How can I find out which namespace prefixes are used in a document? + 10 How can I specify a default namespace for XPath expressions? + 11 What are the ``findall()`` and ``xpath()`` methods on Element(Tree)? + 12 Why doesn't ``findall()`` support full XPath expressions? + 13 What is the difference between str(xslt(doc)) and xslt(doc).write() ? + 14 Why is my application so slow? + + +Is there a tutorial? +-------------------- + +There is a `tutorial for ElementTree`_ which also works for lxml.etree. The +`API documentation`_ also contains many examples. + +.. _`tutorial for ElementTree`: http://effbot.org/zone/element.htm +.. _`API documentation`: api.html + + +Where can I find more documentation about lxml? +----------------------------------------------- + +There is a lot of documentation as lxml implements the well-known `ElementTree +API`_ and tries to follow its documentation as closely as possible. There are +a couple of issues where lxml cannot keep up compatibility. They are +described in the compatibility_ documentation. The lxml specific extensions +to the API are described by individual files in the ``doc`` directory of the +distribution and on `the web page`_. + +.. _`ElementTree API`: http://effbot.org/zone/element-index.htm +.. _`the web page`: http://codespeak.net/lxml/#documentation + + +My application crashes! Why does lxml.etree do that? +---------------------------------------------------- + +One of the goals of lxml is "no segfaults", so if there is no clear warning in +the documentation that you were doing something potentially harmful, you have +found a bug and we would like to hear about it. Please report this bug to the +mailing list. See the next section on how to do that. + + +I think I have found a bug in lxml. What should I do? +----------------------------------------------------- + +a) First, you should look at the `current developer changelog`_ to see if this + is a known problem that has already been fixed in the SVN trunk. + + .. _`current developer changelog`: http://codespeak.net/svn/lxml/trunk/CHANGES.txt + +b) If you are using threads, please see the following section to check if + you touch on one of the potential pitfalls. + +c) Otherwise, we would really like to hear about it. Please report it to the + `mailing list`_ so that we can fix it. It is very helpful in this case if + you can come up with a short code snippet that demonstrates your problem. + Please also report the version of lxml, libxml2 and libxslt that you are + using by calling this:: + + from lxml import etree + print "lxml.etree: ", etree.LXML_VERSION + print "libxml used: ", etree.LIBXML_VERSION + print "libxml compiled: ", etree.LIBXML_COMPILED_VERSION + print "libxslt used: ", etree.LIBXSLT_VERSION + print "libxslt compiled: ", etree.LIBXSLT_COMPILED_VERSION + + .. _`mailing list`: http://codespeak.net/mailman/listinfo/lxml-dev + + +Can I use threads to concurrently access the lxml API? +------------------------------------------------------ + +Yes, although not carelessly. + +lxml frees the GIL (Python's global interpreter lock) internally when parsing +from disk and memory, as long as you use either the default parser (which is +replicated for each thread) or create a parser for each thread yourself. lxml +also allows concurrency during validation (RelaxNG and XMLSchema) and XSL +transformation. You can share RelaxNG, XMLSchema and XSLT objects between +threads. While you can also share parsers between threads, this will +serialize the access to each of them, so it is better to copy() parsers or to +use the default parser. Note that access to the XML() and HTML() functions is +always serialized. If you need to parse concurrently from strings, use +StringIO. + +Warning: You should generally avoid modifying trees in other threads than the +one it was generated in. Although this should work in many cases, there are +certain scenarios where the termination of a thread that parsed a tree can +crash the application if subtrees of this tree are moved to other documents. +You should be on the safe side when passing trees between threads if you +either + +a) do not modify these trees and do not move its elements to other trees, or +b) do not terminate threads while the trees they parsed are still in use + (e.g. by using a fixed size thread-pool or long-running threads in + processing chains) + + +What is the difference between lxml.etree and lxml.objectify? +------------------------------------------------------------- + +The two modules provide different ways of handling XML. However, objectify +builds on top of lxml.etree and therefore inherits most of its capabilities +and the major part of its API. + +* lxml.etree is a generic API for XML and HTML handling. It is `mostly + ElementTree compatible`_ and supports the entire XML infoset. It is well + suited for both mixed content and data centric XML. Its generality makes it + the best choice for most applications. + +* lxml.objectify is a specialized API for XML data handling in a Python object + syntax. It provides a very natural way to deal with data fields stored in a + structurally well defined XML format. Data fields are automatically + converted to Python data types and can be manipulated with normal Python + operators. Look at the examples in the `objectify documentation`_ to see + what it feels like to use it. + + Objectify is not well suited for mixed contents or HTML documents. As it is + built on top of lxml.etree, however, it inherits the normal support for + XPath, XSLT or validation. + +.. _`mostly ElementTree compatible`: compatibility.html +.. _`objectify documentation`: objectify.txt + + +Why doesn't the ``pretty_print`` option reformat my XML output? +--------------------------------------------------------------- + +Pretty printing (or formatting) an XML document means adding white space to +the content. These modifications are harmless if they only impact elements in +the document that do not carry (text) data. They corrupt your data if they +impact elements that contain data. If lxml cannot distinguish between +whitespace and data, it will not alter your data. Whitespace is therefore +only added between nodes that do not contain data. This is always the case +for trees constructed element-by-element, so no problems should be expected +here. For parsed trees, a good way to assure that no conflicting whitespace +is left in the tree is the ``remove_blank_text`` option:: >>> parser = etree.XMLParser(remove_blank_text=True) >>> tree = etree.parse(file, parser) - This will allow the parser to drop blank text nodes when constructing the - tree. If you now call a serialization function to pretty print this tree, - lxml can add fresh whitespace to the XML tree to indent it. +This will allow the parser to drop blank text nodes when constructing the +tree. If you now call a serialization function to pretty print this tree, +lxml can add fresh whitespace to the XML tree to indent it. -#) Why can't lxml parse my XML from unicode strings? +Why can't lxml parse my XML from unicode strings? +------------------------------------------------- - lxml can read Python unicode strings and even tries to support them if - libxml2 does not. However, if the unicode string declares an XML encoding - internally (````), parsing is bound to fail, as this - encoding is most likely not the real encoding used in Python unicode. The - same is true for HTML unicode strings that contain charset meta tags. Note - that Python uses different encodings for unicode on different platforms, so - even specifying the real internal unicode encoding is not portable between - Python interpreters. Don't do it. +lxml can read Python unicode strings and even tries to support them if libxml2 +does not. However, if the unicode string declares an XML encoding internally +(````), parsing is bound to fail, as this encoding is +most likely not the real encoding used in Python unicode. The same is true +for HTML unicode strings that contain charset meta tags. Note that Python +uses different encodings for unicode on different platforms, so even +specifying the real internal unicode encoding is not portable between Python +interpreters. Don't do it. - Python unicode strings with XML data or HTML data that carry encoding - information are broken. lxml will not parse them. You must provide - parsable data in a valid encoding. +Python unicode strings with XML data or HTML data that carry encoding +information are broken. lxml will not parse them. You must provide parsable +data in a valid encoding. -#) How can I find out which namespace prefixes are used in a document? +How can I find out which namespace prefixes are used in a document? +------------------------------------------------------------------- - You can traverse the document (``getiterator()``) and collect the prefix - attributes from all Elements into a set. However, it is unlikely that you - really want to do that. You do not need these prefixes, honestly. You - only need the namespace URIs. All namespace comparisons use these, so feel - free to make up your own prefixes when you use XPath expressions or - extension functions. +You can traverse the document (``getiterator()``) and collect the prefix +attributes from all Elements into a set. However, it is unlikely that you +really want to do that. You do not need these prefixes, honestly. You only +need the namespace URIs. All namespace comparisons use these, so feel free to +make up your own prefixes when you use XPath expressions or extension +functions. - The only place where you might consider specifying prefixes is the - serialization of Elements that were created through the API. Here, you can - specify a prefix mapping through the ``nsmap`` argument when creating the - root Element. Its children will then inherit this prefix for - serialization. +The only place where you might consider specifying prefixes is the +serialization of Elements that were created through the API. Here, you can +specify a prefix mapping through the ``nsmap`` argument when creating the root +Element. Its children will then inherit this prefix for serialization. -#) How can I specify a default namespace for XPath expressions? +How can I specify a default namespace for XPath expressions? +------------------------------------------------------------ - You can't. In XPath, there is no such thing as a default namespace. Just - use an arbitrary prefix and let the namespace dictionary of the XPath - evaluators map it to your namespace. See also the question above. +You can't. In XPath, there is no such thing as a default namespace. Just use +an arbitrary prefix and let the namespace dictionary of the XPath evaluators +map it to your namespace. See also the question above. -#) What are the ``findall()`` and ``xpath()`` methods on Element(Tree)? +What are the ``findall()`` and ``xpath()`` methods on Element(Tree)? +-------------------------------------------------------------------- - ``findall()`` is part of the original `ElementTree API`_. It supports a - `simple subset of the XPath language`_, without predicates, conditions and - other advanced features. It is very handy for finding specific tags in a - tree. Another important difference is namespace handling, which uses the - ``{namespace}tagname`` notation. This is not supported by XPath. The - findall, find and findtext methods are compatible with other ElementTree - implementations and allow writing portable code that runs on ElementTree, - cElementTree and lxml.etree. +``findall()`` is part of the original `ElementTree API`_. It supports a +`simple subset of the XPath language`_, without predicates, conditions and +other advanced features. It is very handy for finding specific tags in a +tree. Another important difference is namespace handling, which uses the +``{namespace}tagname`` notation. This is not supported by XPath. The +findall, find and findtext methods are compatible with other ElementTree +implementations and allow writing portable code that runs on ElementTree, +cElementTree and lxml.etree. - ``xpath()``, on the other hand, supports the complete power of the XPath - language, including predicates, XPath functions and Python extension - functions. The syntax is defined by the `XPath specification`_. If you - need the expressiveness and selectivity of XPath, the ``xpath()`` method, - the ``XPath`` class and the ``XPathEvaluator`` are the best choice_. +``xpath()``, on the other hand, supports the complete power of the XPath +language, including predicates, XPath functions and Python extension +functions. The syntax is defined by the `XPath specification`_. If you need +the expressiveness and selectivity of XPath, the ``xpath()`` method, the +``XPath`` class and the ``XPathEvaluator`` are the best choice_. - .. _`simple subset of the XPath language`: http://effbot.org/zone/element-xpath.htm - .. _`XPath specification`: http://www.w3.org/TR/xpath - .. _choice: performance.html#xpath +.. _`simple subset of the XPath language`: http://effbot.org/zone/element-xpath.htm +.. _`XPath specification`: http://www.w3.org/TR/xpath +.. _choice: performance.html#xpath -#) Why doesn't ``findall()`` support full XPath expressions? +Why doesn't ``findall()`` support full XPath expressions? +--------------------------------------------------------- - It was decided that it is more important to keep compatibility with - ElementTree_ to simplify code migration between the libraries. The main - difference compared to XPath is the ``{namespace}tagname`` notation used in - ``findall()``, which is not valid XPath. +It was decided that it is more important to keep compatibility with +ElementTree_ to simplify code migration between the libraries. The main +difference compared to XPath is the ``{namespace}tagname`` notation used in +``findall()``, which is not valid XPath. - ElementTree and lxml.etree use the same implementation, which assures 100% - compatibility. Note that ``findall()`` is `so fast`_ in lxml that a native - implementation would not bring any performance benefits. +ElementTree and lxml.etree use the same implementation, which assures 100% +compatibility. Note that ``findall()`` is `so fast`_ in lxml that a native +implementation would not bring any performance benefits. - .. _`so fast`: performance.html#tree-traversal +.. _`so fast`: performance.html#tree-traversal -#) What is the difference between str(xslt(doc)) and xslt(doc).write() ? +What is the difference between str(xslt(doc)) and xslt(doc).write() ? +--------------------------------------------------------------------- - The str() implementation of the XSLTResultTree class (a subclass of - ElementTree) knows about the output method chosen in the stylesheet - (xsl:output), write() doesn't. If you call write(), the result will be a - normal XML tree serialization in the requested encoding. Calling this - method may also fail for XSLT results that are not XML trees (e.g. string - results). +The str() implementation of the XSLTResultTree class (a subclass of +ElementTree) knows about the output method chosen in the stylesheet +(xsl:output), write() doesn't. If you call write(), the result will be a +normal XML tree serialization in the requested encoding. Calling this method +may also fail for XSLT results that are not XML trees (e.g. string results). - If you call str(), it will return the serialized result as specified by the - XSL transform. This correctly serializes string results to encoded Python - strings and honours ``xsl:output`` options like ``indent``. This almost - certainly does what you want, so you should only use ``write()`` if you are - sure that the XSLT result is an XML tree and you want to override the - encoding and indentation options requested by the stylesheet. +If you call str(), it will return the serialized result as specified by the +XSL transform. This correctly serializes string results to encoded Python +strings and honours ``xsl:output`` options like ``indent``. This almost +certainly does what you want, so you should only use ``write()`` if you are +sure that the XSLT result is an XML tree and you want to override the encoding +and indentation options requested by the stylesheet. -#) Why is my application so slow? +Why is my application so slow? +------------------------------ - lxml.etree is a very fast library for processing XML. There are, however, - `a few caveats`_ involved in the mapping of the powerful libxml2 library to - the simple and convenient ElementTree API. Not all operations are as fast - as the simplicity of the API might suggest. The `benchmark page`_ has a - comparison to other ElementTree implementations and a number of tips for - performance tweaking. +lxml.etree is a very fast library for processing XML. There are, however, `a +few caveats`_ involved in the mapping of the powerful libxml2 library to the +simple and convenient ElementTree API. Not all operations are as fast as the +simplicity of the API might suggest. The `benchmark page`_ has a comparison +to other ElementTree implementations and a number of tips for performance +tweaking. - .. _`a few caveats`: performance.html#the-elementtree-api - .. _`benchmark page`: performance.html +.. _`a few caveats`: performance.html#the-elementtree-api +.. _`benchmark page`: performance.html From scoder at codespeak.net Sun Aug 6 08:38:35 2006 From: scoder at codespeak.net (scoder at codespeak.net) Date: Sun, 6 Aug 2006 08:38:35 +0200 (CEST) Subject: [Lxml-checkins] r31067 - lxml/branch/capi/doc Message-ID: <20060806063835.B9A0510074@code0.codespeak.net> Author: scoder Date: Sun Aug 6 08:38:34 2006 New Revision: 31067 Modified: lxml/branch/capi/doc/FAQ.txt Log: doc fixes Modified: lxml/branch/capi/doc/FAQ.txt ============================================================================== --- lxml/branch/capi/doc/FAQ.txt (original) +++ lxml/branch/capi/doc/FAQ.txt Sun Aug 6 08:38:34 2006 @@ -118,7 +118,7 @@ The two modules provide different ways of handling XML. However, objectify builds on top of lxml.etree and therefore inherits most of its capabilities -and the major part of its API. +and a large portion of its API. * lxml.etree is a generic API for XML and HTML handling. It is `mostly ElementTree compatible`_ and supports the entire XML infoset. It is well @@ -127,10 +127,10 @@ * lxml.objectify is a specialized API for XML data handling in a Python object syntax. It provides a very natural way to deal with data fields stored in a - structurally well defined XML format. Data fields are automatically - converted to Python data types and can be manipulated with normal Python - operators. Look at the examples in the `objectify documentation`_ to see - what it feels like to use it. + structurally well defined XML format. Data is automatically converted to + Python data types and can be manipulated with normal Python operators. Look + at the examples in the `objectify documentation`_ to see what it feels like + to use it. Objectify is not well suited for mixed contents or HTML documents. As it is built on top of lxml.etree, however, it inherits the normal support for From scoder at codespeak.net Sun Aug 6 08:45:41 2006 From: scoder at codespeak.net (scoder at codespeak.net) Date: Sun, 6 Aug 2006 08:45:41 +0200 (CEST) Subject: [Lxml-checkins] r31068 - lxml/branch/capi/src/lxml Message-ID: <20060806064541.29E4210074@code0.codespeak.net> Author: scoder Date: Sun Aug 6 08:45:39 2006 New Revision: 31068 Modified: lxml/branch/capi/src/lxml/objectify.pyx Log: misspelled function name Modified: lxml/branch/capi/src/lxml/objectify.pyx ============================================================================== --- lxml/branch/capi/src/lxml/objectify.pyx (original) +++ lxml/branch/capi/src/lxml/objectify.pyx Sun Aug 6 08:45:39 2006 @@ -404,12 +404,12 @@ else: return default - def descendentpaths(self, prefix=None): - """Returns a list of object path expressions for all descendents. + def descendantpaths(self, prefix=None): + """Returns a list of object path expressions for all descendants. """ if prefix is not None and not python._isString(prefix): prefix = '.'.join(prefix) - return _buildDescendentPaths(self._c_node, prefix) + return _buildDescendantPaths(self._c_node, prefix) cdef tree.xmlNode* _findFollowingSibling(tree.xmlNode* c_node, char* href, char* name, @@ -1241,8 +1241,8 @@ cetree.namespacedNameFromNsName(c_href, c_path)) c_node = child._c_node -cdef _buildDescendentPaths(tree.xmlNode* c_node, prefix_string): - """Returns a list of all descendent paths. +cdef _buildDescendantPaths(tree.xmlNode* c_node, prefix_string): + """Returns a list of all descendant paths. """ tag = cetree.namespacedName(c_node) if prefix_string: @@ -1253,11 +1253,11 @@ prefix_string = tag path = [prefix_string] path_list = [] - _recursiveBuildDescendentPaths(c_node, path, path_list) + _recursiveBuildDescendantPaths(c_node, path, path_list) return path_list -cdef _recursiveBuildDescendentPaths(tree.xmlNode* c_node, path, path_list): - """Fills the list 'path_list' with all descendent paths, initial prefix +cdef _recursiveBuildDescendantPaths(tree.xmlNode* c_node, path, path_list): + """Fills the list 'path_list' with all descendant paths, initial prefix being in the list 'path'. """ cdef python.PyObject* dict_result @@ -1280,7 +1280,7 @@ tag = tag + '[%d]' % count python.PyDict_SetItem(tags, tag, count) python.PyList_Append(path, tag) - _recursiveBuildDescendentPaths(c_child, path, path_list) + _recursiveBuildDescendantPaths(c_child, path, path_list) del path[-1] c_child = cetree.nextElement(c_child) From scoder at codespeak.net Sun Aug 6 09:04:22 2006 From: scoder at codespeak.net (scoder at codespeak.net) Date: Sun, 6 Aug 2006 09:04:22 +0200 (CEST) Subject: [Lxml-checkins] r31069 - lxml/branch/capi/doc Message-ID: <20060806070422.CDEEC10074@code0.codespeak.net> Author: scoder Date: Sun Aug 6 09:04:21 2006 New Revision: 31069 Modified: lxml/branch/capi/doc/objectify.txt Log: split long into in objectify.txt Modified: lxml/branch/capi/doc/objectify.txt ============================================================================== --- lxml/branch/capi/doc/objectify.txt (original) +++ lxml/branch/capi/doc/objectify.txt Sun Aug 6 09:04:21 2006 @@ -9,6 +9,23 @@ .. _Amara: http://uche.ogbuji.net/tech/4suite/amara/ +.. contents:: +.. + 1 Setting up lxml.objectify + 2 Creating objectify trees + 3 Element access through object attributes + 4 Namespace handling + 5 ObjectPath + 6 Python data types + 7 Defining additional data classes + 8 Recursive string representation of elements + 9 What is different from ElementTree? + 10 Resetting the API + + +Setting up lxml.objectify +------------------------- + To make use of ``objectify``, you need both the ``lxml.etree`` module and ``lxml.objectify``:: @@ -52,19 +69,6 @@ .. _`namespace specific classes`: element_classes.html#namespace-class-lookup -.. contents:: -.. - 1 Creating objectify trees - 2 Element access through object attributes - 3 Namespace handling - 4 ObjectPath - 5 Python data types - 6 Defining additional data classes - 7 Recursive string representation of elements - 8 What is different from ElementTree? - 9 Resetting the API - - Creating objectify trees ------------------------ From scoder at codespeak.net Sun Aug 6 09:07:23 2006 From: scoder at codespeak.net (scoder at codespeak.net) Date: Sun, 6 Aug 2006 09:07:23 +0200 (CEST) Subject: [Lxml-checkins] r31070 - lxml/branch/capi/doc Message-ID: <20060806070723.F247610074@code0.codespeak.net> Author: scoder Date: Sun Aug 6 09:07:22 2006 New Revision: 31070 Modified: lxml/branch/capi/doc/objectify.txt Log: clarification in docs Modified: lxml/branch/capi/doc/objectify.txt ============================================================================== --- lxml/branch/capi/doc/objectify.txt (original) +++ lxml/branch/capi/doc/objectify.txt Sun Aug 6 09:07:22 2006 @@ -735,9 +735,10 @@ As the objectify setup is local to a parser, it does not interfere with the rest of lxml. However, if you stop using the parser you registered -``objectify`` for, you might also want to reset the global class lookup -mechanism back to the default one, to disable the per-parser lookup. This is -easily achieved by calling the setup function without arguments:: +``objectify`` for, and you can make sure no other module is still using the +parser delegation, you can set the global class lookup mechanism back to the +default one, to disable the per-parser lookup. This is easily achieved by +calling the setup function without arguments:: >>> etree.setElementClassLookup() From scoder at codespeak.net Sun Aug 6 10:27:48 2006 From: scoder at codespeak.net (scoder at codespeak.net) Date: Sun, 6 Aug 2006 10:27:48 +0200 (CEST) Subject: [Lxml-checkins] r31071 - in lxml/branch/capi: doc src/lxml src/lxml/tests Message-ID: <20060806082748.50CD11006E@code0.codespeak.net> Author: scoder Date: Sun Aug 6 10:27:44 2006 New Revision: 31071 Modified: lxml/branch/capi/doc/objectify.txt lxml/branch/capi/src/lxml/objectify.pyx lxml/branch/capi/src/lxml/tests/test_objectify.py Log: support indexing in ObjectPath.setattr(), major code cleanup in objectify.pyx, fixed child traversal in various places by ignoring non-element children Modified: lxml/branch/capi/doc/objectify.txt ============================================================================== --- lxml/branch/capi/doc/objectify.txt (original) +++ lxml/branch/capi/doc/objectify.txt Sun Aug 6 10:27:44 2006 @@ -376,13 +376,15 @@ >>> print root.some.child["{other}unknown"].text my value -Note, however, that indexing is not supported in this context:: +Note, however, that indexing is only supported in this context if the children +exist. Indexing of non existing children will not extend or create a list of +such children but raise an exception:: >>> path = objectify.ObjectPath(".some[1].child.{other}unknown") >>> path.setattr(root, "my value") Traceback (most recent call last): ... - TypeError: this operation does not support indexed paths + ValueError: creating indexed path attributes is not supported It is worth noting that ObjectPath does not depend on the ``objectify`` module or the ObjectifiedElement implementation. It can also be used in combination Modified: lxml/branch/capi/src/lxml/objectify.pyx ============================================================================== --- lxml/branch/capi/src/lxml/objectify.pyx (original) +++ lxml/branch/capi/src/lxml/objectify.pyx Sun Aug 6 10:27:44 2006 @@ -1035,11 +1035,8 @@ """Set the value of the target element in a subtree. If any of the children on the path does not exist, it is created. - Note that paths containing indexed attributes are not supported. """ - if self._indexes is not None: - raise TypeError, "this operation does not support indexed paths" - _createObjectPath(root, self._path_cstr, value) + _createObjectPath(root, self._path_cstr, self._indexes, value) cdef object __SPLIT_PATH __SPLIT_PATH = re.compile( @@ -1180,13 +1177,8 @@ if index is not None: c_index = python.PyInt_AsSsize_t(index) - c_node = cetree.findChildForwards(c_node, 0) - while c_node is not NULL: - if cetree.tagMatches(c_node, c_href, c_path): - if c_index <= 0: - break - c_index = c_index - 1 - c_node = cetree.nextElement(c_node) + c_node = _findFollowingSibling(c_node.children, + c_href, c_path, c_index) if use_default: return default_value @@ -1194,7 +1186,7 @@ tag = cetree.namespacedNameFromNsName(c_href, c_path) raise AttributeError, "no such child: " + tag -cdef _createObjectPath(_Element root, char* c_path, value): +cdef _createObjectPath(_Element root, char* c_path, index_list, value): """Follow the path to find the target element, build the missing children as needed and replace the target element by 'value'. """ @@ -1202,6 +1194,7 @@ cdef tree.xmlNode* c_node cdef tree.xmlNode* c_child cdef char* c_href + cdef Py_ssize_t c_index_pos, c_index c_node = root._c_node if c_path[0] != c'\0': c_href = c_path @@ -1214,6 +1207,8 @@ raise ValueError, "root element does not match: need %s, got %s" % \ (cetree.namespacedNameFromNsName(c_href, c_path), root.tag) + if index_list is not None: + c_index_pos = 1 while c_node is not NULL: while c_path[0] != c'\0': c_path = c_path + 1 @@ -1229,12 +1224,21 @@ return c_path = c_path + 1 - c_child = cetree.findChildForwards(c_node, 0) - while c_child is not NULL and \ - not cetree.tagMatches(c_child, c_href, c_path): - c_child = cetree.nextElement(c_child) + c_index = 0 + if index_list is not None: + index = python.PyList_GET_ITEM(index_list, c_index_pos) + python.Py_INCREF(index) + c_index_pos = c_index_pos + 1 + if index is not None: + c_index = python.PyInt_AsSsize_t(index) + + c_child = _findFollowingSibling(c_node.children, + c_href, c_path, c_index) if c_child is not NULL: c_node = c_child + elif c_index > 0: + raise ValueError, \ + "creating indexed path attributes is not supported" else: child = SubElement( cetree.elementFactory(root._doc, c_node), @@ -1268,6 +1272,10 @@ c_href = tree._getNs(c_node) c_child = cetree.findChildForwards(c_node, 0) while c_child is not NULL: + while c_child.type != tree.XML_ELEMENT_NODE: + c_child = c_child.next + if c_child is NULL: + return if c_href is tree._getNs(c_child): tag = c_child.name else: @@ -1277,12 +1285,14 @@ count = 0 else: count = (dict_result) + 1 - tag = tag + '[%d]' % count python.PyDict_SetItem(tags, tag, count) + if count > 0: + tag = tag + '[%d]' % count python.PyList_Append(path, tag) _recursiveBuildDescendantPaths(c_child, path, path_list) del path[-1] - c_child = cetree.nextElement(c_child) + c_child = c_child.next + ################################################################################ # Type annotations @@ -1343,7 +1353,7 @@ if pytype is None: # try to guess type - if not cetree.findChildForwards(c_node, 0): + if cetree.findChildForwards(c_node, 0) is NULL: # element has no children => data class if value is None: value = textOf(c_node) Modified: lxml/branch/capi/src/lxml/tests/test_objectify.py ============================================================================== --- lxml/branch/capi/src/lxml/tests/test_objectify.py (original) +++ lxml/branch/capi/src/lxml/tests/test_objectify.py Sun Aug 6 10:27:44 2006 @@ -20,7 +20,8 @@ 0 1 - 2 + 2 + 3 ''' @@ -50,7 +51,7 @@ def test_child_getattr(self): root = self.etree.XML(xml_str) self.assertEquals("0", getattr(root.c1, "{objectified}c2").text) - self.assertEquals("2", getattr(root.c1, "{otherNS}c2").text) + self.assertEquals("3", getattr(root.c1, "{otherNS}c2").text) def test_child_nonexistant(self): root = self.etree.XML(xml_str) @@ -61,13 +62,14 @@ root = self.etree.XML(xml_str) self.assertEquals("0", root.c1.c2[0].text) self.assertEquals("1", root.c1.c2[1].text) - self.assertRaises(IndexError, operator.itemgetter(2), root.c1.c2) + self.assertEquals("2", root.c1.c2[2].text) + self.assertRaises(IndexError, operator.itemgetter(3), root.c1.c2) def test_child_len(self): root = self.etree.XML(xml_str) self.assertEquals(1, len(root)) self.assertEquals(1, len(root.c1)) - self.assertEquals(2, len(root.c1.c2)) + self.assertEquals(3, len(root.c1.c2)) def test_child_iter(self): root = self.etree.XML(xml_str) @@ -75,7 +77,7 @@ list(iter(root))) self.assertEquals([root.c1], list(iter(root.c1))) - self.assertEquals([root.c1.c2[0], root.c1.c2[1]], + self.assertEquals([root.c1.c2[0], root.c1.c2[1], root.c1.c2[2]], list(iter((root.c1.c2)))) def test_class_lookup(self): @@ -422,6 +424,8 @@ path = objectify.ObjectPath( "root.c1.c2[1]" ) self.assert_(path.hasattr(root)) path = objectify.ObjectPath( "root.c1.c2[2]" ) + self.assert_(path.hasattr(root)) + path = objectify.ObjectPath( "root.c1.c2[3]" ) self.assertFalse(path.hasattr(root)) path = objectify.ObjectPath( "root.c1[1].c2" ) self.assertFalse(path.hasattr(root)) @@ -447,19 +451,19 @@ path = objectify.ObjectPath( "root.c1[0].c2[1]" ) self.assertEquals(root.c1.c2[1].text, path(root).text) - path = objectify.ObjectPath( "root.c1.c2[1]" ) - self.assertEquals(root.c1.c2[1].text, path(root).text) + path = objectify.ObjectPath( "root.c1.c2[2]" ) + self.assertEquals(root.c1.c2[2].text, path(root).text) def test_object_path_index_list(self): root = self.etree.XML(xml_str) path = objectify.ObjectPath( ['root', 'c1[0]', 'c2[0]'] ) self.assertEquals(root.c1.c2.text, path(root).text) - path = objectify.ObjectPath( ['root', 'c1[0]', 'c2[1]'] ) - self.assertEquals(root.c1.c2[1].text, path(root).text) + path = objectify.ObjectPath( ['root', 'c1[0]', 'c2[2]'] ) + self.assertEquals(root.c1.c2[2].text, path(root).text) - path = objectify.ObjectPath( ['root', 'c1', 'c2[1]'] ) - self.assertEquals(root.c1.c2[1].text, path(root).text) + path = objectify.ObjectPath( ['root', 'c1', 'c2[2]'] ) + self.assertEquals(root.c1.c2[2].text, path(root).text) def test_object_path_index_fail_parse(self): self.assertRaises(ValueError, objectify.ObjectPath, @@ -468,6 +472,11 @@ ['root', 'c1[0]', 'c2[-1]']) self.assertRaises(ValueError, objectify.ObjectPath, + "root[2].c1.c2") + self.assertRaises(ValueError, objectify.ObjectPath, + ['root[2]', 'c1', 'c2']) + + self.assertRaises(ValueError, objectify.ObjectPath, "root.c1[-1].c2") self.assertRaises(ValueError, objectify.ObjectPath, ['root', 'c1[-1]', 'c2']) @@ -512,8 +521,12 @@ self.assertEquals(root.c1.c2.text, path.find(root).text) path = objectify.ObjectPath( ['root', '{objectified}c1', '{objectified}c2'] ) self.assertEquals(root.c1.c2.text, path.find(root).text) + path = objectify.ObjectPath( ['root', '{objectified}c1', '{objectified}c2[2]'] ) + self.assertEquals(root.c1.c2[2].text, path.find(root).text) path = objectify.ObjectPath( ['root', 'c1', '{objectified}c2'] ) self.assertEquals(root.c1.c2.text, path.find(root).text) + path = objectify.ObjectPath( ['root', 'c1', '{objectified}c2[2]'] ) + self.assertEquals(root.c1.c2[2].text, path.find(root).text) path = objectify.ObjectPath( ['root', 'c1', '{otherNS}c2'] ) self.assertEquals(getattr(root.c1, '{otherNS}c2').text, path.find(root).text) @@ -540,27 +553,30 @@ self.assertEquals(new_value, root.c1.c99.c126.honk) self.assertEquals(new_value, path(root).text) - def test_descendent_paths(self): + def test_descendant_paths(self): root = self.etree.XML(xml_str) self.assertEquals( - ['{objectified}root', '{objectified}root.c1', - '{objectified}root.c1.c2', '{objectified}root.c1.c2[1]', + ['{objectified}root', '{objectified}root.c1', + '{objectified}root.c1.c2', + '{objectified}root.c1.c2[1]', '{objectified}root.c1.c2[2]', '{objectified}root.c1.{otherNS}c2'], - root.descendentpaths()) + root.descendantpaths()) - def test_descendent_paths_child(self): + def test_descendant_paths_child(self): root = self.etree.XML(xml_str) self.assertEquals( - ['{objectified}c1', '{objectified}c1.c2', '{objectified}c1.c2[1]', + ['{objectified}c1', '{objectified}c1.c2', + '{objectified}c1.c2[1]', '{objectified}c1.c2[2]', '{objectified}c1.{otherNS}c2'], - root.c1.descendentpaths()) + root.c1.descendantpaths()) - def test_descendent_paths_prefix(self): + def test_descendant_paths_prefix(self): root = self.etree.XML(xml_str) self.assertEquals( ['root.{objectified}c1', 'root.{objectified}c1.c2', - 'root.{objectified}c1.c2[1]', 'root.{objectified}c1.{otherNS}c2'], - root.c1.descendentpaths('root')) + 'root.{objectified}c1.c2[1]', 'root.{objectified}c1.c2[2]', + 'root.{objectified}c1.{otherNS}c2'], + root.c1.descendantpaths('root')) def test_suite(): From scoder at codespeak.net Sun Aug 6 11:17:28 2006 From: scoder at codespeak.net (scoder at codespeak.net) Date: Sun, 6 Aug 2006 11:17:28 +0200 (CEST) Subject: [Lxml-checkins] r31072 - lxml/pyrex Message-ID: <20060806091728.4CC2E1006E@code0.codespeak.net> Author: scoder Date: Sun Aug 6 11:17:27 2006 New Revision: 31072 Removed: lxml/pyrex/MANIFEST Log: dropped MANIFEST in favour of MANIFEST.in Deleted: /lxml/pyrex/MANIFEST ============================================================================== --- /lxml/pyrex/MANIFEST Sun Aug 6 11:17:27 2006 +++ (empty file) @@ -1,71 +0,0 @@ -CHANGES.txt -INSTALL.txt -MANIFEST.in -README.txt -ToDo.txt -USAGE.txt -pyrexc.py -setup.py -Demos/Makefile -Demos/Makefile.nodistutils -Demos/Setup.py -Demos/numeric_demo.pyx -Demos/primes.pyx -Demos/pyprimes.py -Demos/run_numeric_demo.py -Demos/run_primes.py -Demos/run_spam.py -Demos/spam.pyx -Doc/About.html -Doc/FAQ.html -Doc/extension_types.html -Doc/index.html -Doc/overview.html -Doc/primes.c -Doc/sharing.html -Doc/special_methods.html -Pyrex/Debugging.py -Pyrex/Utils.py -Pyrex/__init__.py -Pyrex/Compiler/CmdLine.py -Pyrex/Compiler/Code.py -Pyrex/Compiler/DebugFlags.py -Pyrex/Compiler/Errors.py -Pyrex/Compiler/ExprNodes.py -Pyrex/Compiler/Lexicon.pickle -Pyrex/Compiler/Lexicon.py -Pyrex/Compiler/Main.py -Pyrex/Compiler/Naming.py -Pyrex/Compiler/Nodes.py -Pyrex/Compiler/Options.py -Pyrex/Compiler/Parsing.py -Pyrex/Compiler/PyrexTypes.py -Pyrex/Compiler/Scanning.py -Pyrex/Compiler/Symtab.py -Pyrex/Compiler/TypeSlots.py -Pyrex/Compiler/Version.py -Pyrex/Compiler/__init__.py -Pyrex/Distutils/__init__.py -Pyrex/Distutils/build_ext.py -Pyrex/Mac/DarwinSystem.py -Pyrex/Mac/Finder_Std_Suite.py -Pyrex/Mac/MPW_Misc_Suite.py -Pyrex/Mac/MacSystem.py -Pyrex/Mac/MacUtils.py -Pyrex/Mac/PS_Misc_Suite.py -Pyrex/Mac/PyServerMain.py -Pyrex/Mac/TS_Misc_Suite.py -Pyrex/Mac/__init__.py -Pyrex/Plex/Actions.py -Pyrex/Plex/DFA.py -Pyrex/Plex/Errors.py -Pyrex/Plex/Lexicons.py -Pyrex/Plex/Machines.py -Pyrex/Plex/Regexps.py -Pyrex/Plex/Scanners.py -Pyrex/Plex/Timing.py -Pyrex/Plex/Traditional.py -Pyrex/Plex/Transitions.py -Pyrex/Plex/__init__.py -Pyrex/Plex/test_tm.py -bin/pyrexc From scoder at codespeak.net Sun Aug 6 11:18:46 2006 From: scoder at codespeak.net (scoder at codespeak.net) Date: Sun, 6 Aug 2006 11:18:46 +0200 (CEST) Subject: [Lxml-checkins] r31073 - lxml/pyrex/Pyrex/Compiler Message-ID: <20060806091846.BC2C310071@code0.codespeak.net> Author: scoder Date: Sun Aug 6 11:18:44 2006 New Revision: 31073 Modified: lxml/pyrex/Pyrex/Compiler/Nodes.py Log: skip normalize exception step in _Pyx_Raise Modified: lxml/pyrex/Pyrex/Compiler/Nodes.py ============================================================================== --- lxml/pyrex/Pyrex/Compiler/Nodes.py (original) +++ lxml/pyrex/Pyrex/Compiler/Nodes.py Sun Aug 6 11:18:44 2006 @@ -3754,7 +3754,7 @@ goto raise_error; } else if (PyType_Check(type) || PyClass_Check(type)) - PyErr_NormalizeException(&type, &value, &tb); + ; /* PyErr_NormalizeException(&type, &value, &tb); */ else if (PyInstance_Check(type)) { /* Raising an instance. The value should be a dummy. */ if (value != Py_None) { From scoder at codespeak.net Sun Aug 6 12:28:20 2006 From: scoder at codespeak.net (scoder at codespeak.net) Date: Sun, 6 Aug 2006 12:28:20 +0200 (CEST) Subject: [Lxml-checkins] r31075 - lxml/trunk/src/lxml Message-ID: <20060806102820.848AF1006E@code0.codespeak.net> Author: scoder Date: Sun Aug 6 12:28:19 2006 New Revision: 31075 Modified: lxml/trunk/src/lxml/etree.pyx Log: exception fixes for Python 2.5 Modified: lxml/trunk/src/lxml/etree.pyx ============================================================================== --- lxml/trunk/src/lxml/etree.pyx (original) +++ lxml/trunk/src/lxml/etree.pyx Sun Aug 6 12:28:19 2006 @@ -21,6 +21,8 @@ cdef object id id = __builtin__.id +cdef object super +super = __builtin__.super del __builtin__ @@ -74,9 +76,24 @@ # module level superclass for all exceptions class LxmlError(Error): def __init__(self, *args): - Error.__init__(self, *args) + _initError(self, *args) self.error_log = __copyGlobalErrorLog() +cdef object _LxmlError +_LxmlError = LxmlError + +def _superError(obj, *args): + super(_LxmlError, obj).__init__(*args) + +cdef object _initError +if issubclass(_LxmlError, object): + _initError = _superError # Python >= 2.5 +else: + _initError = Error.__init__ # Python <= 2.4 + +del _superError + + # superclass for all syntax errors class LxmlSyntaxError(LxmlError, SyntaxError): pass From scoder at codespeak.net Sun Aug 6 12:28:30 2006 From: scoder at codespeak.net (scoder at codespeak.net) Date: Sun, 6 Aug 2006 12:28:30 +0200 (CEST) Subject: [Lxml-checkins] r31076 - in lxml/trunk: doc src/lxml Message-ID: <20060806102830.B779F1006E@code0.codespeak.net> Author: scoder Date: Sun Aug 6 12:28:28 2006 New Revision: 31076 Modified: lxml/trunk/doc/extensions.txt lxml/trunk/src/lxml/xpath.pxi Log: let XPathSyntaxError inherit from XPathError Modified: lxml/trunk/doc/extensions.txt ============================================================================== --- lxml/trunk/doc/extensions.txt (original) +++ lxml/trunk/doc/extensions.txt Sun Aug 6 12:28:28 2006 @@ -151,7 +151,7 @@ >>> e2.evaluate('/foo:a') Traceback (most recent call last): ... - XPathSyntaxError: Error in xpath expression. + XPathSyntaxError: error in xpath expression Evaluator-local extensions Modified: lxml/trunk/src/lxml/xpath.pxi ============================================================================== --- lxml/trunk/src/lxml/xpath.pxi (original) +++ lxml/trunk/src/lxml/xpath.pxi Sun Aug 6 12:28:28 2006 @@ -3,7 +3,7 @@ class XPathContextError(XPathError): pass -class XPathSyntaxError(LxmlSyntaxError): +class XPathSyntaxError(LxmlSyntaxError, XPathError): pass ################################################################################ @@ -89,7 +89,7 @@ self._xpathCtxt.lastError.message is not NULL: message = funicode(self._xpathCtxt.lastError.message) else: - message = "Error in xpath expression." + message = "error in xpath expression" raise XPathSyntaxError, message cdef object _handle_result(self, xpath.xmlXPathObject* xpathObj, _Document doc): From scoder at codespeak.net Sun Aug 6 18:21:33 2006 From: scoder at codespeak.net (scoder at codespeak.net) Date: Sun, 6 Aug 2006 18:21:33 +0200 (CEST) Subject: [Lxml-checkins] r31078 - lxml/branch/capi/src/lxml Message-ID: <20060806162133.DB68D10063@code0.codespeak.net> Author: scoder Date: Sun Aug 6 18:21:31 2006 New Revision: 31078 Modified: lxml/branch/capi/src/lxml/objectify.pyx Log: small cleanup Modified: lxml/branch/capi/src/lxml/objectify.pyx ============================================================================== --- lxml/branch/capi/src/lxml/objectify.pyx (original) +++ lxml/branch/capi/src/lxml/objectify.pyx Sun Aug 6 18:21:31 2006 @@ -1270,7 +1270,7 @@ python.PyList_Append(path_list, '.'.join(path)) tags = {} c_href = tree._getNs(c_node) - c_child = cetree.findChildForwards(c_node, 0) + c_child = c_node.children while c_child is not NULL: while c_child.type != tree.XML_ELEMENT_NODE: c_child = c_child.next From scoder at codespeak.net Sun Aug 6 18:26:16 2006 From: scoder at codespeak.net (scoder at codespeak.net) Date: Sun, 6 Aug 2006 18:26:16 +0200 (CEST) Subject: [Lxml-checkins] r31079 - lxml/branch/capi/src/lxml Message-ID: <20060806162616.C060C10063@code0.codespeak.net> Author: scoder Date: Sun Aug 6 18:26:15 2006 New Revision: 31079 Modified: lxml/branch/capi/src/lxml/objectify.pyx Log: small cleanup Modified: lxml/branch/capi/src/lxml/objectify.pyx ============================================================================== --- lxml/branch/capi/src/lxml/objectify.pyx (original) +++ lxml/branch/capi/src/lxml/objectify.pyx Sun Aug 6 18:26:15 2006 @@ -1250,7 +1250,7 @@ """ tag = cetree.namespacedName(c_node) if prefix_string: - if not prefix_string.endswith('.'): + if prefix_string[-1] != '.': prefix_string = prefix_string + '.' prefix_string = prefix_string + tag else: From scoder at codespeak.net Sun Aug 6 19:54:15 2006 From: scoder at codespeak.net (scoder at codespeak.net) Date: Sun, 6 Aug 2006 19:54:15 +0200 (CEST) Subject: [Lxml-checkins] r31080 - lxml/trunk/src/lxml Message-ID: <20060806175415.878DB10063@code0.codespeak.net> Author: scoder Date: Sun Aug 6 19:54:13 2006 New Revision: 31080 Modified: lxml/trunk/src/lxml/etree.pyx Log: small change Modified: lxml/trunk/src/lxml/etree.pyx ============================================================================== --- lxml/trunk/src/lxml/etree.pyx (original) +++ lxml/trunk/src/lxml/etree.pyx Sun Aug 6 19:54:13 2006 @@ -86,7 +86,7 @@ super(_LxmlError, obj).__init__(*args) cdef object _initError -if issubclass(_LxmlError, object): +if isinstance(_LxmlError, type): _initError = _superError # Python >= 2.5 else: _initError = Error.__init__ # Python <= 2.4 From scoder at codespeak.net Sun Aug 6 21:37:06 2006 From: scoder at codespeak.net (scoder at codespeak.net) Date: Sun, 6 Aug 2006 21:37:06 +0200 (CEST) Subject: [Lxml-checkins] r31081 - lxml/trunk/doc Message-ID: <20060806193706.8D1A11005A@code0.codespeak.net> Author: scoder Date: Sun Aug 6 21:37:04 2006 New Revision: 31081 Modified: lxml/trunk/doc/build.txt Log: note on requirement for patched Pyrex version Modified: lxml/trunk/doc/build.txt ============================================================================== --- lxml/trunk/doc/build.txt (original) +++ lxml/trunk/doc/build.txt Sun Aug 6 21:37:04 2006 @@ -1,31 +1,46 @@ How to build lxml from source ============================= -To build lxml from source, you need libxml2 and libxslt properly -installed, include header files (possibly shipped in -dev packages). +To build lxml from source, you need libxml2 and libxslt properly installed, +including header files (possibly shipped in -dev packages). Pyrex ----- -The lxml.etree module is written in Pyrex_. Since we ship the -Pyrex-generated .c file with lxml releases however, you should not -need Pyrex to build lxml. +The lxml.etree and lxml.objectify modules are written in Pyrex_. Since we +distribute the Pyrex-generated .c files with lxml releases, however, you do +not need Pyrex to build lxml from the normal release sources. -If you're interested in building from a svn checkout of lxml or want -to be a lxml developer, you do need a working Pyrex installation. +.. _Pyrex: http://www.cosc.canterbury.ac.nz/~greg/python/Pyrex/ -Pyrex now supports EasyInstall, so you can install it -by running the following command as super-user:: +If you are interested in building lxml from a Subversion checkout or want to +be an lxml developer, you do need a working Pyrex installation. - easy_install Pyrex +* lxml 1.0 and earlier -.. _Pyrex: http://www.cosc.canterbury.ac.nz/~greg/python/Pyrex/ -.. _easy_install: http://peak.telecommunity.com/DevCenter/EasyInstall + The 1.0 series build with a standard installation of Pyrex 0.9.4.1. Note + that Pyrex up to and including version 0.9.4 has known problems when + compiling lxml with gcc 4.x or Python 2.4. Do not use it. If you want to + build lxml from non-release sources, please install Pyrex version 0.9.4.1 or + later. + + Pyrex now supports EasyInstall_, so you can install it by running the + following command as super-user:: + + easy_install Pyrex + + .. _EasyInstall: http://peak.telecommunity.com/DevCenter/EasyInstall + +* lxml 1.1 and later + + Newer versions of lxml depend on features and bug fixes that are not yet + available in an official Pyrex release. This includes support for the + external C-API of lxml, for Python 2.5 and for 64 bit architectures. + + To build lxml 1.1 and later from non-release or modified sources, you must + therefore install an updated Pyrex version from here: -Note that Pyrex up to and including version 0.9.4 has known problems when -compiling lxml with gcc 4.0 or Python 2.4. Do not use it. If you want to -build lxml from non-release sources, please install Pyrex version 0.9.4.1 or -later. + http://codespeak.net/svn/lxml/pyrex/ Subversion From scoder at codespeak.net Mon Aug 7 07:16:04 2006 From: scoder at codespeak.net (scoder at codespeak.net) Date: Mon, 7 Aug 2006 07:16:04 +0200 (CEST) Subject: [Lxml-checkins] r31082 - in lxml/branch/capi: doc src/lxml Message-ID: <20060807051604.5D53A10063@code0.codespeak.net> Author: scoder Date: Mon Aug 7 07:16:00 2006 New Revision: 31082 Modified: lxml/branch/capi/doc/objectify.txt lxml/branch/capi/src/lxml/objectify.pyx Log: ValueError -> TypeError for setting indexed paths Modified: lxml/branch/capi/doc/objectify.txt ============================================================================== --- lxml/branch/capi/doc/objectify.txt (original) +++ lxml/branch/capi/doc/objectify.txt Mon Aug 7 07:16:00 2006 @@ -384,7 +384,7 @@ >>> path.setattr(root, "my value") Traceback (most recent call last): ... - ValueError: creating indexed path attributes is not supported + TypeError: creating indexed path attributes is not supported It is worth noting that ObjectPath does not depend on the ``objectify`` module or the ObjectifiedElement implementation. It can also be used in combination Modified: lxml/branch/capi/src/lxml/objectify.pyx ============================================================================== --- lxml/branch/capi/src/lxml/objectify.pyx (original) +++ lxml/branch/capi/src/lxml/objectify.pyx Mon Aug 7 07:16:00 2006 @@ -1237,7 +1237,7 @@ if c_child is not NULL: c_node = c_child elif c_index > 0: - raise ValueError, \ + raise TypeError, \ "creating indexed path attributes is not supported" else: child = SubElement( From scoder at codespeak.net Mon Aug 7 07:20:16 2006 From: scoder at codespeak.net (scoder at codespeak.net) Date: Mon, 7 Aug 2006 07:20:16 +0200 (CEST) Subject: [Lxml-checkins] r31083 - lxml/branch/capi/doc Message-ID: <20060807052016.B025C10063@code0.codespeak.net> Author: scoder Date: Mon Aug 7 07:20:14 2006 New Revision: 31083 Modified: lxml/branch/capi/doc/objectify.txt Log: doc fixes Modified: lxml/branch/capi/doc/objectify.txt ============================================================================== --- lxml/branch/capi/doc/objectify.txt (original) +++ lxml/branch/capi/doc/objectify.txt Mon Aug 7 07:20:14 2006 @@ -380,7 +380,7 @@ exist. Indexing of non existing children will not extend or create a list of such children but raise an exception:: - >>> path = objectify.ObjectPath(".some[1].child.{other}unknown") + >>> path = objectify.ObjectPath(".{non}existing[1]") >>> path.setattr(root, "my value") Traceback (most recent call last): ... From scoder at codespeak.net Mon Aug 7 07:37:01 2006 From: scoder at codespeak.net (scoder at codespeak.net) Date: Mon, 7 Aug 2006 07:37:01 +0200 (CEST) Subject: [Lxml-checkins] r31084 - lxml/branch/capi/doc Message-ID: <20060807053701.494EF10063@code0.codespeak.net> Author: scoder Date: Mon Aug 7 07:36:59 2006 New Revision: 31084 Modified: lxml/branch/capi/doc/FAQ.txt Log: FAQ entry on speedup by threading Modified: lxml/branch/capi/doc/FAQ.txt ============================================================================== --- lxml/branch/capi/doc/FAQ.txt (original) +++ lxml/branch/capi/doc/FAQ.txt Mon Aug 7 07:36:59 2006 @@ -13,15 +13,16 @@ 3 My application crashes! Why does lxml.etree do that? 4 I think I have found a bug in lxml. What should I do? 5 Can I use threads to concurrently access the lxml API? - 6 What is the difference between lxml.etree and lxml.objectify? - 7 Why doesn't the ``pretty_print`` option reformat my XML output? - 8 Why can't lxml parse my XML from unicode strings? - 9 How can I find out which namespace prefixes are used in a document? - 10 How can I specify a default namespace for XPath expressions? - 11 What are the ``findall()`` and ``xpath()`` methods on Element(Tree)? - 12 Why doesn't ``findall()`` support full XPath expressions? - 13 What is the difference between str(xslt(doc)) and xslt(doc).write() ? - 14 Why is my application so slow? + 6 Does my program run faster if I use threads? + 7 What is the difference between lxml.etree and lxml.objectify? + 8 Why doesn't the ``pretty_print`` option reformat my XML output? + 9 Why can't lxml parse my XML from unicode strings? + 10 How can I find out which namespace prefixes are used in a document? + 11 How can I specify a default namespace for XPath expressions? + 12 What are the ``findall()`` and ``xpath()`` methods on Element(Tree)? + 13 Why doesn't ``findall()`` support full XPath expressions? + 14 What is the difference between str(xslt(doc)) and xslt(doc).write() ? + 15 Why is my application so slow? Is there a tutorial? @@ -113,6 +114,22 @@ processing chains) +Does my program run faster if I use threads? +-------------------------------------------- + +Depends. The best way to answer this is timing and profiling. + +The global interpreter lock (GIL) in Python serialises access to the +interpreter, so if the majority of your processing is done in Python code +(traversing trees, modifying elements, etc.), your gain will be close to 0. +The more of your XML processing moves into lxml, however, the higher your +gain. If your application is bound by XML parsing and serialisation, or by +complex XSLTs, your speedup on multi-processor machines can be substantial. + +See the question above to learn which operations free the GIL to support +multi-threading. + + What is the difference between lxml.etree and lxml.objectify? ------------------------------------------------------------- @@ -243,8 +260,8 @@ What is the difference between str(xslt(doc)) and xslt(doc).write() ? --------------------------------------------------------------------- -The str() implementation of the XSLTResultTree class (a subclass of -ElementTree) knows about the output method chosen in the stylesheet +The str() implementation of the XSLTResultTree class (a subclass of the +ElementTree class) knows about the output method chosen in the stylesheet (xsl:output), write() doesn't. If you call write(), the result will be a normal XML tree serialization in the requested encoding. Calling this method may also fail for XSLT results that are not XML trees (e.g. string results). From scoder at codespeak.net Mon Aug 7 07:58:16 2006 From: scoder at codespeak.net (scoder at codespeak.net) Date: Mon, 7 Aug 2006 07:58:16 +0200 (CEST) Subject: [Lxml-checkins] r31085 - lxml/branch/capi/doc Message-ID: <20060807055816.C4A6310063@code0.codespeak.net> Author: scoder Date: Mon Aug 7 07:58:12 2006 New Revision: 31085 Modified: lxml/branch/capi/doc/FAQ.txt Log: made FAQ hierarchical Modified: lxml/branch/capi/doc/FAQ.txt ============================================================================== --- lxml/branch/capi/doc/FAQ.txt (original) +++ lxml/branch/capi/doc/FAQ.txt Mon Aug 7 07:58:12 2006 @@ -1,3 +1,4 @@ +========================== Frequently Asked Questions ========================== @@ -8,31 +9,41 @@ .. contents:: .. - 1 Is there a tutorial? - 2 Where can I find more documentation about lxml? - 3 My application crashes! Why does lxml.etree do that? - 4 I think I have found a bug in lxml. What should I do? - 5 Can I use threads to concurrently access the lxml API? - 6 Does my program run faster if I use threads? - 7 What is the difference between lxml.etree and lxml.objectify? - 8 Why doesn't the ``pretty_print`` option reformat my XML output? - 9 Why can't lxml parse my XML from unicode strings? - 10 How can I find out which namespace prefixes are used in a document? - 11 How can I specify a default namespace for XPath expressions? - 12 What are the ``findall()`` and ``xpath()`` methods on Element(Tree)? - 13 Why doesn't ``findall()`` support full XPath expressions? - 14 What is the difference between str(xslt(doc)) and xslt(doc).write() ? - 15 Why is my application so slow? + 1 General questions + 1.1 Is there a tutorial? + 1.2 Where can I find more documentation about lxml? + 1.3 What is the difference between lxml.etree and lxml.objectify? + 1.4 Why is my application so slow? + 2 Bugs + 2.1 My application crashes! Why does lxml.etree do that? + 2.2 I think I have found a bug in lxml. What should I do? + 3 Threading + 3.1 Can I use threads to concurrently access the lxml API? + 3.2 Does my program run faster if I use threads? + 4 Parsing and Serialisation + 4.1 Why doesn't the ``pretty_print`` option reformat my XML output? + 4.2 Why can't lxml parse my XML from unicode strings? + 4.3 What is the difference between str(xslt(doc)) and xslt(doc).write() ? + 5 XPath and Document Traversal + 5.1 What are the ``findall()`` and ``xpath()`` methods on Element(Tree)? + 5.2 Why doesn't ``findall()`` support full XPath expressions? + 5.3 How can I find out which namespace prefixes are used in a document? + 5.4 How can I specify a default namespace for XPath expressions? + +General Questions +================= Is there a tutorial? -------------------- -There is a `tutorial for ElementTree`_ which also works for lxml.etree. The -`API documentation`_ also contains many examples. +There is a `tutorial for ElementTree`_ which also works for ``lxml.etree``. +The `API documentation`_ also contains many examples for ``lxml.etree``. To +learn using ``lxml.objectify``, read the `objectify documentation`_. .. _`tutorial for ElementTree`: http://effbot.org/zone/element.htm .. _`API documentation`: api.html +.. _`objectify documentation`: objectify.html Where can I find more documentation about lxml? @@ -49,6 +60,47 @@ .. _`the web page`: http://codespeak.net/lxml/#documentation +What is the difference between lxml.etree and lxml.objectify? +------------------------------------------------------------- + +The two modules provide different ways of handling XML. However, objectify +builds on top of lxml.etree and therefore inherits most of its capabilities +and a large portion of its API. + +* lxml.etree is a generic API for XML and HTML handling. It aims for + ElementTree compatibility_ and supports the entire XML infoset. It is well + suited for both mixed content and data centric XML. Its generality makes it + the best choice for most applications. + +* lxml.objectify is a specialized API for XML data handling in a Python object + syntax. It provides a very natural way to deal with data fields stored in a + structurally well defined XML format. Data is automatically converted to + Python data types and can be manipulated with normal Python operators. Look + at the examples in the `objectify documentation`_ to see what it feels like + to use it. + + Objectify is not well suited for mixed contents or HTML documents. As it is + built on top of lxml.etree, however, it inherits the normal support for + XPath, XSLT or validation. + + +Why is my application so slow? +------------------------------ + +lxml.etree is a very fast library for processing XML. There are, however, `a +few caveats`_ involved in the mapping of the powerful libxml2 library to the +simple and convenient ElementTree API. Not all operations are as fast as the +simplicity of the API might suggest. The `benchmark page`_ has a comparison +to other ElementTree implementations and a number of tips for performance +tweaking. + +.. _`a few caveats`: performance.html#the-elementtree-api +.. _`benchmark page`: performance.html + + +Bugs +==== + My application crashes! Why does lxml.etree do that? ---------------------------------------------------- @@ -85,6 +137,9 @@ .. _`mailing list`: http://codespeak.net/mailman/listinfo/lxml-dev +Threading +========= + Can I use threads to concurrently access the lxml API? ------------------------------------------------------ @@ -130,31 +185,8 @@ multi-threading. -What is the difference between lxml.etree and lxml.objectify? -------------------------------------------------------------- - -The two modules provide different ways of handling XML. However, objectify -builds on top of lxml.etree and therefore inherits most of its capabilities -and a large portion of its API. - -* lxml.etree is a generic API for XML and HTML handling. It is `mostly - ElementTree compatible`_ and supports the entire XML infoset. It is well - suited for both mixed content and data centric XML. Its generality makes it - the best choice for most applications. - -* lxml.objectify is a specialized API for XML data handling in a Python object - syntax. It provides a very natural way to deal with data fields stored in a - structurally well defined XML format. Data is automatically converted to - Python data types and can be manipulated with normal Python operators. Look - at the examples in the `objectify documentation`_ to see what it feels like - to use it. - - Objectify is not well suited for mixed contents or HTML documents. As it is - built on top of lxml.etree, however, it inherits the normal support for - XPath, XSLT or validation. - -.. _`mostly ElementTree compatible`: compatibility.html -.. _`objectify documentation`: objectify.txt +Parsing and Serialisation +========================= Why doesn't the ``pretty_print`` option reformat my XML output? @@ -195,29 +227,25 @@ data in a valid encoding. -How can I find out which namespace prefixes are used in a document? -------------------------------------------------------------------- - -You can traverse the document (``getiterator()``) and collect the prefix -attributes from all Elements into a set. However, it is unlikely that you -really want to do that. You do not need these prefixes, honestly. You only -need the namespace URIs. All namespace comparisons use these, so feel free to -make up your own prefixes when you use XPath expressions or extension -functions. - -The only place where you might consider specifying prefixes is the -serialization of Elements that were created through the API. Here, you can -specify a prefix mapping through the ``nsmap`` argument when creating the root -Element. Its children will then inherit this prefix for serialization. +What is the difference between str(xslt(doc)) and xslt(doc).write() ? +--------------------------------------------------------------------- +The str() implementation of the XSLTResultTree class (a subclass of the +ElementTree class) knows about the output method chosen in the stylesheet +(xsl:output), write() doesn't. If you call write(), the result will be a +normal XML tree serialization in the requested encoding. Calling this method +may also fail for XSLT results that are not XML trees (e.g. string results). -How can I specify a default namespace for XPath expressions? ------------------------------------------------------------- +If you call str(), it will return the serialized result as specified by the +XSL transform. This correctly serializes string results to encoded Python +strings and honours ``xsl:output`` options like ``indent``. This almost +certainly does what you want, so you should only use ``write()`` if you are +sure that the XSLT result is an XML tree and you want to override the encoding +and indentation options requested by the stylesheet. -You can't. In XPath, there is no such thing as a default namespace. Just use -an arbitrary prefix and let the namespace dictionary of the XPath evaluators -map it to your namespace. See also the question above. +XPath and Document Traversal +============================ What are the ``findall()`` and ``xpath()`` methods on Element(Tree)? -------------------------------------------------------------------- @@ -257,33 +285,25 @@ .. _`so fast`: performance.html#tree-traversal -What is the difference between str(xslt(doc)) and xslt(doc).write() ? ---------------------------------------------------------------------- - -The str() implementation of the XSLTResultTree class (a subclass of the -ElementTree class) knows about the output method chosen in the stylesheet -(xsl:output), write() doesn't. If you call write(), the result will be a -normal XML tree serialization in the requested encoding. Calling this method -may also fail for XSLT results that are not XML trees (e.g. string results). - -If you call str(), it will return the serialized result as specified by the -XSL transform. This correctly serializes string results to encoded Python -strings and honours ``xsl:output`` options like ``indent``. This almost -certainly does what you want, so you should only use ``write()`` if you are -sure that the XSLT result is an XML tree and you want to override the encoding -and indentation options requested by the stylesheet. +How can I find out which namespace prefixes are used in a document? +------------------------------------------------------------------- +You can traverse the document (``getiterator()``) and collect the prefix +attributes from all Elements into a set. However, it is unlikely that you +really want to do that. You do not need these prefixes, honestly. You only +need the namespace URIs. All namespace comparisons use these, so feel free to +make up your own prefixes when you use XPath expressions or extension +functions. -Why is my application so slow? ------------------------------- +The only place where you might consider specifying prefixes is the +serialization of Elements that were created through the API. Here, you can +specify a prefix mapping through the ``nsmap`` argument when creating the root +Element. Its children will then inherit this prefix for serialization. -lxml.etree is a very fast library for processing XML. There are, however, `a -few caveats`_ involved in the mapping of the powerful libxml2 library to the -simple and convenient ElementTree API. Not all operations are as fast as the -simplicity of the API might suggest. The `benchmark page`_ has a comparison -to other ElementTree implementations and a number of tips for performance -tweaking. -.. _`a few caveats`: performance.html#the-elementtree-api -.. _`benchmark page`: performance.html +How can I specify a default namespace for XPath expressions? +------------------------------------------------------------ +You can't. In XPath, there is no such thing as a default namespace. Just use +an arbitrary prefix and let the namespace dictionary of the XPath evaluators +map it to your namespace. See also the question above. From scoder at codespeak.net Mon Aug 7 07:59:12 2006 From: scoder at codespeak.net (scoder at codespeak.net) Date: Mon, 7 Aug 2006 07:59:12 +0200 (CEST) Subject: [Lxml-checkins] r31086 - lxml/branch/capi/doc Message-ID: <20060807055912.DE7DC10063@code0.codespeak.net> Author: scoder Date: Mon Aug 7 07:59:11 2006 New Revision: 31086 Modified: lxml/branch/capi/doc/objectify.txt Log: try to make clear what objectify actually is before describing its setup Modified: lxml/branch/capi/doc/objectify.txt ============================================================================== --- lxml/branch/capi/doc/objectify.txt (original) +++ lxml/branch/capi/doc/objectify.txt Mon Aug 7 07:59:11 2006 @@ -2,10 +2,20 @@ lxml.objectify ============== -lxml supports an alternative element API similar to the Amara_ bindery through -a custom Element implementation. This API is very different from the -ElementTree API. If it is used, it should not be mixed with other element -implementations, to avoid non-obvious behaviour. +lxml supports an alternative API similar to the Amara_ bindery through a +custom Element implementation. The main idea is to hide the usage of XML +behind normal Python objects, sometimes referred to as data-binding. It +allows you to use XML as if you were dealing with a normal Python object +hierarchy. + +Accessing the children of an XML element deploys object attribute access. If +there are multiple children with the same name, slicing and indexing can be +used. Python data types are extracted from XML content automatically and made +available to the normal Python operators. + +This API is very different from the ElementTree API. If it is used, it should +not be mixed with other element implementations, to avoid non-obvious +behaviour. .. _Amara: http://uche.ogbuji.net/tech/4suite/amara/ From scoder at codespeak.net Mon Aug 7 08:01:17 2006 From: scoder at codespeak.net (scoder at codespeak.net) Date: Mon, 7 Aug 2006 08:01:17 +0200 (CEST) Subject: [Lxml-checkins] r31087 - lxml/branch/capi/doc Message-ID: <20060807060117.EEA2110053@code0.codespeak.net> Author: scoder Date: Mon Aug 7 08:01:16 2006 New Revision: 31087 Modified: lxml/branch/capi/doc/FAQ.txt Log: small clarification in FAQ Modified: lxml/branch/capi/doc/FAQ.txt ============================================================================== --- lxml/branch/capi/doc/FAQ.txt (original) +++ lxml/branch/capi/doc/FAQ.txt Mon Aug 7 08:01:16 2006 @@ -92,7 +92,8 @@ simple and convenient ElementTree API. Not all operations are as fast as the simplicity of the API might suggest. The `benchmark page`_ has a comparison to other ElementTree implementations and a number of tips for performance -tweaking. +tweaking. As with any Python application, the rule of thumb is: the more of +your processing runs in C, the faster your application gets. .. _`a few caveats`: performance.html#the-elementtree-api .. _`benchmark page`: performance.html From scoder at codespeak.net Mon Aug 7 08:07:59 2006 From: scoder at codespeak.net (scoder at codespeak.net) Date: Mon, 7 Aug 2006 08:07:59 +0200 (CEST) Subject: [Lxml-checkins] r31088 - lxml/branch/capi/doc Message-ID: <20060807060759.F319710053@code0.codespeak.net> Author: scoder Date: Mon Aug 7 08:07:57 2006 New Revision: 31088 Modified: lxml/branch/capi/doc/FAQ.txt Log: small clarifications in FAQ Modified: lxml/branch/capi/doc/FAQ.txt ============================================================================== --- lxml/branch/capi/doc/FAQ.txt (original) +++ lxml/branch/capi/doc/FAQ.txt Mon Aug 7 08:07:57 2006 @@ -155,16 +155,16 @@ serialize the access to each of them, so it is better to copy() parsers or to use the default parser. Note that access to the XML() and HTML() functions is always serialized. If you need to parse concurrently from strings, use -StringIO. +``parse()`` with ``StringIO``. Warning: You should generally avoid modifying trees in other threads than the one it was generated in. Although this should work in many cases, there are certain scenarios where the termination of a thread that parsed a tree can -crash the application if subtrees of this tree are moved to other documents. +crash the application if subtrees of this tree were moved to other documents. You should be on the safe side when passing trees between threads if you either -a) do not modify these trees and do not move its elements to other trees, or +a) do not modify these trees and do not move their elements to other trees, or b) do not terminate threads while the trees they parsed are still in use (e.g. by using a fixed size thread-pool or long-running threads in processing chains) From scoder at codespeak.net Mon Aug 7 08:12:09 2006 From: scoder at codespeak.net (scoder at codespeak.net) Date: Mon, 7 Aug 2006 08:12:09 +0200 (CEST) Subject: [Lxml-checkins] r31089 - lxml/branch/capi/doc Message-ID: <20060807061209.3AB3C10053@code0.codespeak.net> Author: scoder Date: Mon Aug 7 08:12:07 2006 New Revision: 31089 Modified: lxml/branch/capi/doc/FAQ.txt Log: typo Modified: lxml/branch/capi/doc/FAQ.txt ============================================================================== --- lxml/branch/capi/doc/FAQ.txt (original) +++ lxml/branch/capi/doc/FAQ.txt Mon Aug 7 08:12:07 2006 @@ -175,7 +175,7 @@ Depends. The best way to answer this is timing and profiling. -The global interpreter lock (GIL) in Python serialises access to the +The global interpreter lock (GIL) in Python serializes access to the interpreter, so if the majority of your processing is done in Python code (traversing trees, modifying elements, etc.), your gain will be close to 0. The more of your XML processing moves into lxml, however, the higher your From scoder at codespeak.net Mon Aug 7 08:52:39 2006 From: scoder at codespeak.net (scoder at codespeak.net) Date: Mon, 7 Aug 2006 08:52:39 +0200 (CEST) Subject: [Lxml-checkins] r31090 - lxml/branch/capi/doc Message-ID: <20060807065239.BD1DE10053@code0.codespeak.net> Author: scoder Date: Mon Aug 7 08:52:38 2006 New Revision: 31090 Modified: lxml/branch/capi/doc/objectify.txt Log: objectify.txt: show how to access children that are not valid Python identifiers Modified: lxml/branch/capi/doc/objectify.txt ============================================================================== --- lxml/branch/capi/doc/objectify.txt (original) +++ lxml/branch/capi/doc/objectify.txt Mon Aug 7 08:52:38 2006 @@ -284,6 +284,25 @@ >>> print root["{other}c"].tag {other}c +The same approach must be used to access children with tag names that are not +valid Python identifiers:: + + >>> el = etree.SubElement(root, "{ns}tag-name") + >>> print root["tag-name"].tag + {ns}tag-name + + >>> new_el = objectify.Element("{ns}new-element") + >>> el = etree.SubElement(new_el, "{ns}child") + >>> el = etree.SubElement(new_el, "{ns}child") + + >>> root["tag-name"] = [ new_el, new_el ] + >>> print root["tag-name"].tag + {ns}tag-name + >>> print root["tag-name"].child.tag + {ns}child + >>> print root["tag-name"][1].child.tag + {ns}child + ObjectPath ---------- From scoder at codespeak.net Mon Aug 7 09:16:41 2006 From: scoder at codespeak.net (scoder at codespeak.net) Date: Mon, 7 Aug 2006 09:16:41 +0200 (CEST) Subject: [Lxml-checkins] r31091 - lxml/branch/capi/doc Message-ID: <20060807071641.4E56E10063@code0.codespeak.net> Author: scoder Date: Mon Aug 7 09:16:39 2006 New Revision: 31091 Modified: lxml/branch/capi/doc/objectify.txt Log: small clarification in doctests Modified: lxml/branch/capi/doc/objectify.txt ============================================================================== --- lxml/branch/capi/doc/objectify.txt (original) +++ lxml/branch/capi/doc/objectify.txt Mon Aug 7 09:16:39 2006 @@ -294,10 +294,16 @@ >>> new_el = objectify.Element("{ns}new-element") >>> el = etree.SubElement(new_el, "{ns}child") >>> el = etree.SubElement(new_el, "{ns}child") + >>> el = etree.SubElement(new_el, "{ns}child") >>> root["tag-name"] = [ new_el, new_el ] + >>> print len(root["tag-name"]) + 2 >>> print root["tag-name"].tag {ns}tag-name + + >>> print len(root["tag-name"].child) + 3 >>> print root["tag-name"].child.tag {ns}child >>> print root["tag-name"][1].child.tag From scoder at codespeak.net Mon Aug 7 12:25:13 2006 From: scoder at codespeak.net (scoder at codespeak.net) Date: Mon, 7 Aug 2006 12:25:13 +0200 (CEST) Subject: [Lxml-checkins] r31099 - lxml/trunk/src/lxml Message-ID: <20060807102513.332CF1006E@code0.codespeak.net> Author: scoder Date: Mon Aug 7 12:25:10 2006 New Revision: 31099 Modified: lxml/trunk/src/lxml/etree_defs.h lxml/trunk/src/lxml/parser.pxi Log: added 'compact' parser option with clear warning not to modify the document afterwards Modified: lxml/trunk/src/lxml/etree_defs.h ============================================================================== --- lxml/trunk/src/lxml/etree_defs.h (original) +++ lxml/trunk/src/lxml/etree_defs.h Mon Aug 7 12:25:10 2006 @@ -16,14 +16,16 @@ #endif #endif -/* XML_PARSE_COMPACT was added in libxml2 2.6.21 */ -/* +/* libxml2 version specific setup */ #include "libxml/xmlversion.h" #if LIBXML_VERSION < 20621 +/* (X|HT)ML_PARSE_COMPACT were added in libxml2 2.6.21 */ #define XML_PARSE_COMPACT 0 #define HTML_PARSE_COMPACT 0 + +/* HTML_PARSE_RECOVER was added in libxml2 2.6.21 */ +#define HTML_PARSE_RECOVER XML_PARSE_RECOVER #endif -*/ /* Redefinition of some Python builtins as C functions */ #define isinstance(o,c) PyObject_IsInstance(o,c) Modified: lxml/trunk/src/lxml/parser.pxi ============================================================================== --- lxml/trunk/src/lxml/parser.pxi (original) +++ lxml/trunk/src/lxml/parser.pxi Mon Aug 7 12:25:10 2006 @@ -587,7 +587,7 @@ parser configuration. A DTD will also be loaded if validation or attribute default values are requested. - Available keyword arguments: + Available boolean keyword arguments: * attribute_defaults - read default attributes from DTD * dtd_validation - validate (if DTD is available) * load_dtd - use DTD for parsing @@ -596,12 +596,18 @@ * recover - try hard to parse through broken XML * remove_blank_text - discard blank text nodes - Note that you must not share parsers between threads. This applies also - to the default parser. + For read-only documents that will not be altered after parsing, you can + also pass the following keyword arguments: + * compact - compactly store element text + + Note that you should avoid sharing parsers between threads. This does not + apply to the default parser. + + You must not modify documents that were parsed with the 'compact' option. """ def __init__(self, attribute_defaults=False, dtd_validation=False, load_dtd=False, no_network=False, ns_clean=False, - recover=False, remove_blank_text=False): + recover=False, remove_blank_text=False, compact=False): cdef int parse_options _BaseParser.__init__(self) @@ -622,6 +628,8 @@ parse_options = parse_options | xmlparser.XML_PARSE_RECOVER if remove_blank_text: parse_options = parse_options | xmlparser.XML_PARSE_NOBLANKS + if compact: + parse_options = parse_options | xmlparser.XML_PARSE_COMPACT self._parse_options = parse_options @@ -700,26 +708,32 @@ tree. By default, it can read broken (non well-formed) HTML, depending on the capabilities of libxml2. Use the 'recover' option to switch this off. - Available keyword arguments: + Available boolean keyword arguments: * recover - try hard to parse through broken HTML (default: True) * no_network - prevent network access * remove_blank_text - discard empty text nodes - Note that you must not share parsers between threads. + For read-only documents that will not be altered after parsing, you can + also pass the following keyword arguments: + * compact - compactly store element text + + Note that you should avoid sharing parsers between threads. You must not + modify documents that were parsed with the 'compact' option. """ - def __init__(self, recover=True, no_network=False, remove_blank_text=False): + def __init__(self, recover=True, no_network=False, remove_blank_text=False, + compact=False): cdef int parse_options _BaseParser.__init__(self) parse_options = _HTML_DEFAULT_PARSE_OPTIONS if recover: - # XXX: make it compile on libxml2 < 2.6.21 - #parse_options = parse_options | htmlparser.HTML_PARSE_RECOVER - parse_options = parse_options | xmlparser.XML_PARSE_RECOVER + parse_options = parse_options | htmlparser.HTML_PARSE_RECOVER if no_network: parse_options = parse_options | htmlparser.HTML_PARSE_NONET if remove_blank_text: parse_options = parse_options | htmlparser.HTML_PARSE_NOBLANKS + if compact: + parse_options = parse_options | htmlparser.HTML_PARSE_COMPACT self._parse_options = parse_options From scoder at codespeak.net Mon Aug 7 13:39:41 2006 From: scoder at codespeak.net (scoder at codespeak.net) Date: Mon, 7 Aug 2006 13:39:41 +0200 (CEST) Subject: [Lxml-checkins] r31102 - in lxml/trunk/src/lxml: . tests Message-ID: <20060807113941.28A961006E@code0.codespeak.net> Author: scoder Date: Mon Aug 7 13:39:38 2006 New Revision: 31102 Modified: lxml/trunk/src/lxml/iterparse.pxi lxml/trunk/src/lxml/parser.pxi lxml/trunk/src/lxml/tests/test.xml lxml/trunk/src/lxml/tests/test_elementtree.py lxml/trunk/src/lxml/tests/test_etree.py Log: support parser options in iterparse() Modified: lxml/trunk/src/lxml/iterparse.pxi ============================================================================== --- lxml/trunk/src/lxml/iterparse.pxi (original) +++ lxml/trunk/src/lxml/iterparse.pxi Mon Aug 7 13:39:38 2006 @@ -212,13 +212,27 @@ those elements that match the given tag. By default, events are generated for all elements. Note that the 'start-ns' and 'end-ns' events are not impacted by this restriction. + + The other keyword arguments in the constructor are mainly based on the + libxml2 parser configuration. A DTD will also be loaded if validation or + attribute default values are requested. + + Available boolean keyword arguments: + * attribute_defaults - read default attributes from DTD + * dtd_validation - validate (if DTD is available) + * load_dtd - use DTD for parsing + * no_network - prevent network access + * remove_blank_text - discard blank text nodes """ cdef object _source cdef object _filename cdef readonly object root - def __init__(self, source, events=("end",), tag=None): + def __init__(self, source, events=("end",), tag=None, + attribute_defaults=False, dtd_validation=False, + load_dtd=False, no_network=False, remove_blank_text=False): cdef _IterparseResolverContext context cdef char* c_filename + cdef int parse_options if not hasattr(source, 'read'): self._filename = source source = open(source, 'rb') @@ -230,11 +244,27 @@ c_filename = NULL self._source = source - _BaseParser.__init__(self) + _BaseParser.__init__(self, _IterparseResolverContext) + + parse_options = _XML_DEFAULT_PARSE_OPTIONS + if load_dtd: + parse_options = parse_options | xmlparser.XML_PARSE_DTDLOAD + if dtd_validation: + parse_options = parse_options | xmlparser.XML_PARSE_DTDVALID | \ + xmlparser.XML_PARSE_DTDLOAD + if attribute_defaults: + parse_options = parse_options | xmlparser.XML_PARSE_DTDATTR | \ + xmlparser.XML_PARSE_DTDLOAD + if no_network: + parse_options = parse_options | xmlparser.XML_PARSE_NONET + if remove_blank_text: + parse_options = parse_options | xmlparser.XML_PARSE_NOBLANKS + self._parse_options = parse_options context = <_IterparseResolverContext>self._context context._setEventFilter(events, tag) context._wrapCallbacks(self._parser_ctxt.sax) + xmlparser.xmlCtxtUseOptions(self._parser_ctxt, parse_options) xmlparser.xmlCtxtResetPush(self._parser_ctxt, NULL, 0, c_filename, NULL) def __iter__(self): Modified: lxml/trunk/src/lxml/parser.pxi ============================================================================== --- lxml/trunk/src/lxml/parser.pxi (original) +++ lxml/trunk/src/lxml/parser.pxi Mon Aug 7 13:39:38 2006 @@ -337,7 +337,7 @@ cdef object _lockParser cdef object _unlockParser - def __init__(self): + def __init__(self, context_class=_ResolverContext): cdef xmlParserCtxt* pctxt if isinstance(self, HTMLParser): self._parser_type = LXML_HTML_PARSER @@ -363,10 +363,7 @@ self._unlockParser = lock.release self._error_log = _ErrorLog() self.resolvers = _ResolverRegistry() - if self._parser_type == LXML_ITERPARSE_PARSER: - self._context = _IterparseResolverContext(self.resolvers) - else: - self._context = _ResolverContext(self.resolvers) + self._context = context_class(self.resolvers) pctxt._private = self._context def __dealloc__(self): @@ -598,7 +595,7 @@ For read-only documents that will not be altered after parsing, you can also pass the following keyword arguments: - * compact - compactly store element text + * compact - compactly store short element text content Note that you should avoid sharing parsers between threads. This does not apply to the default parser. @@ -715,7 +712,7 @@ For read-only documents that will not be altered after parsing, you can also pass the following keyword arguments: - * compact - compactly store element text + * compact - compactly store short element text content Note that you should avoid sharing parsers between threads. You must not modify documents that were parsed with the 'compact' option. Modified: lxml/trunk/src/lxml/tests/test.xml ============================================================================== --- lxml/trunk/src/lxml/tests/test.xml (original) +++ lxml/trunk/src/lxml/tests/test.xml Mon Aug 7 13:39:38 2006 @@ -1 +1,2 @@ - \ No newline at end of file + + Modified: lxml/trunk/src/lxml/tests/test_elementtree.py ============================================================================== --- lxml/trunk/src/lxml/tests/test_elementtree.py (original) +++ lxml/trunk/src/lxml/tests/test_elementtree.py Mon Aug 7 13:39:38 2006 @@ -1820,6 +1820,17 @@ [('end', root[0]), ('end', root[1]), ('end', root)], events) + def test_iterparse_file(self): + iterparse = self.etree.iterparse + iterator = iterparse(fileInTestDir("test.xml")) + self.assertEquals(None, + iterator.root) + events = list(iterator) + root = iterator.root + self.assertEquals( + [('end', root[0]), ('end', root)], + events) + def test_iterparse_start(self): iterparse = self.etree.iterparse f = StringIO('') Modified: lxml/trunk/src/lxml/tests/test_etree.py ============================================================================== --- lxml/trunk/src/lxml/tests/test_etree.py (original) +++ lxml/trunk/src/lxml/tests/test_etree.py Mon Aug 7 13:39:38 2006 @@ -48,7 +48,22 @@ self.assertEquals("TEST", root.get("attr")) self.assertRaises(TypeError, root.set, "newattr", 5) + def test_parse_file_dtd(self): + parse = self.etree.parse + parser = self.etree.XMLParser(attribute_defaults=True) + + tree = parse(fileInTestDir('test.xml'), parser) + root = tree.getroot() + + self.assertEquals( + "valueA", + root.get("default")) + self.assertEquals( + "valueB", + root[0].get("default")) + def test_parse_error(self): + # ET raises ExpatError parse = self.etree.parse # from StringIO f = StringIO('') @@ -56,6 +71,7 @@ f.close() def test_parse_parser_type_error(self): + # ET raises IOError only parse = self.etree.parse self.assertRaises(TypeError, parse, 'notthere.xml', object()) @@ -90,6 +106,29 @@ # ET raises ExpatError, lxml raises XMLSyntaxError self.assertRaises(self.etree.XMLSyntaxError, list, iterparse(f)) + def test_iterparse_file_dtd(self): + iterparse = self.etree.iterparse + iterator = iterparse(fileInTestDir("test.xml"), events=("start",), + attribute_defaults=True) + attributes = [ element.get("default") + for event, element in iterator ] + self.assertEquals( + ["valueA", "valueB"], + attributes) + + def test_iterparse_strip(self): + iterparse = self.etree.iterparse + f = StringIO(""" + \n \n b test \n + + \n\t \n \n """) + iterator = iterparse(f, remove_blank_text=True) + text = [ (element.text, element.tail) + for event, element in iterator ] + self.assertEquals( + [(None, None), (" b test ", None), (" \n ", None)], + text) + def test_iterparse_tag(self): iterparse = self.etree.iterparse f = StringIO('') From scoder at codespeak.net Mon Aug 7 16:42:02 2006 From: scoder at codespeak.net (scoder at codespeak.net) Date: Mon, 7 Aug 2006 16:42:02 +0200 (CEST) Subject: [Lxml-checkins] r31111 - lxml/trunk/src/lxml/tests Message-ID: <20060807144202.9843310075@code0.codespeak.net> Author: scoder Date: Mon Aug 7 16:42:01 2006 New Revision: 31111 Modified: lxml/trunk/src/lxml/tests/test_etree.py Log: fixed test case Modified: lxml/trunk/src/lxml/tests/test_etree.py ============================================================================== --- lxml/trunk/src/lxml/tests/test_etree.py (original) +++ lxml/trunk/src/lxml/tests/test_etree.py Mon Aug 7 16:42:01 2006 @@ -126,7 +126,7 @@ text = [ (element.text, element.tail) for event, element in iterator ] self.assertEquals( - [(None, None), (" b test ", None), (" \n ", None)], + [(" b test ", None), (" \n ", None), (None, None)], text) def test_iterparse_tag(self): From scoder at codespeak.net Mon Aug 7 17:15:10 2006 From: scoder at codespeak.net (scoder at codespeak.net) Date: Mon, 7 Aug 2006 17:15:10 +0200 (CEST) Subject: [Lxml-checkins] r31120 - lxml/trunk/src/lxml Message-ID: <20060807151510.4C67010072@code0.codespeak.net> Author: scoder Date: Mon Aug 7 17:15:07 2006 New Revision: 31120 Modified: lxml/trunk/src/lxml/parser.pxi lxml/trunk/src/lxml/relaxng.pxi lxml/trunk/src/lxml/xmlschema.pxi lxml/trunk/src/lxml/xpath.pxi lxml/trunk/src/lxml/xslt.pxi Log: fixed various threading issues: more unlocking, prevent concurrent XSLT from different threads if it was parsed outside the current or the main thread Modified: lxml/trunk/src/lxml/parser.pxi ============================================================================== --- lxml/trunk/src/lxml/parser.pxi (original) +++ lxml/trunk/src/lxml/parser.pxi Mon Aug 7 17:15:07 2006 @@ -120,6 +120,16 @@ __GLOBAL_PARSER_CONTEXT = _ParserContext() __GLOBAL_PARSER_CONTEXT.initMainParserContext() +cdef int _checkThreadDict(xmlDict* c_dict): + """Check that c_dict is either the local thread dictionary or the global + parent dictionary. + """ + if __GLOBAL_PARSER_CONTEXT._c_dict is c_dict: + return 1 # main thread + if __GLOBAL_PARSER_CONTEXT._getThreadDict(NULL) is c_dict: + return 1 # local thread dict + return 0 + ############################################################ ## support for Python unicode I/O ############################################################ @@ -777,32 +787,41 @@ return result cdef xmlDoc* _copyDoc(xmlDoc* c_doc, int recursive): + cdef python.PyThreadState* state cdef xmlDoc* result + if recursive: + state = python.PyEval_SaveThread() result = tree.xmlCopyDoc(c_doc, recursive) _bugFixURL(c_doc, result) + if recursive: + python.PyEval_RestoreThread(state) __GLOBAL_PARSER_CONTEXT.initDocDict(result) return result cdef xmlDoc* _copyDocRoot(xmlDoc* c_doc, xmlNode* c_new_root): "Recursively copy the document and make c_new_root the new root node." + cdef python.PyThreadState* state cdef xmlDoc* result cdef xmlNode* c_node result = tree.xmlCopyDoc(c_doc, 0) # non recursive _bugFixURL(c_doc, result) __GLOBAL_PARSER_CONTEXT.initDocDict(result) + state = python.PyEval_SaveThread() c_node = tree.xmlDocCopyNode(c_new_root, result, 1) # recursive tree.xmlDocSetRootElement(result, c_node) _copyTail(c_new_root.next, c_node) + python.PyEval_RestoreThread(state) return result cdef void _bugFixURL(xmlDoc* c_source_doc, xmlDoc* c_target_doc): - """libxml2 <= 2.6.17 had a bug that prevented them from copying the - document URL in xmlDocCopy()""" + """libxml2 <= 2.6.17 had a bug that prevented it from copying the document + URL in xmlDocCopy()""" if c_source_doc.URL is not NULL and _LIBXML_VERSION_INT < 20618: if c_target_doc.URL is not NULL: tree.xmlFree(c_target_doc.URL) c_target_doc.URL = tree.xmlStrdup(c_source_doc.URL) + ############################################################ ## API level helper functions for _Document creation ## (here we convert to UTF-8) Modified: lxml/trunk/src/lxml/relaxng.pxi ============================================================================== --- lxml/trunk/src/lxml/relaxng.pxi (original) +++ lxml/trunk/src/lxml/relaxng.pxi Mon Aug 7 17:15:07 2006 @@ -87,16 +87,14 @@ self._error_log.disconnect() raise RelaxNGError, "Failed to create validation context" - state = python.PyEval_SaveThread() - c_doc = _fakeRootDoc(doc._c_doc, root_node._c_node) + state = python.PyEval_SaveThread() ret = relaxng.xmlRelaxNGValidateDoc(valid_ctxt, c_doc) + python.PyEval_RestoreThread(state) _destroyFakeDoc(doc._c_doc, c_doc) relaxng.xmlRelaxNGFreeValidCtxt(valid_ctxt) - python.PyEval_RestoreThread(state) - self._error_log.disconnect() if ret == -1: raise RelaxNGValidateError, "Internal error in Relax NG validation" Modified: lxml/trunk/src/lxml/xmlschema.pxi ============================================================================== --- lxml/trunk/src/lxml/xmlschema.pxi (original) +++ lxml/trunk/src/lxml/xmlschema.pxi Mon Aug 7 17:15:07 2006 @@ -82,16 +82,14 @@ self._error_log.disconnect() raise XMLSchemaError, "Failed to create validation context" - state = python.PyEval_SaveThread() - c_doc = _fakeRootDoc(doc._c_doc, root_node._c_node) + state = python.PyEval_SaveThread() ret = xmlschema.xmlSchemaValidateDoc(valid_ctxt, c_doc) + python.PyEval_RestoreThread(state) _destroyFakeDoc(doc._c_doc, c_doc) xmlschema.xmlSchemaFreeValidCtxt(valid_ctxt) - python.PyEval_RestoreThread(state) - self._error_log.disconnect() if ret == -1: raise XMLSchemaValidateError, "Internal error in XML Schema validation." Modified: lxml/trunk/src/lxml/xpath.pxi ============================================================================== --- lxml/trunk/src/lxml/xpath.pxi (original) +++ lxml/trunk/src/lxml/xpath.pxi Mon Aug 7 17:15:07 2006 @@ -249,6 +249,7 @@ self._raise_parse_error() def __call__(self, _etree_or_element, **_variables): + cdef python.PyThreadState* state cdef xpath.xmlXPathContext* xpathCtxt cdef xpath.xmlXPathObject* xpathObj cdef _Document document @@ -266,7 +267,9 @@ context.register_context(xpathCtxt, document) try: context.registerVariables(_variables) + state = python.PyEval_SaveThread() xpathObj = xpath.xmlXPathCompiledEval(self._xpath, xpathCtxt) + python.PyEval_RestoreThread(state) finally: context.unregister_context() return self._handle_result(xpathObj, document) Modified: lxml/trunk/src/lxml/xslt.pxi ============================================================================== --- lxml/trunk/src/lxml/xslt.pxi (original) +++ lxml/trunk/src/lxml/xslt.pxi Mon Aug 7 17:15:07 2006 @@ -53,7 +53,7 @@ c_doc = (<_XSLTResolverContext>context)._c_style_doc if c_doc is not NULL and c_doc.URL is not NULL: if cstd.strcmp(c_uri, c_doc.URL) == 0: - return c_doc + return _copyDoc(c_doc, 1) return NULL cdef xmlDoc* _xslt_resolve_from_python(char* c_uri, void* context, @@ -134,7 +134,7 @@ c_doc = _xslt_resolve_stylesheet(c_uri, c_pcontext) if c_doc is not NULL: python.PyGILState_Release(gil_state) - return _copyDoc(c_doc, 1) + return c_doc c_doc = _xslt_resolve_from_python(c_uri, c_pcontext, parse_options, &error) if c_doc is NULL and not error: @@ -353,6 +353,9 @@ cdef char** params cdef Py_ssize_t i, kw_count + if not _checkThreadDict(self._c_style.doc.dict): + raise RuntimeError, "stylesheet is not usable in this thread" + input_doc = _documentOrRaise(_input) root_node = _rootNodeOrRaise(_input) From scoder at codespeak.net Mon Aug 7 17:33:08 2006 From: scoder at codespeak.net (scoder at codespeak.net) Date: Mon, 7 Aug 2006 17:33:08 +0200 (CEST) Subject: [Lxml-checkins] r31127 - lxml/branch/capi/doc Message-ID: <20060807153308.E653610074@code0.codespeak.net> Author: scoder Date: Mon Aug 7 17:33:07 2006 New Revision: 31127 Modified: lxml/branch/capi/doc/FAQ.txt Log: FAQ: concurrent XSLT requires parsing in the main thread Modified: lxml/branch/capi/doc/FAQ.txt ============================================================================== --- lxml/branch/capi/doc/FAQ.txt (original) +++ lxml/branch/capi/doc/FAQ.txt Mon Aug 7 17:33:07 2006 @@ -157,6 +157,10 @@ always serialized. If you need to parse concurrently from strings, use ``parse()`` with ``StringIO``. +Due to the way libxslt handles threading, concurrent access to stylesheets is +currently only possible if it was parsed in the main thread. Parsing and +using a stylesheet inside one thread also works. + Warning: You should generally avoid modifying trees in other threads than the one it was generated in. Although this should work in many cases, there are certain scenarios where the termination of a thread that parsed a tree can From scoder at codespeak.net Mon Aug 7 18:12:45 2006 From: scoder at codespeak.net (scoder at codespeak.net) Date: Mon, 7 Aug 2006 18:12:45 +0200 (CEST) Subject: [Lxml-checkins] r31128 - lxml/branch/capi/doc Message-ID: <20060807161245.BDCA010071@code0.codespeak.net> Author: scoder Date: Mon Aug 7 18:12:44 2006 New Revision: 31128 Modified: lxml/branch/capi/doc/objectify.txt Log: reference gnosis.xml.objectify Modified: lxml/branch/capi/doc/objectify.txt ============================================================================== --- lxml/branch/capi/doc/objectify.txt (original) +++ lxml/branch/capi/doc/objectify.txt Mon Aug 7 18:12:44 2006 @@ -2,11 +2,11 @@ lxml.objectify ============== -lxml supports an alternative API similar to the Amara_ bindery through a -custom Element implementation. The main idea is to hide the usage of XML -behind normal Python objects, sometimes referred to as data-binding. It -allows you to use XML as if you were dealing with a normal Python object -hierarchy. +lxml supports an alternative API similar to the Amara_ bindery or +gnosis.xml.objectify_ through a custom Element implementation. The main idea +is to hide the usage of XML behind normal Python objects, sometimes referred +to as data-binding. It allows you to use XML as if you were dealing with a +normal Python object hierarchy. Accessing the children of an XML element deploys object attribute access. If there are multiple children with the same name, slicing and indexing can be @@ -18,6 +18,7 @@ behaviour. .. _Amara: http://uche.ogbuji.net/tech/4suite/amara/ +.. _gnosis.xml.objectify: http://gnosis.cx/download/ .. contents:: .. From scoder at codespeak.net Mon Aug 7 18:17:39 2006 From: scoder at codespeak.net (scoder at codespeak.net) Date: Mon, 7 Aug 2006 18:17:39 +0200 (CEST) Subject: [Lxml-checkins] r31129 - lxml/branch/capi/src/lxml Message-ID: <20060807161739.E451010071@code0.codespeak.net> Author: scoder Date: Mon Aug 7 18:17:38 2006 New Revision: 31129 Modified: lxml/branch/capi/src/lxml/objectify.pyx Log: make type_check available from PyType objects, readable repr() of PyType Modified: lxml/branch/capi/src/lxml/objectify.pyx ============================================================================== --- lxml/branch/capi/src/lxml/objectify.pyx (original) +++ lxml/branch/capi/src/lxml/objectify.pyx Mon Aug 7 18:17:38 2006 @@ -733,7 +733,7 @@ matching type will be used. """ cdef readonly object name - cdef object _type_check + cdef readonly object type_check cdef object _type cdef object _schema_types def __init__(self, name, type_check, type_class): @@ -745,9 +745,12 @@ raise TypeError, "Type class must inherit from ObjectifiedElement" self.name = name self._type = type_class - self._type_check = type_check + self.type_check = type_check self._schema_types = [] + def __repr__(self): + return "PyType(%s, %s)" % (self.name, self._type.__name__) + def register(self, before=None, after=None): """Register the type. @@ -756,12 +759,12 @@ the type list. If any of them is not currently known, it is simply ignored. Raises ValueError if the dependencies cannot be fulfilled. """ - if self._type_check is not None: + if self.type_check is not None: for item in _TYPE_CHECKS: - if item[0] is self._type_check: + if item[0] is self.type_check: _TYPE_CHECKS.remove(item) break - entry = (self._type_check, self) + entry = (self.type_check, self) first_pos = 0 last_pos = -1 if before or after: @@ -791,10 +794,10 @@ for xs_type, pytype in _SCHEMA_TYPE_DICT.items(): if pytype is self: del _SCHEMA_TYPE_DICT[xs_type] - if self._type_check is None: + if self.type_check is None: return try: - _TYPE_CHECKS.remove( (self._type_check, self) ) + _TYPE_CHECKS.remove( (self.type_check, self) ) except ValueError: pass @@ -1330,7 +1333,7 @@ if pytype is not None: value = textOf(c_node) try: - if not (pytype)._type_check(value): + if not (pytype).type_check(value): pytype = None except _ValueError: pytype = None From scoder at codespeak.net Mon Aug 7 18:33:10 2006 From: scoder at codespeak.net (scoder at codespeak.net) Date: Mon, 7 Aug 2006 18:33:10 +0200 (CEST) Subject: [Lxml-checkins] r31132 - in lxml/branch/capi: doc src/lxml Message-ID: <20060807163310.D239810074@code0.codespeak.net> Author: scoder Date: Mon Aug 7 18:33:09 2006 New Revision: 31132 Modified: lxml/branch/capi/doc/objectify.txt lxml/branch/capi/src/lxml/objectify.pyx Log: let ObjectPath.setattr() support list assignment Modified: lxml/branch/capi/doc/objectify.txt ============================================================================== --- lxml/branch/capi/doc/objectify.txt (original) +++ lxml/branch/capi/doc/objectify.txt Mon Aug 7 18:33:09 2006 @@ -412,6 +412,13 @@ >>> print root.some.child["{other}unknown"].text my value +As with attribute assignment, ``setattr()`` accepts lists: + + >>> path.setattr(root, ["v1", "v2", "v3"]) + >>> [ el.text for el in path.find(root) ] + ['v1', 'v2', 'v3'] + + Note, however, that indexing is only supported in this context if the children exist. Indexing of non existing children will not extend or create a list of such children but raise an exception:: Modified: lxml/branch/capi/src/lxml/objectify.pyx ============================================================================== --- lxml/branch/capi/src/lxml/objectify.pyx (original) +++ lxml/branch/capi/src/lxml/objectify.pyx Mon Aug 7 18:33:09 2006 @@ -459,6 +459,9 @@ new_element = cetree.deepcopyNodeToDocument( element._doc, (<_Element>value)._c_node) new_element.tag = element.tag + elif python.PyList_Check(value) or python.PyTuple_Check(value): + element.__setslice__(0, python.PY_SSIZE_T_MAX, value) + return else: new_element = element.makeelement(element.tag) _setElementValue(new_element, value) From scoder at codespeak.net Mon Aug 7 18:50:47 2006 From: scoder at codespeak.net (scoder at codespeak.net) Date: Mon, 7 Aug 2006 18:50:47 +0200 (CEST) Subject: [Lxml-checkins] r31134 - in lxml/branch/capi/src/lxml: . tests Message-ID: <20060807165047.593151007C@code0.codespeak.net> Author: scoder Date: Mon Aug 7 18:50:46 2006 New Revision: 31134 Modified: lxml/branch/capi/src/lxml/objectify.pyx lxml/branch/capi/src/lxml/tests/test_objectify.py Log: __richcmp__ and __nonzero__ for numbers/strings/None Modified: lxml/branch/capi/src/lxml/objectify.pyx ============================================================================== --- lxml/branch/capi/src/lxml/objectify.pyx (original) +++ lxml/branch/capi/src/lxml/objectify.pyx Mon Aug 7 18:50:46 2006 @@ -564,7 +564,7 @@ return abs( _numericValueOf(self) ) def __nonzero__(self): - return _numericValueOf(self) + return _numericValueOf(self) != 0 def __invert__(self): return ~ _numericValueOf(self) @@ -614,6 +614,12 @@ else: return len(text) + def __nonzero__(self): + text = textOf(self._c_node) + if text is None: + return False + return len(text) > 0 + def __richcmp__(self, other, int op): if hasattr(other, 'pyval'): other = other.pyval @@ -652,6 +658,17 @@ def __str__(self): return "None" + def __nonzero__(self): + return False + + def __richcmp__(self, other, int op): + if other is None or self is None: + return python.PyObject_RichCompare(None, None, op) + if isinstance(self, NoneElement): + return python.PyObject_RichCompare(None, other, op) + else: + return python.PyObject_RichCompare(self, None, op) + property pyval: def __get__(self): return None Modified: lxml/branch/capi/src/lxml/tests/test_objectify.py ============================================================================== --- lxml/branch/capi/src/lxml/tests/test_objectify.py (original) +++ lxml/branch/capi/src/lxml/tests/test_objectify.py Mon Aug 7 18:50:46 2006 @@ -178,6 +178,8 @@ self.assertFalse(isinstance(root.none, objectify.NoneElement)) self.assertFalse(isinstance(root.none[0], objectify.NoneElement)) self.assert_(isinstance(root.none[1], objectify.NoneElement)) + self.assertEquals(root.none[1], None) + self.assertFalse(root.none[1]) def test_type_bool(self): Element = self.etree.Element @@ -249,6 +251,11 @@ self.assert_(root.b[0] > 5) self.assert_(5 < root.b[0]) + root.b = "test" + self.assert_(root.b) + root.b = "" + self.assertFalse(root.b) + def test_type_int_cmp(self): XML = self.etree.XML root = XML(u'56') @@ -265,6 +272,11 @@ self.assert_(root.b[0] < "5") self.assert_("5" > root.b[0]) + root.b = 5 + self.assert_(root.b) + root.b = 0 + self.assertFalse(root.b) + def test_type_bool_cmp(self): XML = self.etree.XML root = XML(u'falsetrue') @@ -276,11 +288,19 @@ self.assertFalse(root.b[0] >= root.b[1]) self.assertFalse(root.b[0] > root.b[1]) + self.assertFalse(root.b[0]) + self.assert_(root.b[1]) + self.assertEquals(root.b[0], False) self.assertEquals(False, root.b[0]) self.assert_(root.b[0] < 5) self.assert_(5 > root.b[0]) + root.b = True + self.assert_(root.b) + root.b = False + self.assertFalse(root.b) + def test_type_annotation(self): XML = self.etree.XML root = XML(u'''\ From scoder at codespeak.net Tue Aug 8 13:33:39 2006 From: scoder at codespeak.net (scoder at codespeak.net) Date: Tue, 8 Aug 2006 13:33:39 +0200 (CEST) Subject: [Lxml-checkins] r31159 - in lxml/branch/capi/src/lxml: . tests Message-ID: <20060808113339.2514E10063@code0.codespeak.net> Author: scoder Date: Tue Aug 8 13:33:37 2006 New Revision: 31159 Modified: lxml/branch/capi/src/lxml/objectify.pyx lxml/branch/capi/src/lxml/tests/test_objectify.py Log: support negative indexes for attributes and ObjectPath Modified: lxml/branch/capi/src/lxml/objectify.pyx ============================================================================== --- lxml/branch/capi/src/lxml/objectify.pyx (original) +++ lxml/branch/capi/src/lxml/objectify.pyx Tue Aug 8 13:33:37 2006 @@ -296,8 +296,12 @@ return self else: raise IndexError, key + if key < 0: + c_node = c_parent.last + else: + c_node = c_parent.children c_node = _findFollowingSibling( - c_parent.children, tree._getNs(c_self_node), c_self_node.name, key) + c_node, tree._getNs(c_self_node), c_self_node.name, key) if c_node is NULL: raise IndexError, key return elementFactory(self._doc, c_node) @@ -328,9 +332,12 @@ if c_parent is NULL: # the 'root[i] = ...' case raise TypeError, "index assignment to root element is invalid" + if key < 0: + c_node = c_parent.last + else: + c_node = c_parent.children c_node = _findFollowingSibling( - c_parent.children, - tree._getNs(c_self_node), c_self_node.name, key) + c_node, tree._getNs(c_self_node), c_self_node.name, key) if c_node is NULL: raise IndexError, key element = elementFactory(self._doc, c_node) @@ -414,13 +421,19 @@ cdef tree.xmlNode* _findFollowingSibling(tree.xmlNode* c_node, char* href, char* name, Py_ssize_t index): + cdef tree.xmlNode* (*next)(tree.xmlNode*) + if index >= 0: + next = cetree.nextElement + else: + index = -1 - index + next = cetree.previousElement while c_node is not NULL: if c_node.type == tree.XML_ELEMENT_NODE and \ cetree.tagMatches(c_node, href, name): index = index - 1 if index < 0: return c_node - c_node = c_node.next + c_node = next(c_node) return NULL cdef object _lookupChild(_Element parent, tag): @@ -1063,7 +1076,7 @@ cdef object __SPLIT_PATH __SPLIT_PATH = re.compile( - r"(\.?)\s*(?:\{([^}]*)\})?\s*(\w+)\s*(?:\[\s*([0-9]+)\s*\])?", + r"(\.?)\s*(?:\{([^}]*)\})?\s*(\w+)\s*(?:\[\s*([-0-9]+)\s*\])?", re.U).findall cdef _parseObjectPathString(path): @@ -1083,8 +1096,6 @@ else: index = python.PyNumber_Int(index) has_index = 1 - if index < 0: - raise ValueError, "index must be >= 0" has_dot = _cstr(dot)[0] == c'.' if python.PyList_GET_SIZE(new_path) == 0: if has_dot: @@ -1141,8 +1152,6 @@ if index == 0: index = None else: - if index < 0: - raise ValueError, "index must be >= 0" has_index = 1 if ns is None: entry = "\0%s\0" % name @@ -1200,8 +1209,11 @@ if index is not None: c_index = python.PyInt_AsSsize_t(index) - c_node = _findFollowingSibling(c_node.children, - c_href, c_path, c_index) + if c_index < 0: + c_node = c_node.last + else: + c_node = c_node.children + c_node = _findFollowingSibling(c_node, c_href, c_path, c_index) if use_default: return default_value @@ -1255,8 +1267,11 @@ if index is not None: c_index = python.PyInt_AsSsize_t(index) - c_child = _findFollowingSibling(c_node.children, - c_href, c_path, c_index) + if c_index < 0: + c_child = c_node.last + else: + c_child = c_node.children + c_child = _findFollowingSibling(c_child, c_href, c_path, c_index) if c_child is not NULL: c_node = c_child elif c_index > 0: Modified: lxml/branch/capi/src/lxml/tests/test_objectify.py ============================================================================== --- lxml/branch/capi/src/lxml/tests/test_objectify.py (original) +++ lxml/branch/capi/src/lxml/tests/test_objectify.py Tue Aug 8 13:33:37 2006 @@ -65,6 +65,14 @@ self.assertEquals("2", root.c1.c2[2].text) self.assertRaises(IndexError, operator.itemgetter(3), root.c1.c2) + def test_child_index_neg(self): + root = self.etree.XML(xml_str) + self.assertEquals("0", root.c1.c2[0].text) + self.assertEquals("0", root.c1.c2[-3].text) + self.assertEquals("1", root.c1.c2[-2].text) + self.assertEquals("2", root.c1.c2[-1].text) + self.assertRaises(IndexError, operator.itemgetter(-4), root.c1.c2) + def test_child_len(self): root = self.etree.XML(xml_str) self.assertEquals(1, len(root)) @@ -474,6 +482,12 @@ path = objectify.ObjectPath( "root.c1.c2[2]" ) self.assertEquals(root.c1.c2[2].text, path(root).text) + path = objectify.ObjectPath( "root.c1.c2[-1]" ) + self.assertEquals(root.c1.c2[-1].text, path(root).text) + + path = objectify.ObjectPath( "root.c1.c2[-3]" ) + self.assertEquals(root.c1.c2[-3].text, path(root).text) + def test_object_path_index_list(self): root = self.etree.XML(xml_str) path = objectify.ObjectPath( ['root', 'c1[0]', 'c2[0]'] ) @@ -485,11 +499,17 @@ path = objectify.ObjectPath( ['root', 'c1', 'c2[2]'] ) self.assertEquals(root.c1.c2[2].text, path(root).text) + path = objectify.ObjectPath( ['root', 'c1', 'c2[-1]'] ) + self.assertEquals(root.c1.c2[-1].text, path(root).text) + + path = objectify.ObjectPath( ['root', 'c1', 'c2[-3]'] ) + self.assertEquals(root.c1.c2[-3].text, path(root).text) + def test_object_path_index_fail_parse(self): self.assertRaises(ValueError, objectify.ObjectPath, - "root.c1[0].c2[-1]") + "root.c1[0].c2[-1-2]") self.assertRaises(ValueError, objectify.ObjectPath, - ['root', 'c1[0]', 'c2[-1]']) + ['root', 'c1[0]', 'c2[-1-2]']) self.assertRaises(ValueError, objectify.ObjectPath, "root[2].c1.c2") @@ -497,11 +517,6 @@ ['root[2]', 'c1', 'c2']) self.assertRaises(ValueError, objectify.ObjectPath, - "root.c1[-1].c2") - self.assertRaises(ValueError, objectify.ObjectPath, - ['root', 'c1[-1]', 'c2']) - - self.assertRaises(ValueError, objectify.ObjectPath, ".") self.assertRaises(ValueError, objectify.ObjectPath, ['']) @@ -519,6 +534,12 @@ path = objectify.ObjectPath(".c1[9999].c2[0]") self.assertRaises(AttributeError, path, root) + path = objectify.ObjectPath("root.c1[-2].c2") + self.assertRaises(AttributeError, path, root) + + path = objectify.ObjectPath("root.c1[0].c2[-4]") + self.assertRaises(AttributeError, path, root) + def test_object_path_ns(self): root = self.etree.XML(xml_str) path = objectify.ObjectPath( "{objectified}root.c1.c2" ) From scoder at codespeak.net Tue Aug 8 15:51:30 2006 From: scoder at codespeak.net (scoder at codespeak.net) Date: Tue, 8 Aug 2006 15:51:30 +0200 (CEST) Subject: [Lxml-checkins] r31163 - in lxml/branch/capi/src/lxml: . tests Message-ID: <20060808135130.3D69910069@code0.codespeak.net> Author: scoder Date: Tue Aug 8 15:51:28 2006 New Revision: 31163 Modified: lxml/branch/capi/src/lxml/apihelpers.pxi lxml/branch/capi/src/lxml/objectify.pyx lxml/branch/capi/src/lxml/tests/test_objectify.py Log: rewrite of ObjectPath to let it support namespace-less children of parents with namespaces Modified: lxml/branch/capi/src/lxml/apihelpers.pxi ============================================================================== --- lxml/branch/capi/src/lxml/apihelpers.pxi (original) +++ lxml/branch/capi/src/lxml/apihelpers.pxi Tue Aug 8 15:51:28 2006 @@ -363,6 +363,19 @@ return c_node cdef int _tagMatches(xmlNode* c_node, char* c_href, char* c_name): + """Tests if the node matches namespace URI and tag name. + + A node matches if it matches both c_href and c_name. + + A node matches c_href if any of the following is true: + * c_href is NULL + * its namespace is NULL and c_href is the empty string + * its namespace string equals the c_href string + + A node matches c_name if any of the following is true: + * c_name is NULL + * its name string equals the c_name string + """ cdef char* c_node_href if c_name is NULL: if c_href is NULL: @@ -371,20 +384,21 @@ else: c_node_href = _getNs(c_node) if c_node_href is NULL: - return 0 + return c_href[0] == '\0' else: return cstd.strcmp(c_node_href, c_href) == 0 elif c_href is NULL: if _getNs(c_node) is not NULL: return 0 return cstd.strcmp(c_node.name, c_name) == 0 - else: + elif cstd.strcmp(c_node.name, c_name) == 0: c_node_href = _getNs(c_node) if c_node_href is NULL: - return 0 + return c_href[0] == '\0' else: - return cstd.strcmp(c_node.name, c_name) == 0 and \ - cstd.strcmp(c_node_href, c_href) == 0 + return cstd.strcmp(c_node_href, c_href) == 0 + else: + return 0 cdef void _removeNode(xmlNode* c_node): """Unlink and free a node and subnodes if possible. Modified: lxml/branch/capi/src/lxml/objectify.pyx ============================================================================== --- lxml/branch/capi/src/lxml/objectify.pyx (original) +++ lxml/branch/capi/src/lxml/objectify.pyx Tue Aug 8 15:51:28 2006 @@ -1019,26 +1019,36 @@ ################################################################################ # ObjectPath +ctypedef struct _ObjectPath: + char* href + char* name + Py_ssize_t index + cdef class ObjectPath: """Immutable object that represents a compiled object path. Example for a path: 'root.child[1].{other}child[25]' """ cdef readonly object find - cdef object _indexes cdef object _path cdef object _path_str - cdef char* _path_cstr + cdef _ObjectPath* _c_path + cdef Py_ssize_t _path_len def __init__(self, path): if python._isString(path): - self._path, self._indexes = _parseObjectPathString(path) + self._path = _parseObjectPathString(path) self._path_str = path else: - self._path, self._indexes = _parseObjectPathList(path) + self._path = _parseObjectPathList(path) self._path_str = '.'.join(path) - self._path_cstr = _cstr(self._path) + self._path_len = python.PyList_GET_SIZE(self._path) + self._c_path = _buildObjectPathSegments(self._path) self.find = self.__call__ + def __dealloc__(self): + if self._c_path is not NULL: + python.PyMem_Free(self._c_path) + def __str__(self): return self._path_str @@ -1057,12 +1067,12 @@ use_default = 1 elif use_default > 1: raise TypeError, "invalid number of arguments: needs one or two" - return _findObjectPath(root, self._path_cstr, self._indexes, + return _findObjectPath(root, self._c_path, self._path_len, default, use_default) def hasattr(self, _Element root not None): try: - _findObjectPath(root, self._path_cstr, self._indexes, None, 0) + _findObjectPath(root, self._c_path, self._path_len, None, 0) except AttributeError: return False return True @@ -1072,48 +1082,47 @@ If any of the children on the path does not exist, it is created. """ - _createObjectPath(root, self._path_cstr, self._indexes, value) + _createObjectPath(root, self._c_path, self._path_len, value) -cdef object __SPLIT_PATH -__SPLIT_PATH = re.compile( +cdef object __MATCH_PATH_SEGMENT +__MATCH_PATH_SEGMENT = re.compile( r"(\.?)\s*(?:\{([^}]*)\})?\s*(\w+)\s*(?:\[\s*([-0-9]+)\s*\])?", - re.U).findall + re.U).match cdef _parseObjectPathString(path): """Parse object path string into a 'hrefOnameOhrefOnameOOO' string and an index list. The index list is None if no index was used in the path. """ - cdef int has_index cdef int has_dot new_path = [] - indexes = [] - has_index = 0 path = cetree.utf8(path.strip()) - for dot, ns, name, index in __SPLIT_PATH(path): - if index is not None: - if python.PyString_GET_SIZE(index) == 0 or index == '0': - index = None - else: - index = python.PyNumber_Int(index) - has_index = 1 + path_pos = 0 + while python.PyString_GET_SIZE(path) > 0: + match = __MATCH_PATH_SEGMENT(path, path_pos) + if match is None: + break + + dot, ns, name, index = match.groups() + if index is None or python.PyString_GET_SIZE(index) == 0: + index = 0 + else: + index = python.PyNumber_Int(index) has_dot = _cstr(dot)[0] == c'.' if python.PyList_GET_SIZE(new_path) == 0: if has_dot: - # == path '.child' => ignore root - python.PyList_Append(new_path, "\0\0") - python.PyList_Append(indexes, None) - elif index is not None: + # path '.child' => ignore root + python.PyList_Append(new_path, (None, None, 0)) + elif index != 0: raise ValueError, "index not allowed on root node" elif not has_dot: raise ValueError, "invalid path" - python.PyList_Append(new_path, "%s\0%s\0" % (ns, name)) - python.PyList_Append(indexes, index) - if python.PyList_GET_SIZE(new_path) == 0: + python.PyList_Append(new_path, (ns, name, index)) + + path_pos = match.end() + if python.PyList_GET_SIZE(new_path) == 0 or \ + python.PyString_GET_SIZE(path) > path_pos: raise ValueError, "invalid path" - python.PyList_Append(new_path, "\0\0") - if not has_index: - indexes = None - return ''.join(new_path), indexes + return new_path cdef _parseObjectPathList(path): """Parse object path sequence into a 'hrefOnameOhrefOnameOOO' string and @@ -1122,24 +1131,20 @@ cdef char* index_pos cdef char* index_end cdef char* c_name - cdef int has_index new_path = [] - indexes = [] - has_index = 0 for item in path: item = item.strip() if python.PyList_GET_SIZE(new_path) == 0 and item == '': - entry = "\0\0" # == path '.child' => ignore root - index = None + # path '.child' => ignore root + ns = name = None + index = 0 else: ns, name = cetree.getNsTag(item) c_name = _cstr(name) index_pos = cstd.strchr(c_name, c'[') if index_pos is NULL: - index = None + index = 0 else: - if python.PyList_GET_SIZE(new_path) == 0: - raise ValueError, "index not allowed on root node" name = python.PyString_FromStringAndSize( c_name, (index_pos - c_name)) index_pos = index_pos + 1 @@ -1149,79 +1154,81 @@ index = python.PyNumber_Int( python.PyString_FromStringAndSize( index_pos, (index_end - index_pos))) - if index == 0: - index = None - else: - has_index = 1 - if ns is None: - entry = "\0%s\0" % name - else: - entry = "%s\0%s\0" % (ns, name) - python.PyList_Append(new_path, entry) - python.PyList_Append(indexes, index) + if python.PyList_GET_SIZE(new_path) == 0 and index != 0: + raise ValueError, "index not allowed on root node" + python.PyList_Append(new_path, (ns, name, index)) if python.PyList_GET_SIZE(new_path) == 0 or \ - (python.PyList_GET_SIZE(new_path) == 1 and new_path[0] == '\0\0'): + (python.PyList_GET_SIZE(new_path) == 1 and \ + new_path[0] == (None, None, 0)): raise ValueError, "invalid path" - python.PyList_Append(new_path, "\0\0") - if not has_index: - indexes = None - return ''.join(new_path), indexes + return new_path -cdef _findObjectPath(_Element root, char* c_path, index_list, +cdef _ObjectPath* _buildObjectPathSegments(path_list) except NULL: + cdef _ObjectPath* c_path + cdef _ObjectPath* c_path_segments + cdef Py_ssize_t c_len + c_len = python.PyList_GET_SIZE(path_list) + c_path_segments = <_ObjectPath*>python.PyMem_Malloc(sizeof(_ObjectPath) * + c_len) + if c_path_segments is NULL: + PyErr_NoMemory() + return NULL + c_path = c_path_segments + for href, name, index in path_list: + if href is None: + c_path[0].href = NULL + else: + c_path[0].href = _cstr(href) + if name is None: + c_path[0].name = NULL + else: + c_path[0].name = _cstr(name) + c_path[0].index = index + c_path = c_path + 1 + return c_path_segments + +cdef _findObjectPath(_Element root, _ObjectPath* c_path, Py_ssize_t c_path_len, default_value, int use_default): """Follow the path to find the target element. """ cdef tree.xmlNode* c_node cdef char* c_href - cdef Py_ssize_t c_index_pos, c_index + cdef char* c_name + cdef Py_ssize_t c_index c_node = root._c_node - if c_path[0] != c'\0': - c_href = c_path - while c_path[0] != c'\0': - c_path = c_path + 1 - else: + c_name = c_path[0].name + c_href = c_path[0].href + if c_href is NULL or c_href[0] == c'\0': c_href = tree._getNs(c_node) - c_path = c_path + 1 - if c_path[0] != c'\0' and not cetree.tagMatches(c_node, c_href, c_path): + if not cetree.tagMatches(c_node, c_href, c_name): raise ValueError, "root element does not match: need %s, got %s" % \ - (cetree.namespacedNameFromNsName(c_href, c_path), root.tag) + (cetree.namespacedNameFromNsName(c_href, c_name), root.tag) - if index_list is not None: - c_index_pos = 1 while c_node is not NULL: - while c_path[0] != c'\0': - c_path = c_path + 1 - c_path = c_path + 1 - if c_path[0] != c'\0': - c_href = c_path - while c_path[0] != c'\0': - c_path = c_path + 1 - elif c_path[1] == c'\0': - # '\0\0' found, all done + c_path_len = c_path_len - 1 + if c_path_len <= 0: return cetree.elementFactory(root._doc, c_node) - c_path = c_path + 1 - c_index = 0 - if index_list is not None: - index = python.PyList_GET_ITEM(index_list, c_index_pos) - python.Py_INCREF(index) - c_index_pos = c_index_pos + 1 - if index is not None: - c_index = python.PyInt_AsSsize_t(index) + c_path = c_path + 1 + if c_path[0].href is not NULL: + c_href = c_path[0].href # otherwise: keep parent namespace + c_name = c_path[0].name + c_index = c_path[0].index if c_index < 0: c_node = c_node.last else: c_node = c_node.children - c_node = _findFollowingSibling(c_node, c_href, c_path, c_index) + c_node = _findFollowingSibling(c_node, c_href, c_name, c_index) if use_default: return default_value else: - tag = cetree.namespacedNameFromNsName(c_href, c_path) + tag = cetree.namespacedNameFromNsName(c_href, c_name) raise AttributeError, "no such child: " + tag -cdef _createObjectPath(_Element root, char* c_path, index_list, value): +cdef _createObjectPath(_Element root, _ObjectPath* c_path, + Py_ssize_t c_path_len, value): """Follow the path to find the target element, build the missing children as needed and replace the target element by 'value'. """ @@ -1229,49 +1236,35 @@ cdef tree.xmlNode* c_node cdef tree.xmlNode* c_child cdef char* c_href - cdef Py_ssize_t c_index_pos, c_index + cdef char* c_name + cdef Py_ssize_t c_index c_node = root._c_node - if c_path[0] != c'\0': - c_href = c_path - while c_path[0] != c'\0': - c_path = c_path + 1 - else: + c_name = c_path[0].name + c_href = c_path[0].href + if c_href is NULL or c_href[0] == c'\0': c_href = tree._getNs(c_node) - c_path = c_path + 1 - if c_path[0] != c'\0' and not cetree.tagMatches(c_node, c_href, c_path): + if not cetree.tagMatches(c_node, c_href, c_name): raise ValueError, "root element does not match: need %s, got %s" % \ - (cetree.namespacedNameFromNsName(c_href, c_path), root.tag) + (cetree.namespacedNameFromNsName(c_href, c_name), root.tag) - if index_list is not None: - c_index_pos = 1 while c_node is not NULL: - while c_path[0] != c'\0': - c_path = c_path + 1 - c_path = c_path + 1 - if c_path[0] != c'\0': - c_href = c_path - while c_path[0] != c'\0': - c_path = c_path + 1 - elif c_path[1] == c'\0': - # '\0\0' found => done, all children were there + c_path_len = c_path_len - 1 + if c_path_len <= 0: element = cetree.elementFactory(root._doc, c_node) _replaceElement(element, value) return - c_path = c_path + 1 - c_index = 0 - if index_list is not None: - index = python.PyList_GET_ITEM(index_list, c_index_pos) - python.Py_INCREF(index) - c_index_pos = c_index_pos + 1 - if index is not None: - c_index = python.PyInt_AsSsize_t(index) + c_path = c_path + 1 + if c_path[0].href is not NULL: + c_href = c_path[0].href # otherwise: keep parent namespace + c_name = c_path[0].name + c_index = c_path[0].index if c_index < 0: c_child = c_node.last else: c_child = c_node.children - c_child = _findFollowingSibling(c_child, c_href, c_path, c_index) + c_child = _findFollowingSibling(c_child, c_href, c_name, c_index) if c_child is not NULL: c_node = c_child elif c_index > 0: @@ -1280,7 +1273,7 @@ else: child = SubElement( cetree.elementFactory(root._doc, c_node), - cetree.namespacedNameFromNsName(c_href, c_path)) + cetree.namespacedNameFromNsName(c_href, c_name)) c_node = child._c_node cdef _buildDescendantPaths(tree.xmlNode* c_node, prefix_string): @@ -1316,6 +1309,9 @@ return if c_href is tree._getNs(c_child): tag = c_child.name + elif c_href is not NULL and tree._getNs(c_child) is NULL: + # special case: parent has namespace, child does not + tag = '{}' + c_child.name else: tag = cetree.namespacedName(c_child) dict_result = python.PyDict_GetItem(tags, tag) Modified: lxml/branch/capi/src/lxml/tests/test_objectify.py ============================================================================== --- lxml/branch/capi/src/lxml/tests/test_objectify.py (original) +++ lxml/branch/capi/src/lxml/tests/test_objectify.py Tue Aug 8 15:51:28 2006 @@ -16,14 +16,15 @@ from lxml import objectify xml_str = '''\ - - - 0 - 1 - 2 + + + 0 + 1 + 2 3 - -''' + 3 + +''' class ObjectifyTestCase(HelperTestCase): """Test cases for lxml.elementlib.objectify @@ -600,7 +601,7 @@ ['{objectified}root', '{objectified}root.c1', '{objectified}root.c1.c2', '{objectified}root.c1.c2[1]', '{objectified}root.c1.c2[2]', - '{objectified}root.c1.{otherNS}c2'], + '{objectified}root.c1.{otherNS}c2', '{objectified}root.c1.{}c2'], root.descendantpaths()) def test_descendant_paths_child(self): @@ -608,7 +609,7 @@ self.assertEquals( ['{objectified}c1', '{objectified}c1.c2', '{objectified}c1.c2[1]', '{objectified}c1.c2[2]', - '{objectified}c1.{otherNS}c2'], + '{objectified}c1.{otherNS}c2', '{objectified}c1.{}c2'], root.c1.descendantpaths()) def test_descendant_paths_prefix(self): @@ -616,7 +617,8 @@ self.assertEquals( ['root.{objectified}c1', 'root.{objectified}c1.c2', 'root.{objectified}c1.c2[1]', 'root.{objectified}c1.c2[2]', - 'root.{objectified}c1.{otherNS}c2'], + 'root.{objectified}c1.{otherNS}c2', + 'root.{objectified}c1.{}c2'], root.c1.descendantpaths('root')) From scoder at codespeak.net Tue Aug 8 15:52:50 2006 From: scoder at codespeak.net (scoder at codespeak.net) Date: Tue, 8 Aug 2006 15:52:50 +0200 (CEST) Subject: [Lxml-checkins] r31164 - in lxml/branch/capi/src/lxml: . tests Message-ID: <20060808135250.B9C8910069@code0.codespeak.net> Author: scoder Date: Tue Aug 8 15:52:49 2006 New Revision: 31164 Modified: lxml/branch/capi/src/lxml/objectify.pyx lxml/branch/capi/src/lxml/tests/test_objectify.py Log: ObjectifiedElement.addattr() Modified: lxml/branch/capi/src/lxml/objectify.pyx ============================================================================== --- lxml/branch/capi/src/lxml/objectify.pyx (original) +++ lxml/branch/capi/src/lxml/objectify.pyx Tue Aug 8 15:52:49 2006 @@ -276,6 +276,10 @@ child = _lookupChild(self, tag) self.remove(child) + def addattr(self, tag, value): + element = SubElement(self, _buildChildTag(self, tag)) + _setElementValue(element, value) + def __getitem__(self, key): """Return a sibling, counting from the first child of the parent. Modified: lxml/branch/capi/src/lxml/tests/test_objectify.py ============================================================================== --- lxml/branch/capi/src/lxml/tests/test_objectify.py (original) +++ lxml/branch/capi/src/lxml/tests/test_objectify.py Tue Aug 8 15:52:49 2006 @@ -59,6 +59,20 @@ self.assertRaises(AttributeError, getattr, root.c1, "NOT_THERE") self.assertRaises(AttributeError, getattr, root.c1, "{unknownNS}c2") + def test_addattr(self): + root = self.etree.XML(xml_str) + self.assertEquals(1, len(root.c1)) + root.addattr("c1", "test") + self.assertEquals(2, len(root.c1)) + self.assertEquals("test", root.c1[1].text) + + def test_child_addattr(self): + root = self.etree.XML(xml_str) + self.assertEquals(3, len(root.c1.c2)) + root.c1.addattr("c2", 3) + self.assertEquals(4, len(root.c1.c2)) + self.assertEquals("3", root.c1.c2[3].text) + def test_child_index(self): root = self.etree.XML(xml_str) self.assertEquals("0", root.c1.c2[0].text) From scoder at codespeak.net Tue Aug 8 16:50:07 2006 From: scoder at codespeak.net (scoder at codespeak.net) Date: Tue, 8 Aug 2006 16:50:07 +0200 (CEST) Subject: [Lxml-checkins] r31167 - in lxml/branch/capi: doc src/lxml src/lxml/tests Message-ID: <20060808145007.D2F0610063@code0.codespeak.net> Author: scoder Date: Tue Aug 8 16:50:05 2006 New Revision: 31167 Modified: lxml/branch/capi/doc/objectify.txt lxml/branch/capi/src/lxml/objectify.pyx lxml/branch/capi/src/lxml/tests/test_objectify.py Log: ObjectPath.addattr() to append a value instead of replacing the current one Modified: lxml/branch/capi/doc/objectify.txt ============================================================================== --- lxml/branch/capi/doc/objectify.txt (original) +++ lxml/branch/capi/doc/objectify.txt Tue Aug 8 16:50:05 2006 @@ -412,6 +412,14 @@ >>> print root.some.child["{other}unknown"].text my value + >>> print len( path.find(root) ) + 1 + >>> path.addattr(root, "my new value") + >>> print len( path.find(root) ) + 2 + >>> [ el.text for el in path.find(root) ] + ['my value', 'my new value'] + As with attribute assignment, ``setattr()`` accepts lists: >>> path.setattr(root, ["v1", "v2", "v3"]) Modified: lxml/branch/capi/src/lxml/objectify.pyx ============================================================================== --- lxml/branch/capi/src/lxml/objectify.pyx (original) +++ lxml/branch/capi/src/lxml/objectify.pyx Tue Aug 8 16:50:05 2006 @@ -1086,7 +1086,14 @@ If any of the children on the path does not exist, it is created. """ - _createObjectPath(root, self._c_path, self._path_len, value) + _createObjectPath(root, self._c_path, self._path_len, 1, value) + + def addattr(self, _Element root not None, value): + """Append a value to the target element in a subtree. + + If any of the children on the path does not exist, it is created. + """ + _createObjectPath(root, self._c_path, self._path_len, 0, value) cdef object __MATCH_PATH_SEGMENT __MATCH_PATH_SEGMENT = re.compile( @@ -1232,9 +1239,10 @@ raise AttributeError, "no such child: " + tag cdef _createObjectPath(_Element root, _ObjectPath* c_path, - Py_ssize_t c_path_len, value): + Py_ssize_t c_path_len, int replace, value): """Follow the path to find the target element, build the missing children - as needed and replace the target element by 'value'. + as needed and set the target element to 'value'. If replace is true, an + existing value is replaced, otherwise the new value is added. """ cdef _Element child cdef tree.xmlNode* c_node @@ -1242,6 +1250,9 @@ cdef char* c_href cdef char* c_name cdef Py_ssize_t c_index + if c_path_len == 1: + raise TypeError, "cannot update root node" + c_node = root._c_node c_name = c_path[0].name c_href = c_path[0].href @@ -1251,13 +1262,8 @@ raise ValueError, "root element does not match: need %s, got %s" % \ (cetree.namespacedNameFromNsName(c_href, c_name), root.tag) - while c_node is not NULL: + while c_path_len > 1: c_path_len = c_path_len - 1 - if c_path_len <= 0: - element = cetree.elementFactory(root._doc, c_node) - _replaceElement(element, value) - return - c_path = c_path + 1 if c_path[0].href is not NULL: c_href = c_path[0].href # otherwise: keep parent namespace @@ -1269,17 +1275,31 @@ else: c_child = c_node.children c_child = _findFollowingSibling(c_child, c_href, c_name, c_index) + if c_child is not NULL: c_node = c_child - elif c_index > 0: + elif c_index != 0: raise TypeError, \ "creating indexed path attributes is not supported" else: child = SubElement( cetree.elementFactory(root._doc, c_node), cetree.namespacedNameFromNsName(c_href, c_name)) + if c_path_len == 1: + _setElementValue(child, value) + return c_node = child._c_node + # if we get here, the entire path was already there + if replace: + element = cetree.elementFactory(root._doc, c_node) + _replaceElement(element, value) + else: + element = SubElement( + cetree.elementFactory(root._doc, c_node.parent), + cetree.namespacedName(c_node)) + _setElementValue(element, value) + cdef _buildDescendantPaths(tree.xmlNode* c_node, prefix_string): """Returns a list of all descendant paths. """ Modified: lxml/branch/capi/src/lxml/tests/test_objectify.py ============================================================================== --- lxml/branch/capi/src/lxml/tests/test_objectify.py (original) +++ lxml/branch/capi/src/lxml/tests/test_objectify.py Tue Aug 8 16:50:05 2006 @@ -600,13 +600,35 @@ def test_object_path_set_create(self): root = self.etree.XML(xml_str) - path = objectify.ObjectPath( "root.c1.c99.c126.honk" ) + path = objectify.ObjectPath( "root.c1.c99" ) self.assertRaises(AttributeError, path.find, root) new_value = "my new value" path.setattr(root, new_value) - self.assertEquals(new_value, root.c1.c99.c126.honk) + self.assertEquals(1, len(root.c1.c99)) + self.assertEquals(new_value, root.c1.c99.text) + self.assertEquals(new_value, path(root).text) + + def test_object_path_addattr(self): + root = self.etree.XML(xml_str) + path = objectify.ObjectPath( "root.c1.c2" ) + self.assertEquals(3, len(root.c1.c2)) + path.addattr(root, "test") + self.assertEquals(4, len(root.c1.c2)) + self.assertEquals(["0", "1", "2", "test"], + [el.text for el in root.c1.c2]) + + def test_object_path_addattr_create(self): + root = self.etree.XML(xml_str) + path = objectify.ObjectPath( "root.c1.c99" ) + self.assertRaises(AttributeError, path.find, root) + + new_value = "my new value" + path.addattr(root, new_value) + + self.assertEquals(1, len(root.c1.c99)) + self.assertEquals(new_value, root.c1.c99.text) self.assertEquals(new_value, path(root).text) def test_descendant_paths(self): From scoder at codespeak.net Tue Aug 8 16:50:51 2006 From: scoder at codespeak.net (scoder at codespeak.net) Date: Tue, 8 Aug 2006 16:50:51 +0200 (CEST) Subject: [Lxml-checkins] r31168 - lxml/branch/capi/src/lxml Message-ID: <20060808145051.4632710071@code0.codespeak.net> Author: scoder Date: Tue Aug 8 16:50:50 2006 New Revision: 31168 Modified: lxml/branch/capi/src/lxml/objectify.pyx Log: prevent StringElement.pyval from returning None Modified: lxml/branch/capi/src/lxml/objectify.pyx ============================================================================== --- lxml/branch/capi/src/lxml/objectify.pyx (original) +++ lxml/branch/capi/src/lxml/objectify.pyx Tue Aug 8 16:50:50 2006 @@ -622,7 +622,7 @@ """ property pyval: def __get__(self): - return textOf(self._c_node) + return textOf(self._c_node) or '' def strlen(self): text = textOf(self._c_node) From scoder at codespeak.net Tue Aug 8 16:53:33 2006 From: scoder at codespeak.net (scoder at codespeak.net) Date: Tue, 8 Aug 2006 16:53:33 +0200 (CEST) Subject: [Lxml-checkins] r31169 - lxml/branch/capi/src/lxml Message-ID: <20060808145333.41D8210069@code0.codespeak.net> Author: scoder Date: Tue Aug 8 16:53:31 2006 New Revision: 31169 Modified: lxml/branch/capi/src/lxml/apihelpers.pxi Log: build fix Modified: lxml/branch/capi/src/lxml/apihelpers.pxi ============================================================================== --- lxml/branch/capi/src/lxml/apihelpers.pxi (original) +++ lxml/branch/capi/src/lxml/apihelpers.pxi Tue Aug 8 16:53:31 2006 @@ -384,7 +384,7 @@ else: c_node_href = _getNs(c_node) if c_node_href is NULL: - return c_href[0] == '\0' + return c_href[0] == c'\0' else: return cstd.strcmp(c_node_href, c_href) == 0 elif c_href is NULL: @@ -394,7 +394,7 @@ elif cstd.strcmp(c_node.name, c_name) == 0: c_node_href = _getNs(c_node) if c_node_href is NULL: - return c_href[0] == '\0' + return c_href[0] == c'\0' else: return cstd.strcmp(c_node_href, c_href) == 0 else: From scoder at codespeak.net Tue Aug 8 17:06:55 2006 From: scoder at codespeak.net (scoder at codespeak.net) Date: Tue, 8 Aug 2006 17:06:55 +0200 (CEST) Subject: [Lxml-checkins] r31170 - lxml/branch/capi/src/lxml Message-ID: <20060808150655.EEB7710068@code0.codespeak.net> Author: scoder Date: Tue Aug 8 17:06:54 2006 New Revision: 31170 Modified: lxml/branch/capi/src/lxml/objectify.pyx Log: cleanup Modified: lxml/branch/capi/src/lxml/objectify.pyx ============================================================================== --- lxml/branch/capi/src/lxml/objectify.pyx (original) +++ lxml/branch/capi/src/lxml/objectify.pyx Tue Aug 8 17:06:54 2006 @@ -1000,7 +1000,7 @@ if dict_result is not NULL: return (dict_result)._type raise ValueError, "Invalid pytype attribute in element '%s'" % \ - cetree.namespacedNameFromNsName(tree._getNs(c_node), c_node.name) + cetree.namespacedName(c_node) # check for XML Schema type hint value = cetree.attributeValueFromNsName( From scoder at codespeak.net Tue Aug 8 17:32:22 2006 From: scoder at codespeak.net (scoder at codespeak.net) Date: Tue, 8 Aug 2006 17:32:22 +0200 (CEST) Subject: [Lxml-checkins] r31172 - in lxml/branch/capi: doc src/lxml src/lxml/tests Message-ID: <20060808153222.AD10010069@code0.codespeak.net> Author: scoder Date: Tue Aug 8 17:32:21 2006 New Revision: 31172 Modified: lxml/branch/capi/doc/objectify.txt lxml/branch/capi/src/lxml/objectify.pyx lxml/branch/capi/src/lxml/tests/test_objectify.py Log: require external data types to inherit from ObjectifiedDataElement Modified: lxml/branch/capi/doc/objectify.txt ============================================================================== --- lxml/branch/capi/doc/objectify.txt (original) +++ lxml/branch/capi/doc/objectify.txt Tue Aug 8 17:32:21 2006 @@ -655,15 +655,16 @@ Defining additional data classes -------------------------------- -Data classes can either inherit from ``ObjectifiedElement`` directly or from -one of the specialised classes like ``NumberElement`` or ``BoolElement``. The -numeric types require an initial call to ``self._setValueParser(function)`` to -set the type conversion funtion (string -> Python type). This call should be -placed into the element ``_init()`` method. +Data classes can either inherit from ``ObjectifiedDataElement`` directly or +from one of the specialised classes like ``NumberElement`` or ``BoolElement``. +The numeric types require an initial call to the NumberElement method +``self._setValueParser(function)`` to set their type conversion funtion +(string -> numeric Python type). This call should be placed into the element +``_init()`` method. The registration of data classes uses the ``PyType`` class:: - >>> class ChristmasDate(objectify.ObjectifiedElement): + >>> class ChristmasDate(objectify.ObjectifiedDataElement): ... def callSanta(self): ... print "Ho ho ho!" Modified: lxml/branch/capi/src/lxml/objectify.pyx ============================================================================== --- lxml/branch/capi/src/lxml/objectify.pyx (original) +++ lxml/branch/capi/src/lxml/objectify.pyx Tue Aug 8 17:32:21 2006 @@ -513,7 +513,15 @@ ################################################################################ # Data type support in subclasses -cdef class NumberElement(ObjectifiedElement): +cdef class ObjectifiedDataElement(ObjectifiedElement): + property pyval: + def __get__(self): + return textOf(self._c_node) + + def __str__(self): + return textOf(self._c_node) or '' + +cdef class NumberElement(ObjectifiedDataElement): cdef object _type def _setValueParser(self, function): "Set the function that parses the Python value from a string." @@ -613,7 +621,7 @@ def _init(self): self._type = float -cdef class StringElement(ObjectifiedElement): +cdef class StringElement(ObjectifiedDataElement): """String data class. Note that this class does *not* support the sequence protocol of strings: @@ -643,9 +651,6 @@ return python.PyObject_RichCompare( _strValueOf(self), other, op) - def __str__(self): - return textOf(self._c_node) or '' - def __add__(self, other): text = _strValueOf(self) other = _strValueOf(other) @@ -671,7 +676,7 @@ other = _strValueOf(other) return _strValueOf(self) % other -cdef class NoneElement(ObjectifiedElement): +cdef class NoneElement(ObjectifiedDataElement): def __str__(self): return "None" @@ -690,7 +695,7 @@ def __get__(self): return None -cdef class BoolElement(ObjectifiedElement): +cdef class BoolElement(ObjectifiedDataElement): """Boolean type base on string values: 'true' or 'false'. """ cdef int _boolval(self) except -1: @@ -759,9 +764,10 @@ """User defined type. Named type that contains a type check function and a type class that - inherits from ObjectifiedElement. The type check must take a string as - argument and raise a ValueError if it cannot handle the string value. It - may be None in which case it is not considered for type guessing. + inherits from ObjectifiedDataElement. The type check must take a string + as argument and raise ValueError or TypeError if it cannot handle the + string value. It may be None in which case it is not considered for type + guessing. Example: PyType('int', int, MyIntClass).register() @@ -778,8 +784,9 @@ raise TypeError, "Type name must be a string" if type_check is not None and not callable(type_check): raise TypeError, "Type check function must be callable (or None)" - if not issubclass(type_class, ObjectifiedElement): - raise TypeError, "Type class must inherit from ObjectifiedElement" + if not issubclass(type_class, ObjectifiedDataElement): + raise TypeError, \ + "Data classes must inherit from ObjectifiedDataElement" self.name = name self._type = type_class self.type_check = type_check Modified: lxml/branch/capi/src/lxml/tests/test_objectify.py ============================================================================== --- lxml/branch/capi/src/lxml/tests/test_objectify.py (original) +++ lxml/branch/capi/src/lxml/tests/test_objectify.py Tue Aug 8 17:32:21 2006 @@ -399,7 +399,7 @@ orig_types[0].unregister() self.assertEquals(orig_types[1:], objectify.getRegisteredTypes()) - class NewType(objectify.ObjectifiedElement): + class NewType(objectify.ObjectifiedDataElement): pass def checkMyType(s): From scoder at codespeak.net Tue Aug 8 17:53:22 2006 From: scoder at codespeak.net (scoder at codespeak.net) Date: Tue, 8 Aug 2006 17:53:22 +0200 (CEST) Subject: [Lxml-checkins] r31175 - lxml/branch/capi/src/lxml Message-ID: <20060808155322.26CD01006E@code0.codespeak.net> Author: scoder Date: Tue Aug 8 17:53:19 2006 New Revision: 31175 Modified: lxml/branch/capi/src/lxml/objectify.pyx Log: use ObjectifiedElement if node has no parent Modified: lxml/branch/capi/src/lxml/objectify.pyx ============================================================================== --- lxml/branch/capi/src/lxml/objectify.pyx (original) +++ lxml/branch/capi/src/lxml/objectify.pyx Tue Aug 8 17:53:19 2006 @@ -991,7 +991,7 @@ cdef object _lookupElementClass(state, _Document doc, tree.xmlNode* c_node): cdef python.PyObject* dict_result # if element has children => no data class - if cetree.findChildForwards(c_node, 0): + if c_node.parent is NULL or cetree.findChildForwards(c_node, 0) is not NULL: return ObjectifiedElement # if element is defined as xsi:nil, return NoneElement class From scoder at codespeak.net Tue Aug 8 18:07:14 2006 From: scoder at codespeak.net (scoder at codespeak.net) Date: Tue, 8 Aug 2006 18:07:14 +0200 (CEST) Subject: [Lxml-checkins] r31176 - lxml/branch/lxml-1.0 Message-ID: <20060808160714.B31CA1006E@code0.codespeak.net> Author: scoder Date: Tue Aug 8 18:07:13 2006 New Revision: 31176 Modified: lxml/branch/lxml-1.0/CHANGES.txt lxml/branch/lxml-1.0/version.txt Log: prepare release of 1.0.3 Modified: lxml/branch/lxml-1.0/CHANGES.txt ============================================================================== --- lxml/branch/lxml-1.0/CHANGES.txt (original) +++ lxml/branch/lxml-1.0/CHANGES.txt Tue Aug 8 18:07:13 2006 @@ -14,6 +14,8 @@ Bugs fixed ---------- +* Crash when mixing elements from XSLT results into other trees + * Copying/deepcopying did not work for ElementTree objects * Setting an attribute to a non-string value did not raise an exception Modified: lxml/branch/lxml-1.0/version.txt ============================================================================== --- lxml/branch/lxml-1.0/version.txt (original) +++ lxml/branch/lxml-1.0/version.txt Tue Aug 8 18:07:13 2006 @@ -1 +1 @@ -1.0.2 +1.0.3 From scoder at codespeak.net Tue Aug 8 18:10:43 2006 From: scoder at codespeak.net (scoder at codespeak.net) Date: Tue, 8 Aug 2006 18:10:43 +0200 (CEST) Subject: [Lxml-checkins] r31177 - in lxml/branch/lxml-1.0: . doc Message-ID: <20060808161043.9AB3210069@code0.codespeak.net> Author: scoder Date: Tue Aug 8 18:10:42 2006 New Revision: 31177 Modified: lxml/branch/lxml-1.0/CHANGES.txt lxml/branch/lxml-1.0/doc/main.txt Log: prepare release of 1.0.3 Modified: lxml/branch/lxml-1.0/CHANGES.txt ============================================================================== --- lxml/branch/lxml-1.0/CHANGES.txt (original) +++ lxml/branch/lxml-1.0/CHANGES.txt Tue Aug 8 18:10:42 2006 @@ -2,9 +2,8 @@ lxml changelog ============== -======= -current -======= +1.0.3 (2006-08-08) +================== Features added -------------- Modified: lxml/branch/lxml-1.0/doc/main.txt ============================================================================== --- lxml/branch/lxml-1.0/doc/main.txt (original) +++ lxml/branch/lxml-1.0/doc/main.txt Tue Aug 8 18:10:42 2006 @@ -29,6 +29,8 @@ .. _`installation instructions`: installation.html +* `lxml 1.0.3`_, released 2006-08-08 (`changes for 1.0.3`_) + * `lxml 1.0.2`_, released 2006-06-27 (`changes for 1.0.2`_) * `lxml 1.0.1`_, released 2006-06-09 (`changes for 1.0.1`_) @@ -51,6 +53,7 @@ * `lxml 0.5`_, released 2005-04-08 +.. _`lxml 1.0.3`: lxml-1.0.3.tgz .. _`lxml 1.0.2`: lxml-1.0.2.tgz .. _`lxml 1.0.1`: lxml-1.0.1.tgz .. _`lxml 1.0`: lxml-1.0.tgz @@ -63,6 +66,7 @@ .. _`lxml 0.5.1`: lxml-0.5.1.tgz .. _`lxml 0.5`: lxml-0.5.tgz +.. _`CHANGES for 1.0.3`: changes-1.0.3.html .. _`CHANGES for 1.0.2`: changes-1.0.2.html .. _`CHANGES for 1.0.1`: changes-1.0.1.html .. _`CHANGES for 1.0`: changes-1.0.html From scoder at codespeak.net Tue Aug 8 19:57:02 2006 From: scoder at codespeak.net (scoder at codespeak.net) Date: Tue, 8 Aug 2006 19:57:02 +0200 (CEST) Subject: [Lxml-checkins] r31181 - in lxml/trunk: . benchmark doc src/lxml src/lxml/tests Message-ID: <20060808175702.74A5510069@code0.codespeak.net> Author: scoder Date: Tue Aug 8 19:56:33 2006 New Revision: 31181 Added: lxml/trunk/benchmark/ - copied from r31177, lxml/branch/capi/benchmark/ lxml/trunk/benchmark/bench_etree.py - copied unchanged from r31177, lxml/branch/capi/benchmark/bench_etree.py lxml/trunk/benchmark/bench_xpath.py - copied unchanged from r31177, lxml/branch/capi/benchmark/bench_xpath.py lxml/trunk/benchmark/bench_xslt.py - copied unchanged from r31177, lxml/branch/capi/benchmark/bench_xslt.py lxml/trunk/benchmark/benchbase.py - copied unchanged from r31177, lxml/branch/capi/benchmark/benchbase.py lxml/trunk/doc/capi.txt lxml/trunk/doc/element_classes.txt - copied unchanged from r31177, lxml/branch/capi/doc/element_classes.txt lxml/trunk/doc/objectify.txt - copied unchanged from r31177, lxml/branch/capi/doc/objectify.txt lxml/trunk/src/lxml/classlookup.pxi - copied unchanged from r31177, lxml/branch/capi/src/lxml/classlookup.pxi lxml/trunk/src/lxml/etreepublic.pxd - copied unchanged from r31177, lxml/branch/capi/src/lxml/etreepublic.pxd lxml/trunk/src/lxml/objectify.pyx - copied unchanged from r31177, lxml/branch/capi/src/lxml/objectify.pyx lxml/trunk/src/lxml/public-api.pxi - copied unchanged from r31177, lxml/branch/capi/src/lxml/public-api.pxi lxml/trunk/src/lxml/tests/test_classlookup.py - copied unchanged from r31177, lxml/branch/capi/src/lxml/tests/test_classlookup.py lxml/trunk/src/lxml/tests/test_objectify.py - copied unchanged from r31177, lxml/branch/capi/src/lxml/tests/test_objectify.py Removed: lxml/trunk/bench.py lxml/trunk/doc/namespace_extensions.txt Modified: lxml/trunk/MANIFEST.in lxml/trunk/doc/FAQ.txt lxml/trunk/doc/api.txt lxml/trunk/doc/compatibility.txt lxml/trunk/doc/main.txt lxml/trunk/doc/mkhtml.py lxml/trunk/setup.py lxml/trunk/src/lxml/apihelpers.pxi lxml/trunk/src/lxml/etree.pyx lxml/trunk/src/lxml/etree_defs.h lxml/trunk/src/lxml/htmlparser.pxd lxml/trunk/src/lxml/nsclasses.pxi lxml/trunk/src/lxml/parser.pxi lxml/trunk/src/lxml/python.pxd lxml/trunk/src/lxml/relaxng.pxi lxml/trunk/src/lxml/tests/test_elementtree.py lxml/trunk/src/lxml/tests/test_etree.py lxml/trunk/src/lxml/tests/test_htmlparser.py lxml/trunk/src/lxml/tests/test_nsclasses.py lxml/trunk/src/lxml/tree.pxd lxml/trunk/src/lxml/xmlparser.pxd lxml/trunk/src/lxml/xmlschema.pxi lxml/trunk/src/lxml/xpath.pxd Log: merged CAPI branch: C-API, objectify, classlookup, etc. Modified: lxml/trunk/MANIFEST.in ============================================================================== --- lxml/trunk/MANIFEST.in (original) +++ lxml/trunk/MANIFEST.in Tue Aug 8 19:56:33 2006 @@ -3,8 +3,9 @@ include update-error-constants.py include MANIFEST.in version.txt include CHANGES.txt CREDITS.txt INSTALL.txt LICENSES.txt README.txt TODO.txt -recursive-include src *.pyx *.pxd *.pxi *.py etree.c etree_defs.h +recursive-include src *.pyx *.pxd *.pxi *.py etree.c etree.h etree_defs.h recursive-include src/lxml/tests *.rng *.xslt *.xml +recursive-include benchmark *.py recursive-include doc *.txt *.html *.css *.xml *.mgp pubkey.asc recursive-include doc mkhtml.py rest2html.py -exclude doc/pyrex.txt +exclude doc/pyrex.txt src/lxml/etree.pxi Deleted: /lxml/trunk/bench.py ============================================================================== --- /lxml/trunk/bench.py Tue Aug 8 19:56:33 2006 +++ (empty file) @@ -1,769 +0,0 @@ -import sys, string, time, copy, gc -from itertools import * -from StringIO import StringIO - -TREE_FACTOR = 1 # increase tree size with '-l / '-L' cmd option - -_TEXT = "some ASCII text" * TREE_FACTOR -_UTEXT = u"some klingon: \F8D2" * TREE_FACTOR -_ATTRIBUTES = { - '{attr}test1' : _TEXT, - '{attr}test2' : _TEXT, - 'bla1' : _TEXT, - 'bla2' : _TEXT, - 'bla3' : _TEXT - } - -def with_attributes(*use_attributes): - "Decorator for benchmarks that use attributes" - vmap = {False : 0, True : 1} - values = [ vmap[bool(v)] for v in use_attributes ] - def set_value(function): - try: - function.ATTRIBUTES.update(values) - except AttributeError: - function.ATTRIBUTES = set(values) - return function - return set_value - -def with_text(no_text=False, text=False, utext=False): - "Decorator for benchmarks that use text" - values = [] - if no_text: - values.append(0) - if text: - values.append(1) - if utext: - values.append(2) - def set_value(function): - try: - function.TEXT.add(values) - except AttributeError: - function.TEXT = set(values) - return function - return set_value - -def onlylib(*libs): - "Decorator to restrict benchmarks to specific libraries" - def set_libs(function): - if libs: - function.LIBS = libs - return function - return set_libs - -def serialized(function): - "Decorator for benchmarks that require serialized XML data" - function.STRING = True - return function - -class SkippedTest(Exception): - pass - -class BenchMarkBase(object): - atoz = string.ascii_lowercase - - _LIB_NAME_MAP = { - 'etree' : 'lxe', - 'ElementTree' : 'ET', - 'cElementTree' : 'cET' - } - - SEARCH_TAG = "{cdefg}a00001" - - def __init__(self, etree): - self.etree = etree - libname = etree.__name__.split('.')[-1] - self.lib_name = self._LIB_NAME_MAP.get(libname, libname) - - if libname == 'etree': - deepcopy = copy.deepcopy - def set_property(root, fname): - setattr(self, fname, lambda : deepcopy(root)) - xml = self._serialize_tree(root) - setattr(self, fname + '_xml', lambda : xml) - else: - def set_property(root, fname): - setattr(self, fname, self.et_make_clone_factory(root)) - xml = self._serialize_tree(root) - setattr(self, fname + '_xml', lambda : xml) - - attribute_list = list(izip(count(), ({}, _ATTRIBUTES))) - text_list = list(izip(count(), (None, _TEXT, _UTEXT))) - build_name = self._tree_builder_name - - self.setup_times = [] - for tree in self._all_trees(): - times = [] - self.setup_times.append(times) - setup = getattr(self, '_setup_tree%d' % tree) - for an, attributes in attribute_list: - for tn, text in text_list: - root, t = setup(text, attributes) - times.append(t) - set_property(root, build_name(tree, tn, an)) - - def _tree_builder_name(self, tree, tn, an): - return '_root%d_T%d_A%d' % (tree, tn, an) - - def tree_builder(self, tree, tn, an, serial): - name = self._tree_builder_name(tree, tn, an) - if serial: - name += '_xml' - return getattr(self, name) - - def _serialize_tree(self, root): - return self.etree.tostring(root, 'UTF-8') - - def et_make_clone_factory(self, elem): - def generate_elem(append, elem, level): - var = "e" + str(level) - arg = repr(elem.tag) - if elem.attrib: - arg += ", **%r" % elem.attrib - if level == 1: - append(" e1 = Element(%s)" % arg) - else: - append(" %s = SubElement(e%d, %s)" % (var, level-1, arg)) - if elem.text: - append(" %s.text = %r" % (var, elem.text)) - if elem.tail: - append(" %s.tail = %r" % (var, elem.tail)) - for e in elem: - generate_elem(append, e, level+1) - # generate code for a function that creates a tree - output = ["def element_factory():"] - generate_elem(output.append, elem, 1) - output.append(" return e1") - # setup global function namespace - namespace = { - "Element" : self.etree.Element, - "SubElement" : self.etree.SubElement - } - # create function object - exec "\n".join(output) in namespace - return namespace["element_factory"] - - def _all_trees(self): - all_trees = [] - for name in dir(self): - if name.startswith('_setup_tree'): - all_trees.append(int(name[11:])) - return all_trees - - def _setup_tree1(self, text, attributes): - "tree with 26 2nd level and 520 * TREE_FACTOR 3rd level children" - atoz = self.atoz - SubElement = self.etree.SubElement - current_time = time.time - t = current_time() - root = self.etree.Element('{abc}rootnode') - for ch1 in atoz: - el = SubElement(root, "{bcd}"+ch1*5, attributes) - el.text = text - for ch2 in atoz: - for i in range(20 * TREE_FACTOR): - SubElement(el, "{cdefg}%s%05d" % (ch2, i)) - t = current_time() - t - return (root, t) - - def _setup_tree2(self, text, attributes): - "tree with 520 * TREE_FACTOR 2nd level and 26 3rd level children" - atoz = self.atoz - SubElement = self.etree.SubElement - current_time = time.time - t = current_time() - root = self.etree.Element('{abc}rootnode') - for ch1 in atoz: - for i in range(20 * TREE_FACTOR): - el = SubElement(root, "{bcd}"+ch1*5, attributes) - el.text = text - for ch2 in atoz: - SubElement(el, "{cdefg}%s%05d" % (ch2, i)) - t = current_time() - t - return (root, t) - - def _setup_tree3(self, text, attributes): - "tree of depth 8 + TREE_FACTOR with 3 children per node" - SubElement = self.etree.SubElement - current_time = time.time - t = current_time() - root = self.etree.Element('{abc}rootnode') - children = [root] - for i in range(6 + TREE_FACTOR): - tag_no = count().next - children = [ SubElement(c, "{cdefg}a%05d" % i, attributes) - for i,c in enumerate(chain(children, children, children)) ] - for child in root: - child.text = text - t = current_time() - t - return (root, t) - - def _setup_tree4(self, text, attributes): - "small tree with 26 2nd level and 2 3rd level children" - SubElement = self.etree.SubElement - current_time = time.time - t = current_time() - root = self.etree.Element('{abc}rootnode') - children = [root] - for ch1 in self.atoz: - el = SubElement(root, "{bcd}"+ch1*5, attributes) - el.text = text - SubElement(el, "{cdefg}a00001", attributes) - SubElement(el, "{cdefg}a00002", attributes) - t = current_time() - t - return (root, t) - - def benchmarks(self): - """Returns a list of all benchmarks. - - A benchmark is a tuple containing a method name and a list of tree - numbers. Trees are prepared by the setup function. - """ - all_trees = self._all_trees() - benchmarks = [] - for name in dir(self): - if not name.startswith('bench_'): - continue - method = getattr(self, name) - if hasattr(method, 'LIBS') and self.lib_name not in method.LIBS: - method_call = None - else: - method_call = method - if method.__doc__: - tree_sets = method.__doc__.split() - else: - tree_sets = () - if tree_sets: - tree_tuples = [ map(int, tree_set.split(',')) - for tree_set in tree_sets ] - else: - try: - function = getattr(method, 'im_func', method) - arg_count = method.func_code.co_argcount - 1 - except AttributeError: - arg_count = 1 - tree_tuples = self._permutations(all_trees, arg_count) - - serialized = getattr(method, 'STRING', False) - - for tree_tuple in tree_tuples: - for tn in sorted(getattr(method, 'TEXT', (0,))): - for an in sorted(getattr(method, 'ATTRIBUTES', (0,))): - benchmarks.append((name, method_call, tree_tuple, - tn, an, serialized)) - - return benchmarks - - def _permutations(self, seq, count): - def _permutations(prefix, remainder, count): - if count == 0: - return [ prefix[:] ] - count -= 1 - perms = [] - prefix.append(None) - for pos, el in enumerate(remainder): - new_remainder = remainder[:pos] + remainder[pos+1:] - prefix[-1] = el - perms.extend( _permutations(prefix, new_remainder, count) ) - prefix.pop() - return perms - return _permutations([], seq, count) - - -############################################################ -# Benchmarks -############################################################ - -class BenchMark(BenchMarkBase): - def bench_iter_children(self, root): - for child in root: - pass - - def bench_iter_children_reversed(self, root): - for child in reversed(root): - pass - - @with_attributes(True, False) - @with_text(text=True, utext=True) - def bench_tostring_utf8(self, root): - self.etree.tostring(root, 'UTF-8') - - @with_attributes(True, False) - @with_text(text=True, utext=True) - def bench_tostring_utf16(self, root): - self.etree.tostring(root, 'UTF-16') - - @with_attributes(True, False) - @with_text(text=True, utext=True) - def bench_tostring_utf8_unicode_XML(self, root): - xml = unicode(self.etree.tostring(root, 'UTF-8'), 'UTF-8') - self.etree.XML(xml) - - @with_attributes(True, False) - @with_text(text=True, utext=True) - def bench_write_utf8_parse_stringIO(self, root): - f = StringIO() - self.etree.ElementTree(root).write(f, 'UTF-8') - f.seek(0) - self.etree.parse(f) - - @with_attributes(True, False) - @with_text(text=True, utext=True) - @serialized - def bench_parse_stringIO(self, root_xml): - f = StringIO(root_xml) - self.etree.parse(f) - - @with_attributes(True, False) - @with_text(text=True, utext=True) - @serialized - def bench_XML(self, root_xml): - self.etree.XML(root_xml) - - @with_attributes(True, False) - @with_text(text=True, utext=True) - @serialized - def bench_iterparse_stringIO(self, root_xml): - f = StringIO(root_xml) - for event, element in self.etree.iterparse(f): - pass - - @with_attributes(True, False) - @with_text(text=True, utext=True) - @serialized - def bench_iterparse_stringIO_clear(self, root_xml): - f = StringIO(root_xml) - for event, element in self.etree.iterparse(f): - element.clear() - - def bench_append_from_document(self, root1, root2): - # == "1,2 2,3 1,3 3,1 3,2 2,1" # trees 1 and 2, or 2 and 3, or ... - for el in root2: - root1.append(el) - - def bench_insert_from_document(self, root1, root2): - for el in root2: - root1.insert(len(root1)/2, el) - - def bench_rotate_children(self, root): - # == "1 2 3" # runs on any single tree independently - for i in range(100): - el = root[0] - del root[0] - root.append(el) - - def bench_reorder(self, root): - for i in range(1,len(root)/2): - el = root[0] - del root[0] - root[-i:-i] = [ el ] - - def bench_reorder_slice(self, root): - for i in range(1,len(root)/2): - els = root[0:1] - del root[0] - root[-i:-i] = els - - def bench_clear(self, root): - root.clear() - - def bench_has_children(self, root): - for child in root: - if child and child and child and child and child: - pass - - def bench_len(self, root): - for child in root: - map(len, repeat(child, 20)) - - def bench_create_subelements(self, root): - SubElement = self.etree.SubElement - for child in root: - SubElement(child, '{test}test') - - def bench_append_elements(self, root): - Element = self.etree.Element - for child in root: - el = Element('{test}test') - child.append(el) - - def bench_makeelement(self, root): - empty_attrib = {} - for child in root: - child.makeelement('{test}test', empty_attrib) - - def bench_create_elements(self, root): - Element = self.etree.Element - for child in root: - Element('{test}test') - - def bench_replace_children_element(self, root): - Element = self.etree.Element - for child in root: - el = Element('{test}test') - child[:] = [el] - - def bench_replace_children(self, root): - Element = self.etree.Element - for child in root: - child[:] = [ child[0] ] - - def bench_remove_children(self, root): - for child in root: - root.remove(child) - - def bench_remove_children_reversed(self, root): - for child in reversed(root[:]): - root.remove(child) - - def bench_set_attributes(self, root): - for child in root: - child.set('a', 'bla') - - @with_attributes(True) - def bench_get_attributes(self, root): - for child in root: - child.get('bla1') - child.get('{attr}test1') - - def bench_setget_attributes(self, root): - for child in root: - child.set('a', 'bla') - for child in root: - child.get('a') - - def bench_root_getchildren(self, root): - root.getchildren() - - def bench_getchildren(self, root): - for child in root: - child.getchildren() - - def bench_get_children_slice(self, root): - for child in root: - child[:] - - def bench_get_children_slice_2x(self, root): - for child in root: - children = child[:] - child[:] - - def bench_deepcopy(self, root): - for child in root: - copy.deepcopy(child) - - def bench_deepcopy_all(self, root): - copy.deepcopy(root) - - def bench_tag(self, root): - for child in root: - child.tag - - def bench_tag_repeat(self, root): - for child in root: - for i in repeat(0, 100): - child.tag - - @with_text(utext=True, text=True, no_text=True) - def bench_text(self, root): - for child in root: - child.text - - @with_text(utext=True, text=True, no_text=True) - def bench_text_repeat(self, root): - repeat = range(500) - for child in root: - for i in repeat: - child.text - - def bench_set_text(self, root): - text = _TEXT - for child in root: - child.text = text - - def bench_set_utext(self, root): - text = _UTEXT - for child in root: - child.text = text - - @onlylib('lxe') - def bench_index(self, root): - for child in root: - root.index(child) - - @onlylib('lxe') - def bench_index_slice(self, root): - for child in root[5:100]: - root.index(child, 5, 100) - - @onlylib('lxe') - def bench_index_slice_neg(self, root): - for child in root[-100:-5]: - root.index(child, start=-100, stop=-5) - - def bench_getiterator_all(self, root): - list(root.getiterator()) - - def bench_getiterator_islice(self, root): - list(islice(root.getiterator(), 10, 110)) - - def bench_getiterator_tag(self, root): - list(islice(root.getiterator(self.SEARCH_TAG), 3, 10)) - - def bench_getiterator_tag_all(self, root): - list(root.getiterator(self.SEARCH_TAG)) - - def bench_getiterator_tag_text(self, root): - [ e.text for e in root.getiterator(self.SEARCH_TAG) ] - - def bench_findall(self, root): - root.findall(".//*") - - def bench_findall_tag(self, root): - root.findall(".//" + self.SEARCH_TAG) - - @onlylib('lxe') - def bench_xpath_class(self, root): - xpath = self.etree.XPath("./*[0]") - for child in root: - xpath(child) - - @onlylib('lxe') - def bench_xpath_class_repeat(self, root): - for child in root: - xpath = self.etree.XPath("./*[0]") - xpath(child) - - @onlylib('lxe') - def bench_xpath_element(self, root): - xpath = self.etree.XPathElementEvaluator(root) - for child in root: - xpath.evaluate("./*[0]") - - @onlylib('lxe') - def bench_xpath_method(self, root): - for child in root: - child.xpath("./*[0]") - - @onlylib('lxe') - def bench_xpath_extensions_old(self, root): - def return_child(_, element): - if element: - return element[0] - else: - return () - extensions = {(None, 'child') : return_child} - xpath = self.etree.XPath("child(.)", extensions=extensions) - for child in root: - xpath(child) - - @onlylib('lxe') - def bench_xslt_extensions_old(self, root): - tree = self.etree.XML("""\ - - TEST - - - - - - - - -""") - def return_child(_, elements): - return elements[0][0] - - extensions = {('testns', 'child') : return_child} - - transform = self.etree.XSLT(tree, extensions) - for i in range(10): - transform(root) - - @onlylib('lxe') - def bench_xslt_document(self, root): - transform = self.etree.XSLT(self.etree.XML("""\ - - TEST - - - - - - - - -""")) - transform(root) - - -############################################################ -# Main program -############################################################ - -if __name__ == '__main__': - import_lxml = True - callgrind_zero = False - if len(sys.argv) > 1: - try: - sys.argv.remove('-i') - # run benchmark 'inplace' - sys.path.insert(0, 'src') - except ValueError: - pass - - try: - sys.argv.remove('-nolxml') - # run without lxml - import_lxml = False - except ValueError: - pass - - try: - sys.argv.remove('-z') - # reset callgrind after tree setup - callgrind_zero = True - except ValueError: - pass - - try: - sys.argv.remove('-l') - # use large trees - TREE_FACTOR *= 2 - except ValueError: - pass - - try: - sys.argv.remove('-L') - # use LARGE trees - TREE_FACTOR *= 2 - except ValueError: - pass - - _etrees = [] - if import_lxml: - from lxml import etree - _etrees.append(etree) - - if len(sys.argv) > 1: - if '-a' in sys.argv or '-c' in sys.argv: - # 'all' or 'C-implementations' ? - try: - import cElementTree as cET - _etrees.append(cET) - except ImportError: - pass - - try: - # 'all' ? - sys.argv.remove('-a') - from elementtree import ElementTree as ET - _etrees.append(ET) - except (ValueError, ImportError): - pass - - if not _etrees: - print "No library to test. Exiting." - sys.exit(1) - - print "Preparing test suites and trees ..." - - benchmark_suites = map(BenchMark, _etrees) - - # sorted by name and tree tuple - benchmarks = [ sorted(b.benchmarks()) for b in benchmark_suites ] - - if len(sys.argv) > 1: - selected = [] - for name in sys.argv[1:]: - selected.append(name) - benchmarks = [ [ b for b in bs - if [ match for match in selected - if match in b[0] ] ] - for bs in benchmarks ] - - import time - def run_bench(suite, method_name, method_call, tree_set, tn, an, serial): - if method_call is None: - raise SkippedTest - - current_time = time.time - call_repeat = range(10) - - tree_builders = [ suite.tree_builder(tree, tn, an, serial) - for tree in tree_set ] - - times = [] - args = () - for i in range(3): - gc.collect() - gc.disable() - t = 0 - for i in call_repeat: - args = [ build() for build in tree_builders ] - t_one_call = current_time() - method_call(*args) - t += current_time() - t_one_call - t = 1000.0 * t / len(call_repeat) - times.append(t) - gc.enable() - del args - return times - - def build_treeset_name(trees, tn, an, serialized): - text = {0:'-', 1:'S', 2:'U'}[tn] - attr = {0:'-', 1:'A'}[an] - ser = {True:'X', False:'T'}[serialized] - return "%s%s%s T%s" % (text, attr, ser, ',T'.join(imap(str, trees))[:6]) - - - print "Running benchmark on", ', '.join(b.lib_name - for b in benchmark_suites) - print - - print "Setup times for trees in seconds:" - for b in benchmark_suites: - print "%-3s: " % b.lib_name, - for an in (0,1): - for tn in (0,1,2): - print ' %s ' % build_treeset_name((), tn, an, False)[:2], - print - for i, tree_times in enumerate(b.setup_times): - print " T%d:" % (i+1), ' '.join("%6.4f" % t for t in tree_times) - print - - if callgrind_zero: - cmd = open("callgrind.cmd", 'w') - cmd.write('Zero\n') - cmd.close() - - for bench_calls in izip(*benchmarks): - for lib, (bench, benchmark_setup) in enumerate(izip(benchmark_suites, bench_calls)): - bench_name = benchmark_setup[0] - tree_set_name = build_treeset_name(*benchmark_setup[-4:]) - print "%-3s: %-28s" % (bench.lib_name, bench_name[6:34]), - print "(%-10s)" % tree_set_name, - sys.stdout.flush() - - try: - result = run_bench(bench, *benchmark_setup) - except SkippedTest: - print "skipped" - except KeyboardInterrupt: - print "interrupted by user" - sys.exit(1) - except Exception, e: - print "failed: %s: %s" % (e.__class__.__name__, e) - else: - print "%9.4f msec/pass, best of (" % min(result), - for t in result: - print "%9.4f" % t, - print ")" - - if len(benchmark_suites) > 1: - print # empty line between different benchmarks Modified: lxml/trunk/doc/FAQ.txt ============================================================================== --- lxml/trunk/doc/FAQ.txt (original) +++ lxml/trunk/doc/FAQ.txt Tue Aug 8 19:56:33 2006 @@ -1,3 +1,4 @@ +========================== Frequently Asked Questions ========================== @@ -6,239 +7,339 @@ .. _compatibility: compatibility.html .. _ElementTree: http://effbot.org/zone/element-index.htm - -#) Is there a tutorial? - - There is a `tutorial for ElementTree`_ which also works for lxml.etree. - The `API documentation`_ also contains many examples. - - .. _`tutorial for ElementTree`: http://effbot.org/zone/element.htm - .. _`API documentation`: api.html - - -#) Where can I find more documentation about lxml? - - There is a lot of documentation as lxml implements the well-known - `ElementTree API`_ and tries to follow its documentation as closely as - possible. There are a couple of issues where lxml cannot keep up - compatibility. They are described in the compatibility_ documentation. - The lxml specific extensions to the API are described by individual files - in the ``doc`` directory of the distribution and on `the web page`_. - - .. _`ElementTree API`: http://effbot.org/zone/element-index.htm - .. _`the web page`: http://codespeak.net/lxml/#documentation - - -#) My application crashes! Why does lxml.etree do that? - - One of the goals of lxml is "no segfaults", so if there is no clear warning - in the documentation that you were doing something potentially harmful, you - have found a bug and we would like to hear about it. Please report this - bug to the mailing list. See the next section on how to do that. - - -#) I think I have found a bug in lxml. What should I do? - - a) First, you should look at the `current developer changelog`_ to see if - this is a known problem that has already been fixed in the SVN trunk. - - .. _`current developer changelog`: http://codespeak.net/svn/lxml/trunk/CHANGES.txt - - b) If you are using threads, please see the following section to check if - you touch on one of the potential pitfalls. - - c) Otherwise, we would really like to hear about it. Please report it to - the `mailing list`_ so that we can fix it. It is very helpful in this - case if you can come up with a short code snippet that demonstrates your - problem. Please also report the version of lxml, libxml2 and libxslt - that you are using by calling this:: - - from lxml import etree - print "lxml.etree: ", etree.LXML_VERSION - print "libxml used: ", etree.LIBXML_VERSION - print "libxml compiled: ", etree.LIBXML_COMPILED_VERSION - print "libxslt used: ", etree.LIBXSLT_VERSION - print "libxslt compiled: ", etree.LIBXSLT_COMPILED_VERSION - - .. _`mailing list`: http://codespeak.net/mailman/listinfo/lxml-dev - - -#) Can I use threads to concurrently access the lxml API? - - Yes, although not carelessly. - - lxml frees the GIL (Python's global interpreter lock) internally when - parsing from disk and memory, as long as you use either the default parser - (which is replicated for each thread) or create a parser for each thread - yourself. lxml also allows concurrency during validation (RelaxNG and - XMLSchema) and XSL transformation. You can share RelaxNG, XMLSchema and - XSLT objects between threads. While you can also share parsers between - threads, this will serialize the access to each of them, so it is better to - copy() parsers or to use the default parser. Note that access to the XML() - and HTML() functions is always serialized. If you need to parse from - strings, use StringIO. - - Warning: You should generally avoid modifying trees in other threads than - the one it was generated in. Although this should work in many cases, - there are certain scenarios where the termination of a thread that parsed a - tree can crash the application if subtrees of this tree are moved to other - documents. You should be on the safe side when passing trees between - threads if you either - - a) do not modify these trees and do not move its elements to other trees, or - b) do not terminate threads while the trees they parsed are still in use - - -#) Why can't I just delete parents or clear the root node in iterparse()? - - The ``iterparse()`` implementation is based on the libxml2 parser. It - requires the tree to be intact to finish parsing. If you delete or modify - parents of the current node, chances are you modify the structure in a way - that breaks the parser. Normally, this will result in a segfault. Please - refer to the `iterparse section`_ of the lxml API documentation to find out - what you can do and what you can't do. - - .. _`iterparse section`: api.html#iterparse-and-iterwalk - - -#) Why doesn't the ``pretty_print`` option reformat my XML output? - - Pretty printing (or formatting) an XML document means adding white space to - the content. These modifications are harmless if they only impact elements - in the document that do not carry (text) data. They corrupt your data if - they impact elements that contain data. If lxml cannot distinguish between - whitespace and data, it will not alter your data. Whitespace is therefore - only added between nodes that do not contain data. This is always the case - for trees constructed element-by-element, so no problems should be expected - here. For parsed trees, a good way to assure that no conflicting - whitespace is left in the tree is the ``remove_blank_text`` option:: +.. contents:: +.. + 1 General Questions + 1.1 Is there a tutorial? + 1.2 Where can I find more documentation about lxml? + 1.3 What is the difference between lxml.etree and lxml.objectify? + 1.4 Why is my application so slow? + 1.5 Why do I get errors about missing UCS4 symbols when installing lxml? + 2 Bugs + 2.1 My application crashes! Why does lxml.etree do that? + 2.2 I think I have found a bug in lxml. What should I do? + 3 Threading + 3.1 Can I use threads to concurrently access the lxml API? + 3.2 Does my program run faster if I use threads? + 4 Parsing and Serialisation + 4.1 Why doesn't the ``pretty_print`` option reformat my XML output? + 4.2 Why can't lxml parse my XML from unicode strings? + 4.3 What is the difference between str(xslt(doc)) and xslt(doc).write() ? + 4.4 Why can't I just delete parents or clear the root node in iterparse()? + 5 XPath and Document Traversal + 5.1 What are the ``findall()`` and ``xpath()`` methods on Element(Tree)? + 5.2 Why doesn't ``findall()`` support full XPath expressions? + 5.3 How can I find out which namespace prefixes are used in a document? + 5.4 How can I specify a default namespace for XPath expressions? + + +General Questions +================= + +Is there a tutorial? +-------------------- + +There is a `tutorial for ElementTree`_ which also works for ``lxml.etree``. +The `API documentation`_ also contains many examples for ``lxml.etree``. To +learn using ``lxml.objectify``, read the `objectify documentation`_. + +.. _`tutorial for ElementTree`: http://effbot.org/zone/element.htm +.. _`API documentation`: api.html +.. _`objectify documentation`: objectify.html + + +Where can I find more documentation about lxml? +----------------------------------------------- + +There is a lot of documentation as lxml implements the well-known `ElementTree +API`_ and tries to follow its documentation as closely as possible. There are +a couple of issues where lxml cannot keep up compatibility. They are +described in the compatibility_ documentation. The lxml specific extensions +to the API are described by individual files in the ``doc`` directory of the +distribution and on `the web page`_. + +.. _`ElementTree API`: http://effbot.org/zone/element-index.htm +.. _`the web page`: http://codespeak.net/lxml/#documentation + + +What is the difference between lxml.etree and lxml.objectify? +------------------------------------------------------------- + +The two modules provide different ways of handling XML. However, objectify +builds on top of lxml.etree and therefore inherits most of its capabilities +and a large portion of its API. + +* lxml.etree is a generic API for XML and HTML handling. It aims for + ElementTree compatibility_ and supports the entire XML infoset. It is well + suited for both mixed content and data centric XML. Its generality makes it + the best choice for most applications. + +* lxml.objectify is a specialized API for XML data handling in a Python object + syntax. It provides a very natural way to deal with data fields stored in a + structurally well defined XML format. Data is automatically converted to + Python data types and can be manipulated with normal Python operators. Look + at the examples in the `objectify documentation`_ to see what it feels like + to use it. + + Objectify is not well suited for mixed contents or HTML documents. As it is + built on top of lxml.etree, however, it inherits the normal support for + XPath, XSLT or validation. + + +Why is my application so slow? +------------------------------ + +lxml.etree is a very fast library for processing XML. There are, however, `a +few caveats`_ involved in the mapping of the powerful libxml2 library to the +simple and convenient ElementTree API. Not all operations are as fast as the +simplicity of the API might suggest. The `benchmark page`_ has a comparison +to other ElementTree implementations and a number of tips for performance +tweaking. As with any Python application, the rule of thumb is: the more of +your processing runs in C, the faster your application gets. + +.. _`a few caveats`: performance.html#the-elementtree-api +.. _`benchmark page`: performance.html + + +Why do I get errors about missing UCS4 symbols when installing lxml? +-------------------------------------------------------------------- + +Most likely, you use a Python installation that was configured for internal +use of UCS2 unicode, meaning 16-bit unicode. The lxml egg distributions are +generally compiled on platforms that use UCS4, a 32-bit unicode encoding, as +this is used on the majority of platforms. Sadly, both are not compatible, so +the eggs can only support the one they were compiled with. + +This means that you have to compile lxml from sources for your system. Note +that you do not need Pyrex for this, the lxml source distribution is directly +compilable on both platform types. See the `build instructions`_ on how to do +this. + +.. _`build instructions`: build.html + + +Bugs +==== + +My application crashes! Why does lxml.etree do that? +---------------------------------------------------- + +One of the goals of lxml is "no segfaults", so if there is no clear warning in +the documentation that you were doing something potentially harmful, you have +found a bug and we would like to hear about it. Please report this bug to the +mailing list. See the next section on how to do that. + + +I think I have found a bug in lxml. What should I do? +----------------------------------------------------- + +a) First, you should look at the `current developer changelog`_ to see if this + is a known problem that has already been fixed in the SVN trunk. + + .. _`current developer changelog`: http://codespeak.net/svn/lxml/trunk/CHANGES.txt + +b) If you are using threads, please see the following section to check if + you touch on one of the potential pitfalls. + +c) Otherwise, we would really like to hear about it. Please report it to the + `mailing list`_ so that we can fix it. It is very helpful in this case if + you can come up with a short code snippet that demonstrates your problem. + Please also report the version of lxml, libxml2 and libxslt that you are + using by calling this:: + + from lxml import etree + print "lxml.etree: ", etree.LXML_VERSION + print "libxml used: ", etree.LIBXML_VERSION + print "libxml compiled: ", etree.LIBXML_COMPILED_VERSION + print "libxslt used: ", etree.LIBXSLT_VERSION + print "libxslt compiled: ", etree.LIBXSLT_COMPILED_VERSION + + .. _`mailing list`: http://codespeak.net/mailman/listinfo/lxml-dev + + +Threading +========= + +Can I use threads to concurrently access the lxml API? +------------------------------------------------------ + +Yes, although not carelessly. + +lxml frees the GIL (Python's global interpreter lock) internally when parsing +from disk and memory, as long as you use either the default parser (which is +replicated for each thread) or create a parser for each thread yourself. lxml +also allows concurrency during validation (RelaxNG and XMLSchema) and XSL +transformation. You can share RelaxNG, XMLSchema and XSLT objects between +threads. While you can also share parsers between threads, this will +serialize the access to each of them, so it is better to copy() parsers or to +use the default parser. Note that access to the XML() and HTML() functions is +always serialized. If you need to parse concurrently from strings, use +``parse()`` with ``StringIO``. + +Due to the way libxslt handles threading, concurrent access to stylesheets is +currently only possible if it was parsed in the main thread. Parsing and +using a stylesheet inside one thread also works. + +Warning: You should generally avoid modifying trees in other threads than the +one it was generated in. Although this should work in many cases, there are +certain scenarios where the termination of a thread that parsed a tree can +crash the application if subtrees of this tree were moved to other documents. +You should be on the safe side when passing trees between threads if you +either + +a) do not modify these trees and do not move their elements to other trees, or +b) do not terminate threads while the trees they parsed are still in use + (e.g. by using a fixed size thread-pool or long-running threads in + processing chains) + + +Does my program run faster if I use threads? +-------------------------------------------- + +Depends. The best way to answer this is timing and profiling. + +The global interpreter lock (GIL) in Python serializes access to the +interpreter, so if the majority of your processing is done in Python code +(traversing trees, modifying elements, etc.), your gain will be close to 0. +The more of your XML processing moves into lxml, however, the higher your +gain. If your application is bound by XML parsing and serialisation, or by +complex XSLTs, your speedup on multi-processor machines can be substantial. + +See the question above to learn which operations free the GIL to support +multi-threading. + + +Parsing and Serialisation +========================= + +Why doesn't the ``pretty_print`` option reformat my XML output? +--------------------------------------------------------------- + +Pretty printing (or formatting) an XML document means adding white space to +the content. These modifications are harmless if they only impact elements in +the document that do not carry (text) data. They corrupt your data if they +impact elements that contain data. If lxml cannot distinguish between +whitespace and data, it will not alter your data. Whitespace is therefore +only added between nodes that do not contain data. This is always the case +for trees constructed element-by-element, so no problems should be expected +here. For parsed trees, a good way to assure that no conflicting whitespace +is left in the tree is the ``remove_blank_text`` option:: >>> parser = etree.XMLParser(remove_blank_text=True) >>> tree = etree.parse(file, parser) - This will allow the parser to drop blank text nodes when constructing the - tree. If you now call a serialization function to pretty print this tree, - lxml can add fresh whitespace to the XML tree to indent it. - - -#) Why can't lxml parse my XML from unicode strings? - - lxml can read Python unicode strings and even tries to support them if - libxml2 does not. However, if the unicode string declares an XML encoding - internally (````), parsing is bound to fail, as this - encoding is most likely not the real encoding used in Python unicode. The - same is true for HTML unicode strings that contain charset meta tags. Note - that Python uses different encodings for unicode on different platforms, so - even specifying the real internal unicode encoding is not portable between - Python interpreters. Don't do it. - - Python unicode strings with XML data or HTML data that carry encoding - information are broken. lxml will not parse them. You must provide - parsable data in a valid encoding. - +This will allow the parser to drop blank text nodes when constructing the +tree. If you now call a serialization function to pretty print this tree, +lxml can add fresh whitespace to the XML tree to indent it. -#) Why do I get errors about missing UCS4 symbols when installing lxml? - Most likely, you use a Python installation that was configured for internal - use of UCS2 unicode, meaning 16-bit unicode. The lxml egg distributions - are generally compiled on platforms that use UCS4, a 32-bit unicode - encoding, as this is used on the majority of platforms. Sadly, both are - not compatible, so the eggs can only support the one they were compiled - with. +Why can't lxml parse my XML from unicode strings? +------------------------------------------------- - This means that you have to compile lxml from sources for your system. - Note that you do not need Pyrex for this, the lxml source distribution is - directly compilable on both platform types. See the `build instructions`_ - on how to do this. +lxml can read Python unicode strings and even tries to support them if libxml2 +does not. However, if the unicode string declares an XML encoding internally +(````), parsing is bound to fail, as this encoding is +most likely not the real encoding used in Python unicode. The same is true +for HTML unicode strings that contain charset meta tags. Note that Python +uses different encodings for unicode on different platforms, so even +specifying the real internal unicode encoding is not portable between Python +interpreters. Don't do it. - .. _`build instructions`: build.html +Python unicode strings with XML data or HTML data that carry encoding +information are broken. lxml will not parse them. You must provide parsable +data in a valid encoding. -#) How can I find out which namespace prefixes are used in a document? +What is the difference between str(xslt(doc)) and xslt(doc).write() ? +--------------------------------------------------------------------- - You can traverse the document (``getiterator()``) and collect the prefix - attributes from all Elements into a set. However, it is unlikely that you - really want to do that. You do not need these prefixes, honestly. You - only need the namespace URIs. All namespace comparisons use these, so feel - free to make up your own prefixes when you use XPath expressions or - extension functions. +The str() implementation of the XSLTResultTree class (a subclass of the +ElementTree class) knows about the output method chosen in the stylesheet +(xsl:output), write() doesn't. If you call write(), the result will be a +normal XML tree serialization in the requested encoding. Calling this method +may also fail for XSLT results that are not XML trees (e.g. string results). - The only place where you might consider specifying prefixes is the - serialization of Elements that were created through the API. Here, you can - specify a prefix mapping through the ``nsmap`` argument when creating the - root Element. Its children will then inherit this prefix for - serialization. +If you call str(), it will return the serialized result as specified by the +XSL transform. This correctly serializes string results to encoded Python +strings and honours ``xsl:output`` options like ``indent``. This almost +certainly does what you want, so you should only use ``write()`` if you are +sure that the XSLT result is an XML tree and you want to override the encoding +and indentation options requested by the stylesheet. -#) How can I specify a default namespace for XPath expressions? +Why can't I just delete parents or clear the root node in iterparse()? +---------------------------------------------------------------------- - You can't. In XPath, there is no such thing as a default namespace. Just - use an arbitrary prefix and let the namespace dictionary of the XPath - evaluators map it to your namespace. See also the question above. +The ``iterparse()`` implementation is based on the libxml2 parser. It +requires the tree to be intact to finish parsing. If you delete or modify +parents of the current node, chances are you modify the structure in a way +that breaks the parser. Normally, this will result in a segfault. Please +refer to the `iterparse section`_ of the lxml API documentation to find out +what you can do and what you can't do. +.. _`iterparse section`: api.html#iterparse-and-iterwalk -#) What are the ``findall()`` and ``xpath()`` methods on Element(Tree)? - ``findall()`` is part of the original `ElementTree API`_. It supports a - `simple subset of the XPath language`_, without predicates, conditions and - other advanced features. It is very handy for finding specific tags in a - tree. Another important difference is namespace handling, which uses the - ``{namespace}tagname`` notation. This is not supported by XPath. The - findall, find and findtext methods are compatible with other ElementTree - implementations and allow writing portable code that runs on ElementTree, - cElementTree and lxml.etree. +XPath and Document Traversal +============================ - ``xpath()``, on the other hand, supports the complete power of the XPath - language, including predicates, XPath functions and Python extension - functions. The syntax is defined by the `XPath specification`_. If you - need the expressiveness and selectivity of XPath, the ``xpath()`` method, - the ``XPath`` class and the ``XPathEvaluator`` are the best choice_. +What are the ``findall()`` and ``xpath()`` methods on Element(Tree)? +-------------------------------------------------------------------- - .. _`simple subset of the XPath language`: http://effbot.org/zone/element-xpath.htm - .. _`XPath specification`: http://www.w3.org/TR/xpath - .. _choice: performance.html#xpath +``findall()`` is part of the original `ElementTree API`_. It supports a +`simple subset of the XPath language`_, without predicates, conditions and +other advanced features. It is very handy for finding specific tags in a +tree. Another important difference is namespace handling, which uses the +``{namespace}tagname`` notation. This is not supported by XPath. The +findall, find and findtext methods are compatible with other ElementTree +implementations and allow writing portable code that runs on ElementTree, +cElementTree and lxml.etree. +``xpath()``, on the other hand, supports the complete power of the XPath +language, including predicates, XPath functions and Python extension +functions. The syntax is defined by the `XPath specification`_. If you need +the expressiveness and selectivity of XPath, the ``xpath()`` method, the +``XPath`` class and the ``XPathEvaluator`` are the best choice_. -#) Why doesn't ``findall()`` support full XPath expressions? +.. _`simple subset of the XPath language`: http://effbot.org/zone/element-xpath.htm +.. _`XPath specification`: http://www.w3.org/TR/xpath +.. _choice: performance.html#xpath - It was decided that it is more important to keep compatibility with - ElementTree_ to simplify code migration between the libraries. The main - difference compared to XPath is the ``{namespace}tagname`` notation used in - ``findall()``, which is not valid XPath. - ElementTree and lxml.etree use the same implementation, which assures 100% - compatibility. Note that ``findall()`` is `so fast`_ in lxml that a native - implementation would not bring any performance benefits. +Why doesn't ``findall()`` support full XPath expressions? +--------------------------------------------------------- - .. _`so fast`: performance.html#tree-traversal +It was decided that it is more important to keep compatibility with +ElementTree_ to simplify code migration between the libraries. The main +difference compared to XPath is the ``{namespace}tagname`` notation used in +``findall()``, which is not valid XPath. +ElementTree and lxml.etree use the same implementation, which assures 100% +compatibility. Note that ``findall()`` is `so fast`_ in lxml that a native +implementation would not bring any performance benefits. -#) What is the difference between str(xslt(doc)) and xslt(doc).write() ? +.. _`so fast`: performance.html#tree-traversal - The str() implementation of the XSLTResultTree class (a subclass of - ElementTree) knows about the output method chosen in the stylesheet - (xsl:output), write() doesn't. If you call write(), the result will be a - normal XML tree serialization in the requested encoding. Calling this - method may also fail for XSLT results that are not XML trees (e.g. string - results). - If you call str(), it will return the serialized result as specified by the - XSL transform. This correctly serializes string results to encoded Python - strings and honours ``xsl:output`` options like ``indent``. This almost - certainly does what you want, so you should only use ``write()`` if you are - sure that the XSLT result is an XML tree and you want to override the - encoding and indentation options requested by the stylesheet. +How can I find out which namespace prefixes are used in a document? +------------------------------------------------------------------- +You can traverse the document (``getiterator()``) and collect the prefix +attributes from all Elements into a set. However, it is unlikely that you +really want to do that. You do not need these prefixes, honestly. You only +need the namespace URIs. All namespace comparisons use these, so feel free to +make up your own prefixes when you use XPath expressions or extension +functions. -#) Why is my application so slow? +The only place where you might consider specifying prefixes is the +serialization of Elements that were created through the API. Here, you can +specify a prefix mapping through the ``nsmap`` argument when creating the root +Element. Its children will then inherit this prefix for serialization. - lxml.etree is a very fast library for processing XML. There are, however, - `a few caveats`_ involved in the mapping of the powerful libxml2 library to - the simple and convenient ElementTree API. Not all operations are as fast - as the simplicity of the API might suggest. The `benchmark page`_ has a - comparison to other ElementTree implementations and a number of tips for - performance tweaking. - .. _`a few caveats`: performance.html#the-elementtree-api - .. _`benchmark page`: performance.html +How can I specify a default namespace for XPath expressions? +------------------------------------------------------------ +You can't. In XPath, there is no such thing as a default namespace. Just use +an arbitrary prefix and let the namespace dictionary of the XPath evaluators +map it to your namespace. See also the question above. Modified: lxml/trunk/doc/api.txt ============================================================================== --- lxml/trunk/doc/api.txt (original) +++ lxml/trunk/doc/api.txt Tue Aug 8 19:56:33 2006 @@ -8,18 +8,20 @@ .. contents:: .. 1 lxml.etree - 2 Trees and Documents - 3 Iteration - 4 Parsers - 5 iterparse and iterwalk - 6 Error handling on exceptions - 7 Python unicode strings - 8 XPath - 9 XSLT - 10 RelaxNG - 11 XMLSchema - 12 xinclude - 13 write_c14n on ElementTree + 2 Other Element APIs + 3 Trees and Documents + 4 Iteration + 5 Parsers + 6 iterparse and iterwalk + 7 Error handling on exceptions + 8 Python unicode strings + 9 XPath + 10 XSLT + 11 RelaxNG + 12 XMLSchema + 13 xinclude + 14 write_c14n on ElementTree + lxml.etree ---------- @@ -43,6 +45,25 @@ >>> from StringIO import StringIO +Other Element APIs +------------------ + +While lxml.etree itself uses the ElementTree API, it is possible to replace +the Element implementation by `custom element subclasses`_. This has been +used to implement well-known XML APIs on top of lxml. The ``lxml.elements`` +package contains examples. Currently, there is a data-binding implementation +called `objectify`_, which is similar to the `Amara bindery`_ tool. + +Additionally, the `lxml.elements.classlookup`_ module provides a number of +different schemes to customize the mapping between libxml2 nodes and the +Element classes used by lxml.etree. + +.. _`custom element subclasses`: namespace_extensions.html +.. _`objectify`: objectify.html +.. _`lxml.elements.classlookup`: elements.html#lxml.elements.classlookup +.. _`Amara bindery`: http://uche.ogbuji.net/tech/4suite/amara/ + + Trees and Documents ------------------- Added: lxml/trunk/doc/capi.txt ============================================================================== --- (empty file) +++ lxml/trunk/doc/capi.txt Tue Aug 8 19:56:33 2006 @@ -0,0 +1,84 @@ +============================== +The public C-API of lxml.etree +============================== + +As of version 1.1, lxml.etree provides a public C-API. This allows external +C extensions to efficiently access public functions and classes of lxml, +without going through the Python API. + +The API is described in the file `etreepublic.pxd`_, which is directly +c-importable by Pyrex modules. + +.. _`etreepublic.pxd`: http://codespeak.net/svn/lxml/branch/capi/src/lxml/etreepublic.pxd + + +Writing external modules in Pyrex +--------------------------------- + +This is the easiest way of extending lxml at the C level. A Pyrex module +should start like this:: + + # import the public functions and classes of lxml.etree + cimport etreepublic as cetree + + # import the lxml.etree module in Python + cdef object etree + from lxml import etree + + # initialize the access to the C-API of lxml.etree + cetree.import_etree(etree) + +From this line on, you can access all public functions of lxml.etree from the +``cetree`` namespace like this:: + + # build a tag name from namespace and element name + py_tag = cetree.namespacedNameFromNsName("http://some/url", "myelement") + +Public lxml classes are easily subclassed. For example, to implement and set +a new default element class, you can write code like the following:: + + from etreepublic cimport ElementBase + cdef class NewElementClass(ElementBase): + def setValue(self, myval): + self.set("my_attribute", myval) + + etree.setDefaultElementClass(NewElementClass) + + +Writing external modules in C +----------------------------- + +If you really feel like it, you can also interface with lxml.etree straight +from C code. All you have to do is include the header file for the public +API, import the ``lxml.etree`` module and then call the import function:: + + /* My C extension */ + + /* common includes */ + #include "Python.h" + #include "stdio.h" + #include "string.h" + #include "stdarg.h" + #include "libxml/xmlversion.h" + #include "libxml/encoding.h" + #include "libxml/hash.h" + #include "libxml/tree.h" + #include "libxml/xmlIO.h" + #include "libxml/xmlsave.h" + #include "libxml/globals.h" + #include "libxml/xmlstring.h" + + /* lxml.etree specific includes */ + #include "lxml-version.h" + #include "etree_defs.h" + #include "etree.h" + + /* setup code */ + static PyObject* m_etree; + m_etree = _ADD_YOUR_WAY_TO_IMPORT_A_MODULE_("lxml.etree"); + + import_etree(m_etree); + +Note that including ``etree.h`` does not automatically include the header +files it requires. Note also that the above list of common imports may not be +sufficient. Modified: lxml/trunk/doc/compatibility.txt ============================================================================== --- lxml/trunk/doc/compatibility.txt (original) +++ lxml/trunk/doc/compatibility.txt Tue Aug 8 19:56:33 2006 @@ -100,16 +100,21 @@ have to be touched again by the parser later on. See the lxml API documentation on this. +* ElementTree ignores comments and processing instructions when parsing XML, + while etree will read them in and treat them as Comment or + ProcessingInstruction elements respectively. + * ElementTree has a bug when serializing an empty Comment (no text argument given) to XML, etree serializes this successfully. -* ElementTree ignores comments when parsing XML, while etree will read them in - and treat them as Comment elements. - * ElementTree adds whitespace around comments on serialization, lxml does not. This means that a comment text "text" that ElementTree serializes as "" will become "" in lxml. +* ElementTree merges the target of a processing instruction into ``PI.text``, + while lxml.etree puts it into the ``.target`` property and leaves it out of + the ``.text`` property. + * Because etree is built on top of libxml2, which is namespace prefix aware, etree preserves namespaces declarations and prefixes while ElementTree tends to come up with its own prefixes (ns0, ns1, etc). When no namespace prefix Modified: lxml/trunk/doc/main.txt ============================================================================== --- lxml/trunk/doc/main.txt (original) +++ lxml/trunk/doc/main.txt Tue Aug 8 19:56:33 2006 @@ -92,32 +92,41 @@ Documentation ------------- -lxml follows the ElementTree_ API as much as possible, building it on top of -the native libxml2 tree. See also the `ElementTree compatibility overview`_ -and the `benchmark results`_ comparing lxml to the original ElementTree_ and -cElementTree_ implementations. - -lxml also `extends this API`_ to expose libxml2 and libxslt specific -functionality, such as XPath_, `Relax NG`_, `XML Schema`_, `XSLT`_, and -`c14n`_. Python code can be called from XPath expressions and XSLT stylesheets -through the use of `extension functions`_. +lxml.etree follows the ElementTree_ API as much as possible, building it on +top of the native libxml2 tree. See also the `ElementTree compatibility +overview`_ and the `benchmark results`_ comparing lxml to the original +ElementTree_ and cElementTree_ implementations. + +Right after the ElementTree_ documentation, the most important place to look +is the `lxml.etree API documentation`_. It describes how lxml extends the +ElementTree API to expose libxml2 and libxslt specific functionality, such as +XPath_, `Relax NG`_, `XML Schema`_, `XSLT`_, and `c14n`_. Python code can be +called from XPath expressions and XSLT stylesheets through the use of +`extension functions`_. lxml also offers a `SAX compliant API`_, that works +with the SAX support in the standard library. + +There is a separate module `lxml.objectify`_ that implements a data-binding +API on top of lxml.etree. See the `objectify and etree`_ FAQ entry for a +comparison. In addition to the ElementTree API, lxml also features a sophisticated API for `custom element classes`_. This is a simple way to write arbitrary XML driven -APIs on top of lxml. - -lxml also offers a `SAX compliant API`_, that works with the SAX support -in the standard library. +APIs on top of lxml. As of version 1.1, lxml.etree has a new `C-level API`_ +that can be used to efficiently extend lxml.etree in external C modules, +including custom element class support. .. _ElementTree: http://effbot.org/zone/element-index.htm .. _cElementTree: http://effbot.org/zone/celementtree.htm .. _`benchmark results`: performance.html .. _`ElementTree compatibility overview`: compatibility.html -.. _`extends this API`: api.html +.. _`lxml.etree API documentation`: api.html .. _`extension functions`: extensions.html -.. _`custom element classes`: namespace_extensions.html +.. _`custom element classes`: element_classes.html .. _`SAX compliant API`: sax.html +.. _`C-level API`: capi.html +.. _`lxml.objectify`: objectify.html +.. _`objectify and etree`: FAQ.html##what-is-the-difference-between-lxml-etree-and-lxml-objectify .. _XPath: http://www.w3.org/TR/xpath .. _`Relax NG`: http://www.relaxng.org/ Modified: lxml/trunk/doc/mkhtml.py ============================================================================== --- lxml/trunk/doc/mkhtml.py (original) +++ lxml/trunk/doc/mkhtml.py Tue Aug 8 19:56:33 2006 @@ -12,8 +12,9 @@ shutil.copy(pubkey, dirname) for name in ['main.txt', 'intro.txt', 'api.txt', 'compatibility.txt', - 'extensions.txt', 'namespace_extensions.txt', 'sax.txt', - 'build.txt', 'FAQ.txt', 'performance.txt', 'resolvers.txt']: + 'extensions.txt', 'element_classes.txt', 'sax.txt', + 'build.txt', 'FAQ.txt', 'performance.txt', 'resolvers.txt', + 'capi.txt', 'objectify.txt']: path = os.path.join(doc_dir, name) outname = os.path.splitext(name)[0] + '.html' outpath = os.path.join(dirname, outname) Deleted: /lxml/trunk/doc/namespace_extensions.txt ============================================================================== --- /lxml/trunk/doc/namespace_extensions.txt Tue Aug 8 19:56:33 2006 +++ (empty file) @@ -1,193 +0,0 @@ -==================================== -Using custom Element classes in lxml -==================================== - -lxml has very sophisticated support for custom Element classes. You can -provide your own classes for Elements and have lxml use them by default or -only for a specific tag name in a specific namespace. - -Custom Elements must inherit from the ``lxml.etree.ElementBase`` class, which -provides the Element interface for subclasses:: - - >>> from lxml import etree - >>> class HonkElement(etree.ElementBase): - ... def honking(self): - ... return self.get('honking') == 'true' - ... honking = property(honking) - -This defines a new Element class ``HonkElement`` with a property ``honking``. - -Note that you cannot (or rather *must not*) instantiate this class yourself. -lxml.etree will do that for you through its normal ElementTree API. - -.. contents:: -.. - 1 Changing the default element class - 2 Implementing namespaces - 3 Element initialization - 4 Default implementations - - -Changing the default element class ----------------------------------- - -You can let lxml use your new class for every Element it generates:: - - >>> etree.setDefaultElementClass(HonkElement) - >>> el = etree.Element("myelement") - >>> print isinstance(el, HonkElement) - True - >>> el.honking - False - >>> el = etree.Element("myelement", honking='true') - >>> print etree.tostring(el) - - >>> el.honking - True - -To reset lxml.etree to the original element class, pass ``None`` or nothing:: - - >>> etree.setDefaultElementClass() - >>> el = etree.Element("myelement") - >>> print isinstance(el, HonkElement) - False - - -Implementing namespaces ------------------------ - -lxml allows you to implement namespaces, in a rather literal sense. You can -build a new element namespace (or retrieve an existing one) by calling the -Namespace class:: - - >>> namespace = etree.Namespace('http://hui.de/honk') - -and then register the new element type with that namespace, say, under the tag -name ``honk``:: - - >>> namespace['honk'] = HonkElement - -After this, you create and use your XML elements through the normal API of -lxml:: - - >>> xml = '' - >>> honk_element = etree.XML(xml) - >>> print honk_element.honking - True - -The same works when creating elements by hand:: - - >>> honk_element = etree.Element('{http://hui.de/honk}honk', - ... honking='true') - >>> print honk_element.honking - True - -Essentially, what this allows you to do, is to give elements a custom API -based on their namespace and tag name. - -A somewhat related topic are `extension functions`_ which use a similar -mechanism for registering extension functions in XPath and XSLT. - -.. _`extension functions`: extensions.html - - -Element initialization ----------------------- - -There is one thing to remember. Element classes *must not* have a -constructor, neither must there be any internal state (except for the data -stored in the underlying XML tree). Element instances are created and garbage -collected at need, so there is no way to predict when and how often a -constructor would be called. Even worse, when the ``__init__`` method is -called, the object may not even be initialized yet to represent the XML tag, -so there is not much use in providing an ``__init__`` method in subclasses. - -However, there is one possible way to do things on element initialization, if -you really need to. ElementBase classes have an ``_init()`` method that can -be overridden. It can be used to modify the XML tree, e.g. to construct -special children or verify and update attributes. - -The semantics of ``_init()`` are as follows: - -* It is called at least once on element instantiation time. That is, when a - Python representation of the element is created by lxml. At that time, the - element object is completely initialized to represent a specific XML element - within the tree. - -* The method has complete access to the XML tree. Modifications can be done - in exactly the same way as anywhere else in the program. - -* Python representations of elements may be created multiple times during the - lifetime of an XML element in the underlying tree. The ``_init()`` code - provided by subclasses must take special care by itself that multiple - executions either are harmless or that they are prevented by some kind of - flag in the XML tree. The latter can be achieved by modifying an attribute - value or by removing or adding a specific child node and then verifying this - before running through the init process. - -* Any exceptions raised in ``_init()`` will be propagated throught the API - call that lead to the creation of the Element. So be careful with the code - you write here as its exceptions may turn up in various unexpected places. - - -Default implementations ------------------------ - -In the Namespace example above, we associated the HonkElement class only with -the 'honk' element. If an XML tree contains different elements in the same -namespace, they do not pick up the same implementation:: - - >>> xml = '' - >>> honk_element = etree.XML(xml) - >>> print honk_element.honking - True - >>> print honk_element[0].honking - Traceback (most recent call last): - ... - AttributeError: 'etree._Element' object has no attribute 'honking' - -You can therefore provide one implementation per element name in each -namespace and have lxml select the right one on the fly. If you want one -element implementation per namespace (ignoring the element name) or prefer -having a common class for most elements except a few, you can specify a -default implementation for an entire namespace by registering that class with -the empty element name (None). - -You may consider following an object oriented approach here. If you build a -class hierarchy of element classes, you can also implement a base class for a -namespace that is used if no specific element class is provided. Again, you -can just pass None as an element name:: - - >>> class HonkNSElement(etree.ElementBase): - ... def honk(self): - ... return "HONK" - >>> namespace[None] = HonkNSElement - - >>> class HonkElement(HonkNSElement): - ... def honking(self): - ... return self.get('honking') == 'true' - ... honking = property(honking) - >>> namespace['honk'] = HonkElement - -Now you can rely on lxml to always return objects of type HonkNSElement or its -subclasses for elements of this namespace:: - - >>> xml = '' - >>> honk_element = etree.XML(xml) - - >>> print type(honk_element), type(honk_element[0]) - - - >>> print honk_element.honking - True - >>> print honk_element.honk() - HONK - >>> print honk_element[0].honk() - HONK - >>> print honk_element[0].honking - Traceback (most recent call last): - ... - AttributeError: 'HonkNSElement' object has no attribute 'honking' - -Note that you can also combine this with the global default class. Namespace -specific classes will simply override the less specific default. Modified: lxml/trunk/setup.py ============================================================================== --- lxml/trunk/setup.py (original) +++ lxml/trunk/setup.py Tue Aug 8 19:56:33 2006 @@ -1,5 +1,10 @@ import sys, os, os.path, re +EXT_MODULES = [ + ("etree", "lxml.etree"), + ("objectify", "lxml.objectify") + ] + setup_args = {} ext_args = {} DEFINES = [] @@ -93,16 +98,17 @@ try: from Pyrex.Distutils import build_ext as build_pyx - sources = ["src/lxml/etree.pyx"] + source_extension = ".pyx" setup_args['cmdclass'] = {'build_ext' : build_pyx} except ImportError: print "*NOTE*: Trying to build without Pyrex, needs pre-generated 'src/lxml/etree.c' !" - sources = ["src/lxml/etree.c"] + source_extension = ".c" if '--static' in sys.argv: # use the static setup as configured in setupStaticBuild sys.argv.remove('--static') cflags, xslt_libs = setupStaticBuild() + ext_args['extra_link_args'] = xslt_libs else: cflags = flags('xslt-config --cflags') xslt_libs = flags('xslt-config --libs') @@ -127,13 +133,17 @@ except ValueError: pass -ext_modules = [ Extension( - "lxml.etree", - sources = sources, - extra_compile_args = ['-w'] + cflags, - define_macros = DEFINES, - **ext_args - )] +ext_modules = [] + +for module, package in EXT_MODULES: + ext_modules.append( + Extension( + package, + sources = ["src/lxml/" + module + source_extension], + extra_compile_args = ['-w'] + cflags, + define_macros = DEFINES, + **ext_args + )) # setup ChangeLog entry @@ -192,7 +202,7 @@ ], package_dir = {'': 'src'}, - packages = ['lxml'], + packages = ['lxml', 'lxml.elements'], ext_modules = ext_modules, **setup_args ) Modified: lxml/trunk/src/lxml/apihelpers.pxi ============================================================================== --- lxml/trunk/src/lxml/apihelpers.pxi (original) +++ lxml/trunk/src/lxml/apihelpers.pxi Tue Aug 8 19:56:33 2006 @@ -1,9 +1,9 @@ -# Private helper functions for API functions +# Private/public helper functions for API functions cdef void displayNode(xmlNode* c_node, indent): # to help with debugging cdef xmlNode* c_child - print indent * ' ', c_node + print indent * ' ', c_node c_child = c_node.children while c_child is not NULL: displayNode(c_child, indent + 1) @@ -80,17 +80,61 @@ else: return None +cdef _Element _makeElement(tag, xmlDoc* c_doc, _Document doc, + _BaseParser parser, attrib, nsmap, extra_attrs): + """Create a new element and initialize namespaces and attributes. + + This helper function will reuse as much of the existing document as + possible: + + If 'parser' is None, the parser will be inherited from 'doc' or the + default parser will be used. + + If 'doc' is None, 'c_doc' is used to create a new _Document and the new + element is made its root node. + + If 'c_doc' is also NULL, a new xmlDoc will be created. + """ + cdef xmlNode* c_node + ns_utf, name_utf = _getNsTag(tag) + if doc is not None: + c_doc = doc._c_doc + elif c_doc is NULL: + c_doc = _newDoc() + c_node = _createElement(c_doc, name_utf) + if doc is None: + tree.xmlDocSetRootElement(c_doc, c_node) + doc = _documentFactory(c_doc, parser) + # add namespaces to node if necessary + doc._setNodeNamespaces(c_node, ns_utf, nsmap) + _initNodeAttributes(c_node, doc, attrib, extra_attrs) + return _elementFactory(doc, c_node) + cdef object _attributeValue(xmlNode* c_element, xmlAttr* c_attrib_node): cdef char* value - if c_attrib_node.ns is NULL or c_attrib_node.ns.href is NULL: + cdef char* href + href = _getNs(c_attrib_node) + if href is NULL: value = tree.xmlGetNoNsProp(c_element, c_attrib_node.name) else: - value = tree.xmlGetNsProp(c_element, c_attrib_node.name, - c_attrib_node.ns.href) + value = tree.xmlGetNsProp(c_element, c_attrib_node.name, href) result = funicode(value) tree.xmlFree(value) return result +cdef object _attributeValueFromNsName(xmlNode* c_element, + char* c_href, char* c_name): + cdef char* c_result + if c_href is NULL: + c_result = tree.xmlGetNoNsProp(c_element, c_name) + else: + c_result = tree.xmlGetNsProp(c_element, c_name, c_href) + if c_result is NULL: + return None + result = funicode(c_result) + tree.xmlFree(c_result) + return result + cdef object _getAttributeValue(_NodeBase element, key, default): cdef char* c_result cdef char* c_tag @@ -122,6 +166,30 @@ tree.xmlSetNsProp(element._c_node, c_ns, c_tag, c_value) return 0 +cdef int _delAttribute(_NodeBase element, key) except -1: + cdef xmlAttr* c_attr + cdef char* c_href + ns, tag = _getNsTag(key) + if ns is None: + c_href = NULL + else: + c_href = _cstr(ns) + if _delAttributeFromNsName(element._c_node, c_href, _cstr(tag)): + raise KeyError, key + return 0 + +cdef int _delAttributeFromNsName(xmlNode* c_node, char* c_href, char* c_name): + cdef xmlAttr* c_attr + if c_href is NULL: + c_attr = tree.xmlHasProp(c_node, c_name) + else: + c_attr = tree.xmlHasNsProp(c_node, c_name, c_href) + if c_attr is NULL: + # XXX free namespace that is not in use..? + return -1 + tree.xmlRemoveProp(c_attr) + return 0 + cdef object __RE_XML_ENCODING __RE_XML_ENCODING = re.compile( r'^(\s*<\?\s*xml[^>]+)\s+encoding\s*=\s*"[^"]*"\s*', re.U) @@ -188,7 +256,7 @@ c_node = c_node.next return funicode(result) -cdef _removeText(xmlNode* c_node): +cdef void _removeText(xmlNode* c_node): """Remove all text nodes. Start removing at c_node. @@ -201,6 +269,33 @@ tree.xmlFreeNode(c_node) c_node = c_next +cdef int _setNodeText(xmlNode* c_node, value) except -1: + cdef xmlNode* c_text_node + # remove all text nodes at the start first + _removeText(c_node.children) + if value is None: + return 0 + # now add new text node with value at start + text = _utf8(value) + c_text_node = tree.xmlNewDocText(c_node.doc, _cstr(text)) + if c_node.children is NULL: + tree.xmlAddChild(c_node, c_text_node) + else: + tree.xmlAddPrevSibling(c_node.children, c_text_node) + return 0 + +cdef int _setTailText(xmlNode* c_node, value) except -1: + cdef xmlNode* c_text_node + # remove all text nodes at the start first + _removeText(c_node.next) + if value is None: + return 0 + text = _utf8(value) + c_text_node = tree.xmlNewDocText(c_node.doc, _cstr(text)) + # XXX what if we're the top element? + tree.xmlAddNextSibling(c_node, c_text_node) + return 0 + cdef xmlNode* _findChild(xmlNode* c_node, Py_ssize_t index): if index < 0: return _findChildBackwards(c_node, -index - 1) @@ -268,23 +363,42 @@ return c_node cdef int _tagMatches(xmlNode* c_node, char* c_href, char* c_name): + """Tests if the node matches namespace URI and tag name. + + A node matches if it matches both c_href and c_name. + + A node matches c_href if any of the following is true: + * c_href is NULL + * its namespace is NULL and c_href is the empty string + * its namespace string equals the c_href string + + A node matches c_name if any of the following is true: + * c_name is NULL + * its name string equals the c_name string + """ + cdef char* c_node_href if c_name is NULL: if c_href is NULL: # always match return 1 - elif c_node.ns is NULL or c_node.ns.href is NULL: - return 0 else: - return cstd.strcmp(c_node.ns.href, c_href) == 0 + c_node_href = _getNs(c_node) + if c_node_href is NULL: + return c_href[0] == c'\0' + else: + return cstd.strcmp(c_node_href, c_href) == 0 elif c_href is NULL: - if c_node.ns is not NULL and c_node.ns.href is not NULL: + if _getNs(c_node) is not NULL: return 0 return cstd.strcmp(c_node.name, c_name) == 0 - elif c_node.ns is NULL or c_node.ns.href is NULL: - return 0 + elif cstd.strcmp(c_node.name, c_name) == 0: + c_node_href = _getNs(c_node) + if c_node_href is NULL: + return c_href[0] == c'\0' + else: + return cstd.strcmp(c_node_href, c_href) == 0 else: - return cstd.strcmp(c_node.name, c_name) == 0 and \ - cstd.strcmp(c_node.ns.href, c_href) == 0 + return 0 cdef void _removeNode(xmlNode* c_node): """Unlink and free a node and subnodes if possible. @@ -394,16 +508,17 @@ if nslen > 0: ns = python.PyString_FromStringAndSize(c_tag, nslen) tag = python.PyString_FromStringAndSize(c_ns_end+1, taglen) + elif python.PyString_GET_SIZE(tag) == 0: + raise ValueError, "Empty tag name" return ns, tag cdef object _namespacedName(xmlNode* c_node): - cdef char* href - cdef char* name - name = c_node.name - if c_node.ns is NULL or c_node.ns.href is NULL: + return _namespacedNameFromNsName(_getNs(c_node), c_node.name) + +cdef object _namespacedNameFromNsName(char* href, char* name): + if href is NULL: return funicode(name) else: - href = c_node.ns.href s = python.PyString_FromFormat("{%s}%s", href, name) if isutf8(href) or isutf8(name): return python.PyUnicode_FromEncodedObject(s, 'UTF-8', NULL) Modified: lxml/trunk/src/lxml/etree.pyx ============================================================================== --- lxml/trunk/src/lxml/etree.pyx (original) +++ lxml/trunk/src/lxml/etree.pyx Tue Aug 8 19:56:33 2006 @@ -1,6 +1,6 @@ cimport tree, python -from tree cimport xmlDoc, xmlNode, xmlAttr, xmlNs, _isElement -from python cimport isinstance, issubclass, hasattr, callable +from tree cimport xmlDoc, xmlNode, xmlAttr, xmlNs, _isElement, _getNs +from python cimport isinstance, issubclass, hasattr, getattr, callable from python cimport iter, str, _cstr, _isString, Py_ssize_t cimport xpath cimport xinclude @@ -186,6 +186,7 @@ # forward declaration of _BaseParser, see parser.pxi cdef class _BaseParser + cdef public class _Document [ type LxmlDocumentType, object LxmlDocument ]: """Internal base class to reference a libxml document. @@ -378,7 +379,8 @@ unregisterProxy(self) attemptDeallocation(self._c_node) -cdef class _ElementTree: +cdef public class _ElementTree [ type LxmlElementTreeType, + object LxmlElementTree ]: cdef _Document _doc cdef _NodeBase _context_node @@ -599,8 +601,7 @@ self._assertHasRoot() _tofilelikeC14N(file, self._context_node) -cdef _ElementTree _elementTreeFactory(_Document doc, - _NodeBase context_node): +cdef _ElementTree _elementTreeFactory(_Document doc, _NodeBase context_node): return _newElementTree(doc, context_node, _ElementTree) cdef _ElementTree _newElementTree(_Document doc, _NodeBase context_node, @@ -834,20 +835,7 @@ return _collectText(self._c_node.children) def __set__(self, value): - cdef xmlNode* c_text_node - # remove all text nodes at the start first - _removeText(self._c_node.children) - if value is None: - return - # now add new text node with value at start - text = _utf8(value) - c_text_node = tree.xmlNewDocText(self._doc._c_doc, - _cstr(text)) - if self._c_node.children is NULL: - tree.xmlAddChild(self._c_node, c_text_node) - else: - tree.xmlAddPrevSibling(self._c_node.children, - c_text_node) + _setNodeText(self._c_node, value) property tail: """Text after this element's end tag, but before the next sibling @@ -858,15 +846,7 @@ return _collectText(self._c_node.next) def __set__(self, value): - cdef xmlNode* c_text_node - # remove all text nodes at the start first - _removeText(self._c_node.next) - if value is None: - return - text = _utf8(value) - c_text_node = tree.xmlNewDocText(self._doc._c_doc, _cstr(text)) - # XXX what if we're the top element? - tree.xmlAddNextSibling(self._c_node, c_text_node) + _setTailText(self._c_node, value) # not in ElementTree, read-only property prefix: @@ -1159,18 +1139,7 @@ def makeelement(self, _tag, attrib=None, nsmap=None, **_extra): """Creates a new element associated with the same document. """ - # a little code duplication, but less overhead through doc reuse - cdef xmlNode* c_node - cdef xmlDoc* c_doc - cdef _Document doc - ns_utf, name_utf = _getNsTag(_tag) - doc = self._doc - c_doc = doc._c_doc - c_node = _createElement(c_doc, name_utf) - # add namespaces to node if necessary - doc._setNodeNamespaces(c_node, ns_utf, nsmap) - _initNodeAttributes(c_node, doc, attrib, _extra) - return _elementFactory(doc, c_node) + return _makeElement(_tag, NULL, self._doc, None, attrib, nsmap, _extra) def find(self, path): """Finds the first matching subelement, by tag name or path. @@ -1195,22 +1164,13 @@ cdef _Element _elementFactory(_Document doc, xmlNode* c_node): cdef _Element result - cdef char* c_ns_href result = getProxy(c_node) if result is not None: return result if c_node is NULL: return None - if c_node.type == tree.XML_ELEMENT_NODE: - if c_node.ns == NULL: - c_ns_href = NULL - else: - c_ns_href = c_node.ns.href - element_class = _find_element_class(c_ns_href, c_node.name) - elif c_node.type == tree.XML_COMMENT_NODE: - element_class = _Comment - else: - assert 0, "Unknown node type: %s" % c_node.type + element_class = LOOKUP_ELEMENT_CLASS(ELEMENT_CLASS_LOOKUP_STATE, + doc, c_node) result = element_class() result._doc = doc result._c_node = c_node @@ -1218,17 +1178,25 @@ result._init() return result -cdef class _Comment(_Element): +cdef class __ContentOnlyElement(_Element): + cdef int _raiseImmutable(self) except -1: + raise TypeError, "this element does not have children or attributes" + def set(self, key, value): - pass - - def append(self, _Element element): - pass + self._raiseImmutable() + + def append(self, value): + self._raiseImmutable() + + def insert(self, index, value): + self._raiseImmutable() + + def __setitem__(self, index, value): + self._raiseImmutable() + + def __setslice__(self, start, end, value): + self._raiseImmutable() - property tag: - def __get__(self): - return None - property attrib: def __get__(self): return {} @@ -1268,19 +1236,26 @@ def items(self): return [] - -cdef _Comment _commentFactory(_Document doc, xmlNode* c_node): - cdef _Comment result - result = getProxy(c_node) - if result is not None: - return result - if c_node is NULL: - return None - result = _Comment() - result._doc = doc - result._c_node = c_node - registerProxy(result) - return result + +cdef class _Comment(__ContentOnlyElement): + property tag: + def __get__(self): + return Comment + +cdef class _ProcessingInstruction(__ContentOnlyElement): + property tag: + def __get__(self): + return ProcessingInstruction + + property target: + # not in ElementTree + def __get__(self): + return funicode(self._c_node.name) + + def __set__(self, value): + value = _utf8(value) + c_text = _cstr(value) + tree.xmlNodeSetName(self._c_node, c_text) cdef class _Attrib: cdef _NodeBase _element @@ -1292,21 +1267,8 @@ _setAttributeValue(self._element, key, value) def __delitem__(self, key): - cdef xmlNode* c_node - cdef xmlAttr* c_attr - cdef char* c_tag - ns, tag = _getNsTag(key) - c_tag = _cstr(tag) - c_node = self._element._c_node - if ns is None: - c_attr = tree.xmlHasProp(c_node, c_tag) - else: - c_attr = tree.xmlHasNsProp(c_node, c_tag, _cstr(ns)) - if c_attr is NULL: - # XXX free namespace that is not in use..? - raise KeyError, key - tree.xmlRemoveProp(c_attr) - + _delAttribute(self._element, key) + # ACCESSORS def __repr__(self): result = {} @@ -1422,7 +1384,8 @@ ctypedef xmlNode* (*_node_to_node_function)(xmlNode*) -cdef class _ElementTagMatcher: +cdef public class _ElementTagMatcher [ object LxmlElementTagMatcher, + type LxmlElementTagMatcherType ]: cdef object _pystrings cdef char* _href cdef char* _name @@ -1440,7 +1403,8 @@ if self._name[0] == c'*' and self._name[1] == c'\0': self._name = NULL -cdef class _ElementIterator(_ElementTagMatcher): +cdef public class _ElementIterator(_ElementTagMatcher) [ + object LxmlElementIterator, type LxmlElementIteratorType ]: # we keep Python references here to control GC cdef _NodeBase _node cdef _node_to_node_function _next_element @@ -1575,6 +1539,11 @@ c_node = tree.xmlNewDocComment(c_doc, text) return c_node +cdef xmlNode* _createPI(xmlDoc* c_doc, char* target, char* text): + cdef xmlNode* c_node + c_node = tree.xmlNewDocPI(c_doc, target, text) + return c_node + cdef _initNodeAttributes(xmlNode* c_node, _Document doc, attrib, extra): cdef xmlNs* c_ns # 'extra' is not checked here (expected to be a keyword dict) @@ -1602,35 +1571,43 @@ def Element(_tag, attrib=None, nsmap=None, **_extra): """Element factory. This function returns an object implementing the Element interface. """ + ### also look at _Element.makeelement() and _BaseParser.makeelement() ### + return _makeElement(_tag, NULL, None, None, attrib, nsmap, _extra) + +def Comment(text=None): + """Comment element factory. This factory function creates a special element that will + be serialized as an XML comment. + """ + cdef _Document doc cdef xmlNode* c_node cdef xmlDoc* c_doc - cdef _Document doc - ns_utf, name_utf = _getNsTag(_tag) + if text is None: + text = '' + else: + text = _utf8(text) c_doc = _newDoc() - c_node = _createElement(c_doc, name_utf) - tree.xmlDocSetRootElement(c_doc, c_node) doc = _documentFactory(c_doc, None) - # add namespaces to node if necessary - doc._setNodeNamespaces(c_node, ns_utf, nsmap) - _initNodeAttributes(c_node, doc, attrib, _extra) + c_node = _createComment(c_doc, _cstr(text)) + tree.xmlAddChild(c_doc, c_node) return _elementFactory(doc, c_node) -def Comment(text=None): +def ProcessingInstruction(target, text=None): """Comment element factory. This factory function creates a special element that will be serialized as an XML comment. """ cdef _Document doc cdef xmlNode* c_node cdef xmlDoc* c_doc + target = _utf8(target) if text is None: text = '' else: text = _utf8(text) c_doc = _newDoc() doc = _documentFactory(c_doc, None) - c_node = _createComment(c_doc, _cstr(text)) + c_node = _createPI(c_doc, _cstr(target), _cstr(text)) tree.xmlAddChild(c_doc, c_node) - return _commentFactory(doc, c_node) + return _elementFactory(doc, c_node) def SubElement(_Element _parent not None, _tag, attrib=None, nsmap=None, **_extra): @@ -1668,20 +1645,24 @@ return _elementTreeFactory(doc, element) -def HTML(text): +def HTML(text, _BaseParser parser=None): """Parses an HTML document from a string constant. This function can be used to embed "HTML literals" in Python code. """ cdef _Document doc - doc = _parseMemoryDocument(text, None, __DEFAULT_HTML_PARSER) + if parser is None: + parser = __DEFAULT_HTML_PARSER + doc = _parseMemoryDocument(text, None, parser) return doc.getroot() -def XML(text): +def XML(text, _BaseParser parser=None): """Parses an XML document from a string constant. This function can be used to embed "XML literals" in Python code. """ cdef _Document doc - doc = _parseMemoryDocument(text, None, __DEFAULT_XML_PARSER) + if parser is None: + parser = __DEFAULT_XML_PARSER + doc = _parseMemoryDocument(text, None, parser) return doc.getroot() fromstring = XML @@ -1776,10 +1757,13 @@ return ElementTree(doc.getroot()) -# include submodules +################################################################################ +# Include submodules + include "proxy.pxi" # Proxy handling (element backpointers/memory/etc.) include "apihelpers.pxi" # Private helper functions include "xmlerror.pxi" # Error and log handling +include "classlookup.pxi"# Namespace implementation and registry include "nsclasses.pxi" # Namespace implementation and registry include "docloader.pxi" # Support for custom document loaders include "parser.pxi" # XML Parser @@ -1825,3 +1809,8 @@ include "relaxng.pxi" # RelaxNG include "xmlschema.pxi" # XMLSchema + +################################################################################ +# Public C API + +include "public-api.pxi" Modified: lxml/trunk/src/lxml/etree_defs.h ============================================================================== --- lxml/trunk/src/lxml/etree_defs.h (original) +++ lxml/trunk/src/lxml/etree_defs.h Tue Aug 8 19:56:33 2006 @@ -31,6 +31,7 @@ #define isinstance(o,c) PyObject_IsInstance(o,c) #define issubclass(c,csuper) PyObject_IsSubclass(c,csuper) #define hasattr(o,a) PyObject_HasAttr(o,a) +#define getattr(o,a) PyObject_GetAttr(o,a) #define callable(o) PyCallable_Check(o) #define str(o) PyObject_Str(o) #define iter(o) PyObject_GetIter(o) @@ -39,8 +40,12 @@ #define _isString(obj) PyObject_TypeCheck(obj, &PyBaseString_Type) #define _isElement(c_node) \ - ((c_node)->type == XML_ELEMENT_NODE || \ - (c_node)->type == XML_COMMENT_NODE) + (((c_node)->type == XML_ELEMENT_NODE) || \ + ((c_node)->type == XML_COMMENT_NODE) || \ + ((c_node)->type == XML_PI_NODE)) + +#define _getNs(c_node) \ + (((c_node)->ns == 0) ? 0 : ((c_node)->ns->href)) /* Macro pair implementation of a depth first tree walker * Modified: lxml/trunk/src/lxml/htmlparser.pxd ============================================================================== --- lxml/trunk/src/lxml/htmlparser.pxd (original) +++ lxml/trunk/src/lxml/htmlparser.pxd Tue Aug 8 19:56:33 2006 @@ -10,7 +10,7 @@ HTML_PARSE_PEDANTIC # pedantic error reporting HTML_PARSE_NOBLANKS # remove blank nodes HTML_PARSE_NONET # Forbid network access - # libxml2 2.6.21+ only: + # libxml2 2.6.21+ only: HTML_PARSE_RECOVER # Relaxed parsing HTML_PARSE_COMPACT # compact small text nodes Modified: lxml/trunk/src/lxml/nsclasses.pxi ============================================================================== --- lxml/trunk/src/lxml/nsclasses.pxi (original) +++ lxml/trunk/src/lxml/nsclasses.pxi Tue Aug 8 19:56:33 2006 @@ -6,36 +6,13 @@ class NamespaceRegistryError(LxmlRegistryError): pass -cdef public class ElementBase(_Element) [ type LxmlElementBaseType, - object LxmlElementBase ]: - """All custom Element classes must inherit from this one. - - Note that subclasses *must not* override __init__ or __new__ as it is - absolutely undefined when these objects will be created or destroyed. All - persistent state of elements must be stored in the underlying XML. If you - really need to initialize the object after creation, you can implement an - ``_init(self)`` method that will be called after object creation. - """ - -def setDefaultElementClass(cls=None): - global __DEFAULT_ELEMENT_CLASS - if cls is None: - __DEFAULT_ELEMENT_CLASS = _Element - elif not python.PyType_Check(cls) or not issubclass(cls, ElementBase): - raise LxmlRegistryError, \ - "Registered element classes must be subtypes of ElementBase" - else: - __DEFAULT_ELEMENT_CLASS = cls - -cdef object __DEFAULT_ELEMENT_CLASS -__DEFAULT_ELEMENT_CLASS = _Element - cdef object __NAMESPACE_REGISTRIES __NAMESPACE_REGISTRIES = {} cdef object __FUNCTION_NAMESPACE_REGISTRIES __FUNCTION_NAMESPACE_REGISTRIES = {} + def Namespace(ns_uri): """Retrieve the namespace object associated with the given URI. Creates a new one if it does not yet exist.""" @@ -193,32 +170,37 @@ else: return dict_result -cdef object _find_element_class(char* c_namespace_utf, - char* c_element_name_utf): +cdef object _find_nselement_class(state, _Document doc, xmlNode* c_node): cdef python.PyObject* dict_result cdef _NamespaceRegistry registry + cdef char* c_namespace_utf + if c_node.type != tree.XML_ELEMENT_NODE: + if state is None: + return _lookupDefaultElementClass(None, doc, c_node) + return (state)._callFallback(doc, c_node) + c_namespace_utf = _getNs(c_node) if c_namespace_utf is not NULL: dict_result = python.PyDict_GetItemString( __NAMESPACE_REGISTRIES, c_namespace_utf) else: dict_result = python.PyDict_GetItem( __NAMESPACE_REGISTRIES, None) - if dict_result is NULL: - return __DEFAULT_ELEMENT_CLASS + if dict_result is not NULL: + registry = <_NamespaceRegistry>dict_result + classes = registry._entries - registry = <_NamespaceRegistry>dict_result - classes = registry._entries + if c_node.name is not NULL: + dict_result = python.PyDict_GetItemString( + classes, c_node.name) + else: + dict_result = NULL - if c_element_name_utf is not NULL: - dict_result = python.PyDict_GetItemString( - classes, c_element_name_utf) - else: - dict_result = NULL + if dict_result is NULL: + dict_result = python.PyDict_GetItem(classes, None) - if dict_result is NULL: - dict_result = python.PyDict_GetItem(classes, None) + if dict_result is not NULL: + return dict_result - if dict_result is not NULL: - return dict_result - else: - return __DEFAULT_ELEMENT_CLASS + if state is None: + return _lookupDefaultElementClass(None, doc, c_node) + return (state)._callFallback(doc, c_node) Modified: lxml/trunk/src/lxml/parser.pxi ============================================================================== --- lxml/trunk/src/lxml/parser.pxi (original) +++ lxml/trunk/src/lxml/parser.pxi Tue Aug 8 19:56:33 2006 @@ -344,6 +344,7 @@ cdef _ResolverContext _context cdef LxmlParserType _parser_type cdef xmlParserCtxt* _parser_ctxt + cdef ElementClassLookup _class_lookup cdef object _lockParser cdef object _unlockParser @@ -387,11 +388,15 @@ def __dummy(self): pass + def setElementClassLookup(self, ElementClassLookup lookup not None): + self._class_lookup = lookup + cdef _BaseParser _copy(self): "Create a new parser with the same configuration." cdef _BaseParser parser parser = self.__class__() parser._parse_options = self._parse_options + parser._class_lookup = self._class_lookup parser.resolvers = self.resolvers._copy() parser._context = _ResolverContext(parser.resolvers) parser._parser_ctxt._private = parser._context @@ -401,6 +406,11 @@ "Create a new parser with the same configuration." return self._copy() + def makeelement(self, _tag, attrib=None, nsmap=None, **_extra): + """Creates a new element associated with this parser. + """ + return _makeElement(_tag, NULL, None, self, attrib, nsmap, _extra) + cdef xmlDoc* _parseUnicodeDoc(self, utext, char* c_filename) except NULL: """Parse unicode document, share dictionary if possible. """ @@ -686,7 +696,7 @@ __GLOBAL_PARSER_CONTEXT.setDefaultParser(__DEFAULT_XML_PARSER) -def set_default_parser(_BaseParser parser=None): +def setDefaultParser(_BaseParser parser=None): """Set a default parser for the current thread. This parser is used globally whenever no parser is supplied to the various parse functions of the lxml API. If this function is called without a parser (or if it is @@ -700,9 +710,17 @@ parser = __DEFAULT_XML_PARSER __GLOBAL_PARSER_CONTEXT.setDefaultParser(parser) -def get_default_parser(): +def getDefaultParser(): return __GLOBAL_PARSER_CONTEXT.getDefaultParser() +def set_default_parser(parser): + "Deprecated, please use setDefaultParser instead." + setDefaultParser(parser) + +def get_default_parser(): + "Deprecated, please use getDefaultParser instead." + return getDefaultParser() + ############################################################ ## HTML parser ############################################################ @@ -813,6 +831,13 @@ python.PyEval_RestoreThread(state) return result +cdef xmlNode* _copyNodeToDoc(xmlNode* c_node, xmlDoc* c_doc): + "Recursively copy the element into the document. c_doc is not modified." + cdef xmlNode* c_root + c_root = tree.xmlDocCopyNode(c_node, c_doc, 1) # recursive + _copyTail(c_node.next, c_root) + return c_root + cdef void _bugFixURL(xmlDoc* c_source_doc, xmlDoc* c_target_doc): """libxml2 <= 2.6.17 had a bug that prevented it from copying the document URL in xmlDocCopy()""" Modified: lxml/trunk/src/lxml/python.pxd ============================================================================== --- lxml/trunk/src/lxml/python.pxd (original) +++ lxml/trunk/src/lxml/python.pxd Tue Aug 8 19:56:33 2006 @@ -6,6 +6,7 @@ ctypedef int size_t ctypedef int Py_ssize_t cdef int INT_MAX + cdef int PY_SSIZE_T_MAX cdef void Py_INCREF(object o) @@ -28,14 +29,21 @@ cdef object PyString_FromString(char* s) cdef object PyString_FromFormat(char* format, ...) cdef Py_ssize_t PyString_GET_SIZE(object s) + cdef object PyBool_FromLong(long value) cdef object PyNumber_Int(object value) + cdef Py_ssize_t PyInt_AsSsize_t(object value) + + cdef Py_ssize_t PyTuple_GET_SIZE(object t) + cdef object PyTuple_GET_ITEM(object o, Py_ssize_t pos) cdef Py_ssize_t PyList_GET_SIZE(object l) + cdef object PyList_GET_ITEM(object l, Py_ssize_t index) cdef int PyList_Append(object l, object obj) except -1 cdef int PyList_Reverse(object l) except -1 cdef int PyList_Insert(object l, Py_ssize_t index, object o) except -1 - cdef object PyList_GET_ITEM(object l, Py_ssize_t index) + cdef object PyList_AsTuple(object o) + cdef int PyDict_SetItemString(object d, char* key, object value) except -1 cdef int PyDict_SetItem(object d, object key, object value) except -1 cdef PyObject* PyDict_GetItemString(object d, char* key) @@ -44,12 +52,13 @@ cdef void PyDict_Clear(object d) cdef object PyDict_Copy(object d) cdef Py_ssize_t PyDict_Size(object d) - cdef object PyList_AsTuple(object o) + cdef int PyDict_Contains(object d, object key) cdef object PySequence_List(object o) cdef object PySequence_Tuple(object o) - cdef object PyTuple_GET_ITEM(object o, Py_ssize_t pos) cdef int PyDict_Check(object instance) + cdef int PyList_Check(object instance) + cdef int PyTuple_Check(object instance) cdef int PyNumber_Check(object instance) cdef int PyBool_Check(object instance) cdef int PySequence_Check(object instance) @@ -57,6 +66,8 @@ cdef int PyTuple_CheckExact(object instance) cdef int PyObject_SetAttr(object o, object name, object value) + cdef object PyObject_RichCompare(object o1, object o2, int op) + cdef int PyObject_RichCompareBool(object o1, object o2, int op) cdef void* PyMem_Malloc(size_t size) cdef void* PyMem_Realloc(void* p, size_t size) @@ -78,6 +89,7 @@ cdef int isinstance(object instance, object classes) cdef int issubclass(object derived, object superclasses) cdef int hasattr(object obj, object attr) + cdef object getattr(object obj, object attr) cdef int callable(object obj) cdef object str(object obj) cdef object iter(object obj) Modified: lxml/trunk/src/lxml/relaxng.pxi ============================================================================== --- lxml/trunk/src/lxml/relaxng.pxi (original) +++ lxml/trunk/src/lxml/relaxng.pxi Tue Aug 8 19:56:33 2006 @@ -23,6 +23,7 @@ cdef _NodeBase root_node cdef xmlNode* c_node cdef xmlDoc* fake_c_doc + cdef char* c_href cdef relaxng.xmlRelaxNGParserCtxt* parser_ctxt self._c_schema = NULL fake_c_doc = NULL @@ -31,8 +32,9 @@ root_node = _rootNodeOrRaise(etree) c_node = root_node._c_node # work around for libxml2 bug if document is not RNG at all - if c_node.ns is NULL or c_node.ns.href is NULL or \ - cstd.strcmp(c_node.ns.href, + c_href = _getNs(c_node) + if c_href is NULL or \ + cstd.strcmp(c_href, 'http://relaxng.org/ns/structure/1.0') != 0: raise RelaxNGParseError, "Document is not Relax NG" fake_c_doc = _fakeRootDoc(doc._c_doc, root_node._c_node) Modified: lxml/trunk/src/lxml/tests/test_elementtree.py ============================================================================== --- lxml/trunk/src/lxml/tests/test_elementtree.py (original) +++ lxml/trunk/src/lxml/tests/test_elementtree.py Tue Aug 8 19:56:33 2006 @@ -442,6 +442,7 @@ ElementTree = self.etree.ElementTree XML = self.etree.XML Comment = self.etree.Comment + ProcessingInstruction = self.etree.ProcessingInstruction el = Element('hoi') self.assert_(iselement(el)) @@ -455,6 +456,9 @@ c = Comment('test') self.assert_(iselement(c)) + + p = ProcessingInstruction("test", "some text") + self.assert_(iselement(p)) def test_iteration(self): XML = self.etree.XML @@ -763,7 +767,7 @@ None, a.tail) self.assertXML('', a) - + def test_comment(self): Element = self.etree.Element SubElement = self.etree.SubElement @@ -771,6 +775,7 @@ a = Element('a') a.append(Comment('foo')) + self.assertEquals(a[0].tag, Comment) self.assertEquals(a[0].text, 'foo') def test_comment_text(self): @@ -806,6 +811,30 @@ for i in c: pass + def test_pi(self): + # lxml.etree separates target and text + Element = self.etree.Element + SubElement = self.etree.SubElement + ProcessingInstruction = self.etree.ProcessingInstruction + + a = Element('a') + a.append(ProcessingInstruction('foo', 'some more text')) + self.assertEquals(a[0].tag, ProcessingInstruction) + self.assertXML("", + a) + + def test_pi_nonsense(self): + ProcessingInstruction = self.etree.ProcessingInstruction + pi = ProcessingInstruction('foo') + self.assertEquals({}, pi.attrib) + self.assertEquals([], pi.keys()) + self.assertEquals([], pi.items()) + self.assertEquals(None, pi.get('hoi')) + self.assertEquals(0, len(pi)) + # should not iterate + for i in pi: + pass + def test_setitem(self): Element = self.etree.Element SubElement = self.etree.SubElement Modified: lxml/trunk/src/lxml/tests/test_etree.py ============================================================================== --- lxml/trunk/src/lxml/tests/test_etree.py (original) +++ lxml/trunk/src/lxml/tests/test_etree.py Tue Aug 8 19:56:33 2006 @@ -28,6 +28,9 @@ self.assert_(etree.__version__.startswith( str(etree.LXML_VERSION[0]))) + def test_c_api(self): + self.assert_(hasattr(self.etree, '_import_c_api')) + def test_element_names(self): Element = self.etree.Element @@ -62,6 +65,33 @@ "valueB", root[0].get("default")) + def test_pi(self): + # lxml.etree separates target and text + Element = self.etree.Element + SubElement = self.etree.SubElement + ProcessingInstruction = self.etree.ProcessingInstruction + + a = Element('a') + a.append(ProcessingInstruction('foo', 'some more text')) + self.assertEquals(a[0].target, 'foo') + self.assertEquals(a[0].text, 'some more text') + + def test_pi_parse(self): + XML = self.etree.XML + root = XML("") + self.assertEquals(root[0].target, "mypi") + self.assertEquals(root[0].text, "my test ") + + def test_attribute_set(self): + # ElementTree accepts arbitrary attribute values + # lxml.etree allows only strings + Element = self.etree.Element + + root = Element("root") + root.set("attr", "TEST") + self.assertEquals("TEST", root.get("attr")) + self.assertRaises(TypeError, root.set, "newattr", 5) + def test_parse_error(self): # ET raises ExpatError parse = self.etree.parse @@ -390,6 +420,18 @@ '', tostring(a)) + # does not raise an exception in ElementTree + def test_comment_immutable(self): + Element = self.etree.Element + Comment = self.etree.Comment + + c = Comment() + el = Element('myel') + + self.assertRaises(TypeError, c.append, el) + self.assertRaises(TypeError, c.insert, 0, el) + self.assertRaises(TypeError, c.set, "myattr", "test") + # test weird dictionary interaction leading to segfault previously def test_weird_dict_interaction(self): root = self.etree.Element('root') Modified: lxml/trunk/src/lxml/tests/test_htmlparser.py ============================================================================== --- lxml/trunk/src/lxml/tests/test_htmlparser.py (original) +++ lxml/trunk/src/lxml/tests/test_htmlparser.py Tue Aug 8 19:56:33 2006 @@ -20,7 +20,7 @@ uhtml_str = u"test ??\uF8D2

page ??\uF8D2 title

" def tearDown(self): - self.etree.set_default_parser() + self.etree.setDefaultParser() def test_module_HTML(self): element = self.etree.HTML(self.html_str) @@ -92,13 +92,13 @@ self.assertRaises(self.etree.XMLSyntaxError, self.etree.parse, StringIO(self.broken_html_str)) - self.etree.set_default_parser( self.etree.HTMLParser() ) + self.etree.setDefaultParser( self.etree.HTMLParser() ) tree = self.etree.parse(StringIO(self.broken_html_str)) self.assertEqual(self.etree.tostring(tree.getroot()), self.html_str) - self.etree.set_default_parser() + self.etree.setDefaultParser() self.assertRaises(self.etree.XMLSyntaxError, self.etree.parse, StringIO(self.broken_html_str)) Modified: lxml/trunk/src/lxml/tests/test_nsclasses.py ============================================================================== --- lxml/trunk/src/lxml/tests/test_nsclasses.py (original) +++ lxml/trunk/src/lxml/tests/test_nsclasses.py Tue Aug 8 19:56:33 2006 @@ -144,31 +144,12 @@ etree.Namespace(None).clear() etree.Namespace(u'ns30').clear() - def test_default_element_class(self): - class local_default_class(etree.ElementBase): - pass - - try: - etree.setDefaultElementClass(local_default_class) - self.assert_(isinstance(etree.Element("test"), - local_default_class)) - self.assert_(isinstance(etree.Element("{http://myns}test"), - local_default_class)) - - etree.setDefaultElementClass() - self.assertFalse(isinstance(etree.Element("test"), - local_default_class)) - self.assertFalse(isinstance(etree.Element("{http://myns}test"), - local_default_class)) - finally: - etree.setDefaultElementClass() - def test_suite(): suite = unittest.TestSuite() suite.addTests([unittest.makeSuite(ETreeNamespaceClassesTestCase)]) optionflags = doctest.NORMALIZE_WHITESPACE|doctest.ELLIPSIS suite.addTests( - [doctest.DocFileSuite('../../../doc/namespace_extensions.txt', + [doctest.DocFileSuite('../../../doc/element_classes.txt', optionflags=optionflags)], ) return suite Modified: lxml/trunk/src/lxml/tree.pxd ============================================================================== --- lxml/trunk/src/lxml/tree.pxd (original) +++ lxml/trunk/src/lxml/tree.pxd Tue Aug 8 19:56:33 2006 @@ -161,6 +161,7 @@ cdef xmlNode* xmlNewNode(xmlNs* ns, char* name) cdef xmlNode* xmlNewDocText(xmlDoc* doc, char* content) cdef xmlNode* xmlNewDocComment(xmlDoc* doc, char* content) + cdef xmlNode* xmlNewDocPI(xmlDoc* doc, char* name, char* content) cdef xmlNs* xmlNewNs(xmlNode* node, char* href, char* prefix) cdef xmlNode* xmlAddChild(xmlNode* parent, xmlNode* cur) cdef xmlNode* xmlReplaceNode(xmlNode* old, xmlNode* cur) @@ -175,9 +176,10 @@ cdef char* xmlGetNoNsProp(xmlNode* node, char* name) cdef char* xmlGetNsProp(xmlNode* node, char* name, char* nameSpace) cdef void xmlSetNs(xmlNode* node, xmlNs* ns) - cdef void xmlSetProp(xmlNode* node, char* name, char* value) - cdef void xmlSetNsProp(xmlNode* node, xmlNs* ns, char* name, char* value) - cdef void xmlRemoveProp(xmlAttr* cur) + cdef xmlAttr* xmlSetProp(xmlNode* node, char* name, char* value) + cdef xmlAttr* xmlSetNsProp(xmlNode* node, xmlNs* ns, + char* name, char* value) + cdef int xmlRemoveProp(xmlAttr* cur) cdef char* xmlGetNodePath(xmlNode* node) cdef void xmlDocDumpMemory(xmlDoc* cur, char** mem, int* size) cdef void xmlDocDumpMemoryEnc(xmlDoc* cur, char** mem, int* size, @@ -250,6 +252,7 @@ cdef extern from "etree_defs.h": cdef int _isElement(xmlNode* node) + cdef char* _getNs(xmlNode* node) cdef void BEGIN_FOR_EACH_ELEMENT_FROM(xmlNode* tree_top, xmlNode* start_node, int inclusive) cdef void END_FOR_EACH_ELEMENT_FROM(xmlNode* start_node) Modified: lxml/trunk/src/lxml/xmlparser.pxd ============================================================================== --- lxml/trunk/src/lxml/xmlparser.pxd (original) +++ lxml/trunk/src/lxml/xmlparser.pxd Tue Aug 8 19:56:33 2006 @@ -119,7 +119,7 @@ cdef extern from "libxml/parserInternals.h": cdef xmlParserInput* xmlNewStringInputStream(xmlParserCtxt* ctxt, - char* buffer) + char* buffer) cdef xmlParserInput* xmlNewInputFromFile(xmlParserCtxt* ctxt, char* filename) cdef void xmlFreeInputStream(xmlParserInput* input) Modified: lxml/trunk/src/lxml/xmlschema.pxi ============================================================================== --- lxml/trunk/src/lxml/xmlschema.pxi (original) +++ lxml/trunk/src/lxml/xmlschema.pxi Tue Aug 8 19:56:33 2006 @@ -22,6 +22,7 @@ cdef _NodeBase root_node cdef xmlDoc* fake_c_doc cdef xmlNode* c_node + cdef char* c_href cdef xmlschema.xmlSchemaParserCtxt* parser_ctxt self._c_schema = NULL if etree is not None: @@ -30,8 +31,9 @@ # work around for libxml2 bug if document is not XML schema at all c_node = root_node._c_node - if c_node.ns is NULL or c_node.ns.href is NULL or \ - cstd.strcmp(c_node.ns.href, 'http://www.w3.org/2001/XMLSchema') != 0: + c_href = _getNs(c_node) + if c_href is NULL or \ + cstd.strcmp(c_href, 'http://www.w3.org/2001/XMLSchema') != 0: raise XMLSchemaParseError, "Document is not XML Schema" fake_c_doc = _fakeRootDoc(doc._c_doc, root_node._c_node) Modified: lxml/trunk/src/lxml/xpath.pxd ============================================================================== --- lxml/trunk/src/lxml/xpath.pxd (original) +++ lxml/trunk/src/lxml/xpath.pxd Tue Aug 8 19:56:33 2006 @@ -103,15 +103,15 @@ char* ns_uri, xmlXPathFunction f) cdef void xmlXPathRegisterFuncLookup(xmlXPathContext *ctxt, - xmlXPathFuncLookupFunc f, - void *funcCtxt) + xmlXPathFuncLookupFunc f, + void *funcCtxt) cdef int xmlXPathRegisterVariable(xmlXPathContext *ctxt, - char* name, - xmlXPathObject* value) + char* name, + xmlXPathObject* value) cdef int xmlXPathRegisterVariableNS(xmlXPathContext *ctxt, - char* name, - char* ns_uri, - xmlXPathObject* value) + char* name, + char* ns_uri, + xmlXPathObject* value) cdef void xmlXPathRegisteredVariablesCleanup(xmlXPathContext *ctxt) cdef void xmlXPathRegisteredNsCleanup(xmlXPathContext *ctxt) cdef xmlXPathObject* valuePop (xmlXPathParserContext *ctxt) From scoder at codespeak.net Tue Aug 8 19:57:43 2006 From: scoder at codespeak.net (scoder at codespeak.net) Date: Tue, 8 Aug 2006 19:57:43 +0200 (CEST) Subject: [Lxml-checkins] r31182 - lxml/trunk Message-ID: <20060808175743.CD7A210069@code0.codespeak.net> Author: scoder Date: Tue Aug 8 19:57:41 2006 New Revision: 31182 Added: lxml/trunk/Pyrex-0.9.4.1-public-api.patch - copied unchanged from r31177, lxml/branch/capi/Pyrex-0.9.4.1-public-api.patch Modified: lxml/trunk/CHANGES.txt lxml/trunk/version.txt Log: prepared release of 1.1beta Modified: lxml/trunk/CHANGES.txt ============================================================================== --- lxml/trunk/CHANGES.txt (original) +++ lxml/trunk/CHANGES.txt Tue Aug 8 19:57:41 2006 @@ -2,18 +2,19 @@ lxml changelog ============== -current -======= +1.1beta (2006-08-08) +==================== Features added -------------- -* Element.replace(old, new) method to replace a subelement by another one +* Unlock the GIL for deep copying documents and for XPath() -Bugs fixed ----------- +* Support for Python 2.5 beta -* Crash when mixing elements from XSLT results into other trees +* New ``compact`` keyword argument for parsing read-only documents + +* Support for parser options in iterparse() * The ``namespace`` axis is supported in XPath and returns (prefix, URI) tuples @@ -21,7 +22,27 @@ * The XPath expression "/" now returns an empty list instead of raising an exception -* Copying/deepcopying did not work for ElementTree objects +* XML-Object API on top of lxml (lxml.objectify) + +* Customizable Element class lookup: + + * Support for externally provided lookup functions + + * lxml.elements.classlookup module implements different lookup mechanisms + +* Support for processing instructions (ET-like, not compatible) + +* Public C-level API for independent extension modules + +Bugs fixed +---------- + +* XPathSyntaxError now inherits from XPathError + +* Threading race conditions in RelaxNG and XMLSchema + +* Crash when mixing elements from XSLT results into other trees, concurrent + XSLT is only allowed when the stylesheet was parsed in the main thread * The EXSLT ``regexp:match`` function now works as defined (except for some differences in the regular expression syntax) @@ -30,10 +51,6 @@ * ``iterparse()`` could crash on long XML files -* Setting an attribute to a non-string value did not raise an exception - -* Element.remove() deleted the tail text from the removed Element - * Creating documents no longer copies the parser for later URL resolving. For performance reasons, only a reference is kept. Resolver updates on the parser will now be reflected by documents that were parsed before the @@ -41,6 +58,26 @@ change from 1.0. +1.0.3 (2006-08-08) +================== + +Features added +-------------- + +* Element.replace(old, new) method to replace a subelement by another one + +Bugs fixed +---------- + +* Crash when mixing elements from XSLT results into other trees + +* Copying/deepcopying did not work for ElementTree objects + +* Setting an attribute to a non-string value did not raise an exception + +* Element.remove() deleted the tail text from the removed Element + + 1.1alpha (2006-06-27) ===================== Modified: lxml/trunk/version.txt ============================================================================== --- lxml/trunk/version.txt (original) +++ lxml/trunk/version.txt Tue Aug 8 19:57:41 2006 @@ -1 +1 @@ -1.1alpha +1.1beta From scoder at codespeak.net Tue Aug 8 20:00:31 2006 From: scoder at codespeak.net (scoder at codespeak.net) Date: Tue, 8 Aug 2006 20:00:31 +0200 (CEST) Subject: [Lxml-checkins] r31183 - lxml/trunk Message-ID: <20060808180031.0440F10069@code0.codespeak.net> Author: scoder Date: Tue Aug 8 20:00:30 2006 New Revision: 31183 Modified: lxml/trunk/CHANGES.txt Log: small fix in ChangeLog Modified: lxml/trunk/CHANGES.txt ============================================================================== --- lxml/trunk/CHANGES.txt (original) +++ lxml/trunk/CHANGES.txt Tue Aug 8 20:00:30 2006 @@ -8,10 +8,10 @@ Features added -------------- -* Unlock the GIL for deep copying documents and for XPath() - * Support for Python 2.5 beta +* Unlock the GIL for deep copying documents and for XPath() + * New ``compact`` keyword argument for parsing read-only documents * Support for parser options in iterparse() From scoder at codespeak.net Tue Aug 8 20:01:30 2006 From: scoder at codespeak.net (scoder at codespeak.net) Date: Tue, 8 Aug 2006 20:01:30 +0200 (CEST) Subject: [Lxml-checkins] r31184 - lxml/trunk Message-ID: <20060808180130.494BC10069@code0.codespeak.net> Author: scoder Date: Tue Aug 8 20:01:29 2006 New Revision: 31184 Modified: lxml/trunk/setup.py Log: small fix in setup.py Modified: lxml/trunk/setup.py ============================================================================== --- lxml/trunk/setup.py (original) +++ lxml/trunk/setup.py Tue Aug 8 20:01:29 2006 @@ -202,7 +202,7 @@ ], package_dir = {'': 'src'}, - packages = ['lxml', 'lxml.elements'], + packages = ['lxml'], ext_modules = ext_modules, **setup_args ) From scoder at codespeak.net Tue Aug 8 20:58:07 2006 From: scoder at codespeak.net (scoder at codespeak.net) Date: Tue, 8 Aug 2006 20:58:07 +0200 (CEST) Subject: [Lxml-checkins] r31185 - lxml/tag/lxml-1.0.3 Message-ID: <20060808185807.D665C10050@code0.codespeak.net> Author: scoder Date: Tue Aug 8 20:58:06 2006 New Revision: 31185 Added: lxml/tag/lxml-1.0.3/ - copied from r31184, lxml/branch/lxml-1.0/ Log: tag for lxml 1.0.3 From scoder at codespeak.net Tue Aug 8 20:58:21 2006 From: scoder at codespeak.net (scoder at codespeak.net) Date: Tue, 8 Aug 2006 20:58:21 +0200 (CEST) Subject: [Lxml-checkins] r31186 - lxml/trunk/src/lxml/tests Message-ID: <20060808185821.DC9C110050@code0.codespeak.net> Author: scoder Date: Tue Aug 8 20:58:20 2006 New Revision: 31186 Added: lxml/trunk/src/lxml/tests/test.dtd Log: added forgotton test file Added: lxml/trunk/src/lxml/tests/test.dtd ============================================================================== --- (empty file) +++ lxml/trunk/src/lxml/tests/test.dtd Tue Aug 8 20:58:20 2006 @@ -0,0 +1,9 @@ + + + + + From scoder at codespeak.net Tue Aug 8 21:00:18 2006 From: scoder at codespeak.net (scoder at codespeak.net) Date: Tue, 8 Aug 2006 21:00:18 +0200 (CEST) Subject: [Lxml-checkins] r31187 - lxml/trunk Message-ID: <20060808190018.7659910063@code0.codespeak.net> Author: scoder Date: Tue Aug 8 21:00:17 2006 New Revision: 31187 Modified: lxml/trunk/MANIFEST.in Log: added forgotton test file to MANIFEST.in Modified: lxml/trunk/MANIFEST.in ============================================================================== --- lxml/trunk/MANIFEST.in (original) +++ lxml/trunk/MANIFEST.in Tue Aug 8 21:00:17 2006 @@ -4,7 +4,7 @@ include MANIFEST.in version.txt include CHANGES.txt CREDITS.txt INSTALL.txt LICENSES.txt README.txt TODO.txt recursive-include src *.pyx *.pxd *.pxi *.py etree.c etree.h etree_defs.h -recursive-include src/lxml/tests *.rng *.xslt *.xml +recursive-include src/lxml/tests *.rng *.xslt *.xml *.dtd recursive-include benchmark *.py recursive-include doc *.txt *.html *.css *.xml *.mgp pubkey.asc recursive-include doc mkhtml.py rest2html.py From scoder at codespeak.net Tue Aug 8 22:11:05 2006 From: scoder at codespeak.net (scoder at codespeak.net) Date: Tue, 8 Aug 2006 22:11:05 +0200 (CEST) Subject: [Lxml-checkins] r31188 - lxml/tag/lxml-1.1beta Message-ID: <20060808201105.3944610063@code0.codespeak.net> Author: scoder Date: Tue Aug 8 22:11:03 2006 New Revision: 31188 Added: lxml/tag/lxml-1.1beta/ - copied from r31187, lxml/trunk/ Log: tag for 1.1beta release From scoder at codespeak.net Wed Aug 9 17:27:49 2006 From: scoder at codespeak.net (scoder at codespeak.net) Date: Wed, 9 Aug 2006 17:27:49 +0200 (CEST) Subject: [Lxml-checkins] r31205 - in lxml/trunk/src/lxml: . tests Message-ID: <20060809152749.A51F510072@code0.codespeak.net> Author: scoder Date: Wed Aug 9 17:27:47 2006 New Revision: 31205 Modified: lxml/trunk/src/lxml/objectify.pyx lxml/trunk/src/lxml/tests/test_objectify.py Log: code cleanup in __setattr__ and OP.setattr() to use the same utility functions - fixes some bugs where both behaved differently Modified: lxml/trunk/src/lxml/objectify.pyx ============================================================================== --- lxml/trunk/src/lxml/objectify.pyx (original) +++ lxml/trunk/src/lxml/objectify.pyx Wed Aug 9 17:27:47 2006 @@ -41,6 +41,11 @@ cdef object False False = __builtin__.False +cdef object AttributeError +AttributeError = __builtin__.AttributeError +cdef object IndexError +IndexError = __builtin__.IndexError + cdef object list list = __builtin__.list cdef object set @@ -247,30 +252,13 @@ ElementBase.tag.__set__(self, value) return - if python.PyList_Check(value) or python.PyTuple_Check(value): - try: - element = _lookupChild(self, tag) - except AttributeError: - for item in value: - _appendValue(self, tag, item) - else: - element.__setslice__(0, python.PY_SSIZE_T_MAX, value) + tag = _buildChildTag(self, tag) + try: + element = _lookupChild(self, tag) + except AttributeError: + _appendValue(self, tag, value) else: - if isinstance(value, _Element): - # deep copy the new element - element = cetree.deepcopyNodeToDocument( - self._doc, (<_Element>value)._c_node) - element.tag = _buildChildTag(self, tag) - else: - element = self.makeelement( _buildChildTag(self, tag) ) - _setElementValue(element, value) - - try: - child = _lookupChild(self, tag) - except AttributeError: - self.append(element) - else: - self.replace(child, element) + _replaceElement(element, value) def __delattr__(self, tag): child = _lookupChild(self, tag) @@ -492,14 +480,19 @@ parent._doc, (<_Element>value)._c_node) new_element.tag = tag parent.append(new_element) + elif python.PyList_Check(value) or python.PyTuple_Check(value): + for item in value: + _appendValue(parent, tag, item) else: - new_element = etree.SubElement(parent, tag) + new_element = SubElement(parent, tag) _setElementValue(new_element, value) cdef _setElementValue(_Element element, value): if value is None: cetree.setAttributeValue( element, XML_SCHEMA_INSTANCE_NIL_ATTR, "true") + elif isinstance(value, _Element): + _replaceElement(element, value) else: cetree.delAttributeFromNsName( element._c_node, _XML_SCHEMA_INSTANCE_NS, "nil") @@ -1288,13 +1281,15 @@ elif c_index != 0: raise TypeError, \ "creating indexed path attributes is not supported" + elif c_path_len == 1: + _appendValue(cetree.elementFactory(root._doc, c_node), + cetree.namespacedNameFromNsName(c_href, c_name), + value) + return else: child = SubElement( cetree.elementFactory(root._doc, c_node), cetree.namespacedNameFromNsName(c_href, c_name)) - if c_path_len == 1: - _setElementValue(child, value) - return c_node = child._c_node # if we get here, the entire path was already there @@ -1302,10 +1297,8 @@ element = cetree.elementFactory(root._doc, c_node) _replaceElement(element, value) else: - element = SubElement( - cetree.elementFactory(root._doc, c_node.parent), - cetree.namespacedName(c_node)) - _setElementValue(element, value) + _appendValue(cetree.elementFactory(root._doc, c_node.parent), + cetree.namespacedName(c_node), value) cdef _buildDescendantPaths(tree.xmlNode* c_node, prefix_string): """Returns a list of all descendant paths. Modified: lxml/trunk/src/lxml/tests/test_objectify.py ============================================================================== --- lxml/trunk/src/lxml/tests/test_objectify.py (original) +++ lxml/trunk/src/lxml/tests/test_objectify.py Wed Aug 9 17:27:47 2006 @@ -591,12 +591,28 @@ root = self.etree.XML(xml_str) path = objectify.ObjectPath( "root.c1.c2" ) self.assertEquals(root.c1.c2.text, path.find(root).text) + self.assertEquals("1", root.c1.c2[1].text) new_value = "my new value" path.setattr(root, new_value) self.assertEquals(new_value, root.c1.c2.text) self.assertEquals(new_value, path(root).text) + self.assertEquals("1", root.c1.c2[1].text) + + def test_object_path_set_element(self): + root = self.etree.XML(xml_str) + path = objectify.ObjectPath( "root.c1.c2" ) + self.assertEquals(root.c1.c2.text, path.find(root).text) + self.assertEquals("1", root.c1.c2[1].text) + + new_el = etree.Element("{objectified}test") + etree.SubElement(new_el, "{objectified}sub").a = "TEST" + path.setattr(root, new_el.sub) + + self.assertEquals("TEST", root.c1.c2.a.text) + self.assertEquals("TEST", path(root).a.text) + self.assertEquals("1", root.c1.c2[1].text) def test_object_path_set_create(self): root = self.etree.XML(xml_str) @@ -619,6 +635,20 @@ self.assertEquals(["0", "1", "2", "test"], [el.text for el in root.c1.c2]) + def test_object_path_addattr_element(self): + root = self.etree.XML(xml_str) + path = objectify.ObjectPath( "root.c1.c2" ) + self.assertEquals(3, len(root.c1.c2)) + + new_el = etree.Element("{objectified}test") + etree.SubElement(new_el, "{objectified}sub").a = "TEST" + + path.addattr(root, new_el.sub) + self.assertEquals(4, len(root.c1.c2)) + self.assertEquals("TEST", root.c1.c2[3].a.text) + self.assertEquals(["0", "1", "2"], + [el.text for el in root.c1.c2[:3]]) + def test_object_path_addattr_create(self): root = self.etree.XML(xml_str) path = objectify.ObjectPath( "root.c1.c99" ) @@ -631,6 +661,34 @@ self.assertEquals(new_value, root.c1.c99.text) self.assertEquals(new_value, path(root).text) + def test_object_path_addattr_create_element(self): + root = self.etree.XML(xml_str) + path = objectify.ObjectPath( "root.c1.c99" ) + self.assertRaises(AttributeError, path.find, root) + + new_el = etree.Element("{objectified}test") + etree.SubElement(new_el, "{objectified}sub").a = "TEST" + + path.addattr(root, new_el.sub) + self.assertEquals(1, len(root.c1.c99)) + self.assertEquals("TEST", root.c1.c99.a.text) + self.assertEquals("TEST", path(root).a.text) + + def test_object_path_addattr_create_list(self): + root = self.etree.XML(xml_str) + path = objectify.ObjectPath( "root.c1.c99" ) + self.assertRaises(AttributeError, path.find, root) + + new_el = etree.Element("{objectified}test") + new_el.a = ["TEST1", "TEST2"] + + self.assertEquals(2, len(new_el.a)) + + path.addattr(root, list(new_el.a)) + self.assertEquals(2, len(root.c1.c99)) + self.assertEquals("TEST1", root.c1.c99.text) + self.assertEquals("TEST2", path(root)[1].text) + def test_descendant_paths(self): root = self.etree.XML(xml_str) self.assertEquals( From scoder at codespeak.net Wed Aug 9 17:35:35 2006 From: scoder at codespeak.net (scoder at codespeak.net) Date: Wed, 9 Aug 2006 17:35:35 +0200 (CEST) Subject: [Lxml-checkins] r31206 - lxml/trunk/src/lxml Message-ID: <20060809153535.2315210072@code0.codespeak.net> Author: scoder Date: Wed Aug 9 17:35:33 2006 New Revision: 31206 Modified: lxml/trunk/src/lxml/objectify.pyx Log: fix: always use ObjectifiedElement if element is root node Modified: lxml/trunk/src/lxml/objectify.pyx ============================================================================== --- lxml/trunk/src/lxml/objectify.pyx (original) +++ lxml/trunk/src/lxml/objectify.pyx Wed Aug 9 17:35:33 2006 @@ -983,8 +983,12 @@ cdef object _lookupElementClass(state, _Document doc, tree.xmlNode* c_node): cdef python.PyObject* dict_result + # if element is root node => no data class + if c_node.parent is NULL or not tree._isElement(c_node.parent): + return ObjectifiedElement + # if element has children => no data class - if c_node.parent is NULL or cetree.findChildForwards(c_node, 0) is not NULL: + if cetree.findChildForwards(c_node, 0) is not NULL: return ObjectifiedElement # if element is defined as xsi:nil, return NoneElement class From scoder at codespeak.net Wed Aug 9 18:40:53 2006 From: scoder at codespeak.net (scoder at codespeak.net) Date: Wed, 9 Aug 2006 18:40:53 +0200 (CEST) Subject: [Lxml-checkins] r31208 - in lxml/trunk: . src/lxml Message-ID: <20060809164053.173F310069@code0.codespeak.net> Author: scoder Date: Wed Aug 9 18:40:52 2006 New Revision: 31208 Modified: lxml/trunk/CHANGES.txt lxml/trunk/src/lxml/objectify.pyx Log: fix OE.addattr() also Modified: lxml/trunk/CHANGES.txt ============================================================================== --- lxml/trunk/CHANGES.txt (original) +++ lxml/trunk/CHANGES.txt Wed Aug 9 18:40:52 2006 @@ -2,6 +2,22 @@ lxml changelog ============== +current +======= + +Features added +-------------- + +Bugs fixed +---------- + +* + +* objectify.ObjectifiedElement.addattr() failed to accept Elements and Lists + +* objectify.ObjectPath.setattr() failed to accept Elements and Lists + + 1.1beta (2006-08-08) ==================== Modified: lxml/trunk/src/lxml/objectify.pyx ============================================================================== --- lxml/trunk/src/lxml/objectify.pyx (original) +++ lxml/trunk/src/lxml/objectify.pyx Wed Aug 9 18:40:52 2006 @@ -265,8 +265,7 @@ self.remove(child) def addattr(self, tag, value): - element = SubElement(self, _buildChildTag(self, tag)) - _setElementValue(element, value) + _appendValue(self, _buildChildTag(self, tag), value) def __getitem__(self, key): """Return a sibling, counting from the first child of the parent. From scoder at codespeak.net Wed Aug 9 18:40:58 2006 From: scoder at codespeak.net (scoder at codespeak.net) Date: Wed, 9 Aug 2006 18:40:58 +0200 (CEST) Subject: [Lxml-checkins] r31209 - lxml/trunk/src/lxml/tests Message-ID: <20060809164058.07D2A10069@code0.codespeak.net> Author: scoder Date: Wed Aug 9 18:40:56 2006 New Revision: 31209 Modified: lxml/trunk/src/lxml/tests/test_objectify.py Log: loads of test cases for last fixes Modified: lxml/trunk/src/lxml/tests/test_objectify.py ============================================================================== --- lxml/trunk/src/lxml/tests/test_objectify.py (original) +++ lxml/trunk/src/lxml/tests/test_objectify.py Wed Aug 9 18:40:56 2006 @@ -41,6 +41,10 @@ objectify.setPytypeAttributeTag() objectify.unregister() + def test_root(self): + root = self.etree.Element("test") + self.assert_(isinstance(root, objectify.ObjectifiedElement)) + def test_str(self): root = self.etree.Element("test") self.assertEquals('', str(root)) @@ -66,6 +70,30 @@ self.assertEquals(2, len(root.c1)) self.assertEquals("test", root.c1[1].text) + def test_addattr_element(self): + root = self.etree.XML(xml_str) + self.assertEquals(1, len(root.c1)) + + new_el = self.etree.Element("test", myattr="5") + root.addattr("c1", new_el) + self.assertEquals(2, len(root.c1)) + self.assertEquals(None, root.c1[0].get("myattr")) + self.assertEquals("5", root.c1[1].get("myattr")) + + def test_addattr_list(self): + root = self.etree.XML(xml_str) + self.assertEquals(1, len(root.c1)) + + new_el = self.etree.Element("test") + self.etree.SubElement(new_el, "a", myattr="A") + self.etree.SubElement(new_el, "a", myattr="B") + + root.addattr("c1", list(new_el.a)) + self.assertEquals(3, len(root.c1)) + self.assertEquals(None, root.c1[0].get("myattr")) + self.assertEquals("A", root.c1[1].get("myattr")) + self.assertEquals("B", root.c1[2].get("myattr")) + def test_child_addattr(self): root = self.etree.XML(xml_str) self.assertEquals(3, len(root.c1.c2)) @@ -607,9 +635,10 @@ self.assertEquals("1", root.c1.c2[1].text) new_el = etree.Element("{objectified}test") - etree.SubElement(new_el, "{objectified}sub").a = "TEST" + etree.SubElement(new_el, "{objectified}sub", myattr="ATTR").a = "TEST" path.setattr(root, new_el.sub) + self.assertEquals("ATTR", root.c1.c2.get("myattr")) self.assertEquals("TEST", root.c1.c2.a.text) self.assertEquals("TEST", path(root).a.text) self.assertEquals("1", root.c1.c2[1].text) @@ -626,6 +655,39 @@ self.assertEquals(new_value, root.c1.c99.text) self.assertEquals(new_value, path(root).text) + def test_object_path_set_create_element(self): + root = self.etree.XML(xml_str) + path = objectify.ObjectPath( "root.c1.c99" ) + self.assertRaises(AttributeError, path.find, root) + + new_el = etree.Element("{objectified}test") + etree.SubElement(new_el, "{objectified}sub", myattr="ATTR").a = "TEST" + path.setattr(root, new_el.sub) + + self.assertEquals(1, len(root.c1.c99)) + self.assertEquals("ATTR", root.c1.c99.get("myattr")) + self.assertEquals("TEST", root.c1.c99.a.text) + self.assertEquals("TEST", path(root).a.text) + + def test_object_path_set_create_list(self): + root = self.etree.XML(xml_str) + path = objectify.ObjectPath( "root.c1.c99" ) + self.assertRaises(AttributeError, path.find, root) + + new_el = etree.Element("{objectified}test") + new_el.a = ["TEST1", "TEST2"] + new_el.a[0].set("myattr", "ATTR1") + new_el.a[1].set("myattr", "ATTR2") + + path.setattr(root, list(new_el.a)) + + self.assertEquals(2, len(root.c1.c99)) + self.assertEquals("ATTR1", root.c1.c99[0].get("myattr")) + self.assertEquals("TEST1", root.c1.c99[0].text) + self.assertEquals("ATTR2", root.c1.c99[1].get("myattr")) + self.assertEquals("TEST2", root.c1.c99[1].text) + self.assertEquals("TEST1", path(root).text) + def test_object_path_addattr(self): root = self.etree.XML(xml_str) path = objectify.ObjectPath( "root.c1.c2" ) @@ -667,12 +729,13 @@ self.assertRaises(AttributeError, path.find, root) new_el = etree.Element("{objectified}test") - etree.SubElement(new_el, "{objectified}sub").a = "TEST" + etree.SubElement(new_el, "{objectified}sub", myattr="ATTR").a = "TEST" path.addattr(root, new_el.sub) self.assertEquals(1, len(root.c1.c99)) self.assertEquals("TEST", root.c1.c99.a.text) self.assertEquals("TEST", path(root).a.text) + self.assertEquals("ATTR", root.c1.c99.get("myattr")) def test_object_path_addattr_create_list(self): root = self.etree.XML(xml_str) From scoder at codespeak.net Thu Aug 10 08:20:28 2006 From: scoder at codespeak.net (scoder at codespeak.net) Date: Thu, 10 Aug 2006 08:20:28 +0200 (CEST) Subject: [Lxml-checkins] r31213 - in lxml/trunk: . src/lxml Message-ID: <20060810062028.60B131005A@code0.codespeak.net> Author: scoder Date: Thu Aug 10 08:20:26 2006 New Revision: 31213 Modified: lxml/trunk/setup.py lxml/trunk/src/lxml/etree.pyx Log: support '-dev' version scheme prior to alpha versions Modified: lxml/trunk/setup.py ============================================================================== --- lxml/trunk/setup.py (original) +++ lxml/trunk/setup.py Thu Aug 10 08:20:26 2006 @@ -75,7 +75,10 @@ svn_entries).group(1) svn_version = version + '-' + revision -if 'alpha' in version: +if 'dev' in version: + svn_version = fix_alphabeta(svn_version, 'dev') + dev_status = 'Development Status :: 3 - Alpha' +elif 'alpha' in version: svn_version = fix_alphabeta(svn_version, 'alpha') dev_status = 'Development Status :: 3 - Alpha' elif 'beta' in version: Modified: lxml/trunk/src/lxml/etree.pyx ============================================================================== --- lxml/trunk/src/lxml/etree.pyx (original) +++ lxml/trunk/src/lxml/etree.pyx Thu Aug 10 08:20:26 2006 @@ -115,7 +115,9 @@ try: item = int(item) except ValueError: - if item == 'alpha': + if item == 'dev': + item = -3 + elif item == 'alpha': item = -2 elif item == 'beta': item = -1 From scoder at codespeak.net Thu Aug 10 08:20:46 2006 From: scoder at codespeak.net (scoder at codespeak.net) Date: Thu, 10 Aug 2006 08:20:46 +0200 (CEST) Subject: [Lxml-checkins] r31214 - lxml/trunk Message-ID: <20060810062046.3AF511005A@code0.codespeak.net> Author: scoder Date: Thu Aug 10 08:20:45 2006 New Revision: 31214 Modified: lxml/trunk/CHANGES.txt Log: cleanup Modified: lxml/trunk/CHANGES.txt ============================================================================== --- lxml/trunk/CHANGES.txt (original) +++ lxml/trunk/CHANGES.txt Thu Aug 10 08:20:45 2006 @@ -11,8 +11,6 @@ Bugs fixed ---------- -* - * objectify.ObjectifiedElement.addattr() failed to accept Elements and Lists * objectify.ObjectPath.setattr() failed to accept Elements and Lists From scoder at codespeak.net Thu Aug 10 08:29:53 2006 From: scoder at codespeak.net (scoder at codespeak.net) Date: Thu, 10 Aug 2006 08:29:53 +0200 (CEST) Subject: [Lxml-checkins] r31217 - lxml/trunk/doc Message-ID: <20060810062953.6157A1005A@code0.codespeak.net> Author: scoder Date: Thu Aug 10 08:29:52 2006 New Revision: 31217 Modified: lxml/trunk/doc/main.txt Log: added version links to main.txt Modified: lxml/trunk/doc/main.txt ============================================================================== --- lxml/trunk/doc/main.txt (original) +++ lxml/trunk/doc/main.txt Thu Aug 10 08:29:52 2006 @@ -29,6 +29,10 @@ .. _`installation instructions`: installation.html +* `lxml 1.1beta`_, released 2006-08-08 (`changes for 1.1beta`_) + +* `lxml 1.0.3`_, released 2006-08-08 (`changes for 1.0.3`_) + * `lxml 1.0.2`_, released 2006-06-27 (`changes for 1.0.2`_) * `lxml 1.0.1`_, released 2006-06-09 (`changes for 1.0.1`_) @@ -51,6 +55,8 @@ * `lxml 0.5`_, released 2005-04-08 +.. _`lxml 1.1beta`: lxml-1.1beta.tgz +.. _`lxml 1.0.3`: lxml-1.0.3.tgz .. _`lxml 1.0.2`: lxml-1.0.2.tgz .. _`lxml 1.0.1`: lxml-1.0.1.tgz .. _`lxml 1.0`: lxml-1.0.tgz @@ -63,6 +69,8 @@ .. _`lxml 0.5.1`: lxml-0.5.1.tgz .. _`lxml 0.5`: lxml-0.5.tgz +.. _`CHANGES for 1.1beta`: changes-1.1beta.html +.. _`CHANGES for 1.0.3`: changes-1.0.3.html .. _`CHANGES for 1.0.2`: changes-1.0.2.html .. _`CHANGES for 1.0.1`: changes-1.0.1.html .. _`CHANGES for 1.0`: changes-1.0.html From scoder at codespeak.net Thu Aug 10 12:39:17 2006 From: scoder at codespeak.net (scoder at codespeak.net) Date: Thu, 10 Aug 2006 12:39:17 +0200 (CEST) Subject: [Lxml-checkins] r31223 - in lxml/trunk: doc src/lxml Message-ID: <20060810103917.DCFCE1005A@code0.codespeak.net> Author: scoder Date: Thu Aug 10 12:39:16 2006 New Revision: 31223 Modified: lxml/trunk/doc/objectify.txt lxml/trunk/src/lxml/objectify.pyx Log: DataElement factory for one-shot creation of Elements with Python values Modified: lxml/trunk/doc/objectify.txt ============================================================================== --- lxml/trunk/doc/objectify.txt (original) +++ lxml/trunk/doc/objectify.txt Thu Aug 10 12:39:16 2006 @@ -588,6 +588,10 @@ >>> print root.a 25 + +How data types are matched +-------------------------- + Objectify determines data types by trial and error, unless it finds an attribute named ``lxml.objectify.PYTYPE_ATTRIBUTE``, which must contain any of the following string values: int, long, float, str, unicode, none:: @@ -651,6 +655,31 @@ s = '5' [StringElement] * xsi:type = 'string' +For convenience, there is a special factory ``DataElement()`` that supports +creating an Element with a Python value in one step. You can pass the +required Python type name or the XSI type name:: + + >>> root = objectify.Element("root") + >>> root.x = objectify.DataElement(5, _pytype="long") + >>> print objectify.dump(root) + root = None [ObjectifiedElement] + x = 5L [LongElement] + * py:pytype = 'long' + + >>> root.x = objectify.DataElement(5, _pytype="str", myattr="someval") + >>> print objectify.dump(root) + root = None [ObjectifiedElement] + x = '5' [StringElement] + * py:pytype = 'str' + * myattr = 'someval' + + >>> root.x = objectify.DataElement(5, _xsi="integer") + >>> print objectify.dump(root) + root = None [ObjectifiedElement] + x = 5 [IntElement] + * py:pytype = 'int' + * xsi:type = 'integer' + Defining additional data classes -------------------------------- Modified: lxml/trunk/src/lxml/objectify.pyx ============================================================================== --- lxml/trunk/src/lxml/objectify.pyx (original) +++ lxml/trunk/src/lxml/objectify.pyx Thu Aug 10 12:39:16 2006 @@ -98,6 +98,8 @@ cdef object XML_SCHEMA_INSTANCE_NIL_ATTR XML_SCHEMA_INSTANCE_NIL_ATTR = "{%s}nil" % XML_SCHEMA_INSTANCE_NS +cdef object XML_SCHEMA_INSTANCE_TYPE_ATTR +XML_SCHEMA_INSTANCE_TYPE_ATTR = "{%s}type" % XML_SCHEMA_INSTANCE_NS ################################################################################ @@ -126,28 +128,6 @@ else: raise TypeError, "parser must inherit from lxml.etree.XMLParser" -cdef object _fromstring -_fromstring = etree.fromstring - -def fromstring(xml): - """Objectify specific version of the lxml.etree fromstring() function. - - NOTE: requires parser based element class lookup activated in lxml.etree! - """ - return _fromstring(xml, parser) - -XML = fromstring - -cdef object _makeelement -_makeelement = parser.makeelement - -def Element(*args, **kwargs): - """Objectify specific version of the lxml.etree Element() factory. - - NOTE: requires parser based element class lookup activated in lxml.etree! - """ - return _makeelement(*args, **kwargs) - ################################################################################ # Element class for the main API @@ -506,6 +486,9 @@ # Data type support in subclasses cdef class ObjectifiedDataElement(ObjectifiedElement): + """This is the base class for all data type Elements. Subclasses should + override the 'pyval' property and possibly the __str__ method. + """ property pyval: def __get__(self): return textOf(self._c_node) @@ -884,23 +867,6 @@ _registerPyTypes() -cdef object _guessElementClass(tree.xmlNode* c_node): - value = textOf(c_node) - if value is None: - # default to ObjectifiedElement class - return ObjectifiedElement - if value == '': - return StringElement - errors = (ValueError, TypeError) - for type_check, pytype in _TYPE_CHECKS: - try: - type_check(value) - return (pytype)._type - except errors: - pass - - return StringElement - def getRegisteredTypes(): """Returns a list of the currently registered PyType objects. @@ -928,6 +894,23 @@ types.append(pytype) return types +cdef object _guessElementClass(tree.xmlNode* c_node): + value = textOf(c_node) + if value is None: + # default to ObjectifiedElement class + return ObjectifiedElement + if value == '': + return StringElement + errors = (ValueError, TypeError) + for type_check, pytype in _TYPE_CHECKS: + try: + type_check(value) + return (pytype)._type + except errors: + pass + + return StringElement + ################################################################################ # Recursive element dumping @@ -1440,6 +1423,71 @@ _cstr(pytype.name)) tree.END_FOR_EACH_ELEMENT_FROM(c_node) +################################################################################ +# Module level factory functions + +cdef object _fromstring +_fromstring = etree.fromstring + +def fromstring(xml): + """Objectify specific version of the lxml.etree fromstring() function. + + NOTE: requires parser based element class lookup activated in lxml.etree! + """ + return _fromstring(xml, parser) + +XML = fromstring + +cdef object _makeelement +_makeelement = parser.makeelement + +def Element(*args, **kwargs): + """Objectify specific version of the lxml.etree Element() factory. + + NOTE: requires parser based element class lookup activated in lxml.etree! + """ + return _makeelement(*args, **kwargs) + +def DataElement(_value, _attrib=None, _pytype=None, _xsi=None, **_attributes): + """Create a new element with a Python value and XML attributes taken from + keyword arguments or a dictionary passed as second argument. + + Automatically adds a 'pyval' attribute for the Python type of the value, + if the type can be identified. If '_pyval' or '_xsi' are among the + keyword arguments, they will be used instead. + """ + cdef _Element element + if _attrib is not None: + if python.PyDict_GetSize(_attributes): + _attrib.update(_attributes) + _attributes = _attrib + if _xsi is not None: + python.PyDict_SetItem(_attributes, XML_SCHEMA_INSTANCE_TYPE_ATTR, _xsi) + if _pytype is None: + _pytype = _SCHEMA_TYPE_DICT[_xsi].name + if _pytype is None: + errors = (ValueError, TypeError) + for type_check, pytype in _TYPE_CHECKS: + try: + type_check(_value) + _pytype = (pytype).name + break + except errors: + pass + if _pytype is None: + if _value is None: + _pytype = "none" + elif python._isString(_value): + _pytype = "str" + if _pytype is not None: + python.PyDict_SetItem(_attributes, PYTYPE_ATTRIBUTE, _pytype) + + element = _makeelement("value", _attributes) + if not python._isString(_value): + _value = str(_value) + cetree.setNodeText(element._c_node, _value) + return element + ################################################################################ # Module setup From scoder at codespeak.net Thu Aug 10 12:41:44 2006 From: scoder at codespeak.net (scoder at codespeak.net) Date: Thu, 10 Aug 2006 12:41:44 +0200 (CEST) Subject: [Lxml-checkins] r31224 - lxml/trunk/src/lxml Message-ID: <20060810104144.9350C1005A@code0.codespeak.net> Author: scoder Date: Thu Aug 10 12:41:43 2006 New Revision: 31224 Modified: lxml/trunk/src/lxml/objectify.pyx Log: speedup of objectify.Element (25%) and objectify.DataElement by reusing the same document for new Elements Modified: lxml/trunk/src/lxml/objectify.pyx ============================================================================== --- lxml/trunk/src/lxml/objectify.pyx (original) +++ lxml/trunk/src/lxml/objectify.pyx Thu Aug 10 12:41:43 2006 @@ -1438,8 +1438,11 @@ XML = fromstring +cdef object DEFAULT_DOCUMENT +DEFAULT_DOCUMENT = parser.makeelement("root") + cdef object _makeelement -_makeelement = parser.makeelement +_makeelement = DEFAULT_DOCUMENT.makeelement def Element(*args, **kwargs): """Objectify specific version of the lxml.etree Element() factory. From scoder at codespeak.net Thu Aug 10 13:04:36 2006 From: scoder at codespeak.net (scoder at codespeak.net) Date: Thu, 10 Aug 2006 13:04:36 +0200 (CEST) Subject: [Lxml-checkins] r31225 - lxml/trunk/src/lxml Message-ID: <20060810110436.E5B671005A@code0.codespeak.net> Author: scoder Date: Thu Aug 10 13:04:35 2006 New Revision: 31225 Modified: lxml/trunk/src/lxml/objectify.pyx Log: cleanup Modified: lxml/trunk/src/lxml/objectify.pyx ============================================================================== --- lxml/trunk/src/lxml/objectify.pyx (original) +++ lxml/trunk/src/lxml/objectify.pyx Thu Aug 10 13:04:35 2006 @@ -128,6 +128,12 @@ else: raise TypeError, "parser must inherit from lxml.etree.XMLParser" +cdef object DEFAULT_DOCUMENT +DEFAULT_DOCUMENT = parser.makeelement("root") + +cdef object _makeelement +_makeelement = DEFAULT_DOCUMENT.makeelement + ################################################################################ # Element class for the main API @@ -1438,12 +1444,6 @@ XML = fromstring -cdef object DEFAULT_DOCUMENT -DEFAULT_DOCUMENT = parser.makeelement("root") - -cdef object _makeelement -_makeelement = DEFAULT_DOCUMENT.makeelement - def Element(*args, **kwargs): """Objectify specific version of the lxml.etree Element() factory. From scoder at codespeak.net Thu Aug 10 13:21:17 2006 From: scoder at codespeak.net (scoder at codespeak.net) Date: Thu, 10 Aug 2006 13:21:17 +0200 (CEST) Subject: [Lxml-checkins] r31226 - in lxml/trunk: . src/lxml Message-ID: <20060810112117.C34551005A@code0.codespeak.net> Author: scoder Date: Thu Aug 10 13:21:16 2006 New Revision: 31226 Modified: lxml/trunk/CHANGES.txt lxml/trunk/src/lxml/objectify.pyx Log: ignore unknown pyvals Modified: lxml/trunk/CHANGES.txt ============================================================================== --- lxml/trunk/CHANGES.txt (original) +++ lxml/trunk/CHANGES.txt Thu Aug 10 13:21:16 2006 @@ -11,6 +11,8 @@ Bugs fixed ---------- +* ignore unknown 'pyval' attribute values in objectify + * objectify.ObjectifiedElement.addattr() failed to accept Elements and Lists * objectify.ObjectPath.setattr() failed to accept Elements and Lists Modified: lxml/trunk/src/lxml/objectify.pyx ============================================================================== --- lxml/trunk/src/lxml/objectify.pyx (original) +++ lxml/trunk/src/lxml/objectify.pyx Thu Aug 10 13:21:16 2006 @@ -991,8 +991,7 @@ dict_result = python.PyDict_GetItem(_PYTYPE_DICT, value) if dict_result is not NULL: return (dict_result)._type - raise ValueError, "Invalid pytype attribute in element '%s'" % \ - cetree.namespacedName(c_node) + # unknown 'pyval' => try to figure it out ourself, just go on # check for XML Schema type hint value = cetree.attributeValueFromNsName( From scoder at codespeak.net Thu Aug 10 14:29:59 2006 From: scoder at codespeak.net (scoder at codespeak.net) Date: Thu, 10 Aug 2006 14:29:59 +0200 (CEST) Subject: [Lxml-checkins] r31228 - in lxml/trunk: . src/lxml Message-ID: <20060810122959.B0A1E10068@code0.codespeak.net> Author: scoder Date: Thu Aug 10 14:29:58 2006 New Revision: 31228 Modified: lxml/trunk/CHANGES.txt lxml/trunk/src/lxml/objectify.pyx lxml/trunk/src/lxml/python.pxd Log: removed accidental Python 2.4-ism Modified: lxml/trunk/CHANGES.txt ============================================================================== --- lxml/trunk/CHANGES.txt (original) +++ lxml/trunk/CHANGES.txt Thu Aug 10 14:29:58 2006 @@ -11,6 +11,8 @@ Bugs fixed ---------- +* 1.1beta did not compile under Python 2.3 + * ignore unknown 'pyval' attribute values in objectify * objectify.ObjectifiedElement.addattr() failed to accept Elements and Lists Modified: lxml/trunk/src/lxml/objectify.pyx ============================================================================== --- lxml/trunk/src/lxml/objectify.pyx (original) +++ lxml/trunk/src/lxml/objectify.pyx Thu Aug 10 14:29:58 2006 @@ -184,7 +184,7 @@ if c_ns is NULL and tree._getNs(child._c_node) is not NULL: continue name = child._c_node.name - if not python.PyDict_Contains(children, name): + if python.PyDict_GetItem(children, name) is NULL: python.PyDict_SetItem(children, name, child) return children Modified: lxml/trunk/src/lxml/python.pxd ============================================================================== --- lxml/trunk/src/lxml/python.pxd (original) +++ lxml/trunk/src/lxml/python.pxd Thu Aug 10 14:29:58 2006 @@ -52,7 +52,6 @@ cdef void PyDict_Clear(object d) cdef object PyDict_Copy(object d) cdef Py_ssize_t PyDict_Size(object d) - cdef int PyDict_Contains(object d, object key) cdef object PySequence_List(object o) cdef object PySequence_Tuple(object o) From scoder at codespeak.net Thu Aug 10 21:36:38 2006 From: scoder at codespeak.net (scoder at codespeak.net) Date: Thu, 10 Aug 2006 21:36:38 +0200 (CEST) Subject: [Lxml-checkins] r31242 - lxml/trunk Message-ID: <20060810193638.0E8BE10063@code0.codespeak.net> Author: scoder Date: Thu Aug 10 21:36:37 2006 New Revision: 31242 Modified: lxml/trunk/CHANGES.txt Log: fix in CHANGES.txt Modified: lxml/trunk/CHANGES.txt ============================================================================== --- lxml/trunk/CHANGES.txt (original) +++ lxml/trunk/CHANGES.txt Thu Aug 10 21:36:37 2006 @@ -44,9 +44,9 @@ * Customizable Element class lookup: - * Support for externally provided lookup functions + * different pre-implemented lookup mechanisms - * lxml.elements.classlookup module implements different lookup mechanisms + * support for externally provided lookup functions * Support for processing instructions (ET-like, not compatible) From scoder at codespeak.net Thu Aug 10 21:39:54 2006 From: scoder at codespeak.net (scoder at codespeak.net) Date: Thu, 10 Aug 2006 21:39:54 +0200 (CEST) Subject: [Lxml-checkins] r31243 - lxml/trunk Message-ID: <20060810193954.A5C5B10063@code0.codespeak.net> Author: scoder Date: Thu Aug 10 21:39:53 2006 New Revision: 31243 Modified: lxml/trunk/setup.py Log: fix for changelog parser in setup.py Modified: lxml/trunk/setup.py ============================================================================== --- lxml/trunk/setup.py (original) +++ lxml/trunk/setup.py Thu Aug 10 21:39:53 2006 @@ -163,7 +163,7 @@ break if changelog_lines: changelog_lines.append(line) - elif version in line: + elif line.startswith(version): changelog_lines.append(line) if changelog_lines: From scoder at codespeak.net Fri Aug 11 08:33:35 2006 From: scoder at codespeak.net (scoder at codespeak.net) Date: Fri, 11 Aug 2006 08:33:35 +0200 (CEST) Subject: [Lxml-checkins] r31246 - in lxml/trunk: . doc src/lxml src/lxml/tests Message-ID: <20060811063335.5BAB51005A@code0.codespeak.net> Author: scoder Date: Fri Aug 11 08:33:27 2006 New Revision: 31246 Modified: lxml/trunk/CHANGES.txt lxml/trunk/doc/element_classes.txt lxml/trunk/src/lxml/classlookup.pxi lxml/trunk/src/lxml/etree.pyx lxml/trunk/src/lxml/parser.pxi lxml/trunk/src/lxml/tests/test_nsclasses.py Log: made parser lookup the default element class lookup scheme, added support in XML()/HTML(), rewrite of related doc sections Modified: lxml/trunk/CHANGES.txt ============================================================================== --- lxml/trunk/CHANGES.txt (original) +++ lxml/trunk/CHANGES.txt Fri Aug 11 08:33:27 2006 @@ -8,6 +8,10 @@ Features added -------------- +* Parsers are now the preferred (and default) place where element class lookup + schemes should be registered. Namespace lookup is no longer supported by + default. + Bugs fixed ---------- Modified: lxml/trunk/doc/element_classes.txt ============================================================================== --- lxml/trunk/doc/element_classes.txt (original) +++ lxml/trunk/doc/element_classes.txt Fri Aug 11 08:33:27 2006 @@ -26,12 +26,11 @@ 1 Element initialization 2 Setting up a class lookup scheme 2.1 Default class lookup - 2.2 Parser based lookup - 2.3 Namespace class lookup - 2.4 Attribute based lookup - 2.5 Custom element class lookup + 2.2 Namespace class lookup + 2.3 Attribute based lookup + 2.4 Custom element class lookup 3 Implementing namespaces - 4 Resetting the class lookup scheme + Element initialization ---------------------- @@ -76,101 +75,106 @@ -------------------------------- The first thing to do when deploying custom element classes is to register a -class lookup scheme. lxml.etree provides quite a number of different schemes, -that also support class lookup local to a parser or namespace. Most lookups -support fallback chaining, which allows the next lookup mechanism to take over -when the previous one fails to find a class. - -For small projects, setting a lookup scheme globally can be satisfactory. To -avoid interfering with other modules, however, it is usually a better idea to -globally register the parser specific scheme, instantiate a dedicated parser -for your module and then register the required lookup scheme only for the -parser. Registering the per-parser lookup can be done repeatedly by many -modules without side effects and the separate parsers will prevent any -interference. +class lookup scheme on a parser. lxml.etree provides quite a number of +different schemes, that also support class lookup based on namespaces or +attribute values. Most lookups support fallback chaining, which allows the +next lookup mechanism to take over when the previous one fails to find a +class. +For example, setting a different default element class for a parser works as +follows:: -Default class lookup -.................... + >>> parser_lookup = etree.ElementDefaultClassLookup(element=HonkElement) + >>> parser = etree.XMLParser() + >>> parser.setElementClassLookup(parser_lookup) -This is the default lookup mechanism. It always returns the default element -class. Consequently, no further fallbacks are supported, but this scheme is a -good fallback for other custom lookup mechanisms. +There is one drawback of the parser based scheme: the ``Element()`` factory +creates a new document that deploys the default parser:: -Usage:: + >>> el = etree.Element("root") + >>> print isinstance(el, HonkElement) + False - >>> lookup = etree.ElementDefaultClassLookup() - >>> etree.setElementClassLookup(lookup) +You should therefore avoid using this function in code that uses custom +classes. The ``makeelement()`` method of parsers provides a simple +replacement:: -Or shorter, since it is the default:: + >>> el = parser.makeelement("root") + >>> print isinstance(el, HonkElement) + True - >>> etree.setElementClassLookup() +If you use a parser at the module level, you can easily redirect a module +level ``Element()`` factory to the parser method by adding code like this:: -To change the default element implementation, you can pass your new class to -the constructor. While it accepts classes for ``element``, ``comment`` and -``pi`` nodes, most use cases will only override the element class:: + >>> MODULE_PARSER = etree.XMLParser() + >>> Element = MODULE_PARSER.makeelement - >>> el = etree.Element("myelement") - >>> print isinstance(el, HonkElement) +While the ``XML()`` and ``HTML()`` factories also depend on the default +parser, you can pass them a different parser as second argument:: + + >>> element = etree.XML("") + >>> print isinstance(element, HonkElement) False - >>> lookup = etree.ElementDefaultClassLookup(element=HonkElement) - >>> etree.setElementClassLookup(lookup) + >>> element = etree.XML("", parser) + >>> print isinstance(element, HonkElement) + True - >>> el = etree.Element("myelement") - >>> print isinstance(el, HonkElement) +Whenever you create a document with a parser, it will inherit the lookup +scheme and all subsequent element instantiations for this document will use +it:: + + >>> element = etree.fromstring("", parser) + >>> print isinstance(element, HonkElement) True - >>> el.honking - False - >>> el = etree.Element("myelement", honking='true') - >>> print etree.tostring(el) - - >>> el.honking + >>> el = etree.SubElement(element, "subel") + >>> print isinstance(el, HonkElement) True +For small projects, you may also consider setting a lookup scheme on the +default parser. To avoid interfering with other modules, however, it is +usually a better idea to use a dedicated parser for each module (or a parser +pool when using threads) and then register the required lookup scheme only for +this parser. -Parser based lookup -................... - -This is the preferred global lookup scheme for lxml.etree in the case where a -more specific element lookup scheme is required. It delegates the class -request to the original parser of the current document. If no specific lookup -scheme was registered for that parser, the global lookup simply calls its own -fallback mechanism. -You can enable the parser delegation as follows:: +Default class lookup +.................... - >>> lookup = etree.ParserBasedElementClassLookup() - >>> etree.setElementClassLookup(lookup) +This is the most simple lookup mechanism. It always returns the default +element class. Consequently, no further fallbacks are supported, but this +scheme is a good fallback for other custom lookup mechanisms. -To specify a different fallback scheme than the default class lookup, you can -pass it in the constructor:: +Usage:: - >>> fallback = etree.ElementDefaultClassLookup() - >>> lookup = etree.ParserBasedElementClassLookup(fallback) - >>> etree.setElementClassLookup(lookup) + >>> lookup = etree.ElementDefaultClassLookup() + >>> parser = etree.XMLParser() + >>> parser.setElementClassLookup(lookup) -With such a global setup, you can now set a separate lookup mechanism for each -parser you create, without interfering with other parsers:: +Note that the default for new parsers is to use the global fallback, which is +also the default lookup (if not configured otherwise). - >>> parser_lookup = etree.ElementDefaultClassLookup(element=HonkElement) - >>> parser = etree.XMLParser() - >>> parser.setElementClassLookup(parser_lookup) +To change the default element implementation, you can pass your new class to +the constructor. While it accepts classes for ``element``, ``comment`` and +``pi`` nodes, most use cases will only override the element class:: - >>> element = etree.XML("") - >>> print isinstance(element, HonkElement) + >>> el = parser.makeelement("myelement") + >>> print isinstance(el, HonkElement) False -Whenever you create a document with this parser, it will inherit the lookup -scheme and all subsequent element instantiations for this document will use -it:: + >>> lookup = etree.ElementDefaultClassLookup(element=HonkElement) + >>> parser.setElementClassLookup(lookup) - >>> element = etree.fromstring("", parser) - >>> print isinstance(element, HonkElement) - True - >>> el = etree.SubElement(element, "subel") + >>> el = parser.makeelement("myelement") >>> print isinstance(el, HonkElement) True + >>> el.honking + False + >>> el = parser.makeelement("myelement", honking='true') + >>> print etree.tostring(el) + + >>> el.honking + True Namespace class lookup @@ -180,7 +184,8 @@ element classes. You can select it by calling:: >>> lookup = etree.ElementNamespaceClassLookup() - >>> etree.setElementClassLookup(lookup) + >>> parser = etree.XMLParser() + >>> parser.setElementClassLookup(lookup) See the separate section on `implementing namespaces`_ below to learn how to make use of it. @@ -194,7 +199,7 @@ >>> fallback = etree.ElementDefaultClassLookup(element=HonkElement) >>> lookup = etree.ElementNamespaceClassLookup(fallback) - >>> etree.setElementClassLookup(lookup) + >>> parser.setElementClassLookup(lookup) Attribute based lookup @@ -206,7 +211,8 @@ >>> id_class_mapping = {} # maps attribute values to element classes >>> lookup = etree.AttributeBasedElementClassLookup('id', id_class_mapping) - >>> etree.setElementClassLookup(lookup) + >>> parser = etree.XMLParser() + >>> parser.setElementClassLookup(lookup) Instead of a global setup of this scheme, you should consider using a per-parser setup. @@ -218,7 +224,8 @@ >>> fallback = etree.ElementNamespaceClassLookup() >>> lookup = etree.AttributeBasedElementClassLookup( ... 'id', id_class_mapping, fallback) - >>> etree.setElementClassLookup(lookup) + >>> parser = etree.XMLParser() + >>> parser.setElementClassLookup(lookup) Custom element class lookup @@ -231,7 +238,8 @@ ... def lookup(self, node_type, document, namespace, name): ... return MyElementClass # defined elsewhere - >>> etree.setElementClassLookup( MyLookup() ) + >>> parser = etree.XMLParser() + >>> parser.setElementClassLookup(MyLookup()) The ``lookup()`` method must either return None (which triggers the fallback mechanism) or a subclass of ``lxml.etree.ElementBase``. It can take any @@ -251,7 +259,8 @@ Namespace class:: >>> lookup = etree.ElementNamespaceClassLookup() - >>> etree.setElementClassLookup(lookup) + >>> parser = etree.XMLParser() + >>> parser.setElementClassLookup(lookup) >>> namespace = etree.Namespace('http://hui.de/honk') @@ -264,14 +273,14 @@ lxml:: >>> xml = '' - >>> honk_element = etree.XML(xml) + >>> honk_element = etree.XML(xml, parser) >>> print honk_element.honking True The same works when creating elements by hand:: - >>> honk_element = etree.Element('{http://hui.de/honk}honk', - ... honking='true') + >>> honk_element = parser.makeelement('{http://hui.de/honk}honk', + ... honking='true') >>> print honk_element.honking True @@ -288,7 +297,7 @@ namespace, they do not pick up the same implementation:: >>> xml = '' - >>> honk_element = etree.XML(xml) + >>> honk_element = etree.XML(xml, parser) >>> print honk_element.honking True >>> print honk_element[0].honking @@ -323,7 +332,7 @@ subclasses for elements of this namespace:: >>> xml = '' - >>> honk_element = etree.XML(xml) + >>> honk_element = etree.XML(xml, parser) >>> print type(honk_element), type(honk_element[0]) @@ -338,19 +347,3 @@ Traceback (most recent call last): ... AttributeError: 'HonkNSElement' object has no attribute 'honking' - - -Resetting the class lookup scheme ---------------------------------- - -To reset lxml.etree to the original class lookup, simply pass ``None`` or -nothing to the register function:: - - >>> etree.setElementClassLookup() - >>> el = etree.Element("myelement") - >>> print isinstance(el, HonkElement) - False - -Be aware, though, that this does not immediately apply to elements to which -there already is a Python reference. Their Python class will only be changed -after all references are gone and the Python object is garbage collected. Modified: lxml/trunk/src/lxml/classlookup.pxi ============================================================================== --- lxml/trunk/src/lxml/classlookup.pxi (original) +++ lxml/trunk/src/lxml/classlookup.pxi Fri Aug 11 08:33:27 2006 @@ -61,7 +61,7 @@ if fallback is not None: self.setFallback(fallback) else: - self._fallback_function = DEFAULT_ELEMENT_CLASS_LOOKUP + self._fallback_function = _lookupDefaultElementClass def setFallback(self, ElementClassLookup lookup not None): """Sets the fallback scheme for this lookup method. @@ -72,32 +72,6 @@ cdef object _callFallback(self, doc, xmlNode* c_node): return self._fallback_function(self.fallback, doc, c_node) -# default lookup: Namespace classes -cdef _element_class_lookup_function DEFAULT_ELEMENT_CLASS_LOOKUP -DEFAULT_ELEMENT_CLASS_LOOKUP = _find_nselement_class - -cdef _element_class_lookup_function LOOKUP_ELEMENT_CLASS -LOOKUP_ELEMENT_CLASS = DEFAULT_ELEMENT_CLASS_LOOKUP - -cdef object ELEMENT_CLASS_LOOKUP_STATE -ELEMENT_CLASS_LOOKUP_STATE = None - -cdef void _setElementClassLookupFunction( - _element_class_lookup_function function, object state): - global LOOKUP_ELEMENT_CLASS, ELEMENT_CLASS_LOOKUP_STATE - if function is NULL: - LOOKUP_ELEMENT_CLASS = DEFAULT_ELEMENT_CLASS_LOOKUP - ELEMENT_CLASS_LOOKUP_STATE = None - else: - LOOKUP_ELEMENT_CLASS = function - ELEMENT_CLASS_LOOKUP_STATE = state - -def setElementClassLookup(ElementClassLookup lookup = None): - if lookup is None or lookup._lookup_function is NULL: - _setElementClassLookupFunction(NULL, None) - else: - _setElementClassLookupFunction(lookup._lookup_function, lookup) - ################################################################################ # Custom Element class lookup schemes @@ -213,13 +187,11 @@ cdef object _parser_class_lookup(state, _Document doc, xmlNode* c_node): cdef FallbackElementClassLookup lookup - cdef ElementClassLookup parser_lookup - lookup = state if c_node.type == tree.XML_ELEMENT_NODE: - parser_lookup = doc._parser._class_lookup - if parser_lookup is not None: - return parser_lookup._lookup_function(parser_lookup, doc, c_node) + if doc._parser._class_lookup is not None: + return doc._parser._class_lookup._lookup_function( + doc._parser._class_lookup, doc, c_node) return lookup._callFallback(doc, c_node) @@ -271,3 +243,32 @@ if cls is not None: return cls return lookup._callFallback(doc, c_node) + + +################################################################################ +# Global setup + +cdef _element_class_lookup_function LOOKUP_ELEMENT_CLASS +cdef object ELEMENT_CLASS_LOOKUP_STATE + +cdef void _setElementClassLookupFunction( + _element_class_lookup_function function, object state): + global LOOKUP_ELEMENT_CLASS, ELEMENT_CLASS_LOOKUP_STATE + if function is NULL: + state = DEFAULT_ELEMENT_CLASS_LOOKUP + function = DEFAULT_ELEMENT_CLASS_LOOKUP._lookup_function + + ELEMENT_CLASS_LOOKUP_STATE = state + LOOKUP_ELEMENT_CLASS = function + +def setElementClassLookup(ElementClassLookup lookup = None): + if lookup is None or lookup._lookup_function is NULL: + _setElementClassLookupFunction(NULL, None) + else: + _setElementClassLookupFunction(lookup._lookup_function, lookup) + +# default setup: parser delegation +cdef ParserBasedElementClassLookup DEFAULT_ELEMENT_CLASS_LOOKUP +DEFAULT_ELEMENT_CLASS_LOOKUP = ParserBasedElementClassLookup() + +setElementClassLookup(DEFAULT_ELEMENT_CLASS_LOOKUP) Modified: lxml/trunk/src/lxml/etree.pyx ============================================================================== --- lxml/trunk/src/lxml/etree.pyx (original) +++ lxml/trunk/src/lxml/etree.pyx Fri Aug 11 08:33:27 2006 @@ -1653,7 +1653,9 @@ """ cdef _Document doc if parser is None: - parser = __DEFAULT_HTML_PARSER + parser = __GLOBAL_PARSER_CONTEXT.getDefaultParser() + if not isinstance(parser, HTMLParser): + parser = __DEFAULT_HTML_PARSER doc = _parseMemoryDocument(text, None, parser) return doc.getroot() @@ -1663,7 +1665,9 @@ """ cdef _Document doc if parser is None: - parser = __DEFAULT_XML_PARSER + parser = __GLOBAL_PARSER_CONTEXT.getDefaultParser() + if not isinstance(parser, XMLParser): + parser = __DEFAULT_XML_PARSER doc = _parseMemoryDocument(text, None, parser) return doc.getroot() Modified: lxml/trunk/src/lxml/parser.pxi ============================================================================== --- lxml/trunk/src/lxml/parser.pxi (original) +++ lxml/trunk/src/lxml/parser.pxi Fri Aug 11 08:33:27 2006 @@ -388,7 +388,11 @@ def __dummy(self): pass - def setElementClassLookup(self, ElementClassLookup lookup not None): + def setElementClassLookup(self, ElementClassLookup lookup = None): + """Set a lookup scheme for element classes generated from this parser. + + Reset it by passing None or nothing. + """ self._class_lookup = lookup cdef _BaseParser _copy(self): Modified: lxml/trunk/src/lxml/tests/test_nsclasses.py ============================================================================== --- lxml/trunk/src/lxml/tests/test_nsclasses.py (original) +++ lxml/trunk/src/lxml/tests/test_nsclasses.py Fri Aug 11 08:33:27 2006 @@ -21,6 +21,15 @@ def bluff(self): return u'bluff' + def setUp(self): + parser = etree.XMLParser() + parser.setElementClassLookup( + etree.ElementNamespaceClassLookup() ) + etree.setDefaultParser(parser) + + def tearDown(self): + etree.setDefaultParser() + def test_registry(self): ns = etree.Namespace(u'ns01') ns[u'maeh'] = self.maeh_class From scoder at codespeak.net Fri Aug 11 09:18:26 2006 From: scoder at codespeak.net (scoder at codespeak.net) Date: Fri, 11 Aug 2006 09:18:26 +0200 (CEST) Subject: [Lxml-checkins] r31247 - lxml/trunk/doc Message-ID: <20060811071826.9FF451005A@code0.codespeak.net> Author: scoder Date: Fri Aug 11 09:18:23 2006 New Revision: 31247 Modified: lxml/trunk/doc/objectify.txt Log: doc fix for objectify.txt after lookup change Modified: lxml/trunk/doc/objectify.txt ============================================================================== --- lxml/trunk/doc/objectify.txt (original) +++ lxml/trunk/doc/objectify.txt Fri Aug 11 09:18:23 2006 @@ -43,15 +43,6 @@ >>> from lxml import etree >>> from lxml import objectify -The normal way to use ``objectify`` is to register it with a dedicated parser. -This requires setting up ``lxml.etree`` to use `parser specific element -classes`_ first:: - - >>> lookup = etree.ParserBasedElementClassLookup() - >>> etree.setElementClassLookup(lookup) - -.. _`parser specific element classes`: element_classes.html#parser-based-lookup - The next step is to create a parser that builds objectify documents. The objectify API is meant for data-centric XML (as opposed to document XML with mixed content). Therefore, we configure the parser to let it remove @@ -754,10 +745,9 @@ ... AttributeError: no such child: callSanta -Please read the section on `Resetting the API`_ below to learn about possible -problems. - -.. _`Resetting the API`: #resetting-the-api +Be aware, though, that this does not immediately apply to elements to which +there already is a Python reference. Their Python class will only be changed +after all references are gone and the Python object is garbage collected. Recursive string representation of elements @@ -811,21 +801,3 @@ they rely on the original iteration scheme. This has the disadvantage that they may not be 100% backwards compatible, and the additional advantage that they now support any XPath expression. - - -Resetting the API ------------------ - -As the objectify setup is local to a parser, it does not interfere with the -rest of lxml. However, if you stop using the parser you registered -``objectify`` for, and you can make sure no other module is still using the -parser delegation, you can set the global class lookup mechanism back to the -default one, to disable the per-parser lookup. This is easily achieved by -calling the setup function without arguments:: - - >>> etree.setElementClassLookup() - -Be aware, though, that this does not immediately apply to elements to which -there already is a Python reference. Their Python class will only be changed -after all references are gone and the Python object is garbage collected. The -same applies to registered data classes for elements. From scoder at codespeak.net Fri Aug 11 09:21:35 2006 From: scoder at codespeak.net (scoder at codespeak.net) Date: Fri, 11 Aug 2006 09:21:35 +0200 (CEST) Subject: [Lxml-checkins] r31248 - in lxml/trunk/src/lxml: . tests Message-ID: <20060811072135.8DBD81005A@code0.codespeak.net> Author: scoder Date: Fri Aug 11 09:21:32 2006 New Revision: 31248 Modified: lxml/trunk/src/lxml/objectify.pyx lxml/trunk/src/lxml/tests/test_objectify.py Log: removed objectify.register()/unregister(), replaced by parser based setup Modified: lxml/trunk/src/lxml/objectify.pyx ============================================================================== --- lxml/trunk/src/lxml/objectify.pyx (original) +++ lxml/trunk/src/lxml/objectify.pyx Fri Aug 11 09:21:32 2006 @@ -1489,25 +1489,3 @@ _value = str(_value) cetree.setNodeText(element._c_node, _value) return element - - -################################################################################ -# Module setup - -def register(prefer_nsclasses=True): - """Globally register the objectify element class lookup mechanism. - - By default, namespace specific element classes override this lookup. - Passing False for the ``prefer_nsclasses`` keyword argument will prevent - the namespace lookup. - - Note that this is not the preferred way of using the objectify - module. Consider using a parser specific setup instead. - """ - lookup = ObjectifyElementClassLookup() - if prefer_nsclasses: - lookup = etree.ElementNamespaceClassLookup(lookup) - etree.setElementClassLookup(lookup) - -def unregister(): - etree.setElementClassLookup() Modified: lxml/trunk/src/lxml/tests/test_objectify.py ============================================================================== --- lxml/trunk/src/lxml/tests/test_objectify.py (original) +++ lxml/trunk/src/lxml/tests/test_objectify.py Fri Aug 11 09:21:32 2006 @@ -31,60 +31,68 @@ """ etree = etree + def XML(self, xml): + return self.etree.XML(xml, self.parser) + def setUp(self): - objectify.register() + self.parser = self.etree.XMLParser(remove_blank_text=True) + lookup = etree.ElementNamespaceClassLookup( + objectify.ObjectifyElementClassLookup() ) + self.parser.setElementClassLookup(lookup) + + self.Element = self.parser.makeelement + ns = self.etree.Namespace("otherNS") ns[None] = self.etree.ElementBase def tearDown(self): self.etree.Namespace("otherNS").clear() objectify.setPytypeAttributeTag() - objectify.unregister() def test_root(self): - root = self.etree.Element("test") + root = self.Element("test") self.assert_(isinstance(root, objectify.ObjectifiedElement)) def test_str(self): - root = self.etree.Element("test") + root = self.Element("test") self.assertEquals('', str(root)) def test_child(self): - root = self.etree.XML(xml_str) + root = self.XML(xml_str) self.assertEquals("0", root.c1.c2.text) def test_child_getattr(self): - root = self.etree.XML(xml_str) + root = self.XML(xml_str) self.assertEquals("0", getattr(root.c1, "{objectified}c2").text) self.assertEquals("3", getattr(root.c1, "{otherNS}c2").text) def test_child_nonexistant(self): - root = self.etree.XML(xml_str) + root = self.XML(xml_str) self.assertRaises(AttributeError, getattr, root.c1, "NOT_THERE") self.assertRaises(AttributeError, getattr, root.c1, "{unknownNS}c2") def test_addattr(self): - root = self.etree.XML(xml_str) + root = self.XML(xml_str) self.assertEquals(1, len(root.c1)) root.addattr("c1", "test") self.assertEquals(2, len(root.c1)) self.assertEquals("test", root.c1[1].text) def test_addattr_element(self): - root = self.etree.XML(xml_str) + root = self.XML(xml_str) self.assertEquals(1, len(root.c1)) - new_el = self.etree.Element("test", myattr="5") + new_el = self.Element("test", myattr="5") root.addattr("c1", new_el) self.assertEquals(2, len(root.c1)) self.assertEquals(None, root.c1[0].get("myattr")) self.assertEquals("5", root.c1[1].get("myattr")) def test_addattr_list(self): - root = self.etree.XML(xml_str) + root = self.XML(xml_str) self.assertEquals(1, len(root.c1)) - new_el = self.etree.Element("test") + new_el = self.Element("test") self.etree.SubElement(new_el, "a", myattr="A") self.etree.SubElement(new_el, "a", myattr="B") @@ -95,21 +103,21 @@ self.assertEquals("B", root.c1[2].get("myattr")) def test_child_addattr(self): - root = self.etree.XML(xml_str) + root = self.XML(xml_str) self.assertEquals(3, len(root.c1.c2)) root.c1.addattr("c2", 3) self.assertEquals(4, len(root.c1.c2)) self.assertEquals("3", root.c1.c2[3].text) def test_child_index(self): - root = self.etree.XML(xml_str) + root = self.XML(xml_str) self.assertEquals("0", root.c1.c2[0].text) self.assertEquals("1", root.c1.c2[1].text) self.assertEquals("2", root.c1.c2[2].text) self.assertRaises(IndexError, operator.itemgetter(3), root.c1.c2) def test_child_index_neg(self): - root = self.etree.XML(xml_str) + root = self.XML(xml_str) self.assertEquals("0", root.c1.c2[0].text) self.assertEquals("0", root.c1.c2[-3].text) self.assertEquals("1", root.c1.c2[-2].text) @@ -117,13 +125,13 @@ self.assertRaises(IndexError, operator.itemgetter(-4), root.c1.c2) def test_child_len(self): - root = self.etree.XML(xml_str) + root = self.XML(xml_str) self.assertEquals(1, len(root)) self.assertEquals(1, len(root.c1)) self.assertEquals(3, len(root.c1.c2)) def test_child_iter(self): - root = self.etree.XML(xml_str) + root = self.XML(xml_str) self.assertEquals([root], list(iter(root))) self.assertEquals([root.c1], @@ -132,13 +140,13 @@ list(iter((root.c1.c2)))) def test_class_lookup(self): - root = self.etree.XML(xml_str) + root = self.XML(xml_str) self.assert_(isinstance(root.c1.c2, objectify.ObjectifiedElement)) self.assertFalse(isinstance(getattr(root.c1, "{otherNS}c2"), objectify.ObjectifiedElement)) def test_dir(self): - root = self.etree.XML(xml_str) + root = self.XML(xml_str) dir_c1 = dir(objectify.ObjectifiedElement) + ['c1'] dir_c1.sort() dir_c2 = dir(objectify.ObjectifiedElement) + ['c2'] @@ -148,17 +156,17 @@ self.assertEquals(dir_c2, dir(root.c1)) def test_vars(self): - root = self.etree.XML(xml_str) + root = self.XML(xml_str) self.assertEquals({'c1' : root.c1}, vars(root)) self.assertEquals({'c2' : root.c1.c2}, vars(root.c1)) def test_child_set_ro(self): - root = self.etree.XML(xml_str) + root = self.XML(xml_str) self.assertRaises(TypeError, setattr, root.c1.c2, 'text', "test") self.assertRaises(TypeError, setattr, root.c1.c2, 'pyval', "test") def test_setslice(self): - Element = self.etree.Element + Element = self.Element SubElement = self.etree.SubElement root = Element("root") root.c = ["c1", "c2"] @@ -187,7 +195,7 @@ def test_set_string(self): # make sure strings are not handled as sequences - Element = self.etree.Element + Element = self.Element SubElement = self.etree.SubElement root = Element("root") root.c = "TEST" @@ -195,7 +203,7 @@ [ c.text for c in root.c ]) def test_findall(self): - XML = self.etree.XML + XML = self.XML root = XML('') self.assertEquals(1, len(root.findall("c"))) self.assertEquals(2, len(root.findall(".//c"))) @@ -204,14 +212,14 @@ root.getchildren()[:2]) def test_findall_ns(self): - XML = self.etree.XML + XML = self.XML root = XML('') self.assertEquals(2, len(root.findall(".//{X}b"))) self.assertEquals(3, len(root.findall(".//b"))) self.assertEquals(2, len(root.findall("b"))) def test_build_tree(self): - root = self.etree.Element('root') + root = self.Element('root') root.a = 5 root.b = 6 self.assert_(isinstance(root, objectify.ObjectifiedElement)) @@ -219,7 +227,7 @@ self.assert_(isinstance(root.b, objectify.IntElement)) def test_type_none(self): - Element = self.etree.Element + Element = self.Element SubElement = self.etree.SubElement nil_attr = "{http://www.w3.org/2001/XMLSchema-instance}nil" @@ -233,35 +241,35 @@ self.assertFalse(root.none[1]) def test_type_bool(self): - Element = self.etree.Element + Element = self.Element SubElement = self.etree.SubElement root = Element("{objectified}root") root.none = 'true' self.assert_(isinstance(root.none, objectify.BoolElement)) def test_type_str(self): - Element = self.etree.Element + Element = self.Element SubElement = self.etree.SubElement root = Element("{objectified}root") root.none = "test" self.assert_(isinstance(root.none, objectify.StringElement)) def test_type_int(self): - Element = self.etree.Element + Element = self.Element SubElement = self.etree.SubElement root = Element("{objectified}root") root.none = 5 self.assert_(isinstance(root.none, objectify.IntElement)) def test_type_float(self): - Element = self.etree.Element + Element = self.Element SubElement = self.etree.SubElement root = Element("{objectified}root") root.none = 5.5 self.assert_(isinstance(root.none, objectify.FloatElement)) def test_schema_types(self): - XML = self.etree.XML + XML = self.XML root = XML('''\ 5 @@ -280,14 +288,14 @@ self.assertEquals(5.0, root.a[2]) def test_type_str_sequence(self): - XML = self.etree.XML + XML = self.XML root = XML(u'whytry') strs = [ str(s) for s in root.b ] self.assertEquals(["why", "try"], strs) def test_type_str_cmp(self): - XML = self.etree.XML + XML = self.XML root = XML(u'testtaste') self.assertFalse(root.b[0] < root.b[1]) self.assertFalse(root.b[0] <= root.b[1]) @@ -308,7 +316,7 @@ self.assertFalse(root.b) def test_type_int_cmp(self): - XML = self.etree.XML + XML = self.XML root = XML(u'56') self.assert_(root.b[0] < root.b[1]) self.assert_(root.b[0] <= root.b[1]) @@ -329,7 +337,7 @@ self.assertFalse(root.b) def test_type_bool_cmp(self): - XML = self.etree.XML + XML = self.XML root = XML(u'falsetrue') self.assert_(root.b[0] < root.b[1]) self.assert_(root.b[0] <= root.b[1]) @@ -353,7 +361,7 @@ self.assertFalse(root.b) def test_type_annotation(self): - XML = self.etree.XML + XML = self.XML root = XML(u'''\ 5 @@ -380,7 +388,7 @@ self.assertEquals("float", child_types[7]) def test_change_pytype_attribute(self): - XML = self.etree.XML + XML = self.XML xml = u'''\ @@ -457,25 +465,25 @@ pytype.register() def test_object_path(self): - root = self.etree.XML(xml_str) + root = self.XML(xml_str) path = objectify.ObjectPath( "root.c1.c2" ) self.assertEquals(root.c1.c2.text, path.find(root).text) self.assertEquals(root.c1.c2.text, path(root).text) def test_object_path_list(self): - root = self.etree.XML(xml_str) + root = self.XML(xml_str) path = objectify.ObjectPath( ['root', 'c1', 'c2'] ) self.assertEquals(root.c1.c2.text, path.find(root).text) self.assertEquals(root.c1.c2.text, path(root).text) def test_object_path_fail(self): - root = self.etree.XML(xml_str) + root = self.XML(xml_str) path = objectify.ObjectPath( "root.c1.c99" ) self.assertRaises(AttributeError, path, root) self.assertEquals(None, path(root, None)) def test_object_path_syntax(self): - root = self.etree.XML(xml_str) + root = self.XML(xml_str) path = objectify.ObjectPath("root . {objectified}c1. c2") self.assertEquals(root.c1.c2.text, path(root).text) @@ -483,7 +491,7 @@ self.assertEquals(root.c1.c2.text, path(root).text) def test_object_path_hasattr(self): - root = self.etree.XML(xml_str) + root = self.XML(xml_str) path = objectify.ObjectPath( "root" ) self.assert_(path.hasattr(root)) path = objectify.ObjectPath( "root.c1" ) @@ -502,17 +510,17 @@ self.assertFalse(path.hasattr(root)) def test_object_path_dot_root(self): - root = self.etree.XML(xml_str) + root = self.XML(xml_str) path = objectify.ObjectPath( ".c1.c2" ) self.assertEquals(root.c1.c2.text, path(root).text) def test_object_path_dot_root_list(self): - root = self.etree.XML(xml_str) + root = self.XML(xml_str) path = objectify.ObjectPath( ['', 'c1', 'c2'] ) self.assertEquals(root.c1.c2.text, path(root).text) def test_object_path_index(self): - root = self.etree.XML(xml_str) + root = self.XML(xml_str) path = objectify.ObjectPath( "root.c1[0].c2[0]" ) self.assertEquals(root.c1.c2.text, path(root).text) @@ -532,7 +540,7 @@ self.assertEquals(root.c1.c2[-3].text, path(root).text) def test_object_path_index_list(self): - root = self.etree.XML(xml_str) + root = self.XML(xml_str) path = objectify.ObjectPath( ['root', 'c1[0]', 'c2[0]'] ) self.assertEquals(root.c1.c2.text, path(root).text) @@ -567,7 +575,7 @@ ['', '', '']) def test_object_path_index_fail_lookup(self): - root = self.etree.XML(xml_str) + root = self.XML(xml_str) path = objectify.ObjectPath("root.c1[9999].c2") self.assertRaises(AttributeError, path, root) @@ -584,7 +592,7 @@ self.assertRaises(AttributeError, path, root) def test_object_path_ns(self): - root = self.etree.XML(xml_str) + root = self.XML(xml_str) path = objectify.ObjectPath( "{objectified}root.c1.c2" ) self.assertEquals(root.c1.c2.text, path.find(root).text) path = objectify.ObjectPath( "{objectified}root.{objectified}c1.c2" ) @@ -598,7 +606,7 @@ path.find(root).text) def test_object_path_ns_list(self): - root = self.etree.XML(xml_str) + root = self.XML(xml_str) path = objectify.ObjectPath( ['{objectified}root', 'c1', 'c2'] ) self.assertEquals(root.c1.c2.text, path.find(root).text) path = objectify.ObjectPath( ['{objectified}root', '{objectified}c1', 'c2'] ) @@ -616,7 +624,7 @@ path.find(root).text) def test_object_path_set(self): - root = self.etree.XML(xml_str) + root = self.XML(xml_str) path = objectify.ObjectPath( "root.c1.c2" ) self.assertEquals(root.c1.c2.text, path.find(root).text) self.assertEquals("1", root.c1.c2[1].text) @@ -629,12 +637,12 @@ self.assertEquals("1", root.c1.c2[1].text) def test_object_path_set_element(self): - root = self.etree.XML(xml_str) + root = self.XML(xml_str) path = objectify.ObjectPath( "root.c1.c2" ) self.assertEquals(root.c1.c2.text, path.find(root).text) self.assertEquals("1", root.c1.c2[1].text) - new_el = etree.Element("{objectified}test") + new_el = self.Element("{objectified}test") etree.SubElement(new_el, "{objectified}sub", myattr="ATTR").a = "TEST" path.setattr(root, new_el.sub) @@ -644,7 +652,7 @@ self.assertEquals("1", root.c1.c2[1].text) def test_object_path_set_create(self): - root = self.etree.XML(xml_str) + root = self.XML(xml_str) path = objectify.ObjectPath( "root.c1.c99" ) self.assertRaises(AttributeError, path.find, root) @@ -656,11 +664,11 @@ self.assertEquals(new_value, path(root).text) def test_object_path_set_create_element(self): - root = self.etree.XML(xml_str) + root = self.XML(xml_str) path = objectify.ObjectPath( "root.c1.c99" ) self.assertRaises(AttributeError, path.find, root) - new_el = etree.Element("{objectified}test") + new_el = self.Element("{objectified}test") etree.SubElement(new_el, "{objectified}sub", myattr="ATTR").a = "TEST" path.setattr(root, new_el.sub) @@ -670,11 +678,11 @@ self.assertEquals("TEST", path(root).a.text) def test_object_path_set_create_list(self): - root = self.etree.XML(xml_str) + root = self.XML(xml_str) path = objectify.ObjectPath( "root.c1.c99" ) self.assertRaises(AttributeError, path.find, root) - new_el = etree.Element("{objectified}test") + new_el = self.Element("{objectified}test") new_el.a = ["TEST1", "TEST2"] new_el.a[0].set("myattr", "ATTR1") new_el.a[1].set("myattr", "ATTR2") @@ -689,7 +697,7 @@ self.assertEquals("TEST1", path(root).text) def test_object_path_addattr(self): - root = self.etree.XML(xml_str) + root = self.XML(xml_str) path = objectify.ObjectPath( "root.c1.c2" ) self.assertEquals(3, len(root.c1.c2)) path.addattr(root, "test") @@ -698,11 +706,11 @@ [el.text for el in root.c1.c2]) def test_object_path_addattr_element(self): - root = self.etree.XML(xml_str) + root = self.XML(xml_str) path = objectify.ObjectPath( "root.c1.c2" ) self.assertEquals(3, len(root.c1.c2)) - new_el = etree.Element("{objectified}test") + new_el = self.Element("{objectified}test") etree.SubElement(new_el, "{objectified}sub").a = "TEST" path.addattr(root, new_el.sub) @@ -712,7 +720,7 @@ [el.text for el in root.c1.c2[:3]]) def test_object_path_addattr_create(self): - root = self.etree.XML(xml_str) + root = self.XML(xml_str) path = objectify.ObjectPath( "root.c1.c99" ) self.assertRaises(AttributeError, path.find, root) @@ -724,11 +732,11 @@ self.assertEquals(new_value, path(root).text) def test_object_path_addattr_create_element(self): - root = self.etree.XML(xml_str) + root = self.XML(xml_str) path = objectify.ObjectPath( "root.c1.c99" ) self.assertRaises(AttributeError, path.find, root) - new_el = etree.Element("{objectified}test") + new_el = self.Element("{objectified}test") etree.SubElement(new_el, "{objectified}sub", myattr="ATTR").a = "TEST" path.addattr(root, new_el.sub) @@ -738,11 +746,11 @@ self.assertEquals("ATTR", root.c1.c99.get("myattr")) def test_object_path_addattr_create_list(self): - root = self.etree.XML(xml_str) + root = self.XML(xml_str) path = objectify.ObjectPath( "root.c1.c99" ) self.assertRaises(AttributeError, path.find, root) - new_el = etree.Element("{objectified}test") + new_el = self.Element("{objectified}test") new_el.a = ["TEST1", "TEST2"] self.assertEquals(2, len(new_el.a)) @@ -753,7 +761,7 @@ self.assertEquals("TEST2", path(root)[1].text) def test_descendant_paths(self): - root = self.etree.XML(xml_str) + root = self.XML(xml_str) self.assertEquals( ['{objectified}root', '{objectified}root.c1', '{objectified}root.c1.c2', @@ -762,7 +770,7 @@ root.descendantpaths()) def test_descendant_paths_child(self): - root = self.etree.XML(xml_str) + root = self.XML(xml_str) self.assertEquals( ['{objectified}c1', '{objectified}c1.c2', '{objectified}c1.c2[1]', '{objectified}c1.c2[2]', @@ -770,7 +778,7 @@ root.c1.descendantpaths()) def test_descendant_paths_prefix(self): - root = self.etree.XML(xml_str) + root = self.XML(xml_str) self.assertEquals( ['root.{objectified}c1', 'root.{objectified}c1.c2', 'root.{objectified}c1.c2[1]', 'root.{objectified}c1.c2[2]', From scoder at codespeak.net Fri Aug 11 12:11:33 2006 From: scoder at codespeak.net (scoder at codespeak.net) Date: Fri, 11 Aug 2006 12:11:33 +0200 (CEST) Subject: [Lxml-checkins] r31253 - lxml/trunk/src/lxml Message-ID: <20060811101133.A233A1006E@code0.codespeak.net> Author: scoder Date: Fri Aug 11 12:11:32 2006 New Revision: 31253 Modified: lxml/trunk/src/lxml/objectify.pyx Log: reverted premature optimisation Modified: lxml/trunk/src/lxml/objectify.pyx ============================================================================== --- lxml/trunk/src/lxml/objectify.pyx (original) +++ lxml/trunk/src/lxml/objectify.pyx Fri Aug 11 12:11:32 2006 @@ -128,11 +128,8 @@ else: raise TypeError, "parser must inherit from lxml.etree.XMLParser" -cdef object DEFAULT_DOCUMENT -DEFAULT_DOCUMENT = parser.makeelement("root") - cdef object _makeelement -_makeelement = DEFAULT_DOCUMENT.makeelement +_makeelement = parser.makeelement ################################################################################ From scoder at codespeak.net Fri Aug 11 13:36:58 2006 From: scoder at codespeak.net (scoder at codespeak.net) Date: Fri, 11 Aug 2006 13:36:58 +0200 (CEST) Subject: [Lxml-checkins] r31258 - in lxml/trunk/src/lxml: . tests Message-ID: <20060811113658.BB91710060@code0.codespeak.net> Author: scoder Date: Fri Aug 11 13:36:56 2006 New Revision: 31258 Modified: lxml/trunk/src/lxml/objectify.pyx lxml/trunk/src/lxml/tests/test_objectify.py Log: fix: let DataElement() determine Python types for bool/float values correctly, instantiate the right Element class from DataElement() Modified: lxml/trunk/src/lxml/objectify.pyx ============================================================================== --- lxml/trunk/src/lxml/objectify.pyx (original) +++ lxml/trunk/src/lxml/objectify.pyx Fri Aug 11 13:36:56 2006 @@ -968,10 +968,6 @@ cdef object _lookupElementClass(state, _Document doc, tree.xmlNode* c_node): cdef python.PyObject* dict_result - # if element is root node => no data class - if c_node.parent is NULL or not tree._isElement(c_node.parent): - return ObjectifiedElement - # if element has children => no data class if cetree.findChildForwards(c_node, 0) is not NULL: return ObjectifiedElement @@ -1004,6 +1000,10 @@ if el_class is not None: return el_class + # if element is root node => no data class + if c_node.parent is NULL or not tree._isElement(c_node.parent): + return ObjectifiedElement + # default to string element class if type attribute is not exploitable return _StringElement @@ -1464,11 +1464,22 @@ python.PyDict_SetItem(_attributes, XML_SCHEMA_INSTANCE_TYPE_ATTR, _xsi) if _pytype is None: _pytype = _SCHEMA_TYPE_DICT[_xsi].name + + if python._isString(_value): + strval = _value + elif python.PyBool_Check(_value): + if _value: + strval = "true" + else: + strval = "false" + else: + strval = str(_value) + if _pytype is None: errors = (ValueError, TypeError) for type_check, pytype in _TYPE_CHECKS: try: - type_check(_value) + type_check(strval) _pytype = (pytype).name break except errors: @@ -1482,7 +1493,5 @@ python.PyDict_SetItem(_attributes, PYTYPE_ATTRIBUTE, _pytype) element = _makeelement("value", _attributes) - if not python._isString(_value): - _value = str(_value) - cetree.setNodeText(element._c_node, _value) + cetree.setNodeText(element._c_node, strval) return element Modified: lxml/trunk/src/lxml/tests/test_objectify.py ============================================================================== --- lxml/trunk/src/lxml/tests/test_objectify.py (original) +++ lxml/trunk/src/lxml/tests/test_objectify.py Fri Aug 11 13:36:56 2006 @@ -240,6 +240,11 @@ self.assertEquals(root.none[1], None) self.assertFalse(root.none[1]) + def test_data_element_none(self): + value = objectify.DataElement(None) + self.assert_(isinstance(value, objectify.NoneElement)) + self.assertEquals(value, None) + def test_type_bool(self): Element = self.Element SubElement = self.etree.SubElement @@ -247,6 +252,15 @@ root.none = 'true' self.assert_(isinstance(root.none, objectify.BoolElement)) + def test_data_element_bool(self): + value = objectify.DataElement(True) + self.assert_(isinstance(value, objectify.BoolElement)) + self.assertEquals(value, True) + + value = objectify.DataElement(False) + self.assert_(isinstance(value, objectify.BoolElement)) + self.assertEquals(value, False) + def test_type_str(self): Element = self.Element SubElement = self.etree.SubElement @@ -254,6 +268,11 @@ root.none = "test" self.assert_(isinstance(root.none, objectify.StringElement)) + def test_data_element_str(self): + value = objectify.DataElement("test") + self.assert_(isinstance(value, objectify.StringElement)) + self.assertEquals(value, "test") + def test_type_int(self): Element = self.Element SubElement = self.etree.SubElement @@ -261,6 +280,11 @@ root.none = 5 self.assert_(isinstance(root.none, objectify.IntElement)) + def test_data_element_int(self): + value = objectify.DataElement(5) + self.assert_(isinstance(value, objectify.IntElement)) + self.assertEquals(value, 5) + def test_type_float(self): Element = self.Element SubElement = self.etree.SubElement @@ -268,6 +292,11 @@ root.none = 5.5 self.assert_(isinstance(root.none, objectify.FloatElement)) + def test_data_element_float(self): + value = objectify.DataElement(5.5) + self.assert_(isinstance(value, objectify.FloatElement)) + self.assertEquals(value, 5.5) + def test_schema_types(self): XML = self.XML root = XML('''\ From scoder at codespeak.net Fri Aug 11 13:45:04 2006 From: scoder at codespeak.net (scoder at codespeak.net) Date: Fri, 11 Aug 2006 13:45:04 +0200 (CEST) Subject: [Lxml-checkins] r31259 - lxml/trunk/src/lxml Message-ID: <20060811114504.BEBA510060@code0.codespeak.net> Author: scoder Date: Fri Aug 11 13:45:02 2006 New Revision: 31259 Modified: lxml/trunk/src/lxml/objectify.pyx Log: let _guessElementClass default to StringElement for empty non root-nodes Modified: lxml/trunk/src/lxml/objectify.pyx ============================================================================== --- lxml/trunk/src/lxml/objectify.pyx (original) +++ lxml/trunk/src/lxml/objectify.pyx Fri Aug 11 13:45:02 2006 @@ -900,6 +900,9 @@ cdef object _guessElementClass(tree.xmlNode* c_node): value = textOf(c_node) if value is None: + # if element is not a root node => default to string node + if c_node.parent is not NULL and tree._isElement(c_node.parent): + return StringElement # default to ObjectifiedElement class return ObjectifiedElement if value == '': From ianb at codespeak.net Mon Aug 14 18:37:46 2006 From: ianb at codespeak.net (ianb at codespeak.net) Date: Mon, 14 Aug 2006 18:37:46 +0200 (CEST) Subject: [Lxml-checkins] r31299 - lxml/trunk/doc Message-ID: <20060814163746.C8D6E10071@code0.codespeak.net> Author: ianb Date: Mon Aug 14 18:37:44 2006 New Revision: 31299 Modified: lxml/trunk/doc/build.txt Log: Added note about python setup.py build_ext -I Modified: lxml/trunk/doc/build.txt ============================================================================== --- lxml/trunk/doc/build.txt (original) +++ lxml/trunk/doc/build.txt Mon Aug 14 18:37:44 2006 @@ -75,6 +75,14 @@ make +If you get errors about missing header files (e.g., +``libxml/xmlversion.h``) then you need to add the location of that +file to the include path like:: + + python setup.py build_ext -i -I /usr/include/libxml2 + +where the file is in ``/usr/include/libxml2/libxml/xmlversion.h`` + If you then place lxml's ``src`` directory on your PYTHONPATH somehow, you can import ``lxml.etree`` and play with it. From scoder at codespeak.net Fri Aug 25 22:21:26 2006 From: scoder at codespeak.net (scoder at codespeak.net) Date: Fri, 25 Aug 2006 22:21:26 +0200 (CEST) Subject: [Lxml-checkins] r31660 - lxml/trunk Message-ID: <20060825202126.632E91007B@code0.codespeak.net> Author: scoder Date: Fri Aug 25 22:21:24 2006 New Revision: 31660 Modified: lxml/trunk/MANIFEST.in Log: ship objectify.c with distribution Modified: lxml/trunk/MANIFEST.in ============================================================================== --- lxml/trunk/MANIFEST.in (original) +++ lxml/trunk/MANIFEST.in Fri Aug 25 22:21:24 2006 @@ -3,7 +3,8 @@ include update-error-constants.py include MANIFEST.in version.txt include CHANGES.txt CREDITS.txt INSTALL.txt LICENSES.txt README.txt TODO.txt -recursive-include src *.pyx *.pxd *.pxi *.py etree.c etree.h etree_defs.h +recursive-include src *.pyx *.pxd *.pxi *.py +recursive-include src/lxml etree.c objectify.c etree.h etree_defs.h recursive-include src/lxml/tests *.rng *.xslt *.xml *.dtd recursive-include benchmark *.py recursive-include doc *.txt *.html *.css *.xml *.mgp pubkey.asc From scoder at codespeak.net Fri Aug 25 22:32:53 2006 From: scoder at codespeak.net (scoder at codespeak.net) Date: Fri, 25 Aug 2006 22:32:53 +0200 (CEST) Subject: [Lxml-checkins] r31661 - in lxml/trunk: . src/lxml Message-ID: <20060825203253.7F7E81007B@code0.codespeak.net> Author: scoder Date: Fri Aug 25 22:32:51 2006 New Revision: 31661 Modified: lxml/trunk/CHANGES.txt lxml/trunk/src/lxml/etree.pyx Log: fix for replace() crash found by John Krukoff Modified: lxml/trunk/CHANGES.txt ============================================================================== --- lxml/trunk/CHANGES.txt (original) +++ lxml/trunk/CHANGES.txt Fri Aug 25 22:32:51 2006 @@ -15,6 +15,8 @@ Bugs fixed ---------- +* Crash in tail handling in ``Element.replace()`` + * 1.1beta did not compile under Python 2.3 * ignore unknown 'pyval' attribute values in objectify Modified: lxml/trunk/src/lxml/etree.pyx ============================================================================== --- lxml/trunk/src/lxml/etree.pyx (original) +++ lxml/trunk/src/lxml/etree.pyx Fri Aug 25 22:32:51 2006 @@ -797,9 +797,9 @@ c_new_node = new_element._c_node c_new_next = c_new_node.next tree.xmlReplaceNode(c_old_node, c_new_node) - moveNodeToDocument(new_element, self._doc) _moveTail(c_new_next, c_new_node) _moveTail(c_old_next, c_old_node) + moveNodeToDocument(new_element, self._doc) # PROPERTIES property tag: From scoder at codespeak.net Fri Aug 25 22:36:44 2006 From: scoder at codespeak.net (scoder at codespeak.net) Date: Fri, 25 Aug 2006 22:36:44 +0200 (CEST) Subject: [Lxml-checkins] r31662 - in lxml/branch/lxml-1.0: . src/lxml Message-ID: <20060825203644.BAE011007B@code0.codespeak.net> Author: scoder Date: Fri Aug 25 22:36:43 2006 New Revision: 31662 Modified: lxml/branch/lxml-1.0/CHANGES.txt lxml/branch/lxml-1.0/src/lxml/etree.pyx Log: fix for replace() crash found by John Krukoff Modified: lxml/branch/lxml-1.0/CHANGES.txt ============================================================================== --- lxml/branch/lxml-1.0/CHANGES.txt (original) +++ lxml/branch/lxml-1.0/CHANGES.txt Fri Aug 25 22:36:43 2006 @@ -2,6 +2,18 @@ lxml changelog ============== +current +======= + +Features added +-------------- + +Bugs fixed +---------- + +* Crash in tail handling in ``Element.replace()`` + + 1.0.3 (2006-08-08) ================== Modified: lxml/branch/lxml-1.0/src/lxml/etree.pyx ============================================================================== --- lxml/branch/lxml-1.0/src/lxml/etree.pyx (original) +++ lxml/branch/lxml-1.0/src/lxml/etree.pyx Fri Aug 25 22:36:43 2006 @@ -758,9 +758,9 @@ c_new_node = new_element._c_node c_new_next = c_new_node.next tree.xmlReplaceNode(c_old_node, c_new_node) - moveNodeToDocument(new_element, self._doc) _moveTail(c_new_next, c_new_node) _moveTail(c_old_next, c_old_node) + moveNodeToDocument(new_element, self._doc) # PROPERTIES property tag: From scoder at codespeak.net Fri Aug 25 22:47:28 2006 From: scoder at codespeak.net (scoder at codespeak.net) Date: Fri, 25 Aug 2006 22:47:28 +0200 (CEST) Subject: [Lxml-checkins] r31663 - lxml/trunk/src/lxml/tests Message-ID: <20060825204728.0FD1E1007B@code0.codespeak.net> Author: scoder Date: Fri Aug 25 22:47:27 2006 New Revision: 31663 Modified: lxml/trunk/src/lxml/tests/test_etree.py Log: extended replace() test case Modified: lxml/trunk/src/lxml/tests/test_etree.py ============================================================================== --- lxml/trunk/src/lxml/tests/test_etree.py (original) +++ lxml/trunk/src/lxml/tests/test_etree.py Fri Aug 25 22:47:27 2006 @@ -984,11 +984,19 @@ etree.SubElement(e, 'a%s' % i) new_element = etree.Element("test") + new_element.text = "TESTTEXT" + new_element.tail = "TESTTAIL" child1 = e[1] e.replace(e[0], new_element) self.assertEquals( new_element, e[0]) self.assertEquals( + "TESTTEXT", + e[0].text) + self.assertEquals( + "TESTTAIL", + e[0].tail) + self.assertEquals( child1, e[1]) def test_docinfo_public(self): From scoder at codespeak.net Fri Aug 25 22:48:14 2006 From: scoder at codespeak.net (scoder at codespeak.net) Date: Fri, 25 Aug 2006 22:48:14 +0200 (CEST) Subject: [Lxml-checkins] r31664 - lxml/branch/lxml-1.0/src/lxml/tests Message-ID: <20060825204814.435671007B@code0.codespeak.net> Author: scoder Date: Fri Aug 25 22:48:13 2006 New Revision: 31664 Modified: lxml/branch/lxml-1.0/src/lxml/tests/test_etree.py Log: extended replace() test case Modified: lxml/branch/lxml-1.0/src/lxml/tests/test_etree.py ============================================================================== --- lxml/branch/lxml-1.0/src/lxml/tests/test_etree.py (original) +++ lxml/branch/lxml-1.0/src/lxml/tests/test_etree.py Fri Aug 25 22:48:13 2006 @@ -559,11 +559,19 @@ etree.SubElement(e, 'a%s' % i) new_element = etree.Element("test") + new_element.text = "TESTTEXT" + new_element.tail = "TESTTAIL" child1 = e[1] e.replace(e[0], new_element) self.assertEquals( new_element, e[0]) self.assertEquals( + "TESTTEXT", + e[0].text) + self.assertEquals( + "TESTTAIL", + e[0].tail) + self.assertEquals( child1, e[1]) def test_docinfo_public(self): From scoder at codespeak.net Mon Aug 28 18:12:07 2006 From: scoder at codespeak.net (scoder at codespeak.net) Date: Mon, 28 Aug 2006 18:12:07 +0200 (CEST) Subject: [Lxml-checkins] r31763 - in lxml/trunk/src/lxml: . tests Message-ID: <20060828161207.EFB2F1007F@code0.codespeak.net> Author: scoder Date: Mon Aug 28 18:12:05 2006 New Revision: 31763 Modified: lxml/trunk/src/lxml/apihelpers.pxi lxml/trunk/src/lxml/etree.pyx lxml/trunk/src/lxml/tests/test_etree.py Log: Element.extend() Modified: lxml/trunk/src/lxml/apihelpers.pxi ============================================================================== --- lxml/trunk/src/lxml/apihelpers.pxi (original) +++ lxml/trunk/src/lxml/apihelpers.pxi Mon Aug 28 18:12:05 2006 @@ -449,6 +449,23 @@ c_node = c_next return c_node +cdef void _appendChild(_Element parent, _Element child): + """Append a new child to a parent element. + """ + cdef xmlNode* c_next + cdef xmlNode* c_node + c_node = child._c_node + # store possible text node + c_next = c_node.next + # XXX what if element is coming from a different document? + tree.xmlUnlinkNode(c_node) + # move node itself + tree.xmlAddChild(parent._c_node, c_node) + _moveTail(c_next, c_node) + # uh oh, elements may be pointing to different doc when + # parent element has moved; change them too.. + moveNodeToDocument(child, parent._doc) + cdef int isutf8(char* s): cdef char c c = s[0] Modified: lxml/trunk/src/lxml/etree.pyx ============================================================================== --- lxml/trunk/src/lxml/etree.pyx (original) +++ lxml/trunk/src/lxml/etree.pyx Mon Aug 28 18:12:05 2006 @@ -668,15 +668,17 @@ cdef xmlNode* c_next cdef _Element mynode # first, find start of slice - c_node = _findChild(self._c_node, start) - # now delete the slice - if start != stop: - c_node = _deleteSlice(c_node, start, stop) + if start == python.PY_SSIZE_T_MAX: + c_node = NULL + else: + c_node = _findChild(self._c_node, start) + # now delete the slice + if start != stop: + c_node = _deleteSlice(c_node, start, stop) # if the insertion point is at the end, append there if c_node is NULL: - append = self.append - for node in value: - append(node) + for element in value: + _appendChild(self, element) return # if the next element is in the list, insert before it for mynode in value: @@ -708,22 +710,15 @@ _setAttributeValue(self, key, value) def append(self, _Element element not None): + """Adds a subelement to the end of this element. """ - Adds a subelement to the end of this element. + _appendChild(self, element) + + def extend(self, elements): + """Extends the current children by the elements in the iterable. """ - cdef xmlNode* c_next - cdef xmlNode* c_node - c_node = element._c_node - # store possible text node - c_next = c_node.next - # XXX what if element is coming from a different document? - tree.xmlUnlinkNode(c_node) - # move node itself - tree.xmlAddChild(self._c_node, c_node) - _moveTail(c_next, c_node) - # uh oh, elements may be pointing to different doc when - # parent element has moved; change them too.. - moveNodeToDocument(element, self._doc) + for element in elements: + _appendChild(self, element) def clear(self): """Resets an element. This function removes all subelements, @@ -761,7 +756,7 @@ cdef xmlNode* c_next c_node = _findChild(self._c_node, index) if c_node is NULL: - self.append(element) + _appendChild(self, element) return c_next = element._c_node.next tree.xmlAddPrevSibling(c_node, element._c_node) Modified: lxml/trunk/src/lxml/tests/test_etree.py ============================================================================== --- lxml/trunk/src/lxml/tests/test_etree.py (original) +++ lxml/trunk/src/lxml/tests/test_etree.py Mon Aug 28 18:12:05 2006 @@ -999,6 +999,33 @@ self.assertEquals( child1, e[1]) + def test_extend(self): + etree = self.etree + root = etree.Element('foo') + for i in range(3): + element = etree.SubElement(root, 'a%s' % i) + element.text = "text%d" % i + element.tail = "tail%d" % i + + elements = [] + for i in range(3): + new_element = etree.Element("test%s" % i) + new_element.text = "TEXT%s" % i + new_element.tail = "TAIL%s" % i + elements.append(new_element) + + root.extend(elements) + + self.assertEquals( + ["a0", "a1", "a2", "test0", "test1", "test2"], + [ el.tag for el in root ]) + self.assertEquals( + ["text0", "text1", "text2", "TEXT0", "TEXT1", "TEXT2"], + [ el.text for el in root ]) + self.assertEquals( + ["tail0", "tail1", "tail2", "TAIL0", "TAIL1", "TAIL2"], + [ el.tail for el in root ]) + def test_docinfo_public(self): etree = self.etree xml_header = '' From scoder at codespeak.net Mon Aug 28 18:15:09 2006 From: scoder at codespeak.net (scoder at codespeak.net) Date: Mon, 28 Aug 2006 18:15:09 +0200 (CEST) Subject: [Lxml-checkins] r31764 - lxml/trunk/src/lxml Message-ID: <20060828161509.388CD1007F@code0.codespeak.net> Author: scoder Date: Mon Aug 28 18:15:07 2006 New Revision: 31764 Modified: lxml/trunk/src/lxml/etreepublic.pxd lxml/trunk/src/lxml/objectify.pyx lxml/trunk/src/lxml/public-api.pxi Log: public C function appendChild(parent, child) Modified: lxml/trunk/src/lxml/etreepublic.pxd ============================================================================== --- lxml/trunk/src/lxml/etreepublic.pxd (original) +++ lxml/trunk/src/lxml/etreepublic.pxd Mon Aug 28 18:15:07 2006 @@ -185,6 +185,9 @@ # set the tail text value of an element cdef int setTailText(tree.xmlNode* c_node, text) except -1 + # append an element to the children of a parent element + cdef void appendChild(_Element parent, _Element child) + # recursively lookup a namespace in element or ancestors, or create it cdef tree.xmlNs* findOrBuildNodeNs(_Document doc, tree.xmlNode* c_node, char* href) Modified: lxml/trunk/src/lxml/objectify.pyx ============================================================================== --- lxml/trunk/src/lxml/objectify.pyx (original) +++ lxml/trunk/src/lxml/objectify.pyx Mon Aug 28 18:15:07 2006 @@ -461,7 +461,7 @@ new_element = cetree.deepcopyNodeToDocument( parent._doc, (<_Element>value)._c_node) new_element.tag = tag - parent.append(new_element) + cetree.appendChild(parent, new_element) elif python.PyList_Check(value) or python.PyTuple_Check(value): for item in value: _appendValue(parent, tag, item) Modified: lxml/trunk/src/lxml/public-api.pxi ============================================================================== --- lxml/trunk/src/lxml/public-api.pxi (original) +++ lxml/trunk/src/lxml/public-api.pxi Mon Aug 28 18:15:07 2006 @@ -101,6 +101,9 @@ cdef public xmlNode* previousElement(xmlNode* c_node): return _previousElement(c_node) +cdef public void appendChild(_Element parent, _Element child): + _appendChild(parent, child) + cdef public object pyunicode(char* s): if s is NULL: raise TypeError From scoder at codespeak.net Mon Aug 28 18:41:26 2006 From: scoder at codespeak.net (scoder at codespeak.net) Date: Mon, 28 Aug 2006 18:41:26 +0200 (CEST) Subject: [Lxml-checkins] r31765 - in lxml/trunk: . src/lxml src/lxml/tests Message-ID: <20060828164126.965961007F@code0.codespeak.net> Author: scoder Date: Mon Aug 28 18:41:25 2006 New Revision: 31765 Modified: lxml/trunk/CHANGES.txt lxml/trunk/src/lxml/etree.pyx lxml/trunk/src/lxml/tests/test_xpathevaluator.py Log: let repr() return "" for comments and "" for PIs Modified: lxml/trunk/CHANGES.txt ============================================================================== --- lxml/trunk/CHANGES.txt (original) +++ lxml/trunk/CHANGES.txt Mon Aug 28 18:41:25 2006 @@ -8,6 +8,11 @@ Features added -------------- +* List-like ``Element.extend()`` method + +* Comments and processing instructions return '' and + '' for repr() + * Parsers are now the preferred (and default) place where element class lookup schemes should be registered. Namespace lookup is no longer supported by default. Modified: lxml/trunk/src/lxml/etree.pyx ============================================================================== --- lxml/trunk/src/lxml/etree.pyx (original) +++ lxml/trunk/src/lxml/etree.pyx Mon Aug 28 18:41:25 2006 @@ -1216,9 +1216,6 @@ tree.xmlNodeSetContent(self._c_node, c_text) # ACCESSORS - def __repr__(self): - return "" % self.text - def __getitem__(self, n): raise IndexError @@ -1239,6 +1236,9 @@ def __get__(self): return Comment + def __repr__(self): + return "" % self.text + cdef class _ProcessingInstruction(__ContentOnlyElement): property tag: def __get__(self): @@ -1254,6 +1254,13 @@ c_text = _cstr(value) tree.xmlNodeSetName(self._c_node, c_text) + def __repr__(self): + text = self.text + if text: + return "" % (self.target, text) + else: + return "" % self.target + cdef class _Attrib: cdef _NodeBase _element def __init__(self, _NodeBase element not None): Modified: lxml/trunk/src/lxml/tests/test_xpathevaluator.py ============================================================================== --- lxml/trunk/src/lxml/tests/test_xpathevaluator.py (original) +++ lxml/trunk/src/lxml/tests/test_xpathevaluator.py Mon Aug 28 18:41:25 2006 @@ -74,7 +74,7 @@ def test_xpath_list_comment(self): tree = self.parse('') - self.assertEquals([''], + self.assertEquals([''], map(repr, tree.xpath('/a/node()'))) def test_rel_xpath_boolean(self): From scoder at codespeak.net Mon Aug 28 18:45:35 2006 From: scoder at codespeak.net (scoder at codespeak.net) Date: Mon, 28 Aug 2006 18:45:35 +0200 (CEST) Subject: [Lxml-checkins] r31766 - lxml/trunk/doc Message-ID: <20060828164535.6994110082@code0.codespeak.net> Author: scoder Date: Mon Aug 28 18:45:34 2006 New Revision: 31766 Modified: lxml/trunk/doc/build.txt Log: doc cleanup Modified: lxml/trunk/doc/build.txt ============================================================================== --- lxml/trunk/doc/build.txt (original) +++ lxml/trunk/doc/build.txt Mon Aug 28 18:45:34 2006 @@ -75,16 +75,16 @@ make -If you get errors about missing header files (e.g., -``libxml/xmlversion.h``) then you need to add the location of that -file to the include path like:: +If you get errors about missing header files (e.g., ``libxml/xmlversion.h``) +then you need to add the location of that file to the include path like:: python setup.py build_ext -i -I /usr/include/libxml2 where the file is in ``/usr/include/libxml2/libxml/xmlversion.h`` -If you then place lxml's ``src`` directory on your PYTHONPATH somehow, you can -import ``lxml.etree`` and play with it. +To use lxml.etree in-place, you can place lxml's ``src`` directory on your +Python module search path (PYTHONPATH) and then import ``lxml.etree`` to play +with it. To recompile after changes, note that you may have to run ``make clean`` or delete the file ``src/lxml/etree.c``. Distutils do not automatically pick up From scoder at codespeak.net Mon Aug 28 19:12:13 2006 From: scoder at codespeak.net (scoder at codespeak.net) Date: Mon, 28 Aug 2006 19:12:13 +0200 (CEST) Subject: [Lxml-checkins] r31767 - in lxml/branch/lxml-1.0: . src/lxml src/lxml/tests Message-ID: <20060828171213.CBC0810086@code0.codespeak.net> Author: scoder Date: Mon Aug 28 19:12:11 2006 New Revision: 31767 Modified: lxml/branch/lxml-1.0/CHANGES.txt lxml/branch/lxml-1.0/src/lxml/apihelpers.pxi lxml/branch/lxml-1.0/src/lxml/etree.pyx lxml/branch/lxml-1.0/src/lxml/python.pxd lxml/branch/lxml-1.0/src/lxml/tests/test_etree.py Log: Element.extend() Modified: lxml/branch/lxml-1.0/CHANGES.txt ============================================================================== --- lxml/branch/lxml-1.0/CHANGES.txt (original) +++ lxml/branch/lxml-1.0/CHANGES.txt Mon Aug 28 19:12:11 2006 @@ -8,6 +8,8 @@ Features added -------------- +* List-like ``Element.extend()`` method + Bugs fixed ---------- Modified: lxml/branch/lxml-1.0/src/lxml/apihelpers.pxi ============================================================================== --- lxml/branch/lxml-1.0/src/lxml/apihelpers.pxi (original) +++ lxml/branch/lxml-1.0/src/lxml/apihelpers.pxi Mon Aug 28 19:12:11 2006 @@ -323,6 +323,23 @@ c_node = c_next return c_node +cdef void _appendChild(_Element parent, _Element child): + """Append a new child to a parent element. + """ + cdef xmlNode* c_next + cdef xmlNode* c_node + c_node = child._c_node + # store possible text node + c_next = c_node.next + # XXX what if element is coming from a different document? + tree.xmlUnlinkNode(c_node) + # move node itself + tree.xmlAddChild(parent._c_node, c_node) + _moveTail(c_next, c_node) + # uh oh, elements may be pointing to different doc when + # parent element has moved; change them too.. + moveNodeToDocument(child, parent._doc) + cdef int isutf8(char* s): cdef char c c = s[0] Modified: lxml/branch/lxml-1.0/src/lxml/etree.pyx ============================================================================== --- lxml/branch/lxml-1.0/src/lxml/etree.pyx (original) +++ lxml/branch/lxml-1.0/src/lxml/etree.pyx Mon Aug 28 19:12:11 2006 @@ -627,15 +627,17 @@ cdef xmlNode* c_next cdef _Element mynode # first, find start of slice - c_node = _findChild(self._c_node, start) - # now delete the slice - if start != stop: - c_node = _deleteSlice(c_node, start, stop) + if start == python.PY_SSIZE_T_MAX: + c_node = NULL + else: + c_node = _findChild(self._c_node, start) + # now delete the slice + if start != stop: + c_node = _deleteSlice(c_node, start, stop) # if the insertion point is at the end, append there if c_node is NULL: - append = self.append - for node in value: - append(node) + for element in value: + _appendChild(self, element) return # if the next element is in the list, insert before it for mynode in value: @@ -669,22 +671,15 @@ _setAttributeValue(self, key, value) def append(self, _Element element not None): + """Adds a subelement to the end of this element. """ - Adds a subelement to the end of this element. + _appendChild(self, element) + + def extend(self, elements): + """Extends the current children by the elements in the iterable. """ - cdef xmlNode* c_next - cdef xmlNode* c_node - c_node = element._c_node - # store possible text node - c_next = c_node.next - # XXX what if element is coming from a different document? - tree.xmlUnlinkNode(c_node) - # move node itself - tree.xmlAddChild(self._c_node, c_node) - _moveTail(c_next, c_node) - # uh oh, elements may be pointing to different doc when - # parent element has moved; change them too.. - moveNodeToDocument(element, self._doc) + for element in elements: + _appendChild(self, element) def clear(self): """Resets an element. This function removes all subelements, @@ -722,7 +717,7 @@ cdef xmlNode* c_next c_node = _findChild(self._c_node, index) if c_node is NULL: - self.append(element) + _appendChild(self, element) return c_next = element._c_node.next tree.xmlAddPrevSibling(c_node, element._c_node) Modified: lxml/branch/lxml-1.0/src/lxml/python.pxd ============================================================================== --- lxml/branch/lxml-1.0/src/lxml/python.pxd (original) +++ lxml/branch/lxml-1.0/src/lxml/python.pxd Mon Aug 28 19:12:11 2006 @@ -5,6 +5,7 @@ ctypedef int size_t ctypedef int Py_ssize_t cdef int INT_MAX + cdef Py_ssize_t PY_SSIZE_T_MAX cdef FILE* PyFile_AsFile(object p) cdef int PyFile_Check(object p) Modified: lxml/branch/lxml-1.0/src/lxml/tests/test_etree.py ============================================================================== --- lxml/branch/lxml-1.0/src/lxml/tests/test_etree.py (original) +++ lxml/branch/lxml-1.0/src/lxml/tests/test_etree.py Mon Aug 28 19:12:11 2006 @@ -574,6 +574,33 @@ self.assertEquals( child1, e[1]) + def test_extend(self): + etree = self.etree + root = etree.Element('foo') + for i in range(3): + element = etree.SubElement(root, 'a%s' % i) + element.text = "text%d" % i + element.tail = "tail%d" % i + + elements = [] + for i in range(3): + new_element = etree.Element("test%s" % i) + new_element.text = "TEXT%s" % i + new_element.tail = "TAIL%s" % i + elements.append(new_element) + + root.extend(elements) + + self.assertEquals( + ["a0", "a1", "a2", "test0", "test1", "test2"], + [ el.tag for el in root ]) + self.assertEquals( + ["text0", "text1", "text2", "TEXT0", "TEXT1", "TEXT2"], + [ el.text for el in root ]) + self.assertEquals( + ["tail0", "tail1", "tail2", "TAIL0", "TAIL1", "TAIL2"], + [ el.tail for el in root ]) + def test_docinfo_public(self): etree = self.etree xml_header = ''