From scoder at codespeak.net Sat Jul 1 15:55:39 2006 From: scoder at codespeak.net (scoder at codespeak.net) Date: Sat, 1 Jul 2006 15:55:39 +0200 (CEST) Subject: [Lxml-checkins] r29565 - lxml/trunk/src/lxml Message-ID: <20060701135539.696581007F@code0.codespeak.net> Author: scoder Date: Sat Jul 1 15:55:37 2006 New Revision: 29565 Added: lxml/trunk/src/lxml/etree_defs.h - copied, changed from r29391, lxml/trunk/src/lxml/etree.h Removed: lxml/trunk/src/lxml/etree.h Modified: lxml/trunk/src/lxml/cstd.pxd lxml/trunk/src/lxml/python.pxd lxml/trunk/src/lxml/tree.pxd Log: renamed etree.h to etree_defs.h to move it out of the way: public types in Pyrex will generate an etree.h Modified: lxml/trunk/src/lxml/cstd.pxd ============================================================================== --- lxml/trunk/src/lxml/cstd.pxd (original) +++ lxml/trunk/src/lxml/cstd.pxd Sat Jul 1 15:55:37 2006 @@ -17,6 +17,6 @@ void va_start(va_list ap, void *last) void va_end(va_list ap) -cdef extern from "etree.h": +cdef extern from "etree_defs.h": cdef int va_int(va_list ap) cdef char *va_charptr(va_list ap) Deleted: /lxml/trunk/src/lxml/etree.h ============================================================================== --- /lxml/trunk/src/lxml/etree.h Sat Jul 1 15:55:37 2006 +++ (empty file) @@ -1,133 +0,0 @@ -#ifndef HAS_ETREE_H -#define HAS_ETREE_H - -/* v_arg functions */ -#define va_int(ap) va_arg(ap, int) -#define va_charptr(ap) va_arg(ap, char *) - -/* Py_ssize_t support was added in Python 2.5 */ -#if PY_VERSION_HEX < 0x02050000 -#ifndef PY_SSIZE_T_MAX /* patched Pyrex? */ - typedef int Py_ssize_t; - #define PY_SSIZE_T_MAX INT_MAX - #define PY_SSIZE_T_MIN INT_MIN - #define PyInt_FromSsize_t(z) PyInt_FromLong(z) - #define PyInt_AsSsize_t(o) PyInt_AsLong(o) -#endif -#endif - -/* XML_PARSE_COMPACT was added in libxml2 2.6.21 */ -/* -#include "libxml/xmlversion.h" -#if LIBXML_VERSION < 20621 -#define XML_PARSE_COMPACT 0 -#define HTML_PARSE_COMPACT 0 -#endif -*/ - -/* Redefinition of some Python builtins as C functions */ -#define isinstance(o,c) PyObject_IsInstance(o,c) -#define issubclass(c,csuper) PyObject_IsSubclass(c,csuper) -#define hasattr(o,a) PyObject_HasAttr(o,a) -#define callable(o) PyCallable_Check(o) -#define str(o) PyObject_Str(o) -#define iter(o) PyObject_GetIter(o) -#define _cstr(s) PyString_AS_STRING(s) - -#define _isString(obj) PyObject_TypeCheck(obj, &PyBaseString_Type) - -#define _isElement(c_node) \ - ((c_node)->type == XML_ELEMENT_NODE || \ - (c_node)->type == XML_COMMENT_NODE) - -/* Macro pair implementation of a depth first tree walker - * - * Calls the code block between the BEGIN and END macros for all elements - * below c_tree_top (exclusively), starting at c_node (inclusively iff - * 'inclusive' is 1). - * - * To traverse the node and all of its children and siblings in Pyrex, call - * cdef xmlNode* some_node - * BEGIN_FOR_EACH_ELEMENT_FROM(some_node.parent, some_node, 1) - * # do something with some_node - * END_FOR_EACH_ELEMENT_FROM(some_node) - * - * To traverse only the children and siblings of a node, call - * cdef xmlNode* some_node - * BEGIN_FOR_EACH_ELEMENT_FROM(some_node.parent, some_node, 0) - * # do something with some_node - * END_FOR_EACH_ELEMENT_FROM(some_node) - * - * To traverse only the children, do: - * cdef xmlNode* some_node - * some_node = parent_node.children - * BEGIN_FOR_EACH_ELEMENT_FROM(parent_node, some_node, 1) - * # do something with some_node - * END_FOR_EACH_ELEMENT_FROM(some_node) - * - * NOTE: 'some_node' MUST be a plain 'xmlNode*' ! - * - * NOTE: parent modification during the walk can divert the iterator, but - * should not segfault ! - */ - -#define _ADVANCE_TO_NEXT_ELEMENT(c_node) \ - while ((c_node != 0) && (!_isElement(c_node))) \ - c_node = c_node->next; - -#define _TRAVERSE_TO_NEXT_ELEMENT(c_stop_node, c_node) \ -{ \ - /* walk through children first */ \ - xmlNode* ___next = c_node->children; \ - _ADVANCE_TO_NEXT_ELEMENT(___next) \ - if ((___next == 0) && (c_node != c_stop_node)) { \ - /* try siblings */ \ - ___next = c_node->next; \ - _ADVANCE_TO_NEXT_ELEMENT(___next) \ - /* back off through parents */ \ - while (___next == 0) { \ - c_node = c_node->parent; \ - if (c_node == 0) \ - break; \ - if (c_node == c_stop_node) \ - break; \ - if (!_isElement(c_node)) \ - break; \ - /* we already traversed the parents -> siblings */ \ - ___next = c_node->next; \ - _ADVANCE_TO_NEXT_ELEMENT(___next) \ - } \ - } \ - c_node = ___next; \ -} - -#define BEGIN_FOR_EACH_ELEMENT_FROM(c_tree_top, c_node, inclusive) \ -{ \ - if (c_node != 0) { \ - const xmlNode* ___tree_top = (c_tree_top); \ - /* make sure we start at an element */ \ - if (!_isElement(c_node)) { \ - /* we skip the node, so 'inclusive' is irrelevant */ \ - if (c_node == ___tree_top) \ - c_node = 0; /* nothing to traverse */ \ - else { \ - c_node = c_node->next; \ - _ADVANCE_TO_NEXT_ELEMENT(c_node) \ - } \ - } else if (! (inclusive)) { \ - /* skip the first node */ \ - _TRAVERSE_TO_NEXT_ELEMENT(___tree_top, c_node) \ - } \ - \ - /* now run the user code on the elements we find */ \ - while (c_node != 0) { \ - /* here goes the code to be run for each element */ - -#define END_FOR_EACH_ELEMENT_FROM(c_node) \ - _TRAVERSE_TO_NEXT_ELEMENT(___tree_top, c_node) \ - } \ - } \ -} - - -#endif /*HAS_ETREE_H*/ Copied: lxml/trunk/src/lxml/etree_defs.h (from r29391, lxml/trunk/src/lxml/etree.h) ============================================================================== --- lxml/trunk/src/lxml/etree.h (original) +++ lxml/trunk/src/lxml/etree_defs.h Sat Jul 1 15:55:37 2006 @@ -1,5 +1,5 @@ -#ifndef HAS_ETREE_H -#define HAS_ETREE_H +#ifndef HAS_ETREE_DEFS_H +#define HAS_ETREE_DEFS_H /* v_arg functions */ #define va_int(ap) va_arg(ap, int) @@ -130,4 +130,4 @@ } -#endif /*HAS_ETREE_H*/ +#endif /* HAS_ETREE_DEFS_H */ Modified: lxml/trunk/src/lxml/python.pxd ============================================================================== --- lxml/trunk/src/lxml/python.pxd (original) +++ lxml/trunk/src/lxml/python.pxd Sat Jul 1 15:55:37 2006 @@ -72,7 +72,7 @@ cdef void PyEval_RestoreThread(PyThreadState* state) cdef PyObject* PyThreadState_GetDict() -cdef extern from "etree.h": # redefines some functions as macros +cdef extern from "etree_defs.h": # redefines some functions as macros cdef int _isString(object obj) cdef int isinstance(object instance, object classes) cdef int issubclass(object derived, object superclasses) Modified: lxml/trunk/src/lxml/tree.pxd ============================================================================== --- lxml/trunk/src/lxml/tree.pxd (original) +++ lxml/trunk/src/lxml/tree.pxd Sat Jul 1 15:55:37 2006 @@ -248,7 +248,7 @@ cdef extern from "libxml/xmlstring.h": cdef char* xmlStrdup(char* cur) -cdef extern from "etree.h": +cdef extern from "etree_defs.h": cdef int _isElement(xmlNode* node) cdef void BEGIN_FOR_EACH_ELEMENT_FROM(xmlNode* tree_top, xmlNode* start_node, int inclusive) From scoder at codespeak.net Sat Jul 1 19:05:14 2006 From: scoder at codespeak.net (scoder at codespeak.net) Date: Sat, 1 Jul 2006 19:05:14 +0200 (CEST) Subject: [Lxml-checkins] r29570 - lxml/trunk/src/lxml Message-ID: <20060701170514.3C45F10077@code0.codespeak.net> Author: scoder Date: Sat Jul 1 19:05:12 2006 New Revision: 29570 Modified: lxml/trunk/src/lxml/docloader.pxi lxml/trunk/src/lxml/etree.pyx lxml/trunk/src/lxml/parser.pxi Log: C-ifications Modified: lxml/trunk/src/lxml/docloader.pxi ============================================================================== --- lxml/trunk/src/lxml/docloader.pxi (original) +++ lxml/trunk/src/lxml/docloader.pxi Sat Jul 1 19:05:12 2006 @@ -53,11 +53,7 @@ cdef object _resolvers cdef Resolver _default_resolver def __init__(self, Resolver default_resolver=None): - try: - self._resolvers = set() - except NameError: - from sets import Set - self._resolvers = Set() + self._resolvers = set() self._default_resolver = default_resolver def add(self, Resolver resolver not None): @@ -74,12 +70,15 @@ def remove(self, resolver): self._resolvers.discard(resolver) - def copy(self): + cdef _ResolverRegistry _copy(self): cdef _ResolverRegistry registry registry = _ResolverRegistry(self._default_resolver) registry._resolvers = self._resolvers.copy() return registry + def copy(self): + return self._copy() + def resolve(self, system_url, public_id, context): for resolver in self._resolvers: result = resolver.resolve(system_url, public_id, context) Modified: lxml/trunk/src/lxml/etree.pyx ============================================================================== --- lxml/trunk/src/lxml/etree.pyx (original) +++ lxml/trunk/src/lxml/etree.pyx Sat Jul 1 19:05:12 2006 @@ -12,6 +12,13 @@ cdef object False True = __builtin__.True False = __builtin__.False + +cdef object set +try: + set = __builtin__.set +except AttributeError: + from sets import Set as set + del __builtin__ cdef object _elementpath @@ -297,7 +304,7 @@ result._ns_counter = 0 if parser is None: parser = __GLOBAL_PARSER_CONTEXT.getDefaultParser() - result._parser = parser.copy() + result._parser = parser._copy() return result cdef class DocInfo: @@ -655,11 +662,9 @@ def __copy__(self): cdef xmlDoc* c_doc - cdef _Document doc cdef _Document new_doc - doc = self._doc - c_doc = _copyDocRoot(doc._c_doc, self._c_node) # recursive - new_doc = _documentFactory(c_doc, doc._parser.copy()) + c_doc = _copyDocRoot(self._doc._c_doc, self._c_node) # recursive + new_doc = _documentFactory(c_doc, self._doc._parser._copy()) return new_doc.getroot() def set(self, key, value): Modified: lxml/trunk/src/lxml/parser.pxi ============================================================================== --- lxml/trunk/src/lxml/parser.pxi (original) +++ lxml/trunk/src/lxml/parser.pxi Sat Jul 1 19:05:12 2006 @@ -68,8 +68,9 @@ context = self._findThreadParserContext() if context._default_parser is None: if self._default_parser is None: - self._default_parser = __DEFAULT_XML_PARSER.copy() - context._default_parser = self._default_parser.copy() + self._default_parser = __DEFAULT_XML_PARSER._copy() + if context is not self: + context._default_parser = self._default_parser._copy() return context._default_parser cdef xmlDict* _getThreadDict(self, xmlDict* default): @@ -329,7 +330,7 @@ cdef class _BaseParser: cdef int _parse_options cdef _ErrorLog _error_log - cdef readonly object resolvers + cdef readonly _ResolverRegistry resolvers cdef _ResolverContext _context cdef LxmlParserType _parser_type cdef xmlParserCtxt* _parser_ctxt @@ -379,16 +380,20 @@ def __dummy(self): pass - def copy(self): + cdef _BaseParser _copy(self): "Create a new parser with the same configuration." cdef _BaseParser parser parser = self.__class__() parser._parse_options = self._parse_options - parser.resolvers = self.resolvers.copy() + parser.resolvers = self.resolvers._copy() parser._context = _ResolverContext(parser.resolvers) parser._parser_ctxt._private = parser._context return parser + def copy(self): + "Create a new parser with the same configuration." + return self._copy() + cdef xmlDoc* _parseUnicodeDoc(self, utext, char* c_filename) except NULL: """Parse unicode document, share dictionary if possible. """ From scoder at codespeak.net Sun Jul 2 14:17:14 2006 From: scoder at codespeak.net (scoder at codespeak.net) Date: Sun, 2 Jul 2006 14:17:14 +0200 (CEST) Subject: [Lxml-checkins] r29583 - in lxml/trunk: . doc src/lxml Message-ID: <20060702121714.F3A1F1007E@code0.codespeak.net> Author: scoder Date: Sun Jul 2 14:17:11 2006 New Revision: 29583 Modified: lxml/trunk/CHANGES.txt lxml/trunk/doc/resolvers.txt lxml/trunk/src/lxml/etree.pyx lxml/trunk/src/lxml/parser.pxi Log: do not copy parser on document creation, only keep a reference Modified: lxml/trunk/CHANGES.txt ============================================================================== --- lxml/trunk/CHANGES.txt (original) +++ lxml/trunk/CHANGES.txt Sun Jul 2 14:17:11 2006 @@ -2,6 +2,22 @@ lxml changelog ============== +current +======= + +Features added +-------------- + +Bugs fixed +---------- + +* Creating documents no longer copies the parser for later URL resolving. For + performance reasons, only a reference is kept. Resolver updates on the + parser will now be reflected by documents that were parsed before the + change. Although this should rarely become visible, it is a behavioral + change from 1.0. + + 1.1alpha (2006-06-27) ===================== Modified: lxml/trunk/doc/resolvers.txt ============================================================================== --- lxml/trunk/doc/resolvers.txt (original) +++ lxml/trunk/doc/resolvers.txt Sun Jul 2 14:17:11 2006 @@ -147,23 +147,10 @@ [...] XSLTApplyError: Cannot resolve URI hoi:test -This can only be solved by adding a ``hoi`` resolver to the parser. Note that -adding it after parsing the XSL document will not work as parsed documents -remember the state of the parser at the time of their creation:: +This can only be solved by adding a ``hoi`` resolver to the original parser:: >>> honk_parser.resolvers.add( PrefixResolver("hoi") ) >>> result = transform(honk_doc) - Traceback (most recent call last): - [...] - XSLTApplyError: Cannot resolve URI hoi:test - -You have to parse the document again with this updated parser to copy the -resolver state to the stylesheet:: - - >>> honk_doc = etree.parse(StringIO(xml_text), honk_parser) - >>> transform = etree.XSLT(honk_doc) - Resolving url honk:test as prefix honk ... done - >>> result = transform(honk_doc) Resolving url hoi:test as prefix honk ... failed Resolving url hoi:test as prefix hoi ... done >>> print str(result), Modified: lxml/trunk/src/lxml/etree.pyx ============================================================================== --- lxml/trunk/src/lxml/etree.pyx (original) +++ lxml/trunk/src/lxml/etree.pyx Sun Jul 2 14:17:11 2006 @@ -304,7 +304,7 @@ result._ns_counter = 0 if parser is None: parser = __GLOBAL_PARSER_CONTEXT.getDefaultParser() - result._parser = parser._copy() + result._parser = parser return result cdef class DocInfo: @@ -664,7 +664,7 @@ cdef xmlDoc* c_doc cdef _Document new_doc c_doc = _copyDocRoot(self._doc._c_doc, self._c_node) # recursive - new_doc = _documentFactory(c_doc, self._doc._parser._copy()) + new_doc = _documentFactory(c_doc, self._doc._parser) return new_doc.getroot() def set(self, key, value): Modified: lxml/trunk/src/lxml/parser.pxi ============================================================================== --- lxml/trunk/src/lxml/parser.pxi (original) +++ lxml/trunk/src/lxml/parser.pxi Sun Jul 2 14:17:11 2006 @@ -364,9 +364,9 @@ self._error_log = _ErrorLog() self.resolvers = _ResolverRegistry() if self._parser_type == LXML_ITERPARSE_PARSER: - self._context = _IterparseResolverContext(self.resolvers) + self._context = _IterparseResolverContext(self.resolvers) else: - self._context = _ResolverContext(self.resolvers) + self._context = _ResolverContext(self.resolvers) pctxt._private = self._context def __dealloc__(self): From scoder at codespeak.net Sun Jul 2 16:20:52 2006 From: scoder at codespeak.net (scoder at codespeak.net) Date: Sun, 2 Jul 2006 16:20:52 +0200 (CEST) Subject: [Lxml-checkins] r29589 - lxml/trunk/src/lxml Message-ID: <20060702142052.665DE1007E@code0.codespeak.net> Author: scoder Date: Sun Jul 2 16:20:50 2006 New Revision: 29589 Modified: lxml/trunk/src/lxml/etree.pyx lxml/trunk/src/lxml/nsclasses.pxi Log: made ElementBase (and related) classes public to make them accessible from external modules Modified: lxml/trunk/src/lxml/etree.pyx ============================================================================== --- lxml/trunk/src/lxml/etree.pyx (original) +++ lxml/trunk/src/lxml/etree.pyx Sun Jul 2 16:20:50 2006 @@ -163,9 +163,10 @@ raise type, value, traceback -cdef class _BaseParser # forward declaration +# forward declaration of _BaseParser, see parser.pxi +cdef class _BaseParser -cdef class _Document: +cdef public class _Document [ type LxmlDocumentType, object LxmlDocument ]: """Internal base class to reference a libxml document. When instances of this class are garbage collected, the libxml @@ -340,7 +341,8 @@ else: return "" -cdef class _NodeBase: +cdef public class _NodeBase [ type LxmlNodeBaseType, + object LxmlNodeBase ]: """Base class to reference a document object and a libxml node. By pointing to a Document instance, a reference is kept to @@ -583,7 +585,8 @@ result._context_node = context_node return result -cdef class _Element(_NodeBase): +cdef public class _Element(_NodeBase) [ type LxmlElementType, + object LxmlElement ]: cdef object _tag cdef object _attrib def _init(self): Modified: lxml/trunk/src/lxml/nsclasses.pxi ============================================================================== --- lxml/trunk/src/lxml/nsclasses.pxi (original) +++ lxml/trunk/src/lxml/nsclasses.pxi Sun Jul 2 16:20:50 2006 @@ -6,7 +6,8 @@ class NamespaceRegistryError(LxmlRegistryError): pass -cdef class ElementBase(_Element): +cdef public class ElementBase(_Element) [ type LxmlElementBaseType, + object LxmlElementBase ]: """All custom Element classes must inherit from this one. Note that subclasses *must not* override __init__ or __new__ as it is From scoder at codespeak.net Sun Jul 2 16:21:16 2006 From: scoder at codespeak.net (scoder at codespeak.net) Date: Sun, 2 Jul 2006 16:21:16 +0200 (CEST) Subject: [Lxml-checkins] r29590 - lxml/trunk/src/lxml Message-ID: <20060702142116.780391007E@code0.codespeak.net> Author: scoder Date: Sun Jul 2 16:21:15 2006 New Revision: 29590 Modified: lxml/trunk/src/lxml/etree.pyx Log: small C-ification Modified: lxml/trunk/src/lxml/etree.pyx ============================================================================== --- lxml/trunk/src/lxml/etree.pyx (original) +++ lxml/trunk/src/lxml/etree.pyx Sun Jul 2 16:21:15 2006 @@ -19,6 +19,9 @@ except AttributeError: from sets import Set as set +cdef object id +id = __builtin__.id + del __builtin__ cdef object _elementpath From scoder at codespeak.net Mon Jul 3 08:58:08 2006 From: scoder at codespeak.net (scoder at codespeak.net) Date: Mon, 3 Jul 2006 08:58:08 +0200 (CEST) Subject: [Lxml-checkins] r29605 - lxml/trunk Message-ID: <20060703065808.0014410080@code0.codespeak.net> Author: scoder Date: Mon Jul 3 08:58:06 2006 New Revision: 29605 Modified: lxml/trunk/MANIFEST.in Log: MANIFEST.in: include etree_defs.h instead of etree.h (was renamed) Modified: lxml/trunk/MANIFEST.in ============================================================================== --- lxml/trunk/MANIFEST.in (original) +++ lxml/trunk/MANIFEST.in Mon Jul 3 08:58:06 2006 @@ -3,7 +3,7 @@ include update-error-constants.py include MANIFEST.in version.txt include CHANGES.txt CREDITS.txt INSTALL.txt LICENSES.txt README.txt TODO.txt -recursive-include src *.pyx *.pxd *.pxi *.py etree.c etree.h +recursive-include src *.pyx *.pxd *.pxi *.py etree.c etree_defs.h recursive-include src/lxml/tests *.rng *.xslt *.xml recursive-include doc *.txt *.html *.css *.xml *.mgp pubkey.asc recursive-include doc mkhtml.py rest2html.py From scoder at codespeak.net Fri Jul 14 17:57:47 2006 From: scoder at codespeak.net (scoder at codespeak.net) Date: Fri, 14 Jul 2006 17:57:47 +0200 (CEST) Subject: [Lxml-checkins] r30046 - lxml/branch/capi Message-ID: <20060714155747.EA5F410092@code0.codespeak.net> Author: scoder Date: Fri Jul 14 17:57:46 2006 New Revision: 30046 Added: lxml/branch/capi/ - copied from r30045, lxml/trunk/ Log: New branch for a public C API and modules that use it From scoder at codespeak.net Fri Jul 14 18:00:44 2006 From: scoder at codespeak.net (scoder at codespeak.net) Date: Fri, 14 Jul 2006 18:00:44 +0200 (CEST) Subject: [Lxml-checkins] r30047 - in lxml/branch/capi: . src/lxml src/lxml/tests Message-ID: <20060714160044.2720E10092@code0.codespeak.net> Author: scoder Date: Fri Jul 14 18:00:39 2006 New Revision: 30047 Added: lxml/branch/capi/Pyrex-0.9.4.1-public-api.patch lxml/branch/capi/src/lxml/public-api.pxi lxml/branch/capi/src/lxml/public.pxd Modified: lxml/branch/capi/CHANGES.txt lxml/branch/capi/MANIFEST.in lxml/branch/capi/src/lxml/apihelpers.pxi lxml/branch/capi/src/lxml/etree.pyx lxml/branch/capi/src/lxml/tests/test_etree.py Log: public C API patch, including Pyrex patch Modified: lxml/branch/capi/CHANGES.txt ============================================================================== --- lxml/branch/capi/CHANGES.txt (original) +++ lxml/branch/capi/CHANGES.txt Fri Jul 14 18:00:39 2006 @@ -8,6 +8,8 @@ Features added -------------- +* Public C-level API for independent extension modules + Bugs fixed ---------- Modified: lxml/branch/capi/MANIFEST.in ============================================================================== --- lxml/branch/capi/MANIFEST.in (original) +++ lxml/branch/capi/MANIFEST.in Fri Jul 14 18:00:39 2006 @@ -3,8 +3,8 @@ include update-error-constants.py include MANIFEST.in version.txt include CHANGES.txt CREDITS.txt INSTALL.txt LICENSES.txt README.txt TODO.txt -recursive-include src *.pyx *.pxd *.pxi *.py etree.c etree_defs.h +recursive-include src *.pyx *.pxd *.pxi *.py etree.c etree.h etree_defs.h recursive-include src/lxml/tests *.rng *.xslt *.xml recursive-include doc *.txt *.html *.css *.xml *.mgp pubkey.asc recursive-include doc mkhtml.py rest2html.py -exclude doc/pyrex.txt +exclude doc/pyrex.txt src/lxml/etree.pxi Added: lxml/branch/capi/Pyrex-0.9.4.1-public-api.patch ============================================================================== --- (empty file) +++ lxml/branch/capi/Pyrex-0.9.4.1-public-api.patch Fri Jul 14 18:00:39 2006 @@ -0,0 +1,239 @@ +Index: Pyrex/Compiler/Nodes.py +=================================================================== +--- Pyrex/Compiler/Nodes.py (Revision 151) ++++ Pyrex/Compiler/Nodes.py (Arbeitskopie) +@@ -114,24 +114,28 @@ + self.generate_h_code(env, result) + + def generate_h_code(self, env, result): +- public_vars_and_funcs = [] ++ public_vars = [] ++ public_funcs = [] + public_extension_types = [] + for entry in env.var_entries: + if entry.visibility == 'public': +- public_vars_and_funcs.append(entry) ++ public_vars.append(entry) + for entry in env.cfunc_entries: + if entry.visibility == 'public': +- public_vars_and_funcs.append(entry) ++ public_funcs.append(entry) + for entry in env.c_class_entries: + if entry.visibility == 'public': + public_extension_types.append(entry) +- if public_vars_and_funcs or public_extension_types: ++ if public_vars or public_funcs or public_extension_types: + result.h_file = replace_suffix(result.c_file, ".h") + result.i_file = replace_suffix(result.c_file, ".pxi") + h_code = Code.CCodeWriter(result.h_file) + i_code = Code.PyrexCodeWriter(result.i_file) ++ header_barrier = "__HAS_PYX_" + env.module_name ++ h_code.putln("#ifndef %s" % header_barrier) ++ h_code.putln("#define %s" % header_barrier) + self.generate_extern_c_macro_definition(h_code) +- for entry in public_vars_and_funcs: ++ for entry in public_vars: + h_code.putln("%s %s;" % ( + Naming.extern_c_macro, + entry.type.declaration_code( +@@ -141,7 +145,23 @@ + for entry in public_extension_types: + self.generate_cclass_header_code(entry.type, h_code) + self.generate_cclass_include_code(entry.type, i_code) ++ if public_funcs: ++ for entry in public_funcs: ++ h_code.putln( ++ 'static %s;' % ++ entry.type.declaration_code("(*%s)" % entry.cname)) ++ i_code.putln("cdef extern %s" % ++ entry.type.declaration_code(entry.cname, pyrex = 1)) ++ h_code.putln( ++ "static struct {char *s; void **p;} _%s_API[] = {" % ++ env.module_name) ++ for entry in public_funcs: ++ h_code.putln('{"%s", &%s},' % (entry.cname, entry.cname)) ++ h_code.putln("{0, 0}") ++ h_code.putln("};") ++ self.generate_c_api_import_code(env, h_code) + h_code.putln("PyMODINIT_FUNC init%s(void);" % env.module_name) ++ h_code.putln("#endif /* %s */" % header_barrier) + + def generate_cclass_header_code(self, type, h_code): + #h_code.putln("extern DL_IMPORT(PyTypeObject) %s;" % type.typeobj_cname) +@@ -180,6 +200,7 @@ + self.body.generate_function_definitions(env, code) + self.generate_interned_name_table(env, code) + self.generate_py_string_table(env, code) ++ self.generate_c_api_table(env, code) + self.generate_typeobj_definitions(env, code) + self.generate_method_table(env, code) + self.generate_filename_init_prototype(code) +@@ -437,10 +458,12 @@ + dll_linkage = None + header = entry.type.declaration_code(entry.cname, + dll_linkage = dll_linkage) +- if entry.visibility <> 'private': ++ if entry.visibility == 'private': ++ storage_class = "static " ++ elif entry.visibility == 'extern': + storage_class = "%s " % Naming.extern_c_macro + else: +- storage_class = "static " ++ storage_class = "" + code.putln("%s%s; /*proto*/" % ( + storage_class, + header)) +@@ -1090,6 +1113,63 @@ + code.putln( + "};") + ++ def generate_c_api_table(self, env, code): ++ public_funcs = [] ++ for entry in env.cfunc_entries: ++ if entry.visibility == 'public': ++ public_funcs.append(entry.cname) ++ if public_funcs: ++ env.use_utility_code(c_api_import_code); ++ code.putln( ++ "static __Pyx_CApiTabEntry %s[] = {" % ++ Naming.c_api_tab_cname) ++ public_funcs.sort() ++ for entry_cname in public_funcs: ++ code.putln('{"%s", %s},' % (entry_cname, entry_cname)) ++ code.putln( ++ "{0, 0}") ++ code.putln( ++ "};") ++ ++ def generate_c_api_import_code(self, env, h_code): ++ # this is written to the header file! ++ h_code.put(""" ++ /* Return -1 and set exception on error, 0 on success. */ ++ static int ++ import_%(name)s(PyObject *module) ++ { ++ if (module != NULL) { ++ PyObject *c_api_init = PyObject_GetAttrString( ++ module, "_import_c_api"); ++ if (!c_api_init) ++ return -1; ++ if (PyCObject_Check(c_api_init)) ++ { ++ int (*init)(struct {const char *s; const void **p;}*) = ++ PyCObject_AsVoidPtr(c_api_init); ++ if (!init) { ++ PyErr_SetString(PyExc_RuntimeError, ++ "module returns NULL pointer for C API call"); ++ return -1; ++ } ++ init(_%(name)s_API); ++ } ++ Py_DECREF(c_api_init); ++ } ++ return 0; ++ } ++ """.replace('\n ', '\n') % {'name' : env.module_name}) ++ ++ def generate_c_api_init_code(self, env, code): ++ public_funcs = [] ++ for entry in env.cfunc_entries: ++ if entry.visibility == 'public': ++ public_funcs.append(entry) ++ if public_funcs: ++ code.putln('if (__Pyx_InitCApi(%s) < 0) %s' % ( ++ Naming.module_cname, ++ code.error_goto(self.pos))) ++ + def generate_filename_init_prototype(self, code): + code.putln(""); + code.putln("static void %s(void); /*proto*/" % Naming.fileinit_cname) +@@ -1109,6 +1189,8 @@ + self.generate_intern_code(env, code) + #code.putln("/*--- String init code ---*/") + self.generate_string_init_code(env, code) ++ #code.putln("/*--- External C API setup code ---*/") ++ self.generate_c_api_init_code(env, code) + #code.putln("/*--- Global init code ---*/") + self.generate_global_init_code(env, code) + #code.putln("/*--- Type import code ---*/") +@@ -1862,10 +1944,12 @@ + dll_linkage = None + header = self.return_type.declaration_code(entity, + dll_linkage = dll_linkage) +- if self.visibility <> 'private': ++ if self.visibility == 'private': ++ storage_class = "static " ++ elif self.visibility == 'extern': + storage_class = "%s " % Naming.extern_c_macro + else: +- storage_class = "static " ++ storage_class = "" + code.putln("%s%s {" % ( + storage_class, + header)) +@@ -3550,6 +3634,7 @@ + + utility_function_predeclarations = \ + """ ++typedef struct {const char *s; const void **p;} __Pyx_CApiTabEntry; /*proto*/ + typedef struct {PyObject **p; char *s;} __Pyx_InternTabEntry; /*proto*/ + typedef struct {PyObject **p; char *s; long n;} __Pyx_StringTabEntry; /*proto*/ + static PyObject *__Pyx_UnpackItem(PyObject *, Py_ssize_t); /*proto*/ +@@ -3572,6 +3657,8 @@ + static PyObject *__Pyx_CreateClass(PyObject *bases, PyObject *dict, PyObject *name, char *modname); /*proto*/ + static int __Pyx_InternStrings(__Pyx_InternTabEntry *t); /*proto*/ + static int __Pyx_InitStrings(__Pyx_StringTabEntry *t); /*proto*/ ++static int __Pyx_InitCApi(PyObject *module); /*proto*/ ++static int __Pyx_ImportModuleCApi(__Pyx_CApiTabEntry *t); /*proto*/ + """ + + get_name_predeclaration = \ +@@ -4056,3 +4143,37 @@ + """; + + #------------------------------------------------------------------------------------ ++ ++c_api_import_code = \ ++""" ++static int __Pyx_ImportModuleCApi(__Pyx_CApiTabEntry *t) { ++ __Pyx_CApiTabEntry *api_t; ++ while (t->s) { ++ if (*t->s == '\0') ++ continue; /* shortcut for erased string entries */ ++ api_t = %(API_TAB)s; ++ while ((api_t->s) && (strcmp(api_t->s, t->s) < 0)) ++ ++api_t; ++ if ((!api_t->p) || (strcmp(api_t->s, t->s) != 0)) { ++ PyErr_Format(PyExc_ValueError, ++ "Unknown function name in C API: %%s", t->s); ++ return -1; ++ } ++ *t->p = api_t->p; ++ ++t; ++ } ++ return 0; ++} ++ ++static int __Pyx_InitCApi(PyObject *module) { ++ int result; ++ PyObject* cobj = PyCObject_FromVoidPtr(&__Pyx_ImportModuleCApi, NULL); ++ if (!cobj) ++ return -1; ++ ++ result = PyObject_SetAttrString(module, "_import_c_api", cobj); ++ Py_DECREF(cobj); ++ return result; ++} ++""" % {'API_TAB' : Naming.c_api_tab_cname} ++#------------------------------------------------------------------------------------ +Index: Pyrex/Compiler/Naming.py +=================================================================== +--- Pyrex/Compiler/Naming.py (Revision 151) ++++ Pyrex/Compiler/Naming.py (Arbeitskopie) +@@ -50,5 +50,6 @@ + self_cname = pyrex_prefix + "self" + stringtab_cname = pyrex_prefix + "string_tab" + vtabslot_cname = pyrex_prefix + "vtab" ++c_api_tab_cname = pyrex_prefix + "c_api_tab" + + extern_c_macro = pyrex_prefix.upper() + "EXTERN_C" Modified: lxml/branch/capi/src/lxml/apihelpers.pxi ============================================================================== --- lxml/branch/capi/src/lxml/apihelpers.pxi (original) +++ lxml/branch/capi/src/lxml/apihelpers.pxi Fri Jul 14 18:00:39 2006 @@ -1,9 +1,9 @@ -# Private helper functions for API functions +# Private/public helper functions for API functions cdef void displayNode(xmlNode* c_node, indent): # to help with debugging cdef xmlNode* c_child - print indent * ' ', c_node + print indent * ' ', c_node c_child = c_node.children while c_child is not NULL: displayNode(c_child, indent + 1) Modified: lxml/branch/capi/src/lxml/etree.pyx ============================================================================== --- lxml/branch/capi/src/lxml/etree.pyx (original) +++ lxml/branch/capi/src/lxml/etree.pyx Fri Jul 14 18:00:39 2006 @@ -361,7 +361,8 @@ unregisterProxy(self) attemptDeallocation(self._c_node) -cdef class _ElementTree: +cdef public class _ElementTree [ type LxmlElementTreeType, + object LxmlElementTree ]: cdef _Document _doc cdef _NodeBase _context_node @@ -573,8 +574,7 @@ self._assertHasRoot() _tofilelikeC14N(file, self._context_node) -cdef _ElementTree _elementTreeFactory(_Document doc, - _NodeBase context_node): +cdef _ElementTree _elementTreeFactory(_Document doc, _NodeBase context_node): return _newElementTree(doc, context_node, _ElementTree) cdef _ElementTree _newElementTree(_Document doc, _NodeBase context_node, @@ -1778,3 +1778,8 @@ include "relaxng.pxi" # RelaxNG include "xmlschema.pxi" # XMLSchema + +################################################################################ +# Public C API + +include "public-api.pxi" Added: lxml/branch/capi/src/lxml/public-api.pxi ============================================================================== --- (empty file) +++ lxml/branch/capi/src/lxml/public-api.pxi Fri Jul 14 18:00:39 2006 @@ -0,0 +1,62 @@ +# Public C API for lxml.etree + +cdef public _ElementTree elementTreeFactory(_NodeBase context_node): + return newElementTree(context_node, _ElementTree) + +cdef public _ElementTree newElementTree(_NodeBase context_node, + object subclass): + if context_node is NULL or context_node is None: + raise TypeError + + return _newElementTree(context_node._doc, context_node, subclass) + +cdef public _Element elementFactory(_Document doc, xmlNode* c_node): + if c_node is NULL or doc is None: + raise TypeError + return _elementFactory(doc, c_node) + +cdef public int tagMatches(xmlNode* c_node, char* c_href, char* c_name): + if c_node is NULL: + return -1 + return _tagMatches(c_node, c_href, c_name) + +cdef public _Document documentOrRaise(object input): + return _documentOrRaise(input) + +cdef public _NodeBase rootNodeOrRaise(object input): + return _rootNodeOrRaise(input) + +cdef public object attributeValue(xmlNode* c_element, xmlAttr* c_attrib_node): + return _attributeValue(c_element, c_attrib_node) + +cdef public object getAttributeValue(_NodeBase element, key, default): + return _getAttributeValue(element, key, default) + +cdef public xmlNode* findChild(xmlNode* c_node, Py_ssize_t index): + return _findChild(c_node, index) + +cdef public xmlNode* findChildForwards(xmlNode* c_node, Py_ssize_t index): + return _findChildForwards(c_node, index) + +cdef public xmlNode* findChildBackwards(xmlNode* c_node, Py_ssize_t index): + return _findChildBackwards(c_node, index) + +cdef public xmlNode* nextElement(xmlNode* c_node): + return _nextElement(c_node) + +cdef public xmlNode* previousElement(xmlNode* c_node): + return _previousElement(c_node) + +cdef public object pyunicode(char* s): + if s is NULL: + raise TypeError + return funicode(s) + +cdef public object utf8(object s): + return _utf8(s) + +cdef public object getNsTag(object tag): + return _getNsTag(tag) + +cdef public object namespacedName(xmlNode* c_node): + return _namespacedName(c_node) Added: lxml/branch/capi/src/lxml/public.pxd ============================================================================== --- (empty file) +++ lxml/branch/capi/src/lxml/public.pxd Fri Jul 14 18:00:39 2006 @@ -0,0 +1,80 @@ +# public Pyrex/C interface to lxml.etree + +cimport tree +cimport python + +cdef extern from "etree.h": + ########################################################################## + # public classes + + cdef class lxml.etree._Document [ object LxmlDocument ]: + cdef tree.xmlDoc* _c_doc + + cdef class lxml.etree._NodeBase [ object LxmlNodeBase ]: + cdef _Document _doc + cdef tree.xmlNode* _c_node + + cdef class lxml.etree._Element(_NodeBase) [ object LxmlElement ]: + pass + + cdef class lxml.etree.ElementBase(_Element) [ object LxmlElementBase ]: + pass + + cdef class lxml.etree._ElementTree [ object LxmlElementTree ]: + cdef _Document _doc + cdef _Element _element + + # First function to call! + cdef int import_etree(etree_module) except -1 + + # create an Element for a C-node in the Document + cdef _Element elementFactory(_Document doc, tree.xmlNode* c_node) + + # create an ElementTree for an Element + cdef _ElementTree elementTreeFactory(_NodeBase context_node) + + # create an ElementTree subclass for an Element + cdef _ElementTree newElementTree(_NodeBase context_node, + object subclass) + + # check if a C node matches a tag name and namespace + # (NULL allowed for both) + cdef int tagMatches(tree.xmlNode* c_node, char* c_href, char* c_name) + + # convert a UTF-8 char* to a Python string or unicode string + cdef object pyunicode(char* s) + + # convert the string to UTF-8 using the normal lxml.etree semantics + cdef object utf8(object s) + + # split a tag into a (URI, name) tuple + cdef object getNsTag(object tag) + + # get the "{ns}tag" string for a C node + cdef object namespacedName(tree.xmlNode* c_node) + + # find the Document of an Element, ElementTree or Document (itself!) + cdef _Document documentOrRaise(object input) + + # find the root Element of an Element (itself!), ElementTree or Document + cdef _NodeBase rootNodeOrRaise(object input) + + # return an attribute value for a C attribute on a C element node + cdef object attributeValue(tree.xmlNode* c_element, + tree.xmlAttr* c_attrib_node) + + # find child element number 'index' (supports negative indexes) + cdef tree.xmlNode* findChild(tree.xmlNode* c_node, + python.Py_ssize_t index) + + # find child element number 'index' starting at first one + cdef tree.xmlNode* findChildForwards(tree.xmlNode* c_node, + python.Py_ssize_t index) + + # find child element number 'index' starting at last one + cdef tree.xmlNode* findChildBackwards(tree.xmlNode* c_node, + python.Py_ssize_t index) + + # return next/previous sibling element of the node + cdef tree.xmlNode* nextElement(tree.xmlNode* c_node) + cdef tree.xmlNode* previousElement(tree.xmlNode* c_node) Modified: lxml/branch/capi/src/lxml/tests/test_etree.py ============================================================================== --- lxml/branch/capi/src/lxml/tests/test_etree.py (original) +++ lxml/branch/capi/src/lxml/tests/test_etree.py Fri Jul 14 18:00:39 2006 @@ -28,6 +28,9 @@ self.assert_(etree.__version__.startswith( str(etree.LXML_VERSION[0]))) + def test_c_api(self): + self.assert_(hasattr(self.etree, '_import_c_api')) + def test_element_names(self): Element = self.etree.Element From scoder at codespeak.net Fri Jul 14 20:53:31 2006 From: scoder at codespeak.net (scoder at codespeak.net) Date: Fri, 14 Jul 2006 20:53:31 +0200 (CEST) Subject: [Lxml-checkins] r30053 - lxml/pyrex/Pyrex/Compiler Message-ID: <20060714185331.BE73910092@code0.codespeak.net> Author: scoder Date: Fri Jul 14 20:53:29 2006 New Revision: 30053 Modified: lxml/pyrex/Pyrex/Compiler/Symtab.py Log: fix: mistyped attribute name Modified: lxml/pyrex/Pyrex/Compiler/Symtab.py ============================================================================== --- lxml/pyrex/Pyrex/Compiler/Symtab.py (original) +++ lxml/pyrex/Pyrex/Compiler/Symtab.py Fri Jul 14 20:53:29 2006 @@ -686,7 +686,7 @@ entry.pos = pos # the .pyx file and not the .pxd file if entry.visibility <> visibility: error(pos, "Declaration of '%s' as '%s' conflicts with previous " - "declaration as '%s'" % (class_name, visibility, entry.visibility)) + "declaration as '%s'" % (name, visibility, entry.visibility)) if objstruct_cname: if type.objstruct_cname and type.objstruct_cname <> objstruct_cname: error(pos, "Object struct name differs from previous declaration") From scoder at codespeak.net Fri Jul 14 20:57:18 2006 From: scoder at codespeak.net (scoder at codespeak.net) Date: Fri, 14 Jul 2006 20:57:18 +0200 (CEST) Subject: [Lxml-checkins] r30054 - in lxml/pyrex/Pyrex: Compiler PC Message-ID: <20060714185718.C8B0310092@code0.codespeak.net> Author: scoder Date: Fri Jul 14 20:57:15 2006 New Revision: 30054 Added: lxml/pyrex/Pyrex/PC/ Modified: lxml/pyrex/Pyrex/Compiler/ExprNodes.py lxml/pyrex/Pyrex/Compiler/Lexicon.pickle lxml/pyrex/Pyrex/Compiler/Naming.py lxml/pyrex/Pyrex/Compiler/Nodes.py lxml/pyrex/Pyrex/Compiler/Parsing.py Log: Python 2.5 fixes, public C API support, "#define PYREX_WITHOUT_ASSERTIONS" support Modified: lxml/pyrex/Pyrex/Compiler/ExprNodes.py ============================================================================== --- lxml/pyrex/Pyrex/Compiler/ExprNodes.py (original) +++ lxml/pyrex/Pyrex/Compiler/ExprNodes.py Fri Jul 14 20:57:15 2006 @@ -1157,7 +1157,7 @@ if self.stop: return self.stop.result_code else: - return "0x7fffffff" + return "PY_SSIZE_T_MAX" def calculate_result_code(self): # self.result_code is not used, but this method must exist Modified: lxml/pyrex/Pyrex/Compiler/Lexicon.pickle ============================================================================== Binary files. No diff available. Modified: lxml/pyrex/Pyrex/Compiler/Naming.py ============================================================================== --- lxml/pyrex/Pyrex/Compiler/Naming.py (original) +++ lxml/pyrex/Pyrex/Compiler/Naming.py Fri Jul 14 20:57:15 2006 @@ -50,5 +50,6 @@ self_cname = pyrex_prefix + "self" stringtab_cname = pyrex_prefix + "string_tab" vtabslot_cname = pyrex_prefix + "vtab" +c_api_tab_cname = pyrex_prefix + "c_api_tab" extern_c_macro = pyrex_prefix.upper() + "EXTERN_C" Modified: lxml/pyrex/Pyrex/Compiler/Nodes.py ============================================================================== --- lxml/pyrex/Pyrex/Compiler/Nodes.py (original) +++ lxml/pyrex/Pyrex/Compiler/Nodes.py Fri Jul 14 20:57:15 2006 @@ -114,24 +114,28 @@ self.generate_h_code(env, result) def generate_h_code(self, env, result): - public_vars_and_funcs = [] + public_vars = [] + public_funcs = [] public_extension_types = [] for entry in env.var_entries: if entry.visibility == 'public': - public_vars_and_funcs.append(entry) + public_vars.append(entry) for entry in env.cfunc_entries: if entry.visibility == 'public': - public_vars_and_funcs.append(entry) + public_funcs.append(entry) for entry in env.c_class_entries: if entry.visibility == 'public': public_extension_types.append(entry) - if public_vars_and_funcs or public_extension_types: + if public_vars or public_funcs or public_extension_types: result.h_file = replace_suffix(result.c_file, ".h") result.i_file = replace_suffix(result.c_file, ".pxi") h_code = Code.CCodeWriter(result.h_file) i_code = Code.PyrexCodeWriter(result.i_file) + header_barrier = "__HAS_PYX_" + env.module_name + h_code.putln("#ifndef %s" % header_barrier) + h_code.putln("#define %s" % header_barrier) self.generate_extern_c_macro_definition(h_code) - for entry in public_vars_and_funcs: + for entry in public_vars: h_code.putln("%s %s;" % ( Naming.extern_c_macro, entry.type.declaration_code( @@ -141,7 +145,23 @@ for entry in public_extension_types: self.generate_cclass_header_code(entry.type, h_code) self.generate_cclass_include_code(entry.type, i_code) + if public_funcs: + for entry in public_funcs: + h_code.putln( + 'static %s;' % + entry.type.declaration_code("(*%s)" % entry.cname)) + i_code.putln("cdef extern %s" % + entry.type.declaration_code(entry.cname, pyrex = 1)) + h_code.putln( + "static struct {char *s; void **p;} _%s_API[] = {" % + env.module_name) + for entry in public_funcs: + h_code.putln('{"%s", &%s},' % (entry.cname, entry.cname)) + h_code.putln("{0, 0}") + h_code.putln("};") + self.generate_c_api_import_code(env, h_code) h_code.putln("PyMODINIT_FUNC init%s(void);" % env.module_name) + h_code.putln("#endif /* %s */" % header_barrier) def generate_cclass_header_code(self, type, h_code): #h_code.putln("extern DL_IMPORT(PyTypeObject) %s;" % type.typeobj_cname) @@ -180,6 +200,7 @@ self.body.generate_function_definitions(env, code) self.generate_interned_name_table(env, code) self.generate_py_string_table(env, code) + self.generate_c_api_table(env, code) self.generate_typeobj_definitions(env, code) self.generate_method_table(env, code) self.generate_filename_init_prototype(code) @@ -437,10 +458,12 @@ dll_linkage = None header = entry.type.declaration_code(entry.cname, dll_linkage = dll_linkage) - if entry.visibility <> 'private': + if entry.visibility == 'private': + storage_class = "static " + elif entry.visibility == 'extern': storage_class = "%s " % Naming.extern_c_macro else: - storage_class = "static " + storage_class = "" code.putln("%s%s; /*proto*/" % ( storage_class, header)) @@ -1090,6 +1113,63 @@ code.putln( "};") + def generate_c_api_table(self, env, code): + public_funcs = [] + for entry in env.cfunc_entries: + if entry.visibility == 'public': + public_funcs.append(entry.cname) + if public_funcs: + env.use_utility_code(c_api_import_code); + code.putln( + "static __Pyx_CApiTabEntry %s[] = {" % + Naming.c_api_tab_cname) + public_funcs.sort() + for entry_cname in public_funcs: + code.putln('{"%s", %s},' % (entry_cname, entry_cname)) + code.putln( + "{0, 0}") + code.putln( + "};") + + def generate_c_api_import_code(self, env, h_code): + # this is written to the header file! + h_code.put(""" + /* Return -1 and set exception on error, 0 on success. */ + static int + import_%(name)s(PyObject *module) + { + if (module != NULL) { + PyObject *c_api_init = PyObject_GetAttrString( + module, "_import_c_api"); + if (!c_api_init) + return -1; + if (PyCObject_Check(c_api_init)) + { + int (*init)(struct {const char *s; const void **p;}*) = + PyCObject_AsVoidPtr(c_api_init); + if (!init) { + PyErr_SetString(PyExc_RuntimeError, + "module returns NULL pointer for C API call"); + return -1; + } + init(_%(name)s_API); + } + Py_DECREF(c_api_init); + } + return 0; + } + """.replace('\n ', '\n') % {'name' : env.module_name}) + + def generate_c_api_init_code(self, env, code): + public_funcs = [] + for entry in env.cfunc_entries: + if entry.visibility == 'public': + public_funcs.append(entry) + if public_funcs: + code.putln('if (__Pyx_InitCApi(%s) < 0) %s' % ( + Naming.module_cname, + code.error_goto(self.pos))) + def generate_filename_init_prototype(self, code): code.putln(""); code.putln("static void %s(void); /*proto*/" % Naming.fileinit_cname) @@ -1109,6 +1189,8 @@ self.generate_intern_code(env, code) #code.putln("/*--- String init code ---*/") self.generate_string_init_code(env, code) + #code.putln("/*--- External C API setup code ---*/") + self.generate_c_api_init_code(env, code) #code.putln("/*--- Global init code ---*/") self.generate_global_init_code(env, code) #code.putln("/*--- Type import code ---*/") @@ -1862,10 +1944,12 @@ dll_linkage = None header = self.return_type.declaration_code(entity, dll_linkage = dll_linkage) - if self.visibility <> 'private': + if self.visibility == 'private': + storage_class = "static " + elif self.visibility == 'extern': storage_class = "%s " % Naming.extern_c_macro else: - storage_class = "static " + storage_class = "" code.putln("%s%s {" % ( storage_class, header)) @@ -2896,6 +2980,7 @@ #env.recycle_pending_temps() # TEMPORARY def generate_execution_code(self, code): + code.putln("#ifndef PYREX_WITHOUT_ASSERTIONS") self.cond.generate_evaluation_code(code) if self.value: self.value.generate_evaluation_code(code) @@ -2916,6 +3001,7 @@ self.cond.generate_disposal_code(code) if self.value: self.value.generate_disposal_code(code) + code.putln("#endif") class IfStatNode(StatNode): @@ -3548,6 +3634,7 @@ utility_function_predeclarations = \ """ +typedef struct {const char *s; const void **p;} __Pyx_CApiTabEntry; /*proto*/ typedef struct {PyObject **p; char *s;} __Pyx_InternTabEntry; /*proto*/ typedef struct {PyObject **p; char *s; long n;} __Pyx_StringTabEntry; /*proto*/ static PyObject *__Pyx_UnpackItem(PyObject *, Py_ssize_t); /*proto*/ @@ -3570,6 +3657,8 @@ static PyObject *__Pyx_CreateClass(PyObject *bases, PyObject *dict, PyObject *name, char *modname); /*proto*/ static int __Pyx_InternStrings(__Pyx_InternTabEntry *t); /*proto*/ static int __Pyx_InitStrings(__Pyx_StringTabEntry *t); /*proto*/ +static int __Pyx_InitCApi(PyObject *module); /*proto*/ +static int __Pyx_ImportModuleCApi(__Pyx_CApiTabEntry *t); /*proto*/ """ get_name_predeclaration = \ @@ -3656,10 +3745,16 @@ Py_INCREF(type); Py_DECREF(tmp); } - if (PyString_Check(type)) - ; + if (PyString_CheckExact(type)) { + /* Raising builtin string is deprecated but still allowed -- + * do nothing. Raising an instance of a new-style str + * subclass is right out. */ + if (PyErr_Warn(PyExc_DeprecationWarning, + "raising a string exception is deprecated")) + goto raise_error; + } else if (PyType_Check(type) || PyClass_Check(type)) - ; /*PyErr_NormalizeException(&type, &value, &tb);*/ + PyErr_NormalizeException(&type, &value, &tb); else if (PyInstance_Check(type)) { /* Raising an instance. The value should be a dummy. */ if (value != Py_None) { @@ -3675,12 +3770,29 @@ Py_INCREF(type); } } + else if (PyType_IsSubtype(type->ob_type, (PyTypeObject*)PyExc_Exception)) { + /* Raising a new-style object (in Py2.5). + The value should be a dummy. */ + if (value != Py_None) { + PyErr_SetString(PyExc_TypeError, + "instance exception may not have a separate value"); + goto raise_error; + } + else { + /* Normalize to raise , */ + Py_DECREF(value); + value = type; + type = type->ob_type; + Py_INCREF(type); + } + } else { /* Not something you can raise. You get an exception anyway, just not what you specified :-) */ PyErr_Format(PyExc_TypeError, - "exceptions must be strings, classes, or " - "instances, not %s", type->ob_type->tp_name); + "exceptions must be classes, instances, or " + "strings (deprecated), not %s", + type->ob_type->tp_name); goto raise_error; } PyErr_Restore(type, value, tb); @@ -4031,3 +4143,37 @@ """; #------------------------------------------------------------------------------------ + +c_api_import_code = \ +""" +static int __Pyx_ImportModuleCApi(__Pyx_CApiTabEntry *t) { + __Pyx_CApiTabEntry *api_t; + while (t->s) { + if (*t->s == '\0') + continue; /* shortcut for erased string entries */ + api_t = %(API_TAB)s; + while ((api_t->s) && (strcmp(api_t->s, t->s) < 0)) + ++api_t; + if ((!api_t->p) || (strcmp(api_t->s, t->s) != 0)) { + PyErr_Format(PyExc_ValueError, + "Unknown function name in C API: %%s", t->s); + return -1; + } + *t->p = api_t->p; + ++t; + } + return 0; +} + +static int __Pyx_InitCApi(PyObject *module) { + int result; + PyObject* cobj = PyCObject_FromVoidPtr(&__Pyx_ImportModuleCApi, NULL); + if (!cobj) + return -1; + + result = PyObject_SetAttrString(module, "_import_c_api", cobj); + Py_DECREF(cobj); + return result; +} +""" % {'API_TAB' : Naming.c_api_tab_cname} +#------------------------------------------------------------------------------------ Modified: lxml/pyrex/Pyrex/Compiler/Parsing.py ============================================================================== --- lxml/pyrex/Pyrex/Compiler/Parsing.py (original) +++ lxml/pyrex/Pyrex/Compiler/Parsing.py Fri Jul 14 20:57:15 2006 @@ -650,6 +650,10 @@ expr_list.append(p_expr(s)) if len(expr_list) == 1: expr = expr_list[0] + #return Nodes.ExprStatNode(expr.pos, expr = expr) + if isinstance(expr, ExprNodes.StringNode): + return Nodes.PassStatNode(expr.pos) + else: return Nodes.ExprStatNode(expr.pos, expr = expr) else: expr_list_list = [] From scoder at codespeak.net Fri Jul 14 21:02:47 2006 From: scoder at codespeak.net (scoder at codespeak.net) Date: Fri, 14 Jul 2006 21:02:47 +0200 (CEST) Subject: [Lxml-checkins] r30055 - lxml/pyrex/Pyrex/Compiler Message-ID: <20060714190247.2343F10092@code0.codespeak.net> Author: scoder Date: Fri Jul 14 21:02:46 2006 New Revision: 30055 Modified: lxml/pyrex/Pyrex/Compiler/Parsing.py Log: indentation fix for last checkin Modified: lxml/pyrex/Pyrex/Compiler/Parsing.py ============================================================================== --- lxml/pyrex/Pyrex/Compiler/Parsing.py (original) +++ lxml/pyrex/Pyrex/Compiler/Parsing.py Fri Jul 14 21:02:46 2006 @@ -654,7 +654,7 @@ if isinstance(expr, ExprNodes.StringNode): return Nodes.PassStatNode(expr.pos) else: - return Nodes.ExprStatNode(expr.pos, expr = expr) + return Nodes.ExprStatNode(expr.pos, expr = expr) else: expr_list_list = [] flatten_parallel_assignments(expr_list, expr_list_list) From scoder at codespeak.net Sat Jul 15 07:43:28 2006 From: scoder at codespeak.net (scoder at codespeak.net) Date: Sat, 15 Jul 2006 07:43:28 +0200 (CEST) Subject: [Lxml-checkins] r30056 - lxml/branch/capi/src/lxml Message-ID: <20060715054328.DFDD310092@code0.codespeak.net> Author: scoder Date: Sat Jul 15 07:43:26 2006 New Revision: 30056 Modified: lxml/branch/capi/src/lxml/apihelpers.pxi Log: raise ValueError on empty tag name in getNsTag - also if NS is empty Modified: lxml/branch/capi/src/lxml/apihelpers.pxi ============================================================================== --- lxml/branch/capi/src/lxml/apihelpers.pxi (original) +++ lxml/branch/capi/src/lxml/apihelpers.pxi Sat Jul 15 07:43:26 2006 @@ -390,6 +390,8 @@ if nslen > 0: ns = python.PyString_FromStringAndSize(c_tag, nslen) tag = python.PyString_FromStringAndSize(c_ns_end+1, taglen) + elif python.PyString_GET_SIZE(tag) == 0: + raise ValueError, "Empty tag name" return ns, tag cdef object _namespacedName(xmlNode* c_node): From scoder at codespeak.net Sat Jul 15 08:24:41 2006 From: scoder at codespeak.net (scoder at codespeak.net) Date: Sat, 15 Jul 2006 08:24:41 +0200 (CEST) Subject: [Lxml-checkins] r30057 - lxml/branch/capi/src/lxml Message-ID: <20060715062441.D063A10092@code0.codespeak.net> Author: scoder Date: Sat Jul 15 08:24:38 2006 New Revision: 30057 Modified: lxml/branch/capi/src/lxml/apihelpers.pxi lxml/branch/capi/src/lxml/etree.pyx lxml/branch/capi/src/lxml/etree_defs.h lxml/branch/capi/src/lxml/relaxng.pxi lxml/branch/capi/src/lxml/tree.pxd lxml/branch/capi/src/lxml/xmlschema.pxi Log: new _getNs C-macro to find ns URI of a c_node (or NULL) Modified: lxml/branch/capi/src/lxml/apihelpers.pxi ============================================================================== --- lxml/branch/capi/src/lxml/apihelpers.pxi (original) +++ lxml/branch/capi/src/lxml/apihelpers.pxi Sat Jul 15 08:24:38 2006 @@ -82,11 +82,12 @@ cdef object _attributeValue(xmlNode* c_element, xmlAttr* c_attrib_node): cdef char* value - if c_attrib_node.ns is NULL or c_attrib_node.ns.href is NULL: + cdef char* href + href = _getNs(c_attrib_node) + if href is NULL: value = tree.xmlGetNoNsProp(c_element, c_attrib_node.name) else: - value = tree.xmlGetNsProp(c_element, c_attrib_node.name, - c_attrib_node.ns.href) + value = tree.xmlGetNsProp(c_element, c_attrib_node.name, href) result = funicode(value) tree.xmlFree(value) return result @@ -264,23 +265,28 @@ return c_node cdef int _tagMatches(xmlNode* c_node, char* c_href, char* c_name): + cdef char* c_node_href if c_name is NULL: if c_href is NULL: # always match return 1 - elif c_node.ns is NULL or c_node.ns.href is NULL: - return 0 else: - return cstd.strcmp(c_node.ns.href, c_href) == 0 + c_node_href = _getNs(c_node) + if c_node_href is NULL: + return 0 + else: + return cstd.strcmp(c_node_href, c_href) == 0 elif c_href is NULL: - if c_node.ns is not NULL and c_node.ns.href is not NULL: + if _getNs(c_node) is not NULL: return 0 return cstd.strcmp(c_node.name, c_name) == 0 - elif c_node.ns is NULL or c_node.ns.href is NULL: - return 0 else: - return cstd.strcmp(c_node.name, c_name) == 0 and \ - cstd.strcmp(c_node.ns.href, c_href) == 0 + c_node_href = _getNs(c_node) + if c_node_href is NULL: + return 0 + else: + return cstd.strcmp(c_node.name, c_name) == 0 and \ + cstd.strcmp(c_node_href, c_href) == 0 cdef void _removeNode(xmlNode* c_node): """Unlink and free a node and subnodes if possible. @@ -398,10 +404,10 @@ cdef char* href cdef char* name name = c_node.name - if c_node.ns is NULL or c_node.ns.href is NULL: + href = _getNs(c_node) + if href is NULL: return funicode(name) else: - href = c_node.ns.href s = python.PyString_FromFormat("{%s}%s", href, name) if isutf8(href) or isutf8(name): return python.PyUnicode_FromEncodedObject(s, 'UTF-8', NULL) Modified: lxml/branch/capi/src/lxml/etree.pyx ============================================================================== --- lxml/branch/capi/src/lxml/etree.pyx (original) +++ lxml/branch/capi/src/lxml/etree.pyx Sat Jul 15 08:24:38 2006 @@ -1,5 +1,5 @@ cimport tree, python -from tree cimport xmlDoc, xmlNode, xmlAttr, xmlNs, _isElement +from tree cimport xmlDoc, xmlNode, xmlAttr, xmlNs, _isElement, _getNs from python cimport isinstance, issubclass, hasattr, callable from python cimport iter, str, _cstr, _isString, Py_ssize_t cimport xpath @@ -1148,18 +1148,13 @@ cdef _Element _elementFactory(_Document doc, xmlNode* c_node): cdef _Element result - cdef char* c_ns_href result = getProxy(c_node) if result is not None: return result if c_node is NULL: return None if c_node.type == tree.XML_ELEMENT_NODE: - if c_node.ns == NULL: - c_ns_href = NULL - else: - c_ns_href = c_node.ns.href - element_class = _find_element_class(c_ns_href, c_node.name) + element_class = _find_element_class(_getNs(c_node), c_node.name) elif c_node.type == tree.XML_COMMENT_NODE: element_class = _Comment else: Modified: lxml/branch/capi/src/lxml/etree_defs.h ============================================================================== --- lxml/branch/capi/src/lxml/etree_defs.h (original) +++ lxml/branch/capi/src/lxml/etree_defs.h Sat Jul 15 08:24:38 2006 @@ -40,6 +40,9 @@ ((c_node)->type == XML_ELEMENT_NODE || \ (c_node)->type == XML_COMMENT_NODE) +#define _getNs(c_node) \ + (((c_node)->ns == 0) ? 0 : ((c_node)->ns->href)) + /* Macro pair implementation of a depth first tree walker * * Calls the code block between the BEGIN and END macros for all elements Modified: lxml/branch/capi/src/lxml/relaxng.pxi ============================================================================== --- lxml/branch/capi/src/lxml/relaxng.pxi (original) +++ lxml/branch/capi/src/lxml/relaxng.pxi Sat Jul 15 08:24:38 2006 @@ -23,6 +23,7 @@ cdef _NodeBase root_node cdef xmlNode* c_node cdef xmlDoc* fake_c_doc + cdef char* c_href cdef relaxng.xmlRelaxNGParserCtxt* parser_ctxt self._c_schema = NULL fake_c_doc = NULL @@ -31,8 +32,9 @@ root_node = _rootNodeOrRaise(etree) c_node = root_node._c_node # work around for libxml2 bug if document is not RNG at all - if c_node.ns is NULL or c_node.ns.href is NULL or \ - cstd.strcmp(c_node.ns.href, + c_href = _getNs(c_node) + if c_href is NULL or \ + cstd.strcmp(c_href, 'http://relaxng.org/ns/structure/1.0') != 0: raise RelaxNGParseError, "Document is not Relax NG" fake_c_doc = _fakeRootDoc(doc._c_doc, root_node._c_node) Modified: lxml/branch/capi/src/lxml/tree.pxd ============================================================================== --- lxml/branch/capi/src/lxml/tree.pxd (original) +++ lxml/branch/capi/src/lxml/tree.pxd Sat Jul 15 08:24:38 2006 @@ -250,6 +250,7 @@ cdef extern from "etree_defs.h": cdef int _isElement(xmlNode* node) + cdef char* _getNs(xmlNode* node) cdef void BEGIN_FOR_EACH_ELEMENT_FROM(xmlNode* tree_top, xmlNode* start_node, int inclusive) cdef void END_FOR_EACH_ELEMENT_FROM(xmlNode* start_node) Modified: lxml/branch/capi/src/lxml/xmlschema.pxi ============================================================================== --- lxml/branch/capi/src/lxml/xmlschema.pxi (original) +++ lxml/branch/capi/src/lxml/xmlschema.pxi Sat Jul 15 08:24:38 2006 @@ -22,6 +22,7 @@ cdef _NodeBase root_node cdef xmlDoc* fake_c_doc cdef xmlNode* c_node + cdef char* c_href cdef xmlschema.xmlSchemaParserCtxt* parser_ctxt self._c_schema = NULL if etree is not None: @@ -30,8 +31,9 @@ # work around for libxml2 bug if document is not XML schema at all c_node = root_node._c_node - if c_node.ns is NULL or c_node.ns.href is NULL or \ - cstd.strcmp(c_node.ns.href, 'http://www.w3.org/2001/XMLSchema') != 0: + c_href = _getNs(c_node) + if c_href is NULL or \ + cstd.strcmp(c_href, 'http://www.w3.org/2001/XMLSchema') != 0: raise XMLSchemaParseError, "Document is not XML Schema" fake_c_doc = _fakeRootDoc(doc._c_doc, root_node._c_node) From scoder at codespeak.net Sat Jul 15 16:55:20 2006 From: scoder at codespeak.net (scoder at codespeak.net) Date: Sat, 15 Jul 2006 16:55:20 +0200 (CEST) Subject: [Lxml-checkins] r30065 - lxml/branch/capi/src/lxml Message-ID: <20060715145520.C982010092@code0.codespeak.net> Author: scoder Date: Sat Jul 15 16:55:18 2006 New Revision: 30065 Modified: lxml/branch/capi/src/lxml/apihelpers.pxi lxml/branch/capi/src/lxml/htmlparser.pxd lxml/branch/capi/src/lxml/xmlparser.pxd lxml/branch/capi/src/lxml/xpath.pxd Log: tab/space indentation fixes Modified: lxml/branch/capi/src/lxml/apihelpers.pxi ============================================================================== --- lxml/branch/capi/src/lxml/apihelpers.pxi (original) +++ lxml/branch/capi/src/lxml/apihelpers.pxi Sat Jul 15 16:55:18 2006 @@ -286,7 +286,7 @@ return 0 else: return cstd.strcmp(c_node.name, c_name) == 0 and \ - cstd.strcmp(c_node_href, c_href) == 0 + cstd.strcmp(c_node_href, c_href) == 0 cdef void _removeNode(xmlNode* c_node): """Unlink and free a node and subnodes if possible. Modified: lxml/branch/capi/src/lxml/htmlparser.pxd ============================================================================== --- lxml/branch/capi/src/lxml/htmlparser.pxd (original) +++ lxml/branch/capi/src/lxml/htmlparser.pxd Sat Jul 15 16:55:18 2006 @@ -10,7 +10,7 @@ HTML_PARSE_PEDANTIC # pedantic error reporting HTML_PARSE_NOBLANKS # remove blank nodes HTML_PARSE_NONET # Forbid network access - # libxml2 2.6.21+ only: + # libxml2 2.6.21+ only: HTML_PARSE_RECOVER # Relaxed parsing HTML_PARSE_COMPACT # compact small text nodes Modified: lxml/branch/capi/src/lxml/xmlparser.pxd ============================================================================== --- lxml/branch/capi/src/lxml/xmlparser.pxd (original) +++ lxml/branch/capi/src/lxml/xmlparser.pxd Sat Jul 15 16:55:18 2006 @@ -119,7 +119,7 @@ cdef extern from "libxml/parserInternals.h": cdef xmlParserInput* xmlNewStringInputStream(xmlParserCtxt* ctxt, - char* buffer) + char* buffer) cdef xmlParserInput* xmlNewInputFromFile(xmlParserCtxt* ctxt, char* filename) cdef void xmlFreeInputStream(xmlParserInput* input) Modified: lxml/branch/capi/src/lxml/xpath.pxd ============================================================================== --- lxml/branch/capi/src/lxml/xpath.pxd (original) +++ lxml/branch/capi/src/lxml/xpath.pxd Sat Jul 15 16:55:18 2006 @@ -103,15 +103,15 @@ char* ns_uri, xmlXPathFunction f) cdef void xmlXPathRegisterFuncLookup(xmlXPathContext *ctxt, - xmlXPathFuncLookupFunc f, - void *funcCtxt) + xmlXPathFuncLookupFunc f, + void *funcCtxt) cdef int xmlXPathRegisterVariable(xmlXPathContext *ctxt, - char* name, - xmlXPathObject* value) + char* name, + xmlXPathObject* value) cdef int xmlXPathRegisterVariableNS(xmlXPathContext *ctxt, - char* name, - char* ns_uri, - xmlXPathObject* value) + char* name, + char* ns_uri, + xmlXPathObject* value) cdef void xmlXPathRegisteredVariablesCleanup(xmlXPathContext *ctxt) cdef void xmlXPathRegisteredNsCleanup(xmlXPathContext *ctxt) cdef xmlXPathObject* valuePop (xmlXPathParserContext *ctxt) From scoder at codespeak.net Sat Jul 15 17:13:12 2006 From: scoder at codespeak.net (scoder at codespeak.net) Date: Sat, 15 Jul 2006 17:13:12 +0200 (CEST) Subject: [Lxml-checkins] r30066 - lxml/branch/capi/src/lxml Message-ID: <20060715151312.EDFB510092@code0.codespeak.net> Author: scoder Date: Sat Jul 15 17:13:11 2006 New Revision: 30066 Modified: lxml/branch/capi/src/lxml/etree_defs.h lxml/branch/capi/src/lxml/python.pxd Log: C-ification of getattr() Modified: lxml/branch/capi/src/lxml/etree_defs.h ============================================================================== --- lxml/branch/capi/src/lxml/etree_defs.h (original) +++ lxml/branch/capi/src/lxml/etree_defs.h Sat Jul 15 17:13:11 2006 @@ -29,6 +29,7 @@ #define isinstance(o,c) PyObject_IsInstance(o,c) #define issubclass(c,csuper) PyObject_IsSubclass(c,csuper) #define hasattr(o,a) PyObject_HasAttr(o,a) +#define getattr(o,a) PyObject_GetAttr(o,a) #define callable(o) PyCallable_Check(o) #define str(o) PyObject_Str(o) #define iter(o) PyObject_GetIter(o) Modified: lxml/branch/capi/src/lxml/python.pxd ============================================================================== --- lxml/branch/capi/src/lxml/python.pxd (original) +++ lxml/branch/capi/src/lxml/python.pxd Sat Jul 15 17:13:11 2006 @@ -77,6 +77,7 @@ cdef int isinstance(object instance, object classes) cdef int issubclass(object derived, object superclasses) cdef int hasattr(object obj, object attr) + cdef object getattr(object obj, object attr) cdef int callable(object obj) cdef object str(object obj) cdef object iter(object obj) From scoder at codespeak.net Sat Jul 15 17:20:33 2006 From: scoder at codespeak.net (scoder at codespeak.net) Date: Sat, 15 Jul 2006 17:20:33 +0200 (CEST) Subject: [Lxml-checkins] r30067 - lxml/branch/capi/src/lxml Message-ID: <20060715152033.125D210092@code0.codespeak.net> Author: scoder Date: Sat Jul 15 17:20:31 2006 New Revision: 30067 Added: lxml/branch/capi/src/lxml/etreepublic.pxd - copied unchanged from r30047, lxml/branch/capi/src/lxml/public.pxd Removed: lxml/branch/capi/src/lxml/public.pxd Log: renamed public include file: public.pxd -> etreepublic.pxd Deleted: /lxml/branch/capi/src/lxml/public.pxd ============================================================================== --- /lxml/branch/capi/src/lxml/public.pxd Sat Jul 15 17:20:31 2006 +++ (empty file) @@ -1,80 +0,0 @@ -# public Pyrex/C interface to lxml.etree - -cimport tree -cimport python - -cdef extern from "etree.h": - ########################################################################## - # public classes - - cdef class lxml.etree._Document [ object LxmlDocument ]: - cdef tree.xmlDoc* _c_doc - - cdef class lxml.etree._NodeBase [ object LxmlNodeBase ]: - cdef _Document _doc - cdef tree.xmlNode* _c_node - - cdef class lxml.etree._Element(_NodeBase) [ object LxmlElement ]: - pass - - cdef class lxml.etree.ElementBase(_Element) [ object LxmlElementBase ]: - pass - - cdef class lxml.etree._ElementTree [ object LxmlElementTree ]: - cdef _Document _doc - cdef _Element _element - - # First function to call! - cdef int import_etree(etree_module) except -1 - - # create an Element for a C-node in the Document - cdef _Element elementFactory(_Document doc, tree.xmlNode* c_node) - - # create an ElementTree for an Element - cdef _ElementTree elementTreeFactory(_NodeBase context_node) - - # create an ElementTree subclass for an Element - cdef _ElementTree newElementTree(_NodeBase context_node, - object subclass) - - # check if a C node matches a tag name and namespace - # (NULL allowed for both) - cdef int tagMatches(tree.xmlNode* c_node, char* c_href, char* c_name) - - # convert a UTF-8 char* to a Python string or unicode string - cdef object pyunicode(char* s) - - # convert the string to UTF-8 using the normal lxml.etree semantics - cdef object utf8(object s) - - # split a tag into a (URI, name) tuple - cdef object getNsTag(object tag) - - # get the "{ns}tag" string for a C node - cdef object namespacedName(tree.xmlNode* c_node) - - # find the Document of an Element, ElementTree or Document (itself!) - cdef _Document documentOrRaise(object input) - - # find the root Element of an Element (itself!), ElementTree or Document - cdef _NodeBase rootNodeOrRaise(object input) - - # return an attribute value for a C attribute on a C element node - cdef object attributeValue(tree.xmlNode* c_element, - tree.xmlAttr* c_attrib_node) - - # find child element number 'index' (supports negative indexes) - cdef tree.xmlNode* findChild(tree.xmlNode* c_node, - python.Py_ssize_t index) - - # find child element number 'index' starting at first one - cdef tree.xmlNode* findChildForwards(tree.xmlNode* c_node, - python.Py_ssize_t index) - - # find child element number 'index' starting at last one - cdef tree.xmlNode* findChildBackwards(tree.xmlNode* c_node, - python.Py_ssize_t index) - - # return next/previous sibling element of the node - cdef tree.xmlNode* nextElement(tree.xmlNode* c_node) - cdef tree.xmlNode* previousElement(tree.xmlNode* c_node) From scoder at codespeak.net Sat Jul 15 17:22:38 2006 From: scoder at codespeak.net (scoder at codespeak.net) Date: Sat, 15 Jul 2006 17:22:38 +0200 (CEST) Subject: [Lxml-checkins] r30068 - in lxml/branch/capi: . src/lxml src/lxml/tests Message-ID: <20060715152238.15D8110092@code0.codespeak.net> Author: scoder Date: Sat Jul 15 17:22:36 2006 New Revision: 30068 Added: lxml/branch/capi/src/lxml/objectify.pyx lxml/branch/capi/src/lxml/tests/test_objectify.py Modified: lxml/branch/capi/setup.py Log: new module lxml.elementlib.objectify Modified: lxml/branch/capi/setup.py ============================================================================== --- lxml/branch/capi/setup.py (original) +++ lxml/branch/capi/setup.py Sat Jul 15 17:22:36 2006 @@ -1,5 +1,10 @@ import sys, os, os.path, re +EXT_MODULES = [ + ("etree", "lxml.etree"), + ("objectify", "lxml.elementlib.objectify") + ] + setup_args = {} ext_args = {} DEFINES = [] @@ -93,11 +98,11 @@ try: from Pyrex.Distutils import build_ext as build_pyx - sources = ["src/lxml/etree.pyx"] + source_extension = ".pyx" setup_args['cmdclass'] = {'build_ext' : build_pyx} except ImportError: print "*NOTE*: Trying to build without Pyrex, needs pre-generated 'src/lxml/etree.c' !" - sources = ["src/lxml/etree.c"] + source_extension = ".c" if '--static' in sys.argv: # use the static setup as configured in setupStaticBuild @@ -127,13 +132,17 @@ except ValueError: pass -ext_modules = [ Extension( - "lxml.etree", - sources = sources, - extra_compile_args = ['-w'] + cflags, - define_macros = DEFINES, - **ext_args - )] +ext_modules = [] + +for module, package in EXT_MODULES: + ext_modules.append( + Extension( + package, + sources = ["src/lxml/" + module + source_extension], + extra_compile_args = ['-w'] + cflags, + define_macros = DEFINES, + **ext_args + )) # setup ChangeLog entry @@ -192,7 +201,7 @@ ], package_dir = {'': 'src'}, - packages = ['lxml'], + packages = ['lxml', 'lxml.elementlib'], ext_modules = ext_modules, **setup_args ) Added: lxml/branch/capi/src/lxml/objectify.pyx ============================================================================== --- (empty file) +++ lxml/branch/capi/src/lxml/objectify.pyx Sat Jul 15 17:22:36 2006 @@ -0,0 +1,65 @@ +from etreepublic cimport _Element, ElementBase, elementFactory, import_etree +from python cimport isinstance, getattr, _cstr, Py_ssize_t +cimport etreepublic as cetree +cimport python +cimport tree + +from lxml import etree +# initialize C-API of lxml.etree +import_etree(etree) + +cdef class ObjectifiedElement(ElementBase): + """Element class with an Amara-like API. + + Element children are accessed as object attributes. Multiple children + with the same name are available through a list index. Example: + + >>> root = etree.XML("01") + >>> second_c2 = root.c1.c2[1] + """ + def __getattr__(self, tag): + cdef tree.xmlNode* c_node + cdef char* c_href + cdef char* c_tag + ns, tag = cetree.getNsTag(tag) + if ns is not None: + c_href = _cstr(ns) + else: + c_href = tree._getNs(self._c_node) + c_tag = _cstr(tag) + c_node = _findFollowingSibling(self._c_node.children, c_href, c_tag, 0) + if c_node is NULL: + raise AttributeError, "no such child: %s" % tag + return elementFactory(self._doc, c_node) + + def __getitem__(self, key): + cdef tree.xmlNode* c_node + cdef tree.xmlNode* c_parent + if python._isString(key): + return getattr(self, key) + c_node = self._c_node + c_parent = c_node.parent + if c_parent is NULL: + if key == 0: + return self + else: + raise IndexError, key + c_node = _findFollowingSibling( + c_parent.children, tree._getNs(c_parent), c_node.name, key) + if c_node is NULL: + raise IndexError, key + return elementFactory(self._doc, c_node) + + +cdef tree.xmlNode* _findFollowingSibling(tree.xmlNode* c_node, + char* href, char* name, + Py_ssize_t index): + while c_node is not NULL: + if tree._isElement(c_node) and cetree.tagMatches(c_node, href, name): + index = index - 1 + if index < 0: + return c_node + c_node = c_node.next + return NULL + +#etree.setDefaultElementClass(MyElement) Added: lxml/branch/capi/src/lxml/tests/test_objectify.py ============================================================================== --- (empty file) +++ lxml/branch/capi/src/lxml/tests/test_objectify.py Sat Jul 15 17:22:36 2006 @@ -0,0 +1,61 @@ +# -*- coding: utf-8 -*- + +""" +Tests specific to the extended etree API + +Tests that apply to the general ElementTree API should go into +test_elementtree +""" + + +import unittest, doctest, operator + +from common_imports import etree, StringIO, HelperTestCase, fileInTestDir +from common_imports import SillyFileLike, canonicalize + +from lxml.elementlib import objectify + +xml_str = '''\ + + + 0 + 1 + +''' + +class ObjectifyTestCase(HelperTestCase): + """Test cases for lxml.elementlib.objectify + """ + etree = etree + + def setUp(self): + ns = etree.Namespace("objectified") + ns[None] = objectify.ObjectifiedElement + + def tearDown(self): + ns = etree.Namespace("objectified") + ns.clear() + + def test_child_attr(self): + root = etree.XML(xml_str) + self.assertEqual(root.c1.c2.text, "0") + + def test_child_attr_nonexistant(self): + root = etree.XML(xml_str) + self.assertRaises(AttributeError, getattr, root.c1, "NOT_THERE") + + def test_child_attr_index(self): + root = etree.XML(xml_str) + self.assertEqual(root.c1.c2[0].text, "0") + self.assertEqual(root.c1.c2[1].text, "1") + self.assertRaises(IndexError, operator.itemgetter(2), root.c1.c2) + +def test_suite(): + suite = unittest.TestSuite() + suite.addTests([unittest.makeSuite(ObjectifyTestCase)]) + suite.addTests( + [doctest.DocFileSuite('../../../doc/objectify.txt')]) + return suite + +if __name__ == '__main__': + unittest.main() From scoder at codespeak.net Sun Jul 16 13:29:11 2006 From: scoder at codespeak.net (scoder at codespeak.net) Date: Sun, 16 Jul 2006 13:29:11 +0200 (CEST) Subject: [Lxml-checkins] r30071 - lxml/branch/capi/src/lxml Message-ID: <20060716112911.867FF1009D@code0.codespeak.net> Author: scoder Date: Sun Jul 16 13:29:09 2006 New Revision: 30071 Modified: lxml/branch/capi/src/lxml/etree.pyx lxml/branch/capi/src/lxml/etreepublic.pxd lxml/branch/capi/src/lxml/public-api.pxi Log: made _ElementIterator public Modified: lxml/branch/capi/src/lxml/etree.pyx ============================================================================== --- lxml/branch/capi/src/lxml/etree.pyx (original) +++ lxml/branch/capi/src/lxml/etree.pyx Sun Jul 16 13:29:09 2006 @@ -1370,7 +1370,8 @@ ctypedef xmlNode* (*_node_to_node_function)(xmlNode*) -cdef class _ElementTagMatcher: +cdef public class _ElementTagMatcher [ object LxmlElementTagMatcher, + type LxmlElementTagMatcherType ]: cdef object _pystrings cdef char* _href cdef char* _name @@ -1388,7 +1389,8 @@ if self._name[0] == c'*' and self._name[1] == c'\0': self._name = NULL -cdef class _ElementIterator(_ElementTagMatcher): +cdef public class _ElementIterator(_ElementTagMatcher) [ + object LxmlElementIterator, type LxmlElementIteratorType ]: # we keep Python references here to control GC cdef _NodeBase _node cdef _node_to_node_function _next_element Modified: lxml/branch/capi/src/lxml/etreepublic.pxd ============================================================================== --- lxml/branch/capi/src/lxml/etreepublic.pxd (original) +++ lxml/branch/capi/src/lxml/etreepublic.pxd Sun Jul 16 13:29:09 2006 @@ -5,7 +5,7 @@ cdef extern from "etree.h": ########################################################################## - # public classes + # public ElementTree API classes cdef class lxml.etree._Document [ object LxmlDocument ]: cdef tree.xmlDoc* _c_doc @@ -24,6 +24,9 @@ cdef _Document _doc cdef _Element _element + ########################################################################## + # public helper functions + # First function to call! cdef int import_etree(etree_module) except -1 @@ -78,3 +81,24 @@ # return next/previous sibling element of the node cdef tree.xmlNode* nextElement(tree.xmlNode* c_node) cdef tree.xmlNode* previousElement(tree.xmlNode* c_node) + + ########################################################################## + # iterators + + cdef class lxml.etree._ElementTagMatcher [ object LxmlElementTagMatcher ]: + cdef char* _href + cdef char* _name + + # store "{ns}tag" (or None) filter for this matcher or element iterator + # ** unless _href *and* _name are set up 'by hand', this function *must* + # ** be called when subclassing the iterator below! + cdef void initTagMatch(_ElementTagMatcher matcher, tag) + + cdef class lxml.etree._ElementIterator(_ElementTagMatcher) [ + object LxmlElementIterator ]: + cdef _NodeBase _node + cdef tree.xmlNode* (*_next_element)(tree.xmlNode*) + + # store the initial node of the iterator if it matches the required tag + # or its next matching sibling if not + cdef void iteratorStoreNext(_ElementIterator iterator, _NodeBase node) Modified: lxml/branch/capi/src/lxml/public-api.pxi ============================================================================== --- lxml/branch/capi/src/lxml/public-api.pxi (original) +++ lxml/branch/capi/src/lxml/public-api.pxi Sun Jul 16 13:29:09 2006 @@ -60,3 +60,9 @@ cdef public object namespacedName(xmlNode* c_node): return _namespacedName(c_node) + +cdef public void iteratorStoreNext(_ElementIterator iterator, _NodeBase node): + iterator._storeNext(node) + +cdef public void initTagMatch(_ElementTagMatcher matcher, tag): + matcher._initTagMatch(tag) From scoder at codespeak.net Sun Jul 16 13:43:17 2006 From: scoder at codespeak.net (scoder at codespeak.net) Date: Sun, 16 Jul 2006 13:43:17 +0200 (CEST) Subject: [Lxml-checkins] r30072 - in lxml/branch/capi/src/lxml: . tests Message-ID: <20060716114317.1DF021009D@code0.codespeak.net> Author: scoder Date: Sun Jul 16 13:43:15 2006 New Revision: 30072 Modified: lxml/branch/capi/src/lxml/objectify.pyx lxml/branch/capi/src/lxml/tests/test_objectify.py Log: implemented iter() and len() for elements, some cleanup in tests Modified: lxml/branch/capi/src/lxml/objectify.pyx ============================================================================== --- lxml/branch/capi/src/lxml/objectify.pyx (original) +++ lxml/branch/capi/src/lxml/objectify.pyx Sun Jul 16 13:43:15 2006 @@ -1,4 +1,5 @@ -from etreepublic cimport _Element, ElementBase, elementFactory, import_etree +from etreepublic cimport _Element, ElementBase, _ElementIterator +from etreepublic cimport elementFactory, import_etree from python cimport isinstance, getattr, _cstr, Py_ssize_t cimport etreepublic as cetree cimport python @@ -17,17 +18,49 @@ >>> root = etree.XML("01") >>> second_c2 = root.c1.c2[1] """ + def __iter__(self): + parent = self.getparent() + if parent is None: + return iter([self]) + return etree.ElementChildIterator(parent, tag=self.tag) + + def __len__(self): + cdef tree.xmlNode* c_self_node + cdef tree.xmlNode* c_node + cdef char* c_href + cdef char* c_tag + cdef Py_ssize_t count + c_self_node = self._c_node + c_tag = c_self_node.name + c_href = tree._getNs(c_self_node) + count = 1 + c_node = c_self_node.next + while c_node is not NULL: + if tree._isElement(c_node) and \ + cetree.tagMatches(c_node, c_href, c_tag): + count = count + 1 + c_node = c_node.next + c_node = c_self_node.prev + while c_node is not NULL: + if tree._isElement(c_node) and \ + cetree.tagMatches(c_node, c_href, c_tag): + count = count + 1 + c_node = c_node.prev + return count + def __getattr__(self, tag): + cdef tree.xmlNode* c_self_node cdef tree.xmlNode* c_node cdef char* c_href cdef char* c_tag ns, tag = cetree.getNsTag(tag) + c_self_node = self._c_node if ns is not None: c_href = _cstr(ns) else: - c_href = tree._getNs(self._c_node) + c_href = tree._getNs(c_self_node) c_tag = _cstr(tag) - c_node = _findFollowingSibling(self._c_node.children, c_href, c_tag, 0) + c_node = _findFollowingSibling(c_self_node.children, c_href, c_tag, 0) if c_node is NULL: raise AttributeError, "no such child: %s" % tag return elementFactory(self._doc, c_node) Modified: lxml/branch/capi/src/lxml/tests/test_objectify.py ============================================================================== --- lxml/branch/capi/src/lxml/tests/test_objectify.py (original) +++ lxml/branch/capi/src/lxml/tests/test_objectify.py Sun Jul 16 13:43:15 2006 @@ -36,20 +36,39 @@ ns = etree.Namespace("objectified") ns.clear() - def test_child_attr(self): + def test_child(self): root = etree.XML(xml_str) - self.assertEqual(root.c1.c2.text, "0") + self.assertEquals("0", root.c1.c2.text) - def test_child_attr_nonexistant(self): + def test_child_getattr(self): + root = etree.XML(xml_str) + self.assertEquals("0", getattr(root.c1, "{objectified}c2").text) + + def test_child_nonexistant(self): root = etree.XML(xml_str) self.assertRaises(AttributeError, getattr, root.c1, "NOT_THERE") - def test_child_attr_index(self): + def test_child_index(self): root = etree.XML(xml_str) - self.assertEqual(root.c1.c2[0].text, "0") - self.assertEqual(root.c1.c2[1].text, "1") + self.assertEquals("0", root.c1.c2[0].text) + self.assertEquals("1", root.c1.c2[1].text) self.assertRaises(IndexError, operator.itemgetter(2), root.c1.c2) + def test_child_len(self): + root = etree.XML(xml_str) + self.assertEquals(1, len(root)) + self.assertEquals(1, len(root.c1)) + self.assertEquals(2, len(root.c1.c2)) + + def test_child_iter(self): + root = etree.XML(xml_str) + self.assertEquals([root], + list(iter(root))) + self.assertEquals([root.c1], + list(iter(root.c1))) + self.assertEquals([root.c1.c2[0], root.c1.c2[1]], + list(iter((root.c1.c2)))) + def test_suite(): suite = unittest.TestSuite() suite.addTests([unittest.makeSuite(ObjectifyTestCase)]) From scoder at codespeak.net Sun Jul 16 14:37:40 2006 From: scoder at codespeak.net (scoder at codespeak.net) Date: Sun, 16 Jul 2006 14:37:40 +0200 (CEST) Subject: [Lxml-checkins] r30073 - lxml/branch/capi/src/lxml Message-ID: <20060716123740.C08441009F@code0.codespeak.net> Author: scoder Date: Sun Jul 16 14:37:38 2006 New Revision: 30073 Modified: lxml/branch/capi/src/lxml/apihelpers.pxi lxml/branch/capi/src/lxml/etreepublic.pxd lxml/branch/capi/src/lxml/public-api.pxi Log: new public function getAttributeValueFromNsName() Modified: lxml/branch/capi/src/lxml/apihelpers.pxi ============================================================================== --- lxml/branch/capi/src/lxml/apihelpers.pxi (original) +++ lxml/branch/capi/src/lxml/apihelpers.pxi Sun Jul 16 14:37:38 2006 @@ -108,6 +108,19 @@ tree.xmlFree(c_result) return result +cdef object _getAttributeValueFromNsName(_NodeBase element, + char* c_href, char* c_name): + cdef char* c_result + if c_href is NULL: + c_result = tree.xmlGetNoNsProp(element._c_node, c_name) + else: + c_result = tree.xmlGetNsProp(element._c_node, c_name, c_href) + if c_result is NULL: + return None + result = funicode(c_result) + tree.xmlFree(c_result) + return result + cdef void _setAttributeValue(_NodeBase element, key, value): cdef xmlNs* c_ns cdef char* c_value Modified: lxml/branch/capi/src/lxml/etreepublic.pxd ============================================================================== --- lxml/branch/capi/src/lxml/etreepublic.pxd (original) +++ lxml/branch/capi/src/lxml/etreepublic.pxd Sun Jul 16 14:37:38 2006 @@ -66,6 +66,13 @@ cdef object attributeValue(tree.xmlNode* c_element, tree.xmlAttr* c_attrib_node) + # return the value of attribute "{ns}name", or the default value + cdef object getAttributeValue(_NodeBase element, key, default) + + # return the value of the attribute with 'ns' and 'name' (or None) + cdef object getAttributeValueFromNsName(_NodeBase element, + char* ns, char* name): + # find child element number 'index' (supports negative indexes) cdef tree.xmlNode* findChild(tree.xmlNode* c_node, python.Py_ssize_t index) Modified: lxml/branch/capi/src/lxml/public-api.pxi ============================================================================== --- lxml/branch/capi/src/lxml/public-api.pxi (original) +++ lxml/branch/capi/src/lxml/public-api.pxi Sun Jul 16 14:37:38 2006 @@ -32,6 +32,10 @@ cdef public object getAttributeValue(_NodeBase element, key, default): return _getAttributeValue(element, key, default) +cdef public object getAttributeValueFromNsName(_NodeBase element, + char* ns, char* name): + return _getAttributeValueFromNsName(element, ns, name) + cdef public xmlNode* findChild(xmlNode* c_node, Py_ssize_t index): return _findChild(c_node, index) From scoder at codespeak.net Sun Jul 16 15:17:26 2006 From: scoder at codespeak.net (scoder at codespeak.net) Date: Sun, 16 Jul 2006 15:17:26 +0200 (CEST) Subject: [Lxml-checkins] r30074 - lxml/branch/capi/src/lxml Message-ID: <20060716131726.60F631009F@code0.codespeak.net> Author: scoder Date: Sun Jul 16 15:17:23 2006 New Revision: 30074 Modified: lxml/branch/capi/src/lxml/apihelpers.pxi lxml/branch/capi/src/lxml/etreepublic.pxd lxml/branch/capi/src/lxml/public-api.pxi Log: renamed new public function: getAttributeValueFromNsName -> attributeValueFromNsName (now takes xmlNode* instead of _NodeBase argument Modified: lxml/branch/capi/src/lxml/apihelpers.pxi ============================================================================== --- lxml/branch/capi/src/lxml/apihelpers.pxi (original) +++ lxml/branch/capi/src/lxml/apihelpers.pxi Sun Jul 16 15:17:23 2006 @@ -92,6 +92,19 @@ tree.xmlFree(value) return result +cdef object _attributeValueFromNsName(xmlNode* c_element, + char* c_href, char* c_name): + cdef char* c_result + if c_href is NULL: + c_result = tree.xmlGetNoNsProp(c_element, c_name) + else: + c_result = tree.xmlGetNsProp(c_element, c_name, c_href) + if c_result is NULL: + return None + result = funicode(c_result) + tree.xmlFree(c_result) + return result + cdef object _getAttributeValue(_NodeBase element, key, default): cdef char* c_result cdef char* c_tag @@ -108,19 +121,6 @@ tree.xmlFree(c_result) return result -cdef object _getAttributeValueFromNsName(_NodeBase element, - char* c_href, char* c_name): - cdef char* c_result - if c_href is NULL: - c_result = tree.xmlGetNoNsProp(element._c_node, c_name) - else: - c_result = tree.xmlGetNsProp(element._c_node, c_name, c_href) - if c_result is NULL: - return None - result = funicode(c_result) - tree.xmlFree(c_result) - return result - cdef void _setAttributeValue(_NodeBase element, key, value): cdef xmlNs* c_ns cdef char* c_value Modified: lxml/branch/capi/src/lxml/etreepublic.pxd ============================================================================== --- lxml/branch/capi/src/lxml/etreepublic.pxd (original) +++ lxml/branch/capi/src/lxml/etreepublic.pxd Sun Jul 16 15:17:23 2006 @@ -66,13 +66,13 @@ cdef object attributeValue(tree.xmlNode* c_element, tree.xmlAttr* c_attrib_node) + # return the value of the attribute with 'ns' and 'name' (or None) + cdef object attributeValueFromNsName(tree.xmlNode* c_element, + char* c_ns, char* c_name) + # return the value of attribute "{ns}name", or the default value cdef object getAttributeValue(_NodeBase element, key, default) - # return the value of the attribute with 'ns' and 'name' (or None) - cdef object getAttributeValueFromNsName(_NodeBase element, - char* ns, char* name): - # find child element number 'index' (supports negative indexes) cdef tree.xmlNode* findChild(tree.xmlNode* c_node, python.Py_ssize_t index) Modified: lxml/branch/capi/src/lxml/public-api.pxi ============================================================================== --- lxml/branch/capi/src/lxml/public-api.pxi (original) +++ lxml/branch/capi/src/lxml/public-api.pxi Sun Jul 16 15:17:23 2006 @@ -29,13 +29,13 @@ cdef public object attributeValue(xmlNode* c_element, xmlAttr* c_attrib_node): return _attributeValue(c_element, c_attrib_node) +cdef public object attributeValueFromNsName(xmlNode* c_element, + char* ns, char* name): + return _attributeValueFromNsName(c_element, ns, name) + cdef public object getAttributeValue(_NodeBase element, key, default): return _getAttributeValue(element, key, default) -cdef public object getAttributeValueFromNsName(_NodeBase element, - char* ns, char* name): - return _getAttributeValueFromNsName(element, ns, name) - cdef public xmlNode* findChild(xmlNode* c_node, Py_ssize_t index): return _findChild(c_node, index) From scoder at codespeak.net Sun Jul 16 15:19:40 2006 From: scoder at codespeak.net (scoder at codespeak.net) Date: Sun, 16 Jul 2006 15:19:40 +0200 (CEST) Subject: [Lxml-checkins] r30075 - lxml/branch/capi/src/lxml Message-ID: <20060716131940.4BF901009F@code0.codespeak.net> Author: scoder Date: Sun Jul 16 15:19:39 2006 New Revision: 30075 Modified: lxml/branch/capi/src/lxml/objectify.pyx Log: implemented attribute access, doc-strings Modified: lxml/branch/capi/src/lxml/objectify.pyx ============================================================================== --- lxml/branch/capi/src/lxml/objectify.pyx (original) +++ lxml/branch/capi/src/lxml/objectify.pyx Sun Jul 16 15:19:39 2006 @@ -19,12 +19,16 @@ >>> second_c2 = root.c1.c2[1] """ def __iter__(self): + """Iterate over self and all siblings with the same tag. + """ parent = self.getparent() if parent is None: return iter([self]) return etree.ElementChildIterator(parent, tag=self.tag) def __len__(self): + """Count self and siblings with the same tag. + """ cdef tree.xmlNode* c_self_node cdef tree.xmlNode* c_node cdef char* c_href @@ -49,40 +53,38 @@ return count def __getattr__(self, tag): - cdef tree.xmlNode* c_self_node - cdef tree.xmlNode* c_node - cdef char* c_href - cdef char* c_tag - ns, tag = cetree.getNsTag(tag) - c_self_node = self._c_node - if ns is not None: - c_href = _cstr(ns) - else: - c_href = tree._getNs(c_self_node) - c_tag = _cstr(tag) - c_node = _findFollowingSibling(c_self_node.children, c_href, c_tag, 0) - if c_node is NULL: - raise AttributeError, "no such child: %s" % tag - return elementFactory(self._doc, c_node) + """Return the (first) child with the given tag name. If no namespace + is provided, the child will be looked up in the same one as self. + """ + return _lookupAttribute(self, tag) def __getitem__(self, key): - cdef tree.xmlNode* c_node + """Return a sibling or attribute. + * If argument is an integer, returns the following sibling + at that position. + * If argument is a string, does the same as getattr(). + """ + cdef tree.xmlNode* c_self_node cdef tree.xmlNode* c_parent + cdef tree.xmlNode* c_node if python._isString(key): - return getattr(self, key) - c_node = self._c_node - c_parent = c_node.parent + return _lookupAttribute(self, key) + c_self_node = self._c_node + c_parent = c_self_node.parent if c_parent is NULL: if key == 0: return self else: raise IndexError, key c_node = _findFollowingSibling( - c_parent.children, tree._getNs(c_parent), c_node.name, key) + c_parent.children, tree._getNs(c_self_node), c_self_node.name, key) if c_node is NULL: raise IndexError, key return elementFactory(self._doc, c_node) +## def xml_set_attribute(self, name, value): +## ElementBase.set(self, name, value) + cdef tree.xmlNode* _findFollowingSibling(tree.xmlNode* c_node, char* href, char* name, @@ -95,4 +97,27 @@ c_node = c_node.next return NULL +cdef object _lookupAttribute(_Element element, tag): + cdef tree.xmlNode* c_result + cdef tree.xmlNode* c_node + cdef char* c_href + cdef char* c_tag + ns, tag = cetree.getNsTag(tag) + c_tag = _cstr(tag) + c_node = element._c_node + if ns is None: + attrval = cetree.attributeValueFromNsName(c_node, NULL, c_tag) + if attrval is not None: + return attrval + c_href = tree._getNs(c_node) + else: + c_href = _cstr(ns) + attrval = cetree.attributeValueFromNsName(c_node, c_href, c_tag) + if attrval is not None: + return attrval + c_result = _findFollowingSibling(c_node.children, c_href, c_tag, 0) + if c_result is NULL: + raise AttributeError, "no such child: %s" % tag + return elementFactory(element._doc, c_result) + #etree.setDefaultElementClass(MyElement) From scoder at codespeak.net Sun Jul 16 15:21:40 2006 From: scoder at codespeak.net (scoder at codespeak.net) Date: Sun, 16 Jul 2006 15:21:40 +0200 (CEST) Subject: [Lxml-checkins] r30076 - lxml/branch/capi/src/lxml/tests Message-ID: <20060716132140.18B661009F@code0.codespeak.net> Author: scoder Date: Sun Jul 16 15:21:38 2006 New Revision: 30076 Modified: lxml/branch/capi/src/lxml/tests/test_objectify.py Log: more tests for namespace handling, test case for attribute access Modified: lxml/branch/capi/src/lxml/tests/test_objectify.py ============================================================================== --- lxml/branch/capi/src/lxml/tests/test_objectify.py (original) +++ lxml/branch/capi/src/lxml/tests/test_objectify.py Sun Jul 16 15:21:38 2006 @@ -16,10 +16,11 @@ from lxml.elementlib import objectify xml_str = '''\ - - + + 0 1 + 2 ''' @@ -43,6 +44,7 @@ def test_child_getattr(self): root = etree.XML(xml_str) self.assertEquals("0", getattr(root.c1, "{objectified}c2").text) + self.assertEquals("2", getattr(root.c1, "{otherNS}c2").text) def test_child_nonexistant(self): root = etree.XML(xml_str) @@ -69,6 +71,13 @@ self.assertEquals([root.c1.c2[0], root.c1.c2[1]], list(iter((root.c1.c2)))) + def test_attr(self): + root = etree.XML(xml_str) + self.assertEquals("A1", root.c1.a1) + self.assertEquals("A2", root.c1.a2) + self.assertRaises(AttributeError, getattr, root.c1, "a3") + self.assertEquals("A3", root.c1["{otherNS}a3"]) + def test_suite(): suite = unittest.TestSuite() suite.addTests([unittest.makeSuite(ObjectifyTestCase)]) From scoder at codespeak.net Sun Jul 16 15:26:17 2006 From: scoder at codespeak.net (scoder at codespeak.net) Date: Sun, 16 Jul 2006 15:26:17 +0200 (CEST) Subject: [Lxml-checkins] r30077 - lxml/branch/capi/src/lxml/tests Message-ID: <20060716132617.490641009F@code0.codespeak.net> Author: scoder Date: Sun Jul 16 15:26:16 2006 New Revision: 30077 Modified: lxml/branch/capi/src/lxml/tests/test_objectify.py Log: one more test for namespace handling Modified: lxml/branch/capi/src/lxml/tests/test_objectify.py ============================================================================== --- lxml/branch/capi/src/lxml/tests/test_objectify.py (original) +++ lxml/branch/capi/src/lxml/tests/test_objectify.py Sun Jul 16 15:26:16 2006 @@ -49,6 +49,7 @@ def test_child_nonexistant(self): root = etree.XML(xml_str) self.assertRaises(AttributeError, getattr, root.c1, "NOT_THERE") + self.assertRaises(AttributeError, getattr, root.c1, "{unknownNS}c2") def test_child_index(self): root = etree.XML(xml_str) From scoder at codespeak.net Mon Jul 17 06:41:21 2006 From: scoder at codespeak.net (scoder at codespeak.net) Date: Mon, 17 Jul 2006 06:41:21 +0200 (CEST) Subject: [Lxml-checkins] r30092 - lxml/branch/capi/src/lxml Message-ID: <20060717044121.4F73B1009B@code0.codespeak.net> Author: scoder Date: Mon Jul 17 06:41:20 2006 New Revision: 30092 Modified: lxml/branch/capi/src/lxml/objectify.pyx Log: prefer attributes for item access, children for object attribute access Modified: lxml/branch/capi/src/lxml/objectify.pyx ============================================================================== --- lxml/branch/capi/src/lxml/objectify.pyx (original) +++ lxml/branch/capi/src/lxml/objectify.pyx Mon Jul 17 06:41:20 2006 @@ -56,7 +56,7 @@ """Return the (first) child with the given tag name. If no namespace is provided, the child will be looked up in the same one as self. """ - return _lookupAttribute(self, tag) + return _lookupAttribute(self, tag, 0) def __getitem__(self, key): """Return a sibling or attribute. @@ -68,7 +68,7 @@ cdef tree.xmlNode* c_parent cdef tree.xmlNode* c_node if python._isString(key): - return _lookupAttribute(self, key) + return _lookupAttribute(self, key, 1) c_self_node = self._c_node c_parent = c_self_node.parent if c_parent is NULL: @@ -97,7 +97,7 @@ c_node = c_node.next return NULL -cdef object _lookupAttribute(_Element element, tag): +cdef object _lookupAttribute(_Element element, tag, int prefer_attributes): cdef tree.xmlNode* c_result cdef tree.xmlNode* c_node cdef char* c_href @@ -107,17 +107,21 @@ c_node = element._c_node if ns is None: attrval = cetree.attributeValueFromNsName(c_node, NULL, c_tag) - if attrval is not None: + if attrval is not None and prefer_attributes: return attrval c_href = tree._getNs(c_node) else: c_href = _cstr(ns) - attrval = cetree.attributeValueFromNsName(c_node, c_href, c_tag) - if attrval is not None: - return attrval + if attrval is None: + attrval = cetree.attributeValueFromNsName(c_node, c_href, c_tag) + if attrval is not None and prefer_attributes: + return attrval c_result = _findFollowingSibling(c_node.children, c_href, c_tag, 0) if c_result is NULL: - raise AttributeError, "no such child: %s" % tag + if attrval is not None: + return attrval + else: + raise AttributeError, "no such child: %s" % tag return elementFactory(element._doc, c_result) #etree.setDefaultElementClass(MyElement) From scoder at codespeak.net Mon Jul 17 06:41:49 2006 From: scoder at codespeak.net (scoder at codespeak.net) Date: Mon, 17 Jul 2006 06:41:49 +0200 (CEST) Subject: [Lxml-checkins] r30093 - lxml/branch/capi/doc Message-ID: <20060717044149.5411B1009B@code0.codespeak.net> Author: scoder Date: Mon Jul 17 06:41:48 2006 New Revision: 30093 Added: lxml/branch/capi/doc/objectify.txt Log: doctest for lxml.elementlib.objectify Added: lxml/branch/capi/doc/objectify.txt ============================================================================== --- (empty file) +++ lxml/branch/capi/doc/objectify.txt Mon Jul 17 06:41:48 2006 @@ -0,0 +1,102 @@ +========================= +lxml.elementlib.objectify +========================= + +lxml supports an alternative element API similar to the Amara_ bindery through +a custom Element implementation. This API is very different from the +ElementTree API. If it is used, it should be used exclusively to avoid common +pitfalls. + +.. _Amara: http://uche.ogbuji.net/tech/4suite/amara/ + +You can replace the original implementation by the ``objectify`` element class +like this:: + + >>> from lxml import etree + >>> from lxml.elementlib.objectify import ObjectifiedElement + >>> etree.setDefaultElementClass(ObjectifiedElement) + + >>> el = etree.Element("test") + >>> print isinstance(el, ObjectifiedElement) + True + +Note that `namespace specific classes`_ can override this default. If +``objectify`` is in use, it is therefore advisable to let other custom element +classes inherit from the ObjectifiedElement class to make sure that all +element classes provide the same API. + +.. _`namespace specific classes`: namespace_extensions.html + + +Element access through object attributes +---------------------------------------- + +The main idea behind the ``objectify`` API is to hide XML element access +behind the usual object attribute access pattern:: + + >>> root = etree.Element("root") + >>> b = etree.SubElement(root, "b") + >>> print root.b.tag + b + +Attributes are accessed exactly the same way, but element children take precedence:: + + >>> c = etree.SubElement(root, "c", myattr="someval") + >>> print root.c.myattr + someval + + >>> root.set("c", "oh-oh") + >>> print root.c.tag + c + +Attributes can also be accessed through a mapping interface, which is handy in +cases where a child has the same name as an attribute:: + + >>> print root["c"] + oh-oh + + +Namespace handling +------------------ + +Namespaces are handled mostly behind the scenes. If you access an attribute +of an Element without specifying a namespace, the lookup will use the +namespace of the parent:: + + >>> root = etree.Element("{ns}root") + >>> b = etree.SubElement(root, "{ns}b") + >>> c = etree.SubElement(root, "{other}c") + + >>> print root.b.tag + {ns}b + >>> print root.c + Traceback (most recent call last): + ... + AttributeError: no such child: c + +You can access elements with different namespaces via ``getattr()``:: + + >>> print getattr(root, "{other}c").tag + {other}c + +The quick way through item access is also available:: + + >>> print root["{other}c"].tag + {other}c + + +Resetting the API +----------------- + +You can reset the API to the original ElementTree API by resetting the element +class. Be aware, though, that this does not immediately apply to elements to +which there is a Python reference. Their Python class will only be changed +after all references are gone and the Python object is garbage collected. +When you access an element for which there is not currently a Python +representation, it will be created with the currently registered element +class:: + + >>> etree.setDefaultElementClass() + >>> el = etree.Element("test") + >>> print isinstance(el, ObjectifiedElement) + False From scoder at codespeak.net Mon Jul 17 07:14:10 2006 From: scoder at codespeak.net (scoder at codespeak.net) Date: Mon, 17 Jul 2006 07:14:10 +0200 (CEST) Subject: [Lxml-checkins] r30094 - lxml/branch/capi/src/lxml Message-ID: <20060717051410.C11231009B@code0.codespeak.net> Author: scoder Date: Mon Jul 17 07:14:09 2006 New Revision: 30094 Modified: lxml/branch/capi/src/lxml/apihelpers.pxi lxml/branch/capi/src/lxml/etreepublic.pxd lxml/branch/capi/src/lxml/public-api.pxi Log: new public function namespacedNameFromNsName(c_ns, c_tag) Modified: lxml/branch/capi/src/lxml/apihelpers.pxi ============================================================================== --- lxml/branch/capi/src/lxml/apihelpers.pxi (original) +++ lxml/branch/capi/src/lxml/apihelpers.pxi Mon Jul 17 07:14:09 2006 @@ -414,10 +414,9 @@ return ns, tag cdef object _namespacedName(xmlNode* c_node): - cdef char* href - cdef char* name - name = c_node.name - href = _getNs(c_node) + return _namespacedNameFromNsName(_getNs(c_node), c_node.name) + +cdef object _namespacedNameFromNsName(char* href, char* name): if href is NULL: return funicode(name) else: Modified: lxml/branch/capi/src/lxml/etreepublic.pxd ============================================================================== --- lxml/branch/capi/src/lxml/etreepublic.pxd (original) +++ lxml/branch/capi/src/lxml/etreepublic.pxd Mon Jul 17 07:14:09 2006 @@ -56,6 +56,9 @@ # get the "{ns}tag" string for a C node cdef object namespacedName(tree.xmlNode* c_node) + # get the "{ns}tag" string for a href/tagname pair (c_ns may be NULL) + cdef object namespacedNameFromNsName(char* c_ns, char* c_tag) + # find the Document of an Element, ElementTree or Document (itself!) cdef _Document documentOrRaise(object input) Modified: lxml/branch/capi/src/lxml/public-api.pxi ============================================================================== --- lxml/branch/capi/src/lxml/public-api.pxi (original) +++ lxml/branch/capi/src/lxml/public-api.pxi Mon Jul 17 07:14:09 2006 @@ -65,6 +65,9 @@ cdef public object namespacedName(xmlNode* c_node): return _namespacedName(c_node) +cdef public object namespacedNameFromNsName(char* href, char* name): + return _namespacedNameFromNsName(href, name) + cdef public void iteratorStoreNext(_ElementIterator iterator, _NodeBase node): iterator._storeNext(node) From scoder at codespeak.net Mon Jul 17 07:19:29 2006 From: scoder at codespeak.net (scoder at codespeak.net) Date: Mon, 17 Jul 2006 07:19:29 +0200 (CEST) Subject: [Lxml-checkins] r30095 - in lxml/branch/capi: doc src/lxml Message-ID: <20060717051929.C6AF41009B@code0.codespeak.net> Author: scoder Date: Mon Jul 17 07:19:28 2006 New Revision: 30095 Modified: lxml/branch/capi/doc/objectify.txt lxml/branch/capi/src/lxml/objectify.pyx Log: show namespace in 'child or attribute not found' exception Modified: lxml/branch/capi/doc/objectify.txt ============================================================================== --- lxml/branch/capi/doc/objectify.txt (original) +++ lxml/branch/capi/doc/objectify.txt Mon Jul 17 07:19:28 2006 @@ -72,18 +72,25 @@ >>> print root.c Traceback (most recent call last): ... - AttributeError: no such child: c + AttributeError: no such child or attribute: {ns}c You can access elements with different namespaces via ``getattr()``:: >>> print getattr(root, "{other}c").tag {other}c -The quick way through item access is also available:: +The quick way through item access is also available, but as before, XML +attributes take precedence:: >>> print root["{other}c"].tag {other}c + >>> root.set("{other}c", "test") + >>> print root["{other}c"] + test + >>> print getattr(root, "{other}c").tag # FIXME: this looks wrong ... + {other}c + Resetting the API ----------------- Modified: lxml/branch/capi/src/lxml/objectify.pyx ============================================================================== --- lxml/branch/capi/src/lxml/objectify.pyx (original) +++ lxml/branch/capi/src/lxml/objectify.pyx Mon Jul 17 07:19:28 2006 @@ -121,7 +121,8 @@ if attrval is not None: return attrval else: - raise AttributeError, "no such child: %s" % tag + raise AttributeError, "no such child or attribute: %s" % \ + cetree.namespacedNameFromNsName(c_href, c_tag) return elementFactory(element._doc, c_result) #etree.setDefaultElementClass(MyElement) From scoder at codespeak.net Mon Jul 17 09:24:34 2006 From: scoder at codespeak.net (scoder at codespeak.net) Date: Mon, 17 Jul 2006 09:24:34 +0200 (CEST) Subject: [Lxml-checkins] r30100 - lxml/branch/capi/src/lxml Message-ID: <20060717072434.ED2EF10093@code0.codespeak.net> Author: scoder Date: Mon Jul 17 09:24:33 2006 New Revision: 30100 Modified: lxml/branch/capi/src/lxml/objectify.pyx Log: small C-ification Modified: lxml/branch/capi/src/lxml/objectify.pyx ============================================================================== --- lxml/branch/capi/src/lxml/objectify.pyx (original) +++ lxml/branch/capi/src/lxml/objectify.pyx Mon Jul 17 09:24:33 2006 @@ -5,6 +5,7 @@ cimport python cimport tree +cdef object etree from lxml import etree # initialize C-API of lxml.etree import_etree(etree) From scoder at codespeak.net Mon Jul 17 09:48:52 2006 From: scoder at codespeak.net (scoder at codespeak.net) Date: Mon, 17 Jul 2006 09:48:52 +0200 (CEST) Subject: [Lxml-checkins] r30101 - lxml/branch/capi/doc Message-ID: <20060717074852.24A7D10093@code0.codespeak.net> Author: scoder Date: Mon Jul 17 09:48:51 2006 New Revision: 30101 Added: lxml/branch/capi/doc/capi.txt Log: documentation on using the external C API Added: lxml/branch/capi/doc/capi.txt ============================================================================== --- (empty file) +++ lxml/branch/capi/doc/capi.txt Mon Jul 17 09:48:51 2006 @@ -0,0 +1,83 @@ +============================== +The public C-API of lxml.etree +============================== + +As of version 1.1, lxml.etree provides a public C-API. This allows external +C extensions to efficiently access public functions and classes of lxml, +without going through the Python API. + +The API is described in the file `etreepublic.pxd`_, which is directly +c-importable by Pyrex modules. + +.. _`etreepublic.pxd`: http://codespeak.net/svn/lxml/branch/capi/src/lxml/etreepublic.pxd + + +Writing external modules in Pyrex +--------------------------------- + +This is the easiest way of extending lxml at the C level. A Pyrex module +should start like this:: + + # import the public functions and classes of lxml.etree + cimport etreepublic as cetree + + # import the lxml.etree module in Python + cdef object etree + from lxml import etree + + # initialize the access to the C-API of lxml.etree + cetree.import_etree(etree) + +From this line on, you can access all public functions of lxml.etree from the +``cetree`` namespace like this:: + + # build a tag name from namespace and element name + py_tag = cetree.namespacedNameFromNsName("http://some/url", "myelement") + +lxml classes are easily subclassed. For example, to implement and set a new +default element class, you can write code like the following:: + + cdef class NewElementClass(cetree.ElementBase): + def myNewMethod(self, myarg): + self.set("new_attr_myarg", myarg) + + etree.setDefaultElementClass(NewElementClass) + + +Writing external modules in C +----------------------------- + +If you really feel like it, you can also interface with lxml.etree straight +from C code. All you have to do is include the header file for the public +API, import the ``lxml.etree`` module and then call the import function:: + + /* My C extension */ + + /* common includes */ + #include "Python.h" + #include "stdio.h" + #include "string.h" + #include "stdarg.h" + #include "libxml/xmlversion.h" + #include "libxml/encoding.h" + #include "libxml/hash.h" + #include "libxml/tree.h" + #include "libxml/xmlIO.h" + #include "libxml/xmlsave.h" + #include "libxml/globals.h" + #include "libxml/xmlstring.h" + + /* lxml.etree specific includes */ + #include "lxml-version.h" + #include "etree_defs.h" + #include "etree.h" + + /* setup code */ + static PyObject* m_etree; + m_etree = _ADD_YOUR_WAY_TO_IMPORT_A_MODULE_("lxml.etree"); + + import_etree(m_etree); + +Note that including ``etree.h`` does not automatically include the header +files it requires. Note also that the above list of common imports may not be +sufficient. From scoder at codespeak.net Mon Jul 17 10:16:21 2006 From: scoder at codespeak.net (scoder at codespeak.net) Date: Mon, 17 Jul 2006 10:16:21 +0200 (CEST) Subject: [Lxml-checkins] r30102 - lxml/branch/capi/doc Message-ID: <20060717081621.B49FA10093@code0.codespeak.net> Author: scoder Date: Mon Jul 17 10:16:20 2006 New Revision: 30102 Modified: lxml/branch/capi/doc/api.txt lxml/branch/capi/doc/main.txt lxml/branch/capi/doc/mkhtml.py Log: linked to new documentation files from api.txt and main.txt, included in html generation Modified: lxml/branch/capi/doc/api.txt ============================================================================== --- lxml/branch/capi/doc/api.txt (original) +++ lxml/branch/capi/doc/api.txt Mon Jul 17 10:16:20 2006 @@ -8,18 +8,20 @@ .. contents:: .. 1 lxml.etree - 2 Trees and Documents - 3 Iteration - 4 Parsers - 5 iterparse and iterwalk - 6 Error handling on exceptions - 7 Python unicode strings - 8 XPath - 9 XSLT - 10 RelaxNG - 11 XMLSchema - 12 xinclude - 13 write_c14n on ElementTree + 2 Other Element APIs + 3 Trees and Documents + 4 Iteration + 5 Parsers + 6 iterparse and iterwalk + 7 Error handling on exceptions + 8 Python unicode strings + 9 XPath + 10 XSLT + 11 RelaxNG + 12 XMLSchema + 13 xinclude + 14 write_c14n on ElementTree + lxml.etree ---------- @@ -43,6 +45,20 @@ >>> from StringIO import StringIO +Other Element APIs +------------------ + +While lxml.etree itself uses the ElementTree API, it is possible to replace +the Element implementation by `custom element subclasses`_. This has been +used to implement well-known XML APIs on top of lxml. The ``lxml.elementlib`` +package contains these APIs. Currently, there is an data-binding +implementation called `objectify`_, which is similar to the `Amara bindery`_. + +.. _`custom element subclasses`: namespace_extensions.html +.. _`objectify`: objectify.html +.. _`Amara bindery`: http://uche.ogbuji.net/tech/4suite/amara/ + + Trees and Documents ------------------- Modified: lxml/branch/capi/doc/main.txt ============================================================================== --- lxml/branch/capi/doc/main.txt (original) +++ lxml/branch/capi/doc/main.txt Mon Jul 17 10:16:20 2006 @@ -100,14 +100,16 @@ lxml also `extends this API`_ to expose libxml2 and libxslt specific functionality, such as XPath_, `Relax NG`_, `XML Schema`_, `XSLT`_, and `c14n`_. Python code can be called from XPath expressions and XSLT stylesheets -through the use of `extension functions`_. +through the use of `extension functions`_. lxml also offers a `SAX compliant +API`_, that works with the SAX support in the standard library. In addition to the ElementTree API, lxml also features a sophisticated API for `custom element classes`_. This is a simple way to write arbitrary XML driven -APIs on top of lxml. - -lxml also offers a `SAX compliant API`_, that works with the SAX support -in the standard library. +APIs on top of lxml. Some common XML APIs are implemented in the +``lxml.elementlib`` module, as described in the lxml.etree API documentation. +As of version 1.1, lxml.etree features a new `C-level API`_ that can be used +to efficiently extend lxml.etree in external C modules, including custom +element class support. .. _ElementTree: http://effbot.org/zone/element-index.htm .. _cElementTree: http://effbot.org/zone/celementtree.htm @@ -118,6 +120,7 @@ .. _`extension functions`: extensions.html .. _`custom element classes`: namespace_extensions.html .. _`SAX compliant API`: sax.html +.. _`C-level API`: capi.html .. _XPath: http://www.w3.org/TR/xpath .. _`Relax NG`: http://www.relaxng.org/ Modified: lxml/branch/capi/doc/mkhtml.py ============================================================================== --- lxml/branch/capi/doc/mkhtml.py (original) +++ lxml/branch/capi/doc/mkhtml.py Mon Jul 17 10:16:20 2006 @@ -13,7 +13,8 @@ for name in ['main.txt', 'intro.txt', 'api.txt', 'compatibility.txt', 'extensions.txt', 'namespace_extensions.txt', 'sax.txt', - 'build.txt', 'FAQ.txt', 'performance.txt', 'resolvers.txt']: + 'build.txt', 'FAQ.txt', 'performance.txt', 'resolvers.txt', + 'capi.txt', 'objectify.txt']: path = os.path.join(doc_dir, name) outname = os.path.splitext(name)[0] + '.html' outpath = os.path.join(dirname, outname) From scoder at codespeak.net Mon Jul 17 10:17:00 2006 From: scoder at codespeak.net (scoder at codespeak.net) Date: Mon, 17 Jul 2006 10:17:00 +0200 (CEST) Subject: [Lxml-checkins] r30103 - lxml/branch/capi/doc Message-ID: <20060717081700.0C61910093@code0.codespeak.net> Author: scoder Date: Mon Jul 17 10:16:59 2006 New Revision: 30103 Modified: lxml/branch/capi/doc/capi.txt Log: clarification in capi.txt Modified: lxml/branch/capi/doc/capi.txt ============================================================================== --- lxml/branch/capi/doc/capi.txt (original) +++ lxml/branch/capi/doc/capi.txt Mon Jul 17 10:16:59 2006 @@ -34,12 +34,12 @@ # build a tag name from namespace and element name py_tag = cetree.namespacedNameFromNsName("http://some/url", "myelement") -lxml classes are easily subclassed. For example, to implement and set a new -default element class, you can write code like the following:: +Public lxml classes are easily subclassed. For example, to implement and set +a new default element class, you can write code like the following:: cdef class NewElementClass(cetree.ElementBase): - def myNewMethod(self, myarg): - self.set("new_attr_myarg", myarg) + def setValue(self, myval): + self.set("my_attribute", myval) etree.setDefaultElementClass(NewElementClass) From scoder at codespeak.net Mon Jul 17 10:27:15 2006 From: scoder at codespeak.net (scoder at codespeak.net) Date: Mon, 17 Jul 2006 10:27:15 +0200 (CEST) Subject: [Lxml-checkins] r30104 - in lxml/branch/capi: doc src/lxml src/lxml/tests Message-ID: <20060717082715.5F22510093@code0.codespeak.net> Author: scoder Date: Mon Jul 17 10:27:13 2006 New Revision: 30104 Modified: lxml/branch/capi/doc/objectify.txt lxml/branch/capi/src/lxml/objectify.pyx lxml/branch/capi/src/lxml/tests/test_objectify.py Log: attribute access through normal ET API Modified: lxml/branch/capi/doc/objectify.txt ============================================================================== --- lxml/branch/capi/doc/objectify.txt (original) +++ lxml/branch/capi/doc/objectify.txt Mon Jul 17 10:27:13 2006 @@ -39,20 +39,14 @@ >>> print root.b.tag b -Attributes are accessed exactly the same way, but element children take precedence:: +Attributes are accessed as in the normal ElementTree API:: >>> c = etree.SubElement(root, "c", myattr="someval") - >>> print root.c.myattr + >>> print root.c.get("myattr") someval - >>> root.set("c", "oh-oh") - >>> print root.c.tag - c - -Attributes can also be accessed through a mapping interface, which is handy in -cases where a child has the same name as an attribute:: - - >>> print root["c"] + >>> root.c.set("c", "oh-oh") + >>> print root.c.get("c") oh-oh @@ -72,25 +66,18 @@ >>> print root.c Traceback (most recent call last): ... - AttributeError: no such child or attribute: {ns}c + AttributeError: no such child: {ns}c You can access elements with different namespaces via ``getattr()``:: >>> print getattr(root, "{other}c").tag {other}c -The quick way through item access is also available, but as before, XML -attributes take precedence:: +For convenience, there is also a quick way through item access:: >>> print root["{other}c"].tag {other}c - >>> root.set("{other}c", "test") - >>> print root["{other}c"] - test - >>> print getattr(root, "{other}c").tag # FIXME: this looks wrong ... - {other}c - Resetting the API ----------------- Modified: lxml/branch/capi/src/lxml/objectify.pyx ============================================================================== --- lxml/branch/capi/src/lxml/objectify.pyx (original) +++ lxml/branch/capi/src/lxml/objectify.pyx Mon Jul 17 10:27:13 2006 @@ -57,7 +57,7 @@ """Return the (first) child with the given tag name. If no namespace is provided, the child will be looked up in the same one as self. """ - return _lookupAttribute(self, tag, 0) + return _lookupChild(self, tag) def __getitem__(self, key): """Return a sibling or attribute. @@ -69,7 +69,7 @@ cdef tree.xmlNode* c_parent cdef tree.xmlNode* c_node if python._isString(key): - return _lookupAttribute(self, key, 1) + return _lookupChild(self, key) c_self_node = self._c_node c_parent = c_self_node.parent if c_parent is NULL: @@ -98,7 +98,25 @@ c_node = c_node.next return NULL -cdef object _lookupAttribute(_Element element, tag, int prefer_attributes): +cdef object _lookupChild(_Element element, tag): + cdef tree.xmlNode* c_result + cdef tree.xmlNode* c_node + cdef char* c_href + cdef char* c_tag + ns, tag = cetree.getNsTag(tag) + c_tag = _cstr(tag) + c_node = element._c_node + if ns is None: + c_href = tree._getNs(c_node) + else: + c_href = _cstr(ns) + c_result = _findFollowingSibling(c_node.children, c_href, c_tag, 0) + if c_result is NULL: + raise AttributeError, "no such child: %s" % \ + cetree.namespacedNameFromNsName(c_href, c_tag) + return elementFactory(element._doc, c_result) + +cdef object _lookupAttribute(_Element element, tag): cdef tree.xmlNode* c_result cdef tree.xmlNode* c_node cdef char* c_href @@ -108,14 +126,14 @@ c_node = element._c_node if ns is None: attrval = cetree.attributeValueFromNsName(c_node, NULL, c_tag) - if attrval is not None and prefer_attributes: + if attrval is not None: return attrval c_href = tree._getNs(c_node) else: c_href = _cstr(ns) if attrval is None: attrval = cetree.attributeValueFromNsName(c_node, c_href, c_tag) - if attrval is not None and prefer_attributes: + if attrval is not None: return attrval c_result = _findFollowingSibling(c_node.children, c_href, c_tag, 0) if c_result is NULL: Modified: lxml/branch/capi/src/lxml/tests/test_objectify.py ============================================================================== --- lxml/branch/capi/src/lxml/tests/test_objectify.py (original) +++ lxml/branch/capi/src/lxml/tests/test_objectify.py Mon Jul 17 10:27:13 2006 @@ -72,13 +72,6 @@ self.assertEquals([root.c1.c2[0], root.c1.c2[1]], list(iter((root.c1.c2)))) - def test_attr(self): - root = etree.XML(xml_str) - self.assertEquals("A1", root.c1.a1) - self.assertEquals("A2", root.c1.a2) - self.assertRaises(AttributeError, getattr, root.c1, "a3") - self.assertEquals("A3", root.c1["{otherNS}a3"]) - def test_suite(): suite = unittest.TestSuite() suite.addTests([unittest.makeSuite(ObjectifyTestCase)]) From scoder at codespeak.net Mon Jul 17 12:25:42 2006 From: scoder at codespeak.net (scoder at codespeak.net) Date: Mon, 17 Jul 2006 12:25:42 +0200 (CEST) Subject: [Lxml-checkins] r30111 - lxml/branch/capi/doc Message-ID: <20060717102542.DF6DD1008F@code0.codespeak.net> Author: scoder Date: Mon Jul 17 12:25:40 2006 New Revision: 30111 Modified: lxml/branch/capi/doc/api.txt lxml/branch/capi/doc/objectify.txt Log: small doc fixes Modified: lxml/branch/capi/doc/api.txt ============================================================================== --- lxml/branch/capi/doc/api.txt (original) +++ lxml/branch/capi/doc/api.txt Mon Jul 17 12:25:40 2006 @@ -51,7 +51,7 @@ While lxml.etree itself uses the ElementTree API, it is possible to replace the Element implementation by `custom element subclasses`_. This has been used to implement well-known XML APIs on top of lxml. The ``lxml.elementlib`` -package contains these APIs. Currently, there is an data-binding +package contains these APIs. Currently, there is a data-binding implementation called `objectify`_, which is similar to the `Amara bindery`_. .. _`custom element subclasses`: namespace_extensions.html Modified: lxml/branch/capi/doc/objectify.txt ============================================================================== --- lxml/branch/capi/doc/objectify.txt (original) +++ lxml/branch/capi/doc/objectify.txt Mon Jul 17 12:25:40 2006 @@ -4,8 +4,8 @@ lxml supports an alternative element API similar to the Amara_ bindery through a custom Element implementation. This API is very different from the -ElementTree API. If it is used, it should be used exclusively to avoid common -pitfalls. +ElementTree API. If it is used, it should be used *exclusively*, to avoid +common pitfalls when mixing element implementations. .. _Amara: http://uche.ogbuji.net/tech/4suite/amara/ From scoder at codespeak.net Tue Jul 18 15:19:35 2006 From: scoder at codespeak.net (scoder at codespeak.net) Date: Tue, 18 Jul 2006 15:19:35 +0200 (CEST) Subject: [Lxml-checkins] r30164 - lxml/branch/capi/doc Message-ID: <20060718131935.192771007B@code0.codespeak.net> Author: scoder Date: Tue Jul 18 15:19:33 2006 New Revision: 30164 Modified: lxml/branch/capi/doc/objectify.txt Log: doc fix Modified: lxml/branch/capi/doc/objectify.txt ============================================================================== --- lxml/branch/capi/doc/objectify.txt (original) +++ lxml/branch/capi/doc/objectify.txt Tue Jul 18 15:19:33 2006 @@ -22,7 +22,7 @@ Note that `namespace specific classes`_ can override this default. If ``objectify`` is in use, it is therefore advisable to let other custom element -classes inherit from the ObjectifiedElement class to make sure that all +classes inherit from the ``ObjectifiedElement`` class to make sure that all element classes provide the same API. .. _`namespace specific classes`: namespace_extensions.html From scoder at codespeak.net Tue Jul 18 15:22:43 2006 From: scoder at codespeak.net (scoder at codespeak.net) Date: Tue, 18 Jul 2006 15:22:43 +0200 (CEST) Subject: [Lxml-checkins] r30165 - in lxml/branch/capi: . src/lxml Message-ID: <20060718132243.7CE8B1007B@code0.codespeak.net> Author: scoder Date: Tue Jul 18 15:22:40 2006 New Revision: 30165 Added: lxml/branch/capi/src/lxml/classlookup.pyx Modified: lxml/branch/capi/setup.py lxml/branch/capi/src/lxml/etreepublic.pxd lxml/branch/capi/src/lxml/nsclasses.pxi lxml/branch/capi/src/lxml/public-api.pxi Log: support for different Element class lookup functions, lxml.elementlib.classlookup module with different implementations, major cleanup in etreepublic.pxd to make it more readable Modified: lxml/branch/capi/setup.py ============================================================================== --- lxml/branch/capi/setup.py (original) +++ lxml/branch/capi/setup.py Tue Jul 18 15:22:40 2006 @@ -1,8 +1,9 @@ import sys, os, os.path, re EXT_MODULES = [ - ("etree", "lxml.etree"), - ("objectify", "lxml.elementlib.objectify") + ("etree", "lxml.etree"), + ("objectify", "lxml.elementlib.objectify"), + ("classlookup", "lxml.elementlib.classlookup") ] setup_args = {} Added: lxml/branch/capi/src/lxml/classlookup.pyx ============================================================================== --- (empty file) +++ lxml/branch/capi/src/lxml/classlookup.pyx Tue Jul 18 15:22:40 2006 @@ -0,0 +1,77 @@ +# Configurable Element class lookup + +from python cimport isinstance, getattr, _cstr, Py_ssize_t +cimport etreepublic as cetree +cimport python +cimport tree + +cdef object etree +from lxml import etree +# initialize C-API of lxml.etree +cetree.import_etree(etree) + +cdef class ElementClassLookup: + """Superclass of Element class lookups. + """ + cdef object (*_lookup_function)(object, tree.xmlNode*) + def __init__(self): + self._lookup_function = NULL # use default lookup + + +cdef class ElementNamespaceClassLookup(ElementClassLookup): + """Looks up Element class in the Namespace registry. + """ + # uses default lookup + + +cdef class DefaultElementClassLookup(ElementClassLookup): + """Always returns the default Element class. + """ + def __init__(self): + self._lookup_function = cetree.lookupDefaultElementClass + + +cdef class AttributeBasedElementClassLookup(ElementClassLookup): + """Checks an attribute of an Element and looks up the value in a class + dictionary. + + Arguments: + * attribute name ('{ns}name' style string) + * class mapping (Python dict mapping attribute values to Element classes) + + A None value in the class mapping will be checked if the attribute is missing. + """ + cdef object _class_mapping + cdef object _default_class + cdef object _pytag + cdef char* _c_ns + cdef char* _c_name + def __init__(self, attribute_name, class_mapping): + self._pytag = cetree.getNsTag(attribute_name) + ns, name = self._pytag + if ns is None: + self._c_ns = NULL + else: + self._c_ns = python._cstr(ns) + self._c_name = python._cstr(name) + self._class_mapping = dict(_class_mapping) + self._lookup_function = _attribute_lookup + +cdef object _attribute_lookup(object state, tree.xmlNode* c_node): + cdef AttributeBasedElementClassLookup lookup + cdef python.PyObject* dict_result + lookup = state + value = cetree.attributeValueFromNsName( + c_node, lookup._c_ns, lookup._c_name) + dict_result = python.PyDict_GetItemString(lookup._class_mapping, value) + if dict_result is NULL: + return cetree.lookupDefaultElementClass(None, c_node) + else: + return dict_result + + +def setElementClassLookup(ElementClassLookup lookup = None): + if lookup is None or lookup._lookup_function is NULL: + cetree.setElementClassLookupFunction(NULL, None) + else: + cetree.setElementClassLookupFunction(lookup._lookup_function, lookup) Modified: lxml/branch/capi/src/lxml/etreepublic.pxd ============================================================================== --- lxml/branch/capi/src/lxml/etreepublic.pxd (original) +++ lxml/branch/capi/src/lxml/etreepublic.pxd Tue Jul 18 15:22:40 2006 @@ -3,112 +3,136 @@ cimport tree cimport python +cdef extern from "etree_defs.h": + # + cdef int _isElement(xmlNode* node) + cdef char* _getNs(xmlNode* node) + cdef void BEGIN_FOR_EACH_ELEMENT_FROM(xmlNode* tree_top, + xmlNode* start_node, int inclusive) + cdef void END_FOR_EACH_ELEMENT_FROM(xmlNode* start_node) + cdef extern from "etree.h": - ########################################################################## - # public ElementTree API classes + ########################################################################## + # public ElementTree API classes + + cdef class lxml.etree._Document [ object LxmlDocument ]: + cdef tree.xmlDoc* _c_doc - cdef class lxml.etree._Document [ object LxmlDocument ]: - cdef tree.xmlDoc* _c_doc + cdef class lxml.etree._NodeBase [ object LxmlNodeBase ]: + cdef _Document _doc + cdef tree.xmlNode* _c_node - cdef class lxml.etree._NodeBase [ object LxmlNodeBase ]: - cdef _Document _doc - cdef tree.xmlNode* _c_node + cdef class lxml.etree._Element(_NodeBase) [ object LxmlElement ]: + pass - cdef class lxml.etree._Element(_NodeBase) [ object LxmlElement ]: - pass + cdef class lxml.etree.ElementBase(_Element) [ object LxmlElementBase ]: + pass - cdef class lxml.etree.ElementBase(_Element) [ object LxmlElementBase ]: - pass + cdef class lxml.etree._ElementTree [ object LxmlElementTree ]: + cdef _Document _doc + cdef _Element _element - cdef class lxml.etree._ElementTree [ object LxmlElementTree ]: - cdef _Document _doc - cdef _Element _element + ########################################################################## + # creating Element objects - ########################################################################## - # public helper functions + # First function to call! + cdef int import_etree(etree_module) except -1 - # First function to call! - cdef int import_etree(etree_module) except -1 + # create an Element for a C-node in the Document + cdef _Element elementFactory(_Document doc, tree.xmlNode* c_node) - # create an Element for a C-node in the Document - cdef _Element elementFactory(_Document doc, tree.xmlNode* c_node) + # create an ElementTree for an Element + cdef _ElementTree elementTreeFactory(_NodeBase context_node) - # create an ElementTree for an Element - cdef _ElementTree elementTreeFactory(_NodeBase context_node) + # create an ElementTree subclass for an Element + cdef _ElementTree newElementTree(_NodeBase context_node, object subclass) - # create an ElementTree subclass for an Element - cdef _ElementTree newElementTree(_NodeBase context_node, - object subclass) + # set the internal lookup function for Element classes + # use setElementClassLookupFunction(NULL, None) to reset it + cdef void setElementClassLookupFunction( + object (*function)(object, tree.xmlNode*), object state) - # check if a C node matches a tag name and namespace - # (NULL allowed for both) - cdef int tagMatches(tree.xmlNode* c_node, char* c_href, char* c_name) + # lookup function that always returns the default Element class + cdef object lookupDefaultElementClass(object _, tree.xmlNode* c_node) - # convert a UTF-8 char* to a Python string or unicode string - cdef object pyunicode(char* s) + ########################################################################## + # XML attribute access - # convert the string to UTF-8 using the normal lxml.etree semantics - cdef object utf8(object s) + # return an attribute value for a C attribute on a C element node + cdef object attributeValue(tree.xmlNode* c_element, + tree.xmlAttr* c_attrib_node) - # split a tag into a (URI, name) tuple - cdef object getNsTag(object tag) + # return the value of the attribute with 'ns' and 'name' (or None) + cdef object attributeValueFromNsName(tree.xmlNode* c_element, + char* c_ns, char* c_name) - # get the "{ns}tag" string for a C node - cdef object namespacedName(tree.xmlNode* c_node) + # return the value of attribute "{ns}name", or the default value + cdef object getAttributeValue(_NodeBase element, key, default) - # get the "{ns}tag" string for a href/tagname pair (c_ns may be NULL) - cdef object namespacedNameFromNsName(char* c_ns, char* c_tag) + ########################################################################## + # XML node helper functions - # find the Document of an Element, ElementTree or Document (itself!) - cdef _Document documentOrRaise(object input) + # find child element number 'index' (supports negative indexes) + cdef tree.xmlNode* findChild(tree.xmlNode* c_node, + python.Py_ssize_t index) - # find the root Element of an Element (itself!), ElementTree or Document - cdef _NodeBase rootNodeOrRaise(object input) + # find child element number 'index' starting at first one + cdef tree.xmlNode* findChildForwards(tree.xmlNode* c_node, + python.Py_ssize_t index) - # return an attribute value for a C attribute on a C element node - cdef object attributeValue(tree.xmlNode* c_element, - tree.xmlAttr* c_attrib_node) + # find child element number 'index' starting at last one + cdef tree.xmlNode* findChildBackwards(tree.xmlNode* c_node, + python.Py_ssize_t index) - # return the value of the attribute with 'ns' and 'name' (or None) - cdef object attributeValueFromNsName(tree.xmlNode* c_element, - char* c_ns, char* c_name) + # return next/previous sibling element of the node + cdef tree.xmlNode* nextElement(tree.xmlNode* c_node) + cdef tree.xmlNode* previousElement(tree.xmlNode* c_node) - # return the value of attribute "{ns}name", or the default value - cdef object getAttributeValue(_NodeBase element, key, default) + ########################################################################## + # iterators - # find child element number 'index' (supports negative indexes) - cdef tree.xmlNode* findChild(tree.xmlNode* c_node, - python.Py_ssize_t index) + cdef class lxml.etree._ElementTagMatcher [ object LxmlElementTagMatcher ]: + cdef char* _href + cdef char* _name - # find child element number 'index' starting at first one - cdef tree.xmlNode* findChildForwards(tree.xmlNode* c_node, - python.Py_ssize_t index) + # store "{ns}tag" (or None) filter for this matcher or element iterator + # ** unless _href *and* _name are set up 'by hand', this function *must* + # ** be called when subclassing the iterator below! + cdef void initTagMatch(_ElementTagMatcher matcher, tag) + + cdef class lxml.etree._ElementIterator(_ElementTagMatcher) [ + object LxmlElementIterator ]: + cdef _NodeBase _node + cdef tree.xmlNode* (*_next_element)(tree.xmlNode*) + + # store the initial node of the iterator if it matches the required tag + # or its next matching sibling if not + cdef void iteratorStoreNext(_ElementIterator iterator, _NodeBase node) + + ########################################################################## + # other helper functions + + # check if a C node matches a tag name and namespace + # (NULL allowed for each => always matches) + cdef int tagMatches(tree.xmlNode* c_node, char* c_href, char* c_name) + + # convert a UTF-8 char* to a Python string or unicode string + cdef object pyunicode(char* s) + + # convert the string to UTF-8 using the normal lxml.etree semantics + cdef object utf8(object s) + + # split a tag into a (URI, name) tuple + cdef object getNsTag(object tag) + + # get the "{ns}tag" string for a C node + cdef object namespacedName(tree.xmlNode* c_node) + + # get the "{ns}tag" string for a href/tagname pair (c_ns may be NULL) + cdef object namespacedNameFromNsName(char* c_ns, char* c_tag) + + # find the Document of an Element, ElementTree or Document (itself!) + cdef _Document documentOrRaise(object input) - # find child element number 'index' starting at last one - cdef tree.xmlNode* findChildBackwards(tree.xmlNode* c_node, - python.Py_ssize_t index) - - # return next/previous sibling element of the node - cdef tree.xmlNode* nextElement(tree.xmlNode* c_node) - cdef tree.xmlNode* previousElement(tree.xmlNode* c_node) - - ########################################################################## - # iterators - - cdef class lxml.etree._ElementTagMatcher [ object LxmlElementTagMatcher ]: - cdef char* _href - cdef char* _name - - # store "{ns}tag" (or None) filter for this matcher or element iterator - # ** unless _href *and* _name are set up 'by hand', this function *must* - # ** be called when subclassing the iterator below! - cdef void initTagMatch(_ElementTagMatcher matcher, tag) - - cdef class lxml.etree._ElementIterator(_ElementTagMatcher) [ - object LxmlElementIterator ]: - cdef _NodeBase _node - cdef tree.xmlNode* (*_next_element)(tree.xmlNode*) - - # store the initial node of the iterator if it matches the required tag - # or its next matching sibling if not - cdef void iteratorStoreNext(_ElementIterator iterator, _NodeBase node) + # find the root Element of an Element (itself!), ElementTree or Document + cdef _NodeBase rootNodeOrRaise(object input) Modified: lxml/branch/capi/src/lxml/nsclasses.pxi ============================================================================== --- lxml/branch/capi/src/lxml/nsclasses.pxi (original) +++ lxml/branch/capi/src/lxml/nsclasses.pxi Tue Jul 18 15:22:40 2006 @@ -27,6 +27,11 @@ else: __DEFAULT_ELEMENT_CLASS = cls +cdef object _lookupDefaultElementClass(_, xmlNode* c_node): + "Trivial class lookup function that always returns the default class." + return __DEFAULT_ELEMENT_CLASS + + cdef object __DEFAULT_ELEMENT_CLASS __DEFAULT_ELEMENT_CLASS = _Element @@ -36,6 +41,7 @@ cdef object __FUNCTION_NAMESPACE_REGISTRIES __FUNCTION_NAMESPACE_REGISTRIES = {} + def Namespace(ns_uri): """Retrieve the namespace object associated with the given URI. Creates a new one if it does not yet exist.""" @@ -193,10 +199,11 @@ else: return dict_result -cdef object _find_element_class(char* c_namespace_utf, - char* c_element_name_utf): +cdef object _find_nselement_class(_, xmlNode* c_node): cdef python.PyObject* dict_result cdef _NamespaceRegistry registry + cdef char* c_namespace_utf + c_namespace_utf = _getNs(c_node) if c_namespace_utf is not NULL: dict_result = python.PyDict_GetItemString( __NAMESPACE_REGISTRIES, c_namespace_utf) @@ -209,9 +216,9 @@ registry = <_NamespaceRegistry>dict_result classes = registry._entries - if c_element_name_utf is not NULL: + if c_node.name is not NULL: dict_result = python.PyDict_GetItemString( - classes, c_element_name_utf) + classes, c_node.name) else: dict_result = NULL Modified: lxml/branch/capi/src/lxml/public-api.pxi ============================================================================== --- lxml/branch/capi/src/lxml/public-api.pxi (original) +++ lxml/branch/capi/src/lxml/public-api.pxi Tue Jul 18 15:22:40 2006 @@ -15,6 +15,13 @@ raise TypeError return _elementFactory(doc, c_node) +cdef public void setElementClassLookupFunction( + _element_class_lookup_function function, state): + _setElementClassLookupFunction(function, state) + +cdef public object lookupDefaultElementClass(state, xmlNode* c_node): + return _lookupDefaultElementClass(state, c_node) + cdef public int tagMatches(xmlNode* c_node, char* c_href, char* c_name): if c_node is NULL: return -1 From scoder at codespeak.net Tue Jul 18 15:23:01 2006 From: scoder at codespeak.net (scoder at codespeak.net) Date: Tue, 18 Jul 2006 15:23:01 +0200 (CEST) Subject: [Lxml-checkins] r30166 - lxml/branch/capi/src/lxml Message-ID: <20060718132301.928DA10082@code0.codespeak.net> Author: scoder Date: Tue Jul 18 15:23:00 2006 New Revision: 30166 Modified: lxml/branch/capi/src/lxml/etree.pyx Log: support for different Element class lookup functions, lxml.elementlib.classlookup module with different implementations, major cleanup in etreepublic.pxd to make it more readable Modified: lxml/branch/capi/src/lxml/etree.pyx ============================================================================== --- lxml/branch/capi/src/lxml/etree.pyx (original) +++ lxml/branch/capi/src/lxml/etree.pyx Tue Jul 18 15:23:00 2006 @@ -1154,7 +1154,7 @@ if c_node is NULL: return None if c_node.type == tree.XML_ELEMENT_NODE: - element_class = _find_element_class(_getNs(c_node), c_node.name) + element_class = LOOKUP_ELEMENT_CLASS(ELEMENT_CLASS_LOOKUP_STATE, c_node) elif c_node.type == tree.XML_COMMENT_NODE: element_class = _Comment else: @@ -1777,6 +1777,31 @@ include "xmlschema.pxi" # XMLSchema ################################################################################ +# Element class lookup + +ctypedef object (*_element_class_lookup_function)(object, xmlNode*) + +# default: Namespace classes +cdef _element_class_lookup_function DEFAULT_ELEMENT_CLASS_LOOKUP +DEFAULT_ELEMENT_CLASS_LOOKUP = _find_nselement_class + +cdef _element_class_lookup_function LOOKUP_ELEMENT_CLASS +LOOKUP_ELEMENT_CLASS = DEFAULT_ELEMENT_CLASS_LOOKUP + +cdef object ELEMENT_CLASS_LOOKUP_STATE + +cdef void _setElementClassLookupFunction( + _element_class_lookup_function function, object state): + global LOOKUP_ELEMENT_CLASS + if function is NULL: + LOOKUP_ELEMENT_CLASS = DEFAULT_ELEMENT_CLASS_LOOKUP + ELEMENT_CLASS_LOOKUP_STATE = None + else: + LOOKUP_ELEMENT_CLASS = function + ELEMENT_CLASS_LOOKUP_STATE = state + + +################################################################################ # Public C API include "public-api.pxi" From scoder at codespeak.net Tue Jul 18 15:30:17 2006 From: scoder at codespeak.net (scoder at codespeak.net) Date: Tue, 18 Jul 2006 15:30:17 +0200 (CEST) Subject: [Lxml-checkins] r30168 - lxml/branch/capi/src/lxml Message-ID: <20060718133017.4847210082@code0.codespeak.net> Author: scoder Date: Tue Jul 18 15:30:16 2006 New Revision: 30168 Modified: lxml/branch/capi/src/lxml/etreepublic.pxd Log: fixes for last checkins, some doc updates Modified: lxml/branch/capi/src/lxml/etreepublic.pxd ============================================================================== --- lxml/branch/capi/src/lxml/etreepublic.pxd (original) +++ lxml/branch/capi/src/lxml/etreepublic.pxd Tue Jul 18 15:30:16 2006 @@ -4,12 +4,17 @@ cimport python cdef extern from "etree_defs.h": - # - cdef int _isElement(xmlNode* node) - cdef char* _getNs(xmlNode* node) - cdef void BEGIN_FOR_EACH_ELEMENT_FROM(xmlNode* tree_top, - xmlNode* start_node, int inclusive) - cdef void END_FOR_EACH_ELEMENT_FROM(xmlNode* start_node) + # test if c_node is considered an Element (i.e. Element or Comment) + cdef int _isElement(tree.xmlNode* c_node) + + # return the namespace URI of the node or NULL + cdef char* _getNs(tree.xmlNode* node) + + # pair of macros for tree traversal + cdef void BEGIN_FOR_EACH_ELEMENT_FROM(tree.xmlNode* tree_top, + tree.xmlNode* start_node, + int inclusive) + cdef void END_FOR_EACH_ELEMENT_FROM(tree.xmlNode* start_node) cdef extern from "etree.h": ########################################################################## From scoder at codespeak.net Tue Jul 18 15:31:36 2006 From: scoder at codespeak.net (scoder at codespeak.net) Date: Tue, 18 Jul 2006 15:31:36 +0200 (CEST) Subject: [Lxml-checkins] r30169 - lxml/branch/capi/src/lxml Message-ID: <20060718133136.B4E5F10082@code0.codespeak.net> Author: scoder Date: Tue Jul 18 15:31:35 2006 New Revision: 30169 Modified: lxml/branch/capi/src/lxml/objectify.pyx Log: removed unused C function _lookupAttribute from objectify.pyx Modified: lxml/branch/capi/src/lxml/objectify.pyx ============================================================================== --- lxml/branch/capi/src/lxml/objectify.pyx (original) +++ lxml/branch/capi/src/lxml/objectify.pyx Tue Jul 18 15:31:35 2006 @@ -116,32 +116,4 @@ cetree.namespacedNameFromNsName(c_href, c_tag) return elementFactory(element._doc, c_result) -cdef object _lookupAttribute(_Element element, tag): - cdef tree.xmlNode* c_result - cdef tree.xmlNode* c_node - cdef char* c_href - cdef char* c_tag - ns, tag = cetree.getNsTag(tag) - c_tag = _cstr(tag) - c_node = element._c_node - if ns is None: - attrval = cetree.attributeValueFromNsName(c_node, NULL, c_tag) - if attrval is not None: - return attrval - c_href = tree._getNs(c_node) - else: - c_href = _cstr(ns) - if attrval is None: - attrval = cetree.attributeValueFromNsName(c_node, c_href, c_tag) - if attrval is not None: - return attrval - c_result = _findFollowingSibling(c_node.children, c_href, c_tag, 0) - if c_result is NULL: - if attrval is not None: - return attrval - else: - raise AttributeError, "no such child or attribute: %s" % \ - cetree.namespacedNameFromNsName(c_href, c_tag) - return elementFactory(element._doc, c_result) - #etree.setDefaultElementClass(MyElement) From scoder at codespeak.net Tue Jul 18 16:03:48 2006 From: scoder at codespeak.net (scoder at codespeak.net) Date: Tue, 18 Jul 2006 16:03:48 +0200 (CEST) Subject: [Lxml-checkins] r30172 - in lxml/branch/capi/src/lxml: . tests Message-ID: <20060718140348.F2F9C1007D@code0.codespeak.net> Author: scoder Date: Tue Jul 18 16:03:46 2006 New Revision: 30172 Added: lxml/branch/capi/src/lxml/tests/test_classlookup.py Modified: lxml/branch/capi/src/lxml/classlookup.pyx lxml/branch/capi/src/lxml/etree.pyx Log: test cases and fixes for classlookup module Modified: lxml/branch/capi/src/lxml/classlookup.pyx ============================================================================== --- lxml/branch/capi/src/lxml/classlookup.pyx (original) +++ lxml/branch/capi/src/lxml/classlookup.pyx Tue Jul 18 16:03:46 2006 @@ -24,7 +24,7 @@ # uses default lookup -cdef class DefaultElementClassLookup(ElementClassLookup): +cdef class ElementDefaultClassLookup(ElementClassLookup): """Always returns the default Element class. """ def __init__(self): @@ -39,10 +39,9 @@ * attribute name ('{ns}name' style string) * class mapping (Python dict mapping attribute values to Element classes) - A None value in the class mapping will be checked if the attribute is missing. + A None key in the class mapping will be checked if the attribute is missing. """ cdef object _class_mapping - cdef object _default_class cdef object _pytag cdef char* _c_ns cdef char* _c_name @@ -52,9 +51,9 @@ if ns is None: self._c_ns = NULL else: - self._c_ns = python._cstr(ns) - self._c_name = python._cstr(name) - self._class_mapping = dict(_class_mapping) + self._c_ns = _cstr(ns) + self._c_name = _cstr(name) + self._class_mapping = dict(class_mapping) self._lookup_function = _attribute_lookup cdef object _attribute_lookup(object state, tree.xmlNode* c_node): @@ -63,7 +62,7 @@ lookup = state value = cetree.attributeValueFromNsName( c_node, lookup._c_ns, lookup._c_name) - dict_result = python.PyDict_GetItemString(lookup._class_mapping, value) + dict_result = python.PyDict_GetItem(lookup._class_mapping, value) if dict_result is NULL: return cetree.lookupDefaultElementClass(None, c_node) else: Modified: lxml/branch/capi/src/lxml/etree.pyx ============================================================================== --- lxml/branch/capi/src/lxml/etree.pyx (original) +++ lxml/branch/capi/src/lxml/etree.pyx Tue Jul 18 16:03:46 2006 @@ -1792,7 +1792,7 @@ cdef void _setElementClassLookupFunction( _element_class_lookup_function function, object state): - global LOOKUP_ELEMENT_CLASS + global LOOKUP_ELEMENT_CLASS, ELEMENT_CLASS_LOOKUP_STATE if function is NULL: LOOKUP_ELEMENT_CLASS = DEFAULT_ELEMENT_CLASS_LOOKUP ELEMENT_CLASS_LOOKUP_STATE = None Added: lxml/branch/capi/src/lxml/tests/test_classlookup.py ============================================================================== --- (empty file) +++ lxml/branch/capi/src/lxml/tests/test_classlookup.py Tue Jul 18 16:03:46 2006 @@ -0,0 +1,91 @@ +# -*- coding: utf-8 -*- + +""" +Tests specific to the extended etree API + +Tests that apply to the general ElementTree API should go into +test_elementtree +""" + + +import unittest, doctest, operator + +from common_imports import etree, StringIO, HelperTestCase, fileInTestDir +from common_imports import SillyFileLike, canonicalize + +from lxml.elementlib import classlookup + +xml_str = '''\ + + + 0 + 1 + 2 + +''' + +class ClassLookupTestCase(HelperTestCase): + """Test cases for lxml.elementlib.classlookup + """ + etree = etree + + def tearDown(self): + classlookup.setElementClassLookup() + ns = etree.Namespace("myNS") + ns.clear() + + def test_namespace_lookup(self): + class TestElement(etree.ElementBase): + FIND_ME = "namespace class" + + ns = etree.Namespace("myNS") + ns[None] = TestElement + + lookup = classlookup.ElementNamespaceClassLookup() + classlookup.setElementClassLookup(lookup) + + root = etree.XML(xml_str) + self.assertEquals(root.FIND_ME, + TestElement.FIND_ME) + self.assertEquals(root[0].FIND_ME, + TestElement.FIND_ME) + self.assertFalse(hasattr(root[0][-1], 'FIND_ME')) + + def test_default_class_lookup(self): + class TestElement(etree.ElementBase): + FIND_ME = "namespace class" + + ns = etree.Namespace("myNS") + ns[None] = TestElement + + lookup = classlookup.ElementDefaultClassLookup() + classlookup.setElementClassLookup(lookup) + + root = etree.XML(xml_str) + self.assertFalse(hasattr(root, 'FIND_ME')) + self.assertFalse(hasattr(root[0][-1], 'FIND_ME')) + + def test_attribute_based_lookup(self): + class TestElement(etree.ElementBase): + FIND_ME = "attribute_based" + + class_dict = {"A1" : TestElement} + + lookup = classlookup.AttributeBasedElementClassLookup( + "a1", class_dict) + classlookup.setElementClassLookup(lookup) + + root = etree.XML(xml_str) + self.assertFalse(hasattr(root, 'FIND_ME')) + self.assertEquals(root[0].FIND_ME, + TestElement.FIND_ME) + self.assertFalse(hasattr(root[0][0], 'FIND_ME')) + + +def test_suite(): + suite = unittest.TestSuite() + suite.addTests([unittest.makeSuite(ClassLookupTestCase)]) + return suite + +if __name__ == '__main__': + unittest.main() From scoder at codespeak.net Tue Jul 18 17:57:47 2006 From: scoder at codespeak.net (scoder at codespeak.net) Date: Tue, 18 Jul 2006 17:57:47 +0200 (CEST) Subject: [Lxml-checkins] r30176 - lxml/branch/capi/src/lxml Message-ID: <20060718155747.0953A1009C@code0.codespeak.net> Author: scoder Date: Tue Jul 18 17:57:45 2006 New Revision: 30176 Modified: lxml/branch/capi/src/lxml/etreepublic.pxd lxml/branch/capi/src/lxml/public-api.pxi Log: new public C functions to get text and tail Modified: lxml/branch/capi/src/lxml/etreepublic.pxd ============================================================================== --- lxml/branch/capi/src/lxml/etreepublic.pxd (original) +++ lxml/branch/capi/src/lxml/etreepublic.pxd Tue Jul 18 17:57:45 2006 @@ -136,6 +136,12 @@ # get the "{ns}tag" string for a href/tagname pair (c_ns may be NULL) cdef object namespacedNameFromNsName(char* c_ns, char* c_tag) + # get the text content of an element (or None) + cdef object textOf(tree.xmlNode* c_node) + + # get the tail content of an element (or None) + cdef object tailOf(tree.xmlNode* c_node) + # find the Document of an Element, ElementTree or Document (itself!) cdef _Document documentOrRaise(object input) Modified: lxml/branch/capi/src/lxml/public-api.pxi ============================================================================== --- lxml/branch/capi/src/lxml/public-api.pxi (original) +++ lxml/branch/capi/src/lxml/public-api.pxi Tue Jul 18 17:57:45 2006 @@ -33,6 +33,16 @@ cdef public _NodeBase rootNodeOrRaise(object input): return _rootNodeOrRaise(input) +cdef public object textOf(xmlNode* c_node): + if c_node is NULL: + return None + return _collectText(c_node.children) + +cdef public object tailOf(xmlNode* c_node): + if c_node is NULL: + return None + return _collectText(c_node.next) + cdef public object attributeValue(xmlNode* c_element, xmlAttr* c_attrib_node): return _attributeValue(c_element, c_attrib_node) From scoder at codespeak.net Tue Jul 18 17:59:58 2006 From: scoder at codespeak.net (scoder at codespeak.net) Date: Tue, 18 Jul 2006 17:59:58 +0200 (CEST) Subject: [Lxml-checkins] r30177 - in lxml/branch/capi: doc src/lxml Message-ID: <20060718155958.5911E1009C@code0.codespeak.net> Author: scoder Date: Tue Jul 18 17:59:56 2006 New Revision: 30177 Modified: lxml/branch/capi/doc/objectify.txt lxml/branch/capi/src/lxml/objectify.pyx Log: first shot on data type handling, changed objectify module setup to require calling register()/unregister() functions (needed to register/unregister element lookup function) Modified: lxml/branch/capi/doc/objectify.txt ============================================================================== --- lxml/branch/capi/doc/objectify.txt (original) +++ lxml/branch/capi/doc/objectify.txt Tue Jul 18 17:59:56 2006 @@ -4,20 +4,20 @@ lxml supports an alternative element API similar to the Amara_ bindery through a custom Element implementation. This API is very different from the -ElementTree API. If it is used, it should be used *exclusively*, to avoid +ElementTree API. If it is used, it can only be used *exclusively*, to avoid common pitfalls when mixing element implementations. .. _Amara: http://uche.ogbuji.net/tech/4suite/amara/ You can replace the original implementation by the ``objectify`` element class -like this:: +by simply importing the module and calling the ``register`` function:: >>> from lxml import etree - >>> from lxml.elementlib.objectify import ObjectifiedElement - >>> etree.setDefaultElementClass(ObjectifiedElement) + >>> from lxml.elementlib import objectify + >>> objectify.register() >>> el = etree.Element("test") - >>> print isinstance(el, ObjectifiedElement) + >>> print isinstance(el, objectify.ObjectifiedElement) True Note that `namespace specific classes`_ can override this default. If @@ -79,18 +79,34 @@ {other}c +Python data types +----------------- + +The objectify module knows about Python data types and tries its best to let +element content behave like them. For example, this works: + + >>> root = etree.XML("511") + >>> root.a + root.b + 16 + +Objectify determines data types by trial and error, unless it finds an +attribute ``pytype`` in the namespace URI given by +``lxml.elementlib.objectify.PYTYPE_NAMESPACE``, which must contain any of the +following string values: int, float, str, unicode. + + Resetting the API ----------------- -You can reset the API to the original ElementTree API by resetting the element -class. Be aware, though, that this does not immediately apply to elements to -which there is a Python reference. Their Python class will only be changed -after all references are gone and the Python object is garbage collected. -When you access an element for which there is not currently a Python -representation, it will be created with the currently registered element -class:: +You can reset the API to the original ElementTree API by calling the +``unregister()`` function. Be aware, though, that this does not immediately +apply to elements to which there is a Python reference. Their Python class +will only be changed after all references are gone and the Python object is +garbage collected. When you access an element for which there is not +currently a Python representation, it will be created with the currently +registered element class:: - >>> etree.setDefaultElementClass() + >>> objectify.unregister() >>> el = etree.Element("test") - >>> print isinstance(el, ObjectifiedElement) + >>> print isinstance(el, objectify.ObjectifiedElement) False Modified: lxml/branch/capi/src/lxml/objectify.pyx ============================================================================== --- lxml/branch/capi/src/lxml/objectify.pyx (original) +++ lxml/branch/capi/src/lxml/objectify.pyx Tue Jul 18 17:59:56 2006 @@ -1,5 +1,5 @@ from etreepublic cimport _Element, ElementBase, _ElementIterator -from etreepublic cimport elementFactory, import_etree +from etreepublic cimport elementFactory, import_etree, textOf from python cimport isinstance, getattr, _cstr, Py_ssize_t cimport etreepublic as cetree cimport python @@ -10,6 +10,21 @@ # initialize C-API of lxml.etree import_etree(etree) +cdef object __builtin__ +import __builtin__ +cdef object _int +_int = __builtin__.int +cdef object _float +_float = __builtin__.float +cdef object _str +_str = __builtin__.str + +# namespace for "pytype" hint attribute +PYTYPE_NAMESPACE = "http://codespeak.net/lxml/objectify/pytype" +cdef object _PYTYPE_NAMESPACE +_PYTYPE_NAMESPACE = _cstr(PYTYPE_NAMESPACE) + + cdef class ObjectifiedElement(ElementBase): """Element class with an Amara-like API. @@ -59,6 +74,20 @@ """ return _lookupChild(self, tag) +## def __setattr__(self, tag, value): +## """Set the value of (first) child with the given tag name. If no +## namespace is provided, the child will be looked up in the same one as +## self. +## """ +## element = _lookupChild(self, tag) +## if isinstance(value, ObjectifiedElement): +## # FIXME? +## raise NotImplementedError +## elif python._isString(value): +## element.text = value +## else: +## element.text = str(value) + def __getitem__(self, key): """Return a sibling or attribute. * If argument is an integer, returns the following sibling @@ -116,4 +145,98 @@ cetree.namespacedNameFromNsName(c_href, c_tag) return elementFactory(element._doc, c_result) -#etree.setDefaultElementClass(MyElement) +################################################################################ +# Data type support in subclasses + +cdef class _NumberElement(ObjectifiedElement): + cdef object _type + cdef _checkType(self, value): + if not isinstance(value, self._type): + self._type = type(value) + + cdef _value(self): + return self._type(textOf(self._c_node)) + + def value(self): + return self._value() + + def __int__(self): + return int(textOf(self._c_node)) + + def __float__(self): + return float(textOf(self._c_node)) + + def __add__(self, other): + if isinstance(other, _NumberElement): + other = (<_NumberElement>other)._value() + return (<_NumberElement>self)._value() + other + + def __iadd__(self, other): + if isinstance(other, _NumberElement): + other = (<_NumberElement>other)._value() + result = self._value() + other + self._checkType(result) + self.text = _str(result) + +cdef class _IntElement(_NumberElement): + def _init(self): + self._type = int + +cdef class _FloatElement(_NumberElement): + def _init(self): + self._type = float + +cdef class _StringElement(ObjectifiedElement): + pass + +cdef object _TYPE_DICT +_TYPE_DICT = { + 'int' : _IntElement, + 'float' : _FloatElement, + 'str' : _StringElement, + 'unicode' : _StringElement + } + +cdef object _PY_NUMBER_TYPES +_PY_NUMBER_TYPES = (int, float) + +cdef object _guessElementClass(tree.xmlNode* c_node): + value = textOf(c_node) + if python.PyUnicode_Check(value): + # values containing unicode characters cannot be numbers + return _StringElement + for pytype in _PY_NUMBER_TYPES: + try: + pytype(value) + except: + pass + else: + return _TYPE_DICT[pytype.__name__] + return _StringElement + +################################################################################ +# Element class lookup + +cdef object _lookupElementClass(_, tree.xmlNode* c_node): + cdef python.PyObject* dict_result + value = cetree.attributeValueFromNsName( + c_node, _PYTYPE_NAMESPACE, "pytype") + if value is None: + el_class = _guessElementClass(c_node) + if el_class is not None: + return el_class + else: + dict_result = python.PyDict_GetItem(_TYPE_DICT, value) + if dict_result is not NULL: + return dict_result + # FIXME: is it right to raise an Exception based on data? + raise ValueError, "Invalid type attribute in element '%s'" % \ + cetree.namespacedNameFromNsName(tree._getNs(c_node), c_node.name) + +def register(): + etree.setDefaultElementClass(ObjectifiedElement) + cetree.setElementClassLookupFunction(_lookupElementClass, None) + +def unregister(): + etree.setDefaultElementClass() + cetree.setElementClassLookupFunction(NULL, None) From scoder at codespeak.net Tue Jul 18 18:35:59 2006 From: scoder at codespeak.net (scoder at codespeak.net) Date: Tue, 18 Jul 2006 18:35:59 +0200 (CEST) Subject: [Lxml-checkins] r30178 - lxml/branch/capi/src/lxml Message-ID: <20060718163559.D176610094@code0.codespeak.net> Author: scoder Date: Tue Jul 18 18:35:58 2006 New Revision: 30178 Modified: lxml/branch/capi/src/lxml/objectify.pyx Log: element class lookup: shortcut to use standard class if node has children (no data leaf) Modified: lxml/branch/capi/src/lxml/objectify.pyx ============================================================================== --- lxml/branch/capi/src/lxml/objectify.pyx (original) +++ lxml/branch/capi/src/lxml/objectify.pyx Tue Jul 18 18:35:58 2006 @@ -202,6 +202,9 @@ cdef object _guessElementClass(tree.xmlNode* c_node): value = textOf(c_node) + if value is None: + # FIXME: how do we get rid of the class if it was not the right one? + return ObjectifiedElement if python.PyUnicode_Check(value): # values containing unicode characters cannot be numbers return _StringElement @@ -219,6 +222,9 @@ cdef object _lookupElementClass(_, tree.xmlNode* c_node): cdef python.PyObject* dict_result + if cetree.findChildForwards(c_node, 0): + # element has children => no data class + return ObjectifiedElement value = cetree.attributeValueFromNsName( c_node, _PYTYPE_NAMESPACE, "pytype") if value is None: From scoder at codespeak.net Tue Jul 18 19:38:58 2006 From: scoder at codespeak.net (scoder at codespeak.net) Date: Tue, 18 Jul 2006 19:38:58 +0200 (CEST) Subject: [Lxml-checkins] r30180 - lxml/branch/capi/src/lxml Message-ID: <20060718173858.35AA11009C@code0.codespeak.net> Author: scoder Date: Tue Jul 18 19:38:56 2006 New Revision: 30180 Modified: lxml/branch/capi/src/lxml/apihelpers.pxi lxml/branch/capi/src/lxml/etree.pyx lxml/branch/capi/src/lxml/etreepublic.pxd lxml/branch/capi/src/lxml/public-api.pxi Log: public functions to set element text and tail Modified: lxml/branch/capi/src/lxml/apihelpers.pxi ============================================================================== --- lxml/branch/capi/src/lxml/apihelpers.pxi (original) +++ lxml/branch/capi/src/lxml/apihelpers.pxi Tue Jul 18 19:38:56 2006 @@ -198,7 +198,7 @@ c_node = c_node.next return funicode(result) -cdef _removeText(xmlNode* c_node): +cdef void _removeText(xmlNode* c_node): """Remove all text nodes. Start removing at c_node. @@ -211,6 +211,31 @@ tree.xmlFreeNode(c_node) c_node = c_next +cdef _setNodeText(xmlNode* c_node, value): + cdef xmlNode* c_text_node + # remove all text nodes at the start first + _removeText(c_node.children) + if value is None: + return + # now add new text node with value at start + text = _utf8(value) + c_text_node = tree.xmlNewDocText(c_node.doc, _cstr(text)) + if c_node.children is NULL: + tree.xmlAddChild(c_node, c_text_node) + else: + tree.xmlAddPrevSibling(c_node.children, c_text_node) + +cdef _setTailText(xmlNode* c_node, value): + cdef xmlNode* c_text_node + # remove all text nodes at the start first + _removeText(c_node.next) + if value is None: + return + text = _utf8(value) + c_text_node = tree.xmlNewDocText(c_node.doc, _cstr(text)) + # XXX what if we're the top element? + tree.xmlAddNextSibling(c_node, c_text_node) + cdef xmlNode* _findChild(xmlNode* c_node, Py_ssize_t index): if index < 0: return _findChildBackwards(c_node, -index - 1) Modified: lxml/branch/capi/src/lxml/etree.pyx ============================================================================== --- lxml/branch/capi/src/lxml/etree.pyx (original) +++ lxml/branch/capi/src/lxml/etree.pyx Tue Jul 18 19:38:56 2006 @@ -787,20 +787,7 @@ return _collectText(self._c_node.children) def __set__(self, value): - cdef xmlNode* c_text_node - # remove all text nodes at the start first - _removeText(self._c_node.children) - if value is None: - return - # now add new text node with value at start - text = _utf8(value) - c_text_node = tree.xmlNewDocText(self._doc._c_doc, - _cstr(text)) - if self._c_node.children is NULL: - tree.xmlAddChild(self._c_node, c_text_node) - else: - tree.xmlAddPrevSibling(self._c_node.children, - c_text_node) + _setNodeText(self._c_node, value) property tail: """Text after this element's end tag, but before the next sibling @@ -811,15 +798,7 @@ return _collectText(self._c_node.next) def __set__(self, value): - cdef xmlNode* c_text_node - # remove all text nodes at the start first - _removeText(self._c_node.next) - if value is None: - return - text = _utf8(value) - c_text_node = tree.xmlNewDocText(self._doc._c_doc, _cstr(text)) - # XXX what if we're the top element? - tree.xmlAddNextSibling(self._c_node, c_text_node) + _setTailText(self._c_node, value) # not in ElementTree, read-only property prefix: Modified: lxml/branch/capi/src/lxml/etreepublic.pxd ============================================================================== --- lxml/branch/capi/src/lxml/etreepublic.pxd (original) +++ lxml/branch/capi/src/lxml/etreepublic.pxd Tue Jul 18 19:38:56 2006 @@ -142,6 +142,12 @@ # get the tail content of an element (or None) cdef object tailOf(tree.xmlNode* c_node) + # set the text value of an element + cdef object setNodeText(tree.xmlNode* c_node, text) + + # set the tail text value of an element + cdef object setTailText(tree.xmlNode* c_node, text) + # find the Document of an Element, ElementTree or Document (itself!) cdef _Document documentOrRaise(object input) Modified: lxml/branch/capi/src/lxml/public-api.pxi ============================================================================== --- lxml/branch/capi/src/lxml/public-api.pxi (original) +++ lxml/branch/capi/src/lxml/public-api.pxi Tue Jul 18 19:38:56 2006 @@ -43,6 +43,16 @@ return None return _collectText(c_node.next) +cdef public object setNodeText(xmlNode* c_node, text): + if c_node is NULL: + raise ValueError + _setNodeText(c_node, text) + +cdef public object setTailText(xmlNode* c_node, text): + if c_node is NULL: + raise ValueError + _setTailText(c_node, text) + cdef public object attributeValue(xmlNode* c_element, xmlAttr* c_attrib_node): return _attributeValue(c_element, c_attrib_node) From scoder at codespeak.net Tue Jul 18 20:51:13 2006 From: scoder at codespeak.net (scoder at codespeak.net) Date: Tue, 18 Jul 2006 20:51:13 +0200 (CEST) Subject: [Lxml-checkins] r30182 - in lxml/branch/capi: doc src/lxml Message-ID: <20060718185113.19A06100A2@code0.codespeak.net> Author: scoder Date: Tue Jul 18 20:51:11 2006 New Revision: 30182 Modified: lxml/branch/capi/doc/objectify.txt lxml/branch/capi/src/lxml/objectify.pyx Log: more complete implementations of standard data types, in-place operations work Modified: lxml/branch/capi/doc/objectify.txt ============================================================================== --- lxml/branch/capi/doc/objectify.txt (original) +++ lxml/branch/capi/doc/objectify.txt Tue Jul 18 20:51:11 2006 @@ -88,12 +88,32 @@ >>> root = etree.XML("511") >>> root.a + root.b 16 + >>> root.a += root.b + >>> print root.a + 16 + >>> root.a = 2 + >>> print root.a + 2 + 4 + >>> print 1 + root.a + 3 Objectify determines data types by trial and error, unless it finds an attribute ``pytype`` in the namespace URI given by ``lxml.elementlib.objectify.PYTYPE_NAMESPACE``, which must contain any of the following string values: int, float, str, unicode. + >>> pytype_attr = "{%s}%s" % (objectify.PYTYPE_NAMESPACE, "pytype") + + >>> el = etree.Element("test", {pytype_attr : "int"}) + >>> el.text = "5" + >>> el + 10 + 15 + + >>> el = etree.Element("test", {pytype_attr : "str"}) + >>> el.text = "5" + >>> print el + "10" + 510 + Resetting the API ----------------- Modified: lxml/branch/capi/src/lxml/objectify.pyx ============================================================================== --- lxml/branch/capi/src/lxml/objectify.pyx (original) +++ lxml/branch/capi/src/lxml/objectify.pyx Tue Jul 18 20:51:11 2006 @@ -14,10 +14,18 @@ import __builtin__ cdef object _int _int = __builtin__.int +cdef object _long +_long = __builtin__.long cdef object _float _float = __builtin__.float cdef object _str _str = __builtin__.str +cdef object _pow +_pow = __builtin__.pow +cdef object _abs +_abs = __builtin__.abs +cdef object _len +_len = __builtin__.len # namespace for "pytype" hint attribute PYTYPE_NAMESPACE = "http://codespeak.net/lxml/objectify/pytype" @@ -42,6 +50,9 @@ return iter([self]) return etree.ElementChildIterator(parent, tag=self.tag) + def __str__(self): + return textOf(self._c_node) + def __len__(self): """Count self and siblings with the same tag. """ @@ -74,19 +85,27 @@ """ return _lookupChild(self, tag) -## def __setattr__(self, tag, value): -## """Set the value of (first) child with the given tag name. If no -## namespace is provided, the child will be looked up in the same one as -## self. -## """ -## element = _lookupChild(self, tag) -## if isinstance(value, ObjectifiedElement): -## # FIXME? -## raise NotImplementedError -## elif python._isString(value): -## element.text = value -## else: -## element.text = str(value) + def __setattr__(self, tag, value): + """Set the value of the (first) child with the given tag name. If no + namespace is provided, the child will be looked up in the same one as + self. + """ + cdef _Element element + if tag == 'text': + cetree.setNodeText(self._c_node, value) + elif tag == 'tail': + cetree.setTailText(self._c_node, value) + elif tag == 'tag': + # FIXME? + raise NotImplementedError + else: + element = _lookupChild(self, tag) + if isinstance(value, ObjectifiedElement): + # FIXME? + raise NotImplementedError + if not python._isString(value): + value = str(value) + cetree.setNodeText(element._c_node, value) def __getitem__(self, key): """Return a sibling or attribute. @@ -161,37 +180,135 @@ return self._value() def __int__(self): - return int(textOf(self._c_node)) + return _int(textOf(self._c_node)) + + def __long__(self): + return _long(textOf(self._c_node)) def __float__(self): - return float(textOf(self._c_node)) + return _float(textOf(self._c_node)) + +# def __oct__(self): +# def __hex__(self): def __add__(self, other): - if isinstance(other, _NumberElement): - other = (<_NumberElement>other)._value() - return (<_NumberElement>self)._value() + other - - def __iadd__(self, other): - if isinstance(other, _NumberElement): - other = (<_NumberElement>other)._value() - result = self._value() + other - self._checkType(result) - self.text = _str(result) + return _numericValueOf(self) + _numericValueOf(other) + + def __sub__(self, other): + return _numericValueOf(self) - _numericValueOf(other) + + def __mul__(self, other): + return _numericValueOf(self) * _numericValueOf(other) + + def __div__(self, other): + return _numericValueOf(self) / _numericValueOf(other) + + def __truediv__(self, other): + return _numericValueOf(self) / _numericValueOf(other) + + def __mod__(self, other): + return _numericValueOf(self) % _numericValueOf(other) + + def __pow__(self, other, modulo): + if modulo is None: + return _numericValueOf(self) ** _numericValueOf(other) + else: + return _pow(_numericValueOf(self), _numericValueOf(other), modulo) + + def __neg__(self): + return - _numericValueOf(self) + + def __pos__(self): + return + _numericValueOf(self) + + def __abs__(self): + return _abs( _numericValueOf(self) ) + + def __nonzero__(self): + return _numericValueOf(self) + + def __invert__(self): + return ~ _numericValueOf(self) + + def __lshift__(self, other): + return _numericValueOf(self) << _numericValueOf(other) + + def __rshift__(self, other): + return _numericValueOf(self) >> _numericValueOf(other) + + def __and__(self, other): + return _numericValueOf(self) & _numericValueOf(other) + + def __or__(self, other): + return _numericValueOf(self) | _numericValueOf(other) + + def __xor__(self, other): + return _numericValueOf(self) ^ _numericValueOf(other) + +## def __iadd__(self, other): +## if isinstance(other, _NumberElement): +## other = (<_NumberElement>other)._value() +## result = self._value() + other +## self._checkType(result) +## cetree.setNodeText(self._c_node, _str(result)) +## return self cdef class _IntElement(_NumberElement): def _init(self): self._type = int +cdef class _LongElement(_NumberElement): + def _init(self): + self._type = long + cdef class _FloatElement(_NumberElement): def _init(self): self._type = float cdef class _StringElement(ObjectifiedElement): - pass + def __len__(self): + return _len(_strValueOf(self)) + + def __add__(self, other): + text = _strValueOf(self) + other = _strValueOf(other) + if text is None: + return other + if other is None: + return text + return text + other + + def __mul__(self, other): + if isinstance(self, _StringElement): + return textOf((<_StringElement>self)._c_node) * _numericValueOf(other) + else: + raise TypeError, "invalid types for * operator" + + def __getitem__(self, index): + return textOf(self._c_node)[index] + + def __contains__(self, text): + return text in textOf(self._c_node) + +cdef object _strValueOf(obj): + if python._isString(obj): + return obj + if isinstance(obj, _StringElement): + return textOf((<_StringElement>obj)._c_node) + if obj is None: + return '' + return str(obj) + +cdef object _numericValueOf(obj): + if isinstance(obj, _NumberElement): + return (<_NumberElement>obj)._type( + textOf((<_NumberElement>obj)._c_node)) + return obj cdef object _TYPE_DICT _TYPE_DICT = { 'int' : _IntElement, + 'long' : _LongElement, 'float' : _FloatElement, 'str' : _StringElement, 'unicode' : _StringElement From scoder at codespeak.net Wed Jul 19 08:41:13 2006 From: scoder at codespeak.net (scoder at codespeak.net) Date: Wed, 19 Jul 2006 08:41:13 +0200 (CEST) Subject: [Lxml-checkins] r30200 - lxml/trunk/doc Message-ID: <20060719064113.CB1F91009D@code0.codespeak.net> Author: scoder Date: Wed Jul 19 08:41:10 2006 New Revision: 30200 Modified: lxml/trunk/doc/FAQ.txt lxml/trunk/doc/api.txt Log: clarification in api.txt/FAQ.txt: do not delete parents in terparse()! Modified: lxml/trunk/doc/FAQ.txt ============================================================================== --- lxml/trunk/doc/FAQ.txt (original) +++ lxml/trunk/doc/FAQ.txt Wed Jul 19 08:41:10 2006 @@ -89,6 +89,18 @@ b) do not terminate threads while the trees they parsed are still in use +#) Why can't I just delete parents or clear the root node in iterparse()? + + The ``iterparse()`` implementation is based on the libxml2 parser. It + requires the tree to be intact to finish parsing. If you delete or modify + parents of the current node, chances are you modify the structure in a way + that breaks the parser. Normally, this will result in a segfault. Please + refer to the `iterparse section` of the lxml API documentation to find out + what you can do and what you can't do. + + .. _`iterparse section`: api.html#iterparse-and-iterwalk + + #) Why doesn't the ``pretty_print`` option reformat my XML output? Pretty printing (or formatting) an XML document means adding white space to Modified: lxml/trunk/doc/api.txt ============================================================================== --- lxml/trunk/doc/api.txt (original) +++ lxml/trunk/doc/api.txt Wed Jul 19 08:41:10 2006 @@ -276,10 +276,27 @@ >>> context.root.getchildren() [] -During the 'start' event, the descendants and following siblings are not yet -available. Note that you should not modify or move the ancestors or siblings -of the element during either of the two events. You should also avoid moving -the element itself. +**WARNING**: During the 'start' event, the descendants and following siblings +are not yet available and should not be accessed. During the 'end' event, the +following siblings of the element should not be accessed. During either of +the two events, you **must not** modify or move the ancestors (parents) of the +current element. You should also avoid moving or discarding the element +itself. + +If you want to save more memory without deleting parents, you can clean up the +preceding siblings of the current element:: + + >>> for event, element in etree.iterparse(StringIO(xml)): + ... # ... do something with the element + ... element.clear() # clean up children + ... if element.getprevious(): # clean up preceding siblings + ... del element.getparent()[0] + +You can use ``while`` instead of ``if`` if you skipped siblings using the +``tag`` keyword argument. The more selective your tag is, however, the more +thoughts you will have to put into cleaning up the elements that were skipped. +Therefore, it is sometimes easier to traverse all elements and do the tag +selection by hand in the event handler code. The 'start-ns' and 'end-ns' events notify about namespace declarations and generate tuples ``(prefix, URI)``:: From scoder at codespeak.net Wed Jul 19 08:57:41 2006 From: scoder at codespeak.net (scoder at codespeak.net) Date: Wed, 19 Jul 2006 08:57:41 +0200 (CEST) Subject: [Lxml-checkins] r30203 - lxml/trunk/doc Message-ID: <20060719065741.3138010094@code0.codespeak.net> Author: scoder Date: Wed Jul 19 08:57:38 2006 New Revision: 30203 Modified: lxml/trunk/doc/api.txt lxml/trunk/doc/compatibility.txt Log: note on different iterparse() cleanup in compatibility.txt, more clarifications in api.txt Modified: lxml/trunk/doc/api.txt ============================================================================== --- lxml/trunk/doc/api.txt (original) +++ lxml/trunk/doc/api.txt Wed Jul 19 08:57:38 2006 @@ -281,10 +281,12 @@ following siblings of the element should not be accessed. During either of the two events, you **must not** modify or move the ancestors (parents) of the current element. You should also avoid moving or discarding the element -itself. +itself. The golden rule is: do not touch anything that will have to be +touched again by the parser later on. -If you want to save more memory without deleting parents, you can clean up the -preceding siblings of the current element:: +If you have elements with a long list of children in your XML file and want to +save more memory during parsing, you can clean up the preceding siblings of +the current element:: >>> for event, element in etree.iterparse(StringIO(xml)): ... # ... do something with the element @@ -294,9 +296,9 @@ You can use ``while`` instead of ``if`` if you skipped siblings using the ``tag`` keyword argument. The more selective your tag is, however, the more -thoughts you will have to put into cleaning up the elements that were skipped. -Therefore, it is sometimes easier to traverse all elements and do the tag -selection by hand in the event handler code. +thought you will have to put into finding the right way to clean up the +elements that were skipped. Therefore, it is sometimes easier to traverse all +elements and do the tag selection by hand in the event handler code. The 'start-ns' and 'end-ns' events notify about namespace declarations and generate tuples ``(prefix, URI)``:: Modified: lxml/trunk/doc/compatibility.txt ============================================================================== --- lxml/trunk/doc/compatibility.txt (original) +++ lxml/trunk/doc/compatibility.txt Wed Jul 19 08:57:38 2006 @@ -91,6 +91,15 @@ instead of a SyntaxError. lxml.etree follows the other parts of the parser API and raises an (XML)SyntaxError. +* The ``iterparse()`` function in lxml is implemented based on the libxml2 + parser. This means that modifications of the document root or the ancestors + of the current element during parsing can irritate the parser and even + segfault. While this is not a problem in the Python object structure used + by ElementTree, the C tree underlying lxml suffers from it. The golden rule + for ``iterparse()`` on lxml therefore is: do not touch anything that will + have to be touched again by the parser later on. See the lxml API + documentation on this. + * ElementTree has a bug when serializing an empty Comment (no text argument given) to XML, etree serializes this successfully. From scoder at codespeak.net Wed Jul 19 09:01:47 2006 From: scoder at codespeak.net (scoder at codespeak.net) Date: Wed, 19 Jul 2006 09:01:47 +0200 (CEST) Subject: [Lxml-checkins] r30204 - lxml/trunk/doc Message-ID: <20060719070147.ECDF910094@code0.codespeak.net> Author: scoder Date: Wed Jul 19 09:01:46 2006 New Revision: 30204 Modified: lxml/trunk/doc/api.txt Log: more clarifications on iterparse() in api.txt Modified: lxml/trunk/doc/api.txt ============================================================================== --- lxml/trunk/doc/api.txt (original) +++ lxml/trunk/doc/api.txt Wed Jul 19 09:01:46 2006 @@ -278,11 +278,11 @@ **WARNING**: During the 'start' event, the descendants and following siblings are not yet available and should not be accessed. During the 'end' event, the -following siblings of the element should not be accessed. During either of -the two events, you **must not** modify or move the ancestors (parents) of the -current element. You should also avoid moving or discarding the element -itself. The golden rule is: do not touch anything that will have to be -touched again by the parser later on. +descendants of the element can be freely modified, but its following siblings +should not be accessed. During either of the two events, you **must not** +modify or move the ancestors (parents) of the current element. You should +also avoid moving or discarding the element itself. The golden rule is: do +not touch anything that will have to be touched again by the parser later on. If you have elements with a long list of children in your XML file and want to save more memory during parsing, you can clean up the preceding siblings of From scoder at codespeak.net Wed Jul 19 09:10:36 2006 From: scoder at codespeak.net (scoder at codespeak.net) Date: Wed, 19 Jul 2006 09:10:36 +0200 (CEST) Subject: [Lxml-checkins] r30205 - lxml/trunk/doc Message-ID: <20060719071036.AEF991009D@code0.codespeak.net> Author: scoder Date: Wed Jul 19 09:10:35 2006 New Revision: 30205 Modified: lxml/trunk/doc/api.txt Log: one more little clarification on iterparse() in api.txt Modified: lxml/trunk/doc/api.txt ============================================================================== --- lxml/trunk/doc/api.txt (original) +++ lxml/trunk/doc/api.txt Wed Jul 19 09:10:35 2006 @@ -278,7 +278,7 @@ **WARNING**: During the 'start' event, the descendants and following siblings are not yet available and should not be accessed. During the 'end' event, the -descendants of the element can be freely modified, but its following siblings +element and its descendants can be freely modified, but its following siblings should not be accessed. During either of the two events, you **must not** modify or move the ancestors (parents) of the current element. You should also avoid moving or discarding the element itself. The golden rule is: do From scoder at codespeak.net Wed Jul 19 09:33:25 2006 From: scoder at codespeak.net (scoder at codespeak.net) Date: Wed, 19 Jul 2006 09:33:25 +0200 (CEST) Subject: [Lxml-checkins] r30208 - in lxml/branch/capi: doc src/lxml Message-ID: <20060719073325.22685100A5@code0.codespeak.net> Author: scoder Date: Wed Jul 19 09:33:23 2006 New Revision: 30208 Modified: lxml/branch/capi/doc/objectify.txt lxml/branch/capi/src/lxml/objectify.pyx lxml/branch/capi/src/lxml/python.pxd Log: made '%' work for strings Modified: lxml/branch/capi/doc/objectify.txt ============================================================================== --- lxml/branch/capi/doc/objectify.txt (original) +++ lxml/branch/capi/doc/objectify.txt Wed Jul 19 09:33:23 2006 @@ -113,6 +113,9 @@ >>> el.text = "5" >>> print el + "10" 510 + >>> el.text = "%s - %s" + >>> print el % (1234, 12345) + 1234 - 12345 Resetting the API Modified: lxml/branch/capi/src/lxml/objectify.pyx ============================================================================== --- lxml/branch/capi/src/lxml/objectify.pyx (original) +++ lxml/branch/capi/src/lxml/objectify.pyx Wed Jul 19 09:33:23 2006 @@ -284,6 +284,16 @@ else: raise TypeError, "invalid types for * operator" + def __mod__(self, other): + if python.PyTuple_Check(other): + l = [] + for item in other: + python.PyList_Append(l, _strValueOf(item)) + other = tuple(l) + else: + other = _strValueOf(other) + return _strValueOf(self) % other + def __getitem__(self, index): return textOf(self._c_node)[index] Modified: lxml/branch/capi/src/lxml/python.pxd ============================================================================== --- lxml/branch/capi/src/lxml/python.pxd (original) +++ lxml/branch/capi/src/lxml/python.pxd Wed Jul 19 09:33:23 2006 @@ -50,6 +50,7 @@ cdef object PyTuple_GET_ITEM(object o, Py_ssize_t pos) cdef int PyDict_Check(object instance) + cdef int PyTuple_Check(object instance) cdef int PyNumber_Check(object instance) cdef int PyBool_Check(object instance) cdef int PySequence_Check(object instance) From scoder at codespeak.net Wed Jul 19 15:08:46 2006 From: scoder at codespeak.net (scoder at codespeak.net) Date: Wed, 19 Jul 2006 15:08:46 +0200 (CEST) Subject: [Lxml-checkins] r30226 - lxml/branch/capi/src/lxml/elementlib Message-ID: <20060719130846.ED75E100A5@code0.codespeak.net> Author: scoder Date: Wed Jul 19 15:08:45 2006 New Revision: 30226 Added: lxml/branch/capi/src/lxml/elementlib/ lxml/branch/capi/src/lxml/elementlib/__init__.py Log: added src/lxml/elementlib to SVN repository Added: lxml/branch/capi/src/lxml/elementlib/__init__.py ============================================================================== From scoder at codespeak.net Wed Jul 19 16:19:52 2006 From: scoder at codespeak.net (scoder at codespeak.net) Date: Wed, 19 Jul 2006 16:19:52 +0200 (CEST) Subject: [Lxml-checkins] r30231 - lxml/branch/capi/src/lxml Message-ID: <20060719141952.2ACAA10087@code0.codespeak.net> Author: scoder Date: Wed Jul 19 16:19:51 2006 New Revision: 30231 Modified: lxml/branch/capi/src/lxml/etreepublic.pxd lxml/branch/capi/src/lxml/objectify.pyx lxml/branch/capi/src/lxml/public-api.pxi Log: default to namespace class lookup for leaf elements Modified: lxml/branch/capi/src/lxml/etreepublic.pxd ============================================================================== --- lxml/branch/capi/src/lxml/etreepublic.pxd (original) +++ lxml/branch/capi/src/lxml/etreepublic.pxd Wed Jul 19 16:19:51 2006 @@ -60,6 +60,9 @@ # lookup function that always returns the default Element class cdef object lookupDefaultElementClass(object _, tree.xmlNode* c_node) + # lookup function for namespace/tag specific Element classes + cdef object lookupNamespaceElementClass(object _, tree.xmlNode* c_node) + ########################################################################## # XML attribute access Modified: lxml/branch/capi/src/lxml/objectify.pyx ============================================================================== --- lxml/branch/capi/src/lxml/objectify.pyx (original) +++ lxml/branch/capi/src/lxml/objectify.pyx Wed Jul 19 16:19:51 2006 @@ -347,11 +347,18 @@ ################################################################################ # Element class lookup -cdef object _lookupElementClass(_, tree.xmlNode* c_node): +cdef object _lookupElementClass(state, tree.xmlNode* c_node): cdef python.PyObject* dict_result + # if element has children => no data class if cetree.findChildForwards(c_node, 0): - # element has children => no data class return ObjectifiedElement + + # default to namespace specific classes + nsclass = cetree.lookupNamespaceElementClass(state, c_node) + if nsclass is not ObjectifiedElement: + return nsclass + + # otherwise determine class based on text content type value = cetree.attributeValueFromNsName( c_node, _PYTYPE_NAMESPACE, "pytype") if value is None: @@ -362,6 +369,7 @@ dict_result = python.PyDict_GetItem(_TYPE_DICT, value) if dict_result is not NULL: return dict_result + # FIXME: is it right to raise an Exception based on data? raise ValueError, "Invalid type attribute in element '%s'" % \ cetree.namespacedNameFromNsName(tree._getNs(c_node), c_node.name) Modified: lxml/branch/capi/src/lxml/public-api.pxi ============================================================================== --- lxml/branch/capi/src/lxml/public-api.pxi (original) +++ lxml/branch/capi/src/lxml/public-api.pxi Wed Jul 19 16:19:51 2006 @@ -22,6 +22,9 @@ cdef public object lookupDefaultElementClass(state, xmlNode* c_node): return _lookupDefaultElementClass(state, c_node) +cdef public object lookupNamespaceElementClass(state, xmlNode* c_node): + return _find_nselement_class(state, c_node) + cdef public int tagMatches(xmlNode* c_node, char* c_href, char* c_name): if c_node is NULL: return -1 From scoder at codespeak.net Wed Jul 19 17:33:41 2006 From: scoder at codespeak.net (scoder at codespeak.net) Date: Wed, 19 Jul 2006 17:33:41 +0200 (CEST) Subject: [Lxml-checkins] r30234 - in lxml/branch/capi: . src/lxml src/lxml/tests Message-ID: <20060719153341.5A702100A3@code0.codespeak.net> Author: scoder Date: Wed Jul 19 17:33:39 2006 New Revision: 30234 Modified: lxml/branch/capi/CHANGES.txt lxml/branch/capi/src/lxml/etree.pyx lxml/branch/capi/src/lxml/tests/test_etree.py Log: new API method Element.replace(old_element, new_element) to replace a subelement Modified: lxml/branch/capi/CHANGES.txt ============================================================================== --- lxml/branch/capi/CHANGES.txt (original) +++ lxml/branch/capi/CHANGES.txt Wed Jul 19 17:33:39 2006 @@ -8,6 +8,8 @@ Features added -------------- +* Element.replace(old, new) method to replace a subelement by another one + * Public C-level API for independent extension modules Bugs fixed Modified: lxml/branch/capi/src/lxml/etree.pyx ============================================================================== --- lxml/branch/capi/src/lxml/etree.pyx (original) +++ lxml/branch/capi/src/lxml/etree.pyx Wed Jul 19 17:33:39 2006 @@ -750,6 +750,21 @@ raise ValueError, "Element is not a child of this node." _removeText(c_node.next) tree.xmlUnlinkNode(c_node) + + def replace(self, _Element old_element not None, + _Element new_element not None): + """Replaces a subelement with the element passed as second argument. + """ + cdef xmlNode* c_node + cdef xmlNode* c_next + c_node = old_element._c_node + if c_node.parent is not self._c_node: + raise ValueError, "Element is not a child of this node." + _removeText(c_node.next) + c_next = new_element._c_node.next + tree.xmlReplaceNode(c_node, new_element._c_node) + _moveTail(c_next, new_element._c_node) + moveNodeToDocument(new_element, self._doc) # PROPERTIES property tag: Modified: lxml/branch/capi/src/lxml/tests/test_etree.py ============================================================================== --- lxml/branch/capi/src/lxml/tests/test_etree.py (original) +++ lxml/branch/capi/src/lxml/tests/test_etree.py Wed Jul 19 17:33:39 2006 @@ -853,6 +853,53 @@ self.assertEquals( 0, e.index(e[0], -12, -1)) + def test_replace(self): + etree = self.etree + e = etree.Element('foo') + for i in range(10): + el = etree.SubElement(e, 'a%s' % i) + el.text = "text%d" % i + el.tail = "tail%d" % i + + child1 = e[1] + child2 = e[2] + + e.replace(e[0], e[1]) + self.assertEquals( + 9, len(e)) + self.assertEquals( + child1, e[0]) + self.assertEquals( + child1.text, "text1") + self.assertEquals( + child1.tail, "tail1") + self.assertEquals( + child2, e[1]) + + e.replace(e[-1], e[0]) + self.assertEquals( + child1, e[-1]) + self.assertEquals( + child1.text, "text1") + self.assertEquals( + child1.tail, "tail1") + self.assertEquals( + child2, e[0]) + + def test_replace_new(self): + etree = self.etree + e = etree.Element('foo') + for i in range(10): + etree.SubElement(e, 'a%s' % i) + + new_element = etree.Element("test") + child1 = e[1] + e.replace(e[0], new_element) + self.assertEquals( + new_element, e[0]) + self.assertEquals( + child1, e[1]) + def test_docinfo_public(self): etree = self.etree xml_header = '' From scoder at codespeak.net Wed Jul 19 17:50:17 2006 From: scoder at codespeak.net (scoder at codespeak.net) Date: Wed, 19 Jul 2006 17:50:17 +0200 (CEST) Subject: [Lxml-checkins] r30235 - in lxml/branch/capi/src/lxml: . tests Message-ID: <20060719155017.78113100AC@code0.codespeak.net> Author: scoder Date: Wed Jul 19 17:50:15 2006 New Revision: 30235 Modified: lxml/branch/capi/src/lxml/etree.pyx lxml/branch/capi/src/lxml/tests/test_elementtree.py lxml/branch/capi/src/lxml/tests/test_etree.py Log: fix tail handling in Element.remove() and Element.replace() Modified: lxml/branch/capi/src/lxml/etree.pyx ============================================================================== --- lxml/branch/capi/src/lxml/etree.pyx (original) +++ lxml/branch/capi/src/lxml/etree.pyx Wed Jul 19 17:50:15 2006 @@ -736,8 +736,8 @@ return c_next = element._c_node.next tree.xmlAddPrevSibling(c_node, element._c_node) - _moveTail(c_next, element._c_node) moveNodeToDocument(element, self._doc) + _moveTail(c_next, element._c_node) def remove(self, _Element element not None): """Removes a matching subelement. Unlike the find methods, this @@ -745,26 +745,32 @@ or contents. """ cdef xmlNode* c_node + cdef xmlNode* c_next c_node = element._c_node if c_node.parent is not self._c_node: raise ValueError, "Element is not a child of this node." - _removeText(c_node.next) + c_next = element._c_node.next tree.xmlUnlinkNode(c_node) + _moveTail(c_next, c_node) def replace(self, _Element old_element not None, _Element new_element not None): """Replaces a subelement with the element passed as second argument. """ - cdef xmlNode* c_node - cdef xmlNode* c_next - c_node = old_element._c_node - if c_node.parent is not self._c_node: + cdef xmlNode* c_old_node + cdef xmlNode* c_old_next + cdef xmlNode* c_new_node + cdef xmlNode* c_new_next + c_old_node = old_element._c_node + if c_old_node.parent is not self._c_node: raise ValueError, "Element is not a child of this node." - _removeText(c_node.next) - c_next = new_element._c_node.next - tree.xmlReplaceNode(c_node, new_element._c_node) - _moveTail(c_next, new_element._c_node) + c_old_next = c_old_node.next + c_new_node = new_element._c_node + c_new_next = c_new_node.next + tree.xmlReplaceNode(c_old_node, c_new_node) moveNodeToDocument(new_element, self._doc) + _moveTail(c_new_next, c_new_node) + _moveTail(c_old_next, c_old_node) # PROPERTIES property tag: Modified: lxml/branch/capi/src/lxml/tests/test_elementtree.py ============================================================================== --- lxml/branch/capi/src/lxml/tests/test_elementtree.py (original) +++ lxml/branch/capi/src/lxml/tests/test_elementtree.py Wed Jul 19 17:50:15 2006 @@ -1154,6 +1154,7 @@ self.assertXML( '', a) + self.assertEquals('b2', b.tail) def test_getchildren(self): Element = self.etree.Element Modified: lxml/branch/capi/src/lxml/tests/test_etree.py ============================================================================== --- lxml/branch/capi/src/lxml/tests/test_etree.py (original) +++ lxml/branch/capi/src/lxml/tests/test_etree.py Wed Jul 19 17:50:15 2006 @@ -861,6 +861,7 @@ el.text = "text%d" % i el.tail = "tail%d" % i + child0 = e[0] child1 = e[1] child2 = e[2] @@ -874,6 +875,8 @@ self.assertEquals( child1.tail, "tail1") self.assertEquals( + child0.tail, "tail0") + self.assertEquals( child2, e[1]) e.replace(e[-1], e[0]) From scoder at codespeak.net Wed Jul 19 18:08:47 2006 From: scoder at codespeak.net (scoder at codespeak.net) Date: Wed, 19 Jul 2006 18:08:47 +0200 (CEST) Subject: [Lxml-checkins] r30236 - in lxml/trunk: . src/lxml src/lxml/tests Message-ID: <20060719160847.4DDDD100A5@code0.codespeak.net> Author: scoder Date: Wed Jul 19 18:08:44 2006 New Revision: 30236 Modified: lxml/trunk/CHANGES.txt lxml/trunk/src/lxml/etree.pyx lxml/trunk/src/lxml/tests/test_elementtree.py lxml/trunk/src/lxml/tests/test_etree.py Log: merged in new Element.replace() method and tail fixes for Element.remove() Modified: lxml/trunk/CHANGES.txt ============================================================================== --- lxml/trunk/CHANGES.txt (original) +++ lxml/trunk/CHANGES.txt Wed Jul 19 18:08:44 2006 @@ -8,9 +8,13 @@ Features added -------------- +* Element.replace(old, new) method to replace a subelement by another one + Bugs fixed ---------- +* Element.remove() deleted the tail text from the removed Element + * Creating documents no longer copies the parser for later URL resolving. For performance reasons, only a reference is kept. Resolver updates on the parser will now be reflected by documents that were parsed before the @@ -26,7 +30,8 @@ * Module level ``iterwalk()`` function as 'iterparse' for trees -* ElementTree compatible ``iterparse()`` module function +* Module level ``iterparse()`` function similar to ElementTree (see + documentation for differences) * Element.nsmap property returns a mapping of all namespace prefixes known at the Element to their namespace URI Modified: lxml/trunk/src/lxml/etree.pyx ============================================================================== --- lxml/trunk/src/lxml/etree.pyx (original) +++ lxml/trunk/src/lxml/etree.pyx Wed Jul 19 18:08:44 2006 @@ -745,11 +745,32 @@ or contents. """ cdef xmlNode* c_node + cdef xmlNode* c_next c_node = element._c_node if c_node.parent is not self._c_node: raise ValueError, "Element is not a child of this node." - _removeText(c_node.next) + c_next = element._c_node.next tree.xmlUnlinkNode(c_node) + _moveTail(c_next, c_node) + + def replace(self, _Element old_element not None, + _Element new_element not None): + """Replaces a subelement with the element passed as second argument. + """ + cdef xmlNode* c_old_node + cdef xmlNode* c_old_next + cdef xmlNode* c_new_node + cdef xmlNode* c_new_next + c_old_node = old_element._c_node + if c_old_node.parent is not self._c_node: + raise ValueError, "Element is not a child of this node." + c_old_next = c_old_node.next + c_new_node = new_element._c_node + c_new_next = c_new_node.next + tree.xmlReplaceNode(c_old_node, c_new_node) + moveNodeToDocument(new_element, self._doc) + _moveTail(c_new_next, c_new_node) + _moveTail(c_old_next, c_old_node) # PROPERTIES property tag: Modified: lxml/trunk/src/lxml/tests/test_elementtree.py ============================================================================== --- lxml/trunk/src/lxml/tests/test_elementtree.py (original) +++ lxml/trunk/src/lxml/tests/test_elementtree.py Wed Jul 19 18:08:44 2006 @@ -1154,6 +1154,7 @@ self.assertXML( '', a) + self.assertEquals('b2', b.tail) def test_getchildren(self): Element = self.etree.Element Modified: lxml/trunk/src/lxml/tests/test_etree.py ============================================================================== --- lxml/trunk/src/lxml/tests/test_etree.py (original) +++ lxml/trunk/src/lxml/tests/test_etree.py Wed Jul 19 18:08:44 2006 @@ -850,6 +850,56 @@ self.assertEquals( 0, e.index(e[0], -12, -1)) + def test_replace(self): + etree = self.etree + e = etree.Element('foo') + for i in range(10): + el = etree.SubElement(e, 'a%s' % i) + el.text = "text%d" % i + el.tail = "tail%d" % i + + child0 = e[0] + child1 = e[1] + child2 = e[2] + + e.replace(e[0], e[1]) + self.assertEquals( + 9, len(e)) + self.assertEquals( + child1, e[0]) + self.assertEquals( + child1.text, "text1") + self.assertEquals( + child1.tail, "tail1") + self.assertEquals( + child0.tail, "tail0") + self.assertEquals( + child2, e[1]) + + e.replace(e[-1], e[0]) + self.assertEquals( + child1, e[-1]) + self.assertEquals( + child1.text, "text1") + self.assertEquals( + child1.tail, "tail1") + self.assertEquals( + child2, e[0]) + + def test_replace_new(self): + etree = self.etree + e = etree.Element('foo') + for i in range(10): + etree.SubElement(e, 'a%s' % i) + + new_element = etree.Element("test") + child1 = e[1] + e.replace(e[0], new_element) + self.assertEquals( + new_element, e[0]) + self.assertEquals( + child1, e[1]) + def test_docinfo_public(self): etree = self.etree xml_header = '' From scoder at codespeak.net Wed Jul 19 18:09:55 2006 From: scoder at codespeak.net (scoder at codespeak.net) Date: Wed, 19 Jul 2006 18:09:55 +0200 (CEST) Subject: [Lxml-checkins] r30237 - lxml/branch/capi/src/lxml Message-ID: <20060719160955.69574100A5@code0.codespeak.net> Author: scoder Date: Wed Jul 19 18:09:54 2006 New Revision: 30237 Modified: lxml/branch/capi/src/lxml/etree.pyx Log: fix for accidental change in last commit Modified: lxml/branch/capi/src/lxml/etree.pyx ============================================================================== --- lxml/branch/capi/src/lxml/etree.pyx (original) +++ lxml/branch/capi/src/lxml/etree.pyx Wed Jul 19 18:09:54 2006 @@ -736,8 +736,8 @@ return c_next = element._c_node.next tree.xmlAddPrevSibling(c_node, element._c_node) - moveNodeToDocument(element, self._doc) _moveTail(c_next, element._c_node) + moveNodeToDocument(element, self._doc) def remove(self, _Element element not None): """Removes a matching subelement. Unlike the find methods, this From scoder at codespeak.net Wed Jul 19 18:12:18 2006 From: scoder at codespeak.net (scoder at codespeak.net) Date: Wed, 19 Jul 2006 18:12:18 +0200 (CEST) Subject: [Lxml-checkins] r30239 - lxml/branch/capi Message-ID: <20060719161218.6952F100A5@code0.codespeak.net> Author: scoder Date: Wed Jul 19 18:12:17 2006 New Revision: 30239 Modified: lxml/branch/capi/CHANGES.txt Log: mark Element.remove() tail bug fixed Modified: lxml/branch/capi/CHANGES.txt ============================================================================== --- lxml/branch/capi/CHANGES.txt (original) +++ lxml/branch/capi/CHANGES.txt Wed Jul 19 18:12:17 2006 @@ -15,6 +15,8 @@ Bugs fixed ---------- +* Element.remove() deleted the tail text from the removed Element + * Creating documents no longer copies the parser for later URL resolving. For performance reasons, only a reference is kept. Resolver updates on the parser will now be reflected by documents that were parsed before the From scoder at codespeak.net Wed Jul 19 18:45:46 2006 From: scoder at codespeak.net (scoder at codespeak.net) Date: Wed, 19 Jul 2006 18:45:46 +0200 (CEST) Subject: [Lxml-checkins] r30240 - lxml/branch/capi/src/lxml Message-ID: <20060719164546.A4DC510093@code0.codespeak.net> Author: scoder Date: Wed Jul 19 18:45:44 2006 New Revision: 30240 Modified: lxml/branch/capi/src/lxml/etreepublic.pxd lxml/branch/capi/src/lxml/parser.pxi lxml/branch/capi/src/lxml/public-api.pxi Log: public C function to deep copy a node directly into a document Modified: lxml/branch/capi/src/lxml/etreepublic.pxd ============================================================================== --- lxml/branch/capi/src/lxml/etreepublic.pxd (original) +++ lxml/branch/capi/src/lxml/etreepublic.pxd Wed Jul 19 18:45:44 2006 @@ -52,6 +52,9 @@ # create an ElementTree subclass for an Element cdef _ElementTree newElementTree(_NodeBase context_node, object subclass) + # deep copy a node to include in in the Document + cdef _Element deepcopyNodeToDocument(_Document doc, tree.xmlNode* c_root) + # set the internal lookup function for Element classes # use setElementClassLookupFunction(NULL, None) to reset it cdef void setElementClassLookupFunction( Modified: lxml/branch/capi/src/lxml/parser.pxi ============================================================================== --- lxml/branch/capi/src/lxml/parser.pxi (original) +++ lxml/branch/capi/src/lxml/parser.pxi Wed Jul 19 18:45:44 2006 @@ -784,6 +784,13 @@ _copyTail(c_new_root.next, c_node) return result +cdef xmlNode* _copyNodeToDoc(xmlNode* c_node, xmlDoc* c_doc): + "Recursively copy the element into the document. c_doc is not modified." + cdef xmlNode* c_root + c_root = tree.xmlDocCopyNode(c_node, c_doc, 1) # recursive + _copyTail(c_node.next, c_root) + return c_root + cdef void _bugFixURL(xmlDoc* c_source_doc, xmlDoc* c_target_doc): """libxml2 <= 2.6.17 had a bug that prevented them from copying the document URL in xmlDocCopy()""" Modified: lxml/branch/capi/src/lxml/public-api.pxi ============================================================================== --- lxml/branch/capi/src/lxml/public-api.pxi (original) +++ lxml/branch/capi/src/lxml/public-api.pxi Wed Jul 19 18:45:44 2006 @@ -1,5 +1,11 @@ # Public C API for lxml.etree +cdef public _Element deepcopyNodeToDocument(_Document doc, xmlNode* c_root): + "Recursively copy the element into the document. doc is not modified." + cdef xmlNode* c_node + c_node = _copyNodeToDoc(c_root, doc._c_doc) + return _elementFactory(doc, c_node) + cdef public _ElementTree elementTreeFactory(_NodeBase context_node): return newElementTree(context_node, _ElementTree) From scoder at codespeak.net Wed Jul 19 19:07:39 2006 From: scoder at codespeak.net (scoder at codespeak.net) Date: Wed, 19 Jul 2006 19:07:39 +0200 (CEST) Subject: [Lxml-checkins] r30241 - in lxml/branch/capi: doc src/lxml Message-ID: <20060719170739.B4D4B100A0@code0.codespeak.net> Author: scoder Date: Wed Jul 19 19:07:37 2006 New Revision: 30241 Modified: lxml/branch/capi/doc/objectify.txt lxml/branch/capi/src/lxml/objectify.pyx Log: support appending elements via attribute assignment Modified: lxml/branch/capi/doc/objectify.txt ============================================================================== --- lxml/branch/capi/doc/objectify.txt (original) +++ lxml/branch/capi/doc/objectify.txt Wed Jul 19 19:07:37 2006 @@ -49,6 +49,12 @@ >>> print root.c.get("c") oh-oh +You can append elements to the tree by assigning them to an attribute:: + + >>> root.c = etree.Element("new") + >>> print root.c.new.tag + new + Namespace handling ------------------ Modified: lxml/branch/capi/src/lxml/objectify.pyx ============================================================================== --- lxml/branch/capi/src/lxml/objectify.pyx (original) +++ lxml/branch/capi/src/lxml/objectify.pyx Wed Jul 19 19:07:37 2006 @@ -91,18 +91,25 @@ self. """ cdef _Element element + # properties are looked up /after/ __setattr__, so we must emulate them if tag == 'text': cetree.setNodeText(self._c_node, value) + return elif tag == 'tail': cetree.setTailText(self._c_node, value) + return elif tag == 'tag': # FIXME? raise NotImplementedError + + element = _lookupChild(self, tag) + if isinstance(value, _Element): + # deep copy the element and make it the only child + del element[:] + element.append( + cetree.deepcopyNodeToDocument( + self._doc, (value)._c_node)) else: - element = _lookupChild(self, tag) - if isinstance(value, ObjectifiedElement): - # FIXME? - raise NotImplementedError if not python._isString(value): value = str(value) cetree.setNodeText(element._c_node, value) From scoder at codespeak.net Thu Jul 20 07:36:35 2006 From: scoder at codespeak.net (scoder at codespeak.net) Date: Thu, 20 Jul 2006 07:36:35 +0200 (CEST) Subject: [Lxml-checkins] r30250 - lxml/trunk/src/lxml/tests Message-ID: <20060720053635.3EF31100AD@code0.codespeak.net> Author: scoder Date: Thu Jul 20 07:36:31 2006 New Revision: 30250 Modified: lxml/trunk/src/lxml/tests/test_elementtree.py Log: test cases for using str subclasses instead of str Modified: lxml/trunk/src/lxml/tests/test_elementtree.py ============================================================================== --- lxml/trunk/src/lxml/tests/test_elementtree.py (original) +++ lxml/trunk/src/lxml/tests/test_elementtree.py Thu Jul 20 07:36:31 2006 @@ -212,6 +212,17 @@ self.assertEquals('<>&', tostring(a)) + def test_text_str_subclass(self): + Element = self.etree.Element + + class strTest(str): + pass + + a = Element("a") + a.text = strTest("text") + self.assertXML('text', + a) + def test_tail(self): ElementTree = self.etree.ElementTree @@ -224,6 +235,18 @@ self.assertEquals('mixed', root[0].text) self.assertEquals(' content.', root[0].tail) + def test_tail_str_subclass(self): + Element = self.etree.Element + SubElement = self.etree.SubElement + + class strTest(str): + pass + + a = Element("a") + SubElement(a, "t").tail = strTest("tail") + self.assertXML('tail', + a) + def test_ElementTree(self): Element = self.etree.Element ElementTree = self.etree.ElementTree @@ -888,6 +911,17 @@ self.assertEquals('c', a.tag) self.assertEquals('', + a) + def test_delitem(self): Element = self.etree.Element SubElement = self.etree.SubElement From scoder at codespeak.net Thu Jul 20 09:08:03 2006 From: scoder at codespeak.net (scoder at codespeak.net) Date: Thu, 20 Jul 2006 09:08:03 +0200 (CEST) Subject: [Lxml-checkins] r30252 - in lxml/branch/capi: doc src/lxml Message-ID: <20060720070803.AFCD5100AD@code0.codespeak.net> Author: scoder Date: Thu Jul 20 09:08:00 2006 New Revision: 30252 Modified: lxml/branch/capi/doc/objectify.txt lxml/branch/capi/src/lxml/objectify.pyx Log: support for changing element tag name Modified: lxml/branch/capi/doc/objectify.txt ============================================================================== --- lxml/branch/capi/doc/objectify.txt (original) +++ lxml/branch/capi/doc/objectify.txt Thu Jul 20 09:08:00 2006 @@ -55,6 +55,18 @@ >>> print root.c.new.tag new +Note that special care must be taken when changing the tag name of an element:: + + >>> print root.b.tag + b + >>> root.b.tag = "notB" + >>> root.b + Traceback (most recent call last): + ... + AttributeError: no such child: b + >>> print root.notB.tag + notB + Namespace handling ------------------ Modified: lxml/branch/capi/src/lxml/objectify.pyx ============================================================================== --- lxml/branch/capi/src/lxml/objectify.pyx (original) +++ lxml/branch/capi/src/lxml/objectify.pyx Thu Jul 20 09:08:00 2006 @@ -99,8 +99,8 @@ cetree.setTailText(self._c_node, value) return elif tag == 'tag': - # FIXME? - raise NotImplementedError + ElementBase.tag.__set__(self, value) + return element = _lookupChild(self, tag) if isinstance(value, _Element): From scoder at codespeak.net Thu Jul 20 10:57:07 2006 From: scoder at codespeak.net (scoder at codespeak.net) Date: Thu, 20 Jul 2006 10:57:07 +0200 (CEST) Subject: [Lxml-checkins] r30258 - in lxml/branch/capi: doc src/lxml Message-ID: <20060720085707.69BE0100BE@code0.codespeak.net> Author: scoder Date: Thu Jul 20 10:57:05 2006 New Revision: 30258 Modified: lxml/branch/capi/doc/objectify.txt lxml/branch/capi/src/lxml/objectify.pyx Log: implemented __delattr__, let __setattr__ create children on request Modified: lxml/branch/capi/doc/objectify.txt ============================================================================== --- lxml/branch/capi/doc/objectify.txt (original) +++ lxml/branch/capi/doc/objectify.txt Thu Jul 20 10:57:05 2006 @@ -32,12 +32,26 @@ ---------------------------------------- The main idea behind the ``objectify`` API is to hide XML element access -behind the usual object attribute access pattern:: +behind the usual object attribute access pattern. Asking an element for an +attribute will return the sequence of children with corresponding tag names:: >>> root = etree.Element("root") >>> b = etree.SubElement(root, "b") + >>> print root.b[0].tag + b + >>> b = etree.SubElement(root, "b") + >>> print root.b[0].tag + b + >>> print root.b[1].tag + b + +For convenience, you can omit the index '0' to access the first child:: + >>> print root.b.tag b + >>> root.index(root.b) + 0 + >>> del root.b Attributes are accessed as in the normal ElementTree API:: @@ -49,11 +63,11 @@ >>> print root.c.get("c") oh-oh -You can append elements to the tree by assigning them to an attribute:: +New children are created by attribute assignment:: - >>> root.c = etree.Element("new") - >>> print root.c.new.tag - new + >>> root.new_child = etree.Element("new_child") + >>> print root.new_child.tag + new_child Note that special care must be taken when changing the tag name of an element:: @@ -67,6 +81,16 @@ >>> print root.notB.tag notB +The same applies to replacing elements in the tree via attribute assignment:: + + >>> root.c = etree.Element("new") + >>> root.c + Traceback (most recent call last): + ... + AttributeError: no such child: c + >>> print root.new.tag + new + Namespace handling ------------------ Modified: lxml/branch/capi/src/lxml/objectify.pyx ============================================================================== --- lxml/branch/capi/src/lxml/objectify.pyx (original) +++ lxml/branch/capi/src/lxml/objectify.pyx Thu Jul 20 10:57:05 2006 @@ -102,18 +102,29 @@ ElementBase.tag.__set__(self, value) return - element = _lookupChild(self, tag) if isinstance(value, _Element): - # deep copy the element and make it the only child - del element[:] - element.append( - cetree.deepcopyNodeToDocument( - self._doc, (value)._c_node)) + # deep copy the new element + element = cetree.deepcopyNodeToDocument( + self._doc, (<_Element>value)._c_node) + try: + child = _lookupChild(self, tag) + except AttributeError: + self.append(element) + else: + self.replace(child, element) else: + try: + element = _lookupChild(self, tag) + except AttributeError: + element = etree.SubElement(self, tag) if not python._isString(value): value = str(value) cetree.setNodeText(element._c_node, value) + def __delattr__(self, tag): + child = _lookupChild(self, tag) + self.remove(child) + def __getitem__(self, key): """Return a sibling or attribute. * If argument is an integer, returns the following sibling From scoder at codespeak.net Thu Jul 20 10:59:14 2006 From: scoder at codespeak.net (scoder at codespeak.net) Date: Thu, 20 Jul 2006 10:59:14 +0200 (CEST) Subject: [Lxml-checkins] r30259 - lxml/branch/capi/doc Message-ID: <20060720085914.B3E6F100BE@code0.codespeak.net> Author: scoder Date: Thu Jul 20 10:59:13 2006 New Revision: 30259 Modified: lxml/branch/capi/doc/objectify.txt Log: small clarification in docs Modified: lxml/branch/capi/doc/objectify.txt ============================================================================== --- lxml/branch/capi/doc/objectify.txt (original) +++ lxml/branch/capi/doc/objectify.txt Thu Jul 20 10:59:13 2006 @@ -39,11 +39,15 @@ >>> b = etree.SubElement(root, "b") >>> print root.b[0].tag b + >>> root.index(root.b[0]) + 0 >>> b = etree.SubElement(root, "b") >>> print root.b[0].tag b >>> print root.b[1].tag b + >>> root.index(root.b[1]) + 1 For convenience, you can omit the index '0' to access the first child:: From scoder at codespeak.net Thu Jul 20 11:04:08 2006 From: scoder at codespeak.net (scoder at codespeak.net) Date: Thu, 20 Jul 2006 11:04:08 +0200 (CEST) Subject: [Lxml-checkins] r30260 - lxml/branch/capi/doc Message-ID: <20060720090408.26EA6100BC@code0.codespeak.net> Author: scoder Date: Thu Jul 20 11:04:06 2006 New Revision: 30260 Modified: lxml/branch/capi/doc/objectify.txt Log: more doc tests for adding subtrees to trees Modified: lxml/branch/capi/doc/objectify.txt ============================================================================== --- lxml/branch/capi/doc/objectify.txt (original) +++ lxml/branch/capi/doc/objectify.txt Thu Jul 20 11:04:06 2006 @@ -67,12 +67,21 @@ >>> print root.c.get("c") oh-oh -New children are created by attribute assignment:: +New children are created by assigning them to attributes or through the +ElementTree API:: >>> root.new_child = etree.Element("new_child") >>> print root.new_child.tag new_child + >>> root.append( etree.Element("another_child") ) + >>> print root.another_child.tag + another_child + + >>> el = etree.SubElement(root, "and_another_child") + >>> print root.and_another_child.tag + and_another_child + Note that special care must be taken when changing the tag name of an element:: >>> print root.b.tag @@ -87,6 +96,8 @@ The same applies to replacing elements in the tree via attribute assignment:: + >>> print root.c.tag + c >>> root.c = etree.Element("new") >>> root.c Traceback (most recent call last): From scoder at codespeak.net Thu Jul 20 11:20:48 2006 From: scoder at codespeak.net (scoder at codespeak.net) Date: Thu, 20 Jul 2006 11:20:48 +0200 (CEST) Subject: [Lxml-checkins] r30261 - in lxml/branch/capi: doc src/lxml Message-ID: <20060720092048.0C273100AA@code0.codespeak.net> Author: scoder Date: Thu Jul 20 11:20:46 2006 New Revision: 30261 Modified: lxml/branch/capi/doc/objectify.txt lxml/branch/capi/src/lxml/objectify.pyx Log: assigning subtrees to attributes adapts the tag name after deep copying Modified: lxml/branch/capi/doc/objectify.txt ============================================================================== --- lxml/branch/capi/doc/objectify.txt (original) +++ lxml/branch/capi/doc/objectify.txt Thu Jul 20 11:20:46 2006 @@ -67,20 +67,26 @@ >>> print root.c.get("c") oh-oh -New children are created by assigning them to attributes or through the -ElementTree API:: +New children are normally added to elements through the ElementTree API:: - >>> root.new_child = etree.Element("new_child") + >>> root.append( etree.Element("new_child") ) >>> print root.new_child.tag new_child - >>> root.append( etree.Element("another_child") ) + >>> el = etree.SubElement(root, "another_child") >>> print root.another_child.tag another_child - >>> el = etree.SubElement(root, "and_another_child") - >>> print root.and_another_child.tag - and_another_child +Additionally, subtrees can be added by assigned to attributes. In this case, +the subtree is automatically deep copied and the tag name of its root is +updated to match the attribute name:: + + >>> el = etree.Element("yet_another_child") + >>> root.new_child = el + >>> print root.new_child.tag + new_child + >>> print el.tag + yet_another_child Note that special care must be taken when changing the tag name of an element:: @@ -94,18 +100,6 @@ >>> print root.notB.tag notB -The same applies to replacing elements in the tree via attribute assignment:: - - >>> print root.c.tag - c - >>> root.c = etree.Element("new") - >>> root.c - Traceback (most recent call last): - ... - AttributeError: no such child: c - >>> print root.new.tag - new - Namespace handling ------------------ Modified: lxml/branch/capi/src/lxml/objectify.pyx ============================================================================== --- lxml/branch/capi/src/lxml/objectify.pyx (original) +++ lxml/branch/capi/src/lxml/objectify.pyx Thu Jul 20 11:20:46 2006 @@ -106,6 +106,7 @@ # deep copy the new element element = cetree.deepcopyNodeToDocument( self._doc, (<_Element>value)._c_node) + element.tag = _buildChildTag(self, tag) try: child = _lookupChild(self, tag) except AttributeError: @@ -164,14 +165,14 @@ c_node = c_node.next return NULL -cdef object _lookupChild(_Element element, tag): +cdef object _lookupChild(_Element parent, tag): cdef tree.xmlNode* c_result cdef tree.xmlNode* c_node cdef char* c_href cdef char* c_tag ns, tag = cetree.getNsTag(tag) c_tag = _cstr(tag) - c_node = element._c_node + c_node = parent._c_node if ns is None: c_href = tree._getNs(c_node) else: @@ -180,7 +181,18 @@ if c_result is NULL: raise AttributeError, "no such child: %s" % \ cetree.namespacedNameFromNsName(c_href, c_tag) - return elementFactory(element._doc, c_result) + return elementFactory(parent._doc, c_result) + +cdef object _buildChildTag(_Element parent, tag): + cdef char* c_href + cdef char* c_tag + ns, tag = cetree.getNsTag(tag) + c_tag = _cstr(tag) + if ns is None: + c_href = tree._getNs(parent._c_node) + else: + c_href = _cstr(ns) + return cetree.namespacedNameFromNsName(c_href, c_tag) ################################################################################ # Data type support in subclasses From scoder at codespeak.net Thu Jul 20 11:28:11 2006 From: scoder at codespeak.net (scoder at codespeak.net) Date: Thu, 20 Jul 2006 11:28:11 +0200 (CEST) Subject: [Lxml-checkins] r30262 - lxml/branch/capi/src/lxml Message-ID: <20060720092811.C0A32100BE@code0.codespeak.net> Author: scoder Date: Thu Jul 20 11:28:10 2006 New Revision: 30262 Modified: lxml/branch/capi/src/lxml/objectify.pyx Log: small C-ification Modified: lxml/branch/capi/src/lxml/objectify.pyx ============================================================================== --- lxml/branch/capi/src/lxml/objectify.pyx (original) +++ lxml/branch/capi/src/lxml/objectify.pyx Thu Jul 20 11:28:10 2006 @@ -10,6 +10,9 @@ # initialize C-API of lxml.etree import_etree(etree) +cdef object SubElement +SubElement = etree.SubElement + cdef object __builtin__ import __builtin__ cdef object _int @@ -117,7 +120,7 @@ try: element = _lookupChild(self, tag) except AttributeError: - element = etree.SubElement(self, tag) + element = SubElement(self, tag) if not python._isString(value): value = str(value) cetree.setNodeText(element._c_node, value) From scoder at codespeak.net Thu Jul 20 11:30:42 2006 From: scoder at codespeak.net (scoder at codespeak.net) Date: Thu, 20 Jul 2006 11:30:42 +0200 (CEST) Subject: [Lxml-checkins] r30263 - lxml/branch/capi/src/lxml Message-ID: <20060720093042.5EA2F100BE@code0.codespeak.net> Author: scoder Date: Thu Jul 20 11:30:41 2006 New Revision: 30263 Modified: lxml/branch/capi/src/lxml/objectify.pyx Log: fixed variable type Modified: lxml/branch/capi/src/lxml/objectify.pyx ============================================================================== --- lxml/branch/capi/src/lxml/objectify.pyx (original) +++ lxml/branch/capi/src/lxml/objectify.pyx Thu Jul 20 11:30:41 2006 @@ -32,7 +32,7 @@ # namespace for "pytype" hint attribute PYTYPE_NAMESPACE = "http://codespeak.net/lxml/objectify/pytype" -cdef object _PYTYPE_NAMESPACE +cdef char* _PYTYPE_NAMESPACE _PYTYPE_NAMESPACE = _cstr(PYTYPE_NAMESPACE) From scoder at codespeak.net Thu Jul 20 11:40:05 2006 From: scoder at codespeak.net (scoder at codespeak.net) Date: Thu, 20 Jul 2006 11:40:05 +0200 (CEST) Subject: [Lxml-checkins] r30264 - lxml/branch/capi/doc Message-ID: <20060720094005.84E4C100BE@code0.codespeak.net> Author: scoder Date: Thu Jul 20 11:40:04 2006 New Revision: 30264 Modified: lxml/branch/capi/doc/objectify.txt Log: typo Modified: lxml/branch/capi/doc/objectify.txt ============================================================================== --- lxml/branch/capi/doc/objectify.txt (original) +++ lxml/branch/capi/doc/objectify.txt Thu Jul 20 11:40:04 2006 @@ -77,8 +77,8 @@ >>> print root.another_child.tag another_child -Additionally, subtrees can be added by assigned to attributes. In this case, -the subtree is automatically deep copied and the tag name of its root is +Additionally, subtrees can be added by assigning them to attributes. In this +case, the subtree is automatically deep copied and the tag name of its root is updated to match the attribute name:: >>> el = etree.Element("yet_another_child") From scoder at codespeak.net Thu Jul 20 12:15:56 2006 From: scoder at codespeak.net (scoder at codespeak.net) Date: Thu, 20 Jul 2006 12:15:56 +0200 (CEST) Subject: [Lxml-checkins] r30265 - in lxml/branch/capi: doc src/lxml Message-ID: <20060720101556.54424100BE@code0.codespeak.net> Author: scoder Date: Thu Jul 20 12:15:55 2006 New Revision: 30265 Modified: lxml/branch/capi/doc/objectify.txt lxml/branch/capi/src/lxml/objectify.pyx Log: Element.__setitem__ for indexed attribute assignment Modified: lxml/branch/capi/doc/objectify.txt ============================================================================== --- lxml/branch/capi/doc/objectify.txt (original) +++ lxml/branch/capi/doc/objectify.txt Thu Jul 20 12:15:55 2006 @@ -88,6 +88,23 @@ >>> print el.tag yet_another_child +You can also replace children that way:: + + >>> child1 = etree.SubElement(root, "child") + >>> child2 = etree.SubElement(root, "child") + >>> child3 = etree.SubElement(root, "child") + + >>> el = etree.Element("new_child") + >>> subel = etree.SubElement(el, "sub") + + >>> root.child = el + >>> print root.child.sub.tag + sub + + >>> root.child[2] = el + >>> print root.child[2].sub.tag + sub + Note that special care must be taken when changing the tag name of an element:: >>> print root.b.tag Modified: lxml/branch/capi/src/lxml/objectify.pyx ============================================================================== --- lxml/branch/capi/src/lxml/objectify.pyx (original) +++ lxml/branch/capi/src/lxml/objectify.pyx Thu Jul 20 12:15:55 2006 @@ -130,10 +130,12 @@ self.remove(child) def __getitem__(self, key): - """Return a sibling or attribute. - * If argument is an integer, returns the following sibling - at that position. - * If argument is a string, does the same as getattr(). + """Return a sibling, counting from the first child of the parent. + + * If argument is an integer, returns the sibling at that position. + + * If argument is a string, does the same as getattr(). This is used + to provide namespaces for element lookup. """ cdef tree.xmlNode* c_self_node cdef tree.xmlNode* c_parent @@ -153,6 +155,45 @@ raise IndexError, key return elementFactory(self._doc, c_node) + def __setitem__(self, key, value): + """Set the value of a sibling, counting from the first child of the + parent. + + * If argument is an integer, sets the sibling at that position. + + * If argument is a string, does the same as setattr(). This is used + to provide namespaces for element lookup. + """ + cdef _Element element + cdef _Element new_element + cdef tree.xmlNode* c_self_node + cdef tree.xmlNode* c_parent + cdef tree.xmlNode* c_node + if python._isString(key): + self.__setattr__(key, value) + + c_self_node = self._c_node + c_parent = c_self_node.parent + if c_parent is NULL: + # the 'root[i] = ...' case + raise TypeError, "index assignment to root element is invalid" + c_node = _findFollowingSibling( + c_parent.children, + tree._getNs(c_self_node), c_self_node.name, key) + if c_node is NULL: + raise IndexError, key + element = elementFactory(self._doc, c_node) + + if isinstance(value, _Element): + # deep copy the new element + new_element = cetree.deepcopyNodeToDocument( + self._doc, (<_Element>value)._c_node) + new_element.tag = self.tag + self.getparent().replace(element, new_element) + else: + cetree.setNodeText(element._c_node, value) + + ## def xml_set_attribute(self, name, value): ## ElementBase.set(self, name, value) From scoder at codespeak.net Thu Jul 20 15:50:18 2006 From: scoder at codespeak.net (scoder at codespeak.net) Date: Thu, 20 Jul 2006 15:50:18 +0200 (CEST) Subject: [Lxml-checkins] r30270 - lxml/branch/capi/src/lxml Message-ID: <20060720135018.5A9D0100C0@code0.codespeak.net> Author: scoder Date: Thu Jul 20 15:50:17 2006 New Revision: 30270 Modified: lxml/branch/capi/src/lxml/objectify.pyx Log: allow namespace classes inside the tree (not only leaves) Modified: lxml/branch/capi/src/lxml/objectify.pyx ============================================================================== --- lxml/branch/capi/src/lxml/objectify.pyx (original) +++ lxml/branch/capi/src/lxml/objectify.pyx Thu Jul 20 15:50:17 2006 @@ -194,10 +194,6 @@ cetree.setNodeText(element._c_node, value) -## def xml_set_attribute(self, name, value): -## ElementBase.set(self, name, value) - - cdef tree.xmlNode* _findFollowingSibling(tree.xmlNode* c_node, char* href, char* name, Py_ssize_t index): @@ -423,15 +419,15 @@ cdef object _lookupElementClass(state, tree.xmlNode* c_node): cdef python.PyObject* dict_result - # if element has children => no data class - if cetree.findChildForwards(c_node, 0): - return ObjectifiedElement - # default to namespace specific classes nsclass = cetree.lookupNamespaceElementClass(state, c_node) if nsclass is not ObjectifiedElement: return nsclass + # if element has children => no data class + if cetree.findChildForwards(c_node, 0): + return ObjectifiedElement + # otherwise determine class based on text content type value = cetree.attributeValueFromNsName( c_node, _PYTYPE_NAMESPACE, "pytype") From scoder at codespeak.net Thu Jul 20 17:27:41 2006 From: scoder at codespeak.net (scoder at codespeak.net) Date: Thu, 20 Jul 2006 17:27:41 +0200 (CEST) Subject: [Lxml-checkins] r30274 - lxml/branch/capi/src/lxml Message-ID: <20060720152741.E0796100C4@code0.codespeak.net> Author: scoder Date: Thu Jul 20 17:27:40 2006 New Revision: 30274 Modified: lxml/branch/capi/src/lxml/objectify.pyx Log: implementation of Element.find*(), None type Modified: lxml/branch/capi/src/lxml/objectify.pyx ============================================================================== --- lxml/branch/capi/src/lxml/objectify.pyx (original) +++ lxml/branch/capi/src/lxml/objectify.pyx Thu Jul 20 17:27:40 2006 @@ -193,6 +193,30 @@ else: cetree.setNodeText(element._c_node, value) + def findall(self, path): + # Reimplementation of Element.findall() to make it work without child + # iteration. + xpath = etree.ETXPath(path) + return xpath(self) + + def find(self, path): + # Reimplementation of Element.find() to make it work without child + # iteration. + result = self.findall(path) + if isinstance(result, list) and len(result): + return result[0] + else: + return None + + def findtext(self, path, default=None): + # Reimplementation of Element.findtext() to make it work without child + # iteration. + result = self.find(path) + if isinstance(result, _Element): + return result.text or "" + else: + return default + cdef tree.xmlNode* _findFollowingSibling(tree.xmlNode* c_node, char* href, char* name, @@ -370,6 +394,9 @@ def __contains__(self, text): return text in textOf(self._c_node) +cdef class _NoneElement(ObjectifiedElement): + pass + cdef object _strValueOf(obj): if python._isString(obj): return obj @@ -401,7 +428,7 @@ value = textOf(c_node) if value is None: # FIXME: how do we get rid of the class if it was not the right one? - return ObjectifiedElement + return _NoneElement if python.PyUnicode_Check(value): # values containing unicode characters cannot be numbers return _StringElement From scoder at codespeak.net Thu Jul 20 17:27:50 2006 From: scoder at codespeak.net (scoder at codespeak.net) Date: Thu, 20 Jul 2006 17:27:50 +0200 (CEST) Subject: [Lxml-checkins] r30275 - lxml/branch/capi/src/lxml/tests Message-ID: <20060720152750.9EDA4100C6@code0.codespeak.net> Author: scoder Date: Thu Jul 20 17:27:48 2006 New Revision: 30275 Modified: lxml/branch/capi/src/lxml/tests/test_objectify.py Log: test cases: findall, data types Modified: lxml/branch/capi/src/lxml/tests/test_objectify.py ============================================================================== --- lxml/branch/capi/src/lxml/tests/test_objectify.py (original) +++ lxml/branch/capi/src/lxml/tests/test_objectify.py Thu Jul 20 17:27:48 2006 @@ -30,12 +30,13 @@ etree = etree def setUp(self): - ns = etree.Namespace("objectified") - ns[None] = objectify.ObjectifiedElement + objectify.register() + ns = etree.Namespace("otherNs") + ns[None] = etree.ElementBase def tearDown(self): - ns = etree.Namespace("objectified") - ns.clear() + etree.Namespace("otherNs").clear() + objectify.unregister() def test_child(self): root = etree.XML(xml_str) @@ -72,6 +73,51 @@ self.assertEquals([root.c1.c2[0], root.c1.c2[1]], list(iter((root.c1.c2)))) + def test_findall(self): + XML = self.etree.XML + root = XML('') + self.assertEquals(len(root.findall("c")), 1) + self.assertEquals(len(root.findall(".//c")), 2) + self.assertEquals(len(root.findall(".//b")), 3) + self.assertEquals(root.findall(".//b")[:2], + root.getchildren()[:2]) + + def test_findall_ns(self): + XML = self.etree.XML + root = XML('') + self.assertEquals(len(root.findall(".//{X}b")), 2) + self.assertEquals(len(root.findall(".//b")), 3) + self.assertEquals(len(root.findall("b")), 2) + + def test_type_none(self): + Element = self.etree.Element + SubElement = self.etree.SubElement + root = Element("{objectified}root") + SubElement(root, "{objectified}none") + self.assert_(isinstance(root.none, objectify._NoneElement)) + + def test_type_str(self): + Element = self.etree.Element + SubElement = self.etree.SubElement + root = Element("{objectified}root") + SubElement(root, "{objectified}none").text = "test" + self.assert_(isinstance(root.none, objectify._StringElement)) + + def test_type_int(self): + Element = self.etree.Element + SubElement = self.etree.SubElement + root = Element("{objectified}root") + SubElement(root, "{objectified}none").text = "5" + self.assert_(isinstance(root.none, objectify._IntElement)) + + def test_type_float(self): + Element = self.etree.Element + SubElement = self.etree.SubElement + root = Element("{objectified}root") + SubElement(root, "{objectified}none").text = "5.5" + self.assert_(isinstance(root.none, objectify._FloatElement)) + + def test_suite(): suite = unittest.TestSuite() suite.addTests([unittest.makeSuite(ObjectifyTestCase)]) From scoder at codespeak.net Thu Jul 20 17:41:39 2006 From: scoder at codespeak.net (scoder at codespeak.net) Date: Thu, 20 Jul 2006 17:41:39 +0200 (CEST) Subject: [Lxml-checkins] r30276 - lxml/branch/capi/doc Message-ID: <20060720154139.84021100C4@code0.codespeak.net> Author: scoder Date: Thu Jul 20 17:41:38 2006 New Revision: 30276 Modified: lxml/branch/capi/doc/objectify.txt Log: docs: what is different from ET?, TODO Modified: lxml/branch/capi/doc/objectify.txt ============================================================================== --- lxml/branch/capi/doc/objectify.txt (original) +++ lxml/branch/capi/doc/objectify.txt Thu Jul 20 17:41:38 2006 @@ -27,6 +27,14 @@ .. _`namespace specific classes`: namespace_extensions.html +.. contents:: +.. + 1 Element access through object attributes + 2 Namespace handling + 3 Python data types + 4 What is different from ElementTree? + 5 Resetting the API + Element access through object attributes ---------------------------------------- @@ -186,6 +194,66 @@ 1234 - 12345 +What is different from ElementTree? +----------------------------------- + +Such a different Element API obviously implies some side effects to the normal +behaviour of the rest of the API. + +* Iteration over elements does not yield the children, but the siblings. You + can access all children with the ``iterchildren()`` method on elements or + retrieve a list by calling the ``getchildren()`` method. + +* The find, findall and findtext methods use a different implementation as + they rely on the original iteration scheme. This has the disadvantage that + they may not be 100% backwards compatible, and the additional advantage that + they now support any XPath expression. + + +TODO: +----- + +Objectify elements support slicing:: + + >>> root = etree.fromstring(""" + ... + ... 1 + ... 1.2 + ... 1 + ... 3.2 + ... what? + ... + ... """) + >>> for elt in root.a[:]: print elt + ... + 1 + 1.2 + >>> + +Printing an objectify element outputs a pretty-print representation:: + + >>> root = etree.fromstring(""" + ... + ... 1 + ... 1.2 + ... 1 + ... 3.2 + ... what? + ... + ... """) + >>> print root + root = None [ObjectifiedElement] + { + a = 1 [_IntElement] + |---attr1 = foo + |---attr2 = bar + a = 1.2 [_FloatElement] + b = 1 [_IntElement] + b = 3.2 [_FloatElement] + c = 'what?' [_StringElement] + } + + Resetting the API ----------------- From scoder at codespeak.net Thu Jul 20 17:52:54 2006 From: scoder at codespeak.net (scoder at codespeak.net) Date: Thu, 20 Jul 2006 17:52:54 +0200 (CEST) Subject: [Lxml-checkins] r30277 - in lxml/branch/capi: doc src/lxml Message-ID: <20060720155254.4986C100C4@code0.codespeak.net> Author: scoder Date: Thu Jul 20 17:52:52 2006 New Revision: 30277 Modified: lxml/branch/capi/doc/objectify.txt lxml/branch/capi/src/lxml/objectify.pyx Log: __getslice__ support, some doc updates Modified: lxml/branch/capi/doc/objectify.txt ============================================================================== --- lxml/branch/capi/doc/objectify.txt (original) +++ lxml/branch/capi/doc/objectify.txt Thu Jul 20 17:52:52 2006 @@ -65,6 +65,28 @@ 0 >>> del root.b +Iteration and slicing also obey the requested tag:: + + >>> x1 = etree.SubElement(root, "x") + >>> x2 = etree.SubElement(root, "x") + >>> x3 = etree.SubElement(root, "x") + + >>> [ el.tag for el in root.x ] + ['x', 'x', 'x'] + + >>> [ el.tag for el in root.x[1:3] ] + ['x', 'x'] + +If you want to iterate over all children or need to provide a specific +namespace for the tag, use the ``iterchildren()`` method. Like the other +methods for iteration, it supports an optional tag keyword argument:: + + >>> [ el.tag for el in root.iterchildren() ] + ['b', 'x', 'x', 'x'] + + >>> [ el.tag for el in root.iterchildren(tag='b') ] + ['b'] + Attributes are accessed as in the normal ElementTree API:: >>> c = etree.SubElement(root, "c", myattr="someval") @@ -75,19 +97,10 @@ >>> print root.c.get("c") oh-oh -New children are normally added to elements through the ElementTree API:: - - >>> root.append( etree.Element("new_child") ) - >>> print root.new_child.tag - new_child - - >>> el = etree.SubElement(root, "another_child") - >>> print root.another_child.tag - another_child - -Additionally, subtrees can be added by assigning them to attributes. In this -case, the subtree is automatically deep copied and the tag name of its root is -updated to match the attribute name:: +In addition to the normal ElementTree API for appending elements to trees, +subtrees can also be added by assigning them to attributes. In this case, the +subtree is automatically deep copied and the tag name of its root is updated +to match the attribute name:: >>> el = etree.Element("yet_another_child") >>> root.new_child = el @@ -213,23 +226,6 @@ TODO: ----- -Objectify elements support slicing:: - - >>> root = etree.fromstring(""" - ... - ... 1 - ... 1.2 - ... 1 - ... 3.2 - ... what? - ... - ... """) - >>> for elt in root.a[:]: print elt - ... - 1 - 1.2 - >>> - Printing an objectify element outputs a pretty-print representation:: >>> root = etree.fromstring(""" Modified: lxml/branch/capi/src/lxml/objectify.pyx ============================================================================== --- lxml/branch/capi/src/lxml/objectify.pyx (original) +++ lxml/branch/capi/src/lxml/objectify.pyx Thu Jul 20 17:52:52 2006 @@ -13,6 +13,9 @@ cdef object SubElement SubElement = etree.SubElement +cdef object islice +from itertools import islice + cdef object __builtin__ import __builtin__ cdef object _int @@ -193,6 +196,9 @@ else: cetree.setNodeText(element._c_node, value) + def __getslice__(self, Py_ssize_t start, Py_ssize_t end): + return islice(self, start, end) + def findall(self, path): # Reimplementation of Element.findall() to make it work without child # iteration. @@ -205,6 +211,8 @@ result = self.findall(path) if isinstance(result, list) and len(result): return result[0] + elif isinstance(result, _Element): + return result else: return None From scoder at codespeak.net Thu Jul 20 18:02:08 2006 From: scoder at codespeak.net (scoder at codespeak.net) Date: Thu, 20 Jul 2006 18:02:08 +0200 (CEST) Subject: [Lxml-checkins] r30280 - in lxml/branch/capi: doc src/lxml Message-ID: <20060720160208.ECD92100BF@code0.codespeak.net> Author: scoder Date: Thu Jul 20 18:02:07 2006 New Revision: 30280 Modified: lxml/branch/capi/doc/objectify.txt lxml/branch/capi/src/lxml/objectify.pyx Log: support for deleting slices, fix for __getslice__ to return list (not iterator) Modified: lxml/branch/capi/doc/objectify.txt ============================================================================== --- lxml/branch/capi/doc/objectify.txt (original) +++ lxml/branch/capi/doc/objectify.txt Thu Jul 20 18:02:07 2006 @@ -77,16 +77,26 @@ >>> [ el.tag for el in root.x[1:3] ] ['x', 'x'] + >>> [ el.tag for el in root.x[-1:] ] + ['x'] + + >>> del root.x[1:2] + >>> [ el.tag for el in root.x ] + ['x', 'x'] + If you want to iterate over all children or need to provide a specific namespace for the tag, use the ``iterchildren()`` method. Like the other methods for iteration, it supports an optional tag keyword argument:: >>> [ el.tag for el in root.iterchildren() ] - ['b', 'x', 'x', 'x'] + ['b', 'x', 'x'] >>> [ el.tag for el in root.iterchildren(tag='b') ] ['b'] + >>> [ el.tag for el in root.b ] + ['b'] + Attributes are accessed as in the normal ElementTree API:: >>> c = etree.SubElement(root, "c", myattr="someval") Modified: lxml/branch/capi/src/lxml/objectify.pyx ============================================================================== --- lxml/branch/capi/src/lxml/objectify.pyx (original) +++ lxml/branch/capi/src/lxml/objectify.pyx Thu Jul 20 18:02:07 2006 @@ -13,9 +13,6 @@ cdef object SubElement SubElement = etree.SubElement -cdef object islice -from itertools import islice - cdef object __builtin__ import __builtin__ cdef object _int @@ -33,6 +30,13 @@ cdef object _len _len = __builtin__.len +cdef object list +list = __builtin__.list + +cdef object islice +from itertools import islice + + # namespace for "pytype" hint attribute PYTYPE_NAMESPACE = "http://codespeak.net/lxml/objectify/pytype" cdef char* _PYTYPE_NAMESPACE @@ -197,7 +201,15 @@ cetree.setNodeText(element._c_node, value) def __getslice__(self, Py_ssize_t start, Py_ssize_t end): - return islice(self, start, end) + return list(islice(self, start, end)) + + def __delslice__(self, Py_ssize_t start, Py_ssize_t end): + parent = self.getparent() + if parent is None: + raise TypeError, "deleting slices of root element not supported" + remove = parent.remove + for el in self.__getslice__(start, end): + remove(el) def findall(self, path): # Reimplementation of Element.findall() to make it work without child From scoder at codespeak.net Thu Jul 20 21:24:26 2006 From: scoder at codespeak.net (scoder at codespeak.net) Date: Thu, 20 Jul 2006 21:24:26 +0200 (CEST) Subject: [Lxml-checkins] r30298 - lxml/branch/capi/doc Message-ID: <20060720192426.BC769100C0@code0.codespeak.net> Author: scoder Date: Thu Jul 20 21:24:25 2006 New Revision: 30298 Modified: lxml/branch/capi/doc/objectify.txt Log: small doc fixes Modified: lxml/branch/capi/doc/objectify.txt ============================================================================== --- lxml/branch/capi/doc/objectify.txt (original) +++ lxml/branch/capi/doc/objectify.txt Thu Jul 20 21:24:25 2006 @@ -182,7 +182,7 @@ ----------------- The objectify module knows about Python data types and tries its best to let -element content behave like them. For example, this works: +element content behave like them. For example, this works:: >>> root = etree.XML("511") >>> root.a + root.b @@ -197,9 +197,9 @@ 3 Objectify determines data types by trial and error, unless it finds an -attribute ``pytype`` in the namespace URI given by +attribute ``pytype`` in the namespace given by the URI in ``lxml.elementlib.objectify.PYTYPE_NAMESPACE``, which must contain any of the -following string values: int, float, str, unicode. +following string values: int, float, str, unicode:: >>> pytype_attr = "{%s}%s" % (objectify.PYTYPE_NAMESPACE, "pytype") From scoder at codespeak.net Fri Jul 21 21:41:32 2006 From: scoder at codespeak.net (scoder at codespeak.net) Date: Fri, 21 Jul 2006 21:41:32 +0200 (CEST) Subject: [Lxml-checkins] r30333 - in lxml/trunk: . src/lxml src/lxml/tests Message-ID: <20060721194132.177AE100C8@code0.codespeak.net> Author: scoder Date: Fri Jul 21 21:41:29 2006 New Revision: 30333 Modified: lxml/trunk/CHANGES.txt lxml/trunk/src/lxml/apihelpers.pxi lxml/trunk/src/lxml/tests/test_elementtree.py lxml/trunk/src/lxml/tests/test_etree.py Log: fix: raise exception on non-string attribute values Modified: lxml/trunk/CHANGES.txt ============================================================================== --- lxml/trunk/CHANGES.txt (original) +++ lxml/trunk/CHANGES.txt Fri Jul 21 21:41:29 2006 @@ -13,6 +13,8 @@ Bugs fixed ---------- +* Setting an attribute to a non-string value did not raise an exception + * Element.remove() deleted the tail text from the removed Element * Creating documents no longer copies the parser for later URL resolving. For Modified: lxml/trunk/src/lxml/apihelpers.pxi ============================================================================== --- lxml/trunk/src/lxml/apihelpers.pxi (original) +++ lxml/trunk/src/lxml/apihelpers.pxi Fri Jul 21 21:41:29 2006 @@ -107,7 +107,7 @@ tree.xmlFree(c_result) return result -cdef void _setAttributeValue(_NodeBase element, key, value): +cdef int _setAttributeValue(_NodeBase element, key, value) except -1: cdef xmlNs* c_ns cdef char* c_value cdef char* c_tag @@ -120,6 +120,7 @@ else: c_ns = element._doc._findOrBuildNodeNs(element._c_node, _cstr(ns)) tree.xmlSetNsProp(element._c_node, c_ns, c_tag, c_value) + return 0 cdef object __RE_XML_ENCODING __RE_XML_ENCODING = re.compile( Modified: lxml/trunk/src/lxml/tests/test_elementtree.py ============================================================================== --- lxml/trunk/src/lxml/tests/test_elementtree.py (original) +++ lxml/trunk/src/lxml/tests/test_elementtree.py Fri Jul 21 21:41:29 2006 @@ -391,6 +391,13 @@ True, '{http://ns.codespeak.net/test}baz' in root.attrib) + def test_attribute_set(self): + Element = self.etree.Element + + root = Element("root") + root.set("attr", "TEST") + self.assertEquals("TEST", root.get("attr")) + def test_XML(self): XML = self.etree.XML Modified: lxml/trunk/src/lxml/tests/test_etree.py ============================================================================== --- lxml/trunk/src/lxml/tests/test_etree.py (original) +++ lxml/trunk/src/lxml/tests/test_etree.py Fri Jul 21 21:41:29 2006 @@ -38,6 +38,16 @@ self.assertRaises(ValueError, Element, '{test}') self.assertRaises(ValueError, setattr, el, 'tag', '{test}') + def test_attribute_set(self): + # ElementTree accepts arbitrary attribute values + # lxml.etree allows only strings + Element = self.etree.Element + + root = Element("root") + root.set("attr", "TEST") + self.assertEquals("TEST", root.get("attr")) + self.assertRaises(TypeError, root.set, "newattr", 5) + def test_parse_error(self): parse = self.etree.parse # from StringIO From scoder at codespeak.net Fri Jul 21 21:57:15 2006 From: scoder at codespeak.net (scoder at codespeak.net) Date: Fri, 21 Jul 2006 21:57:15 +0200 (CEST) Subject: [Lxml-checkins] r30334 - in lxml/branch/lxml-1.0: . src/lxml src/lxml/tests Message-ID: <20060721195715.09B96100CA@code0.codespeak.net> Author: scoder Date: Fri Jul 21 21:57:13 2006 New Revision: 30334 Modified: lxml/branch/lxml-1.0/CHANGES.txt lxml/branch/lxml-1.0/src/lxml/apihelpers.pxi lxml/branch/lxml-1.0/src/lxml/etree.pyx lxml/branch/lxml-1.0/src/lxml/tests/test_elementtree.py lxml/branch/lxml-1.0/src/lxml/tests/test_etree.py Log: merges from trunk: Element.replace() function, bug fixes for Element.remove() and Element.set(attr, NonStringValue) Modified: lxml/branch/lxml-1.0/CHANGES.txt ============================================================================== --- lxml/branch/lxml-1.0/CHANGES.txt (original) +++ lxml/branch/lxml-1.0/CHANGES.txt Fri Jul 21 21:57:13 2006 @@ -2,6 +2,23 @@ lxml changelog ============== +======= +current +======= + +Features added +-------------- + +* Element.replace(old, new) method to replace a subelement by another one + +Bugs fixed +---------- + +* Setting an attribute to a non-string value did not raise an exception + +* Element.remove() deleted the tail text from the removed Element + + 1.0.2 (2006-06-27) ================== Modified: lxml/branch/lxml-1.0/src/lxml/apihelpers.pxi ============================================================================== --- lxml/branch/lxml-1.0/src/lxml/apihelpers.pxi (original) +++ lxml/branch/lxml-1.0/src/lxml/apihelpers.pxi Fri Jul 21 21:57:13 2006 @@ -107,7 +107,7 @@ tree.xmlFree(c_result) return result -cdef void _setAttributeValue(_NodeBase element, key, value): +cdef int _setAttributeValue(_NodeBase element, key, value) except -1: cdef xmlNs* c_ns cdef char* c_value cdef char* c_tag @@ -120,6 +120,7 @@ else: c_ns = element._doc._findOrBuildNodeNs(element._c_node, _cstr(ns)) tree.xmlSetNsProp(element._c_node, c_ns, c_tag, c_value) + return 0 cdef object __RE_XML_ENCODING __RE_XML_ENCODING = re.compile( Modified: lxml/branch/lxml-1.0/src/lxml/etree.pyx ============================================================================== --- lxml/branch/lxml-1.0/src/lxml/etree.pyx (original) +++ lxml/branch/lxml-1.0/src/lxml/etree.pyx Fri Jul 21 21:57:13 2006 @@ -726,11 +726,32 @@ or contents. """ cdef xmlNode* c_node + cdef xmlNode* c_next c_node = element._c_node if c_node.parent is not self._c_node: raise ValueError, "Element is not a child of this node." - _removeText(c_node.next) + c_next = element._c_node.next tree.xmlUnlinkNode(c_node) + _moveTail(c_next, c_node) + + def replace(self, _Element old_element not None, + _Element new_element not None): + """Replaces a subelement with the element passed as second argument. + """ + cdef xmlNode* c_old_node + cdef xmlNode* c_old_next + cdef xmlNode* c_new_node + cdef xmlNode* c_new_next + c_old_node = old_element._c_node + if c_old_node.parent is not self._c_node: + raise ValueError, "Element is not a child of this node." + c_old_next = c_old_node.next + c_new_node = new_element._c_node + c_new_next = c_new_node.next + tree.xmlReplaceNode(c_old_node, c_new_node) + moveNodeToDocument(new_element, self._doc) + _moveTail(c_new_next, c_new_node) + _moveTail(c_old_next, c_old_node) # PROPERTIES property tag: Modified: lxml/branch/lxml-1.0/src/lxml/tests/test_elementtree.py ============================================================================== --- lxml/branch/lxml-1.0/src/lxml/tests/test_elementtree.py (original) +++ lxml/branch/lxml-1.0/src/lxml/tests/test_elementtree.py Fri Jul 21 21:57:13 2006 @@ -212,6 +212,17 @@ self.assertEquals('<>&', tostring(a)) + def test_text_str_subclass(self): + Element = self.etree.Element + + class strTest(str): + pass + + a = Element("a") + a.text = strTest("text") + self.assertXML('text', + a) + def test_tail(self): ElementTree = self.etree.ElementTree @@ -224,6 +235,18 @@ self.assertEquals('mixed', root[0].text) self.assertEquals(' content.', root[0].tail) + def test_tail_str_subclass(self): + Element = self.etree.Element + SubElement = self.etree.SubElement + + class strTest(str): + pass + + a = Element("a") + SubElement(a, "t").tail = strTest("tail") + self.assertXML('tail', + a) + def test_ElementTree(self): Element = self.etree.Element ElementTree = self.etree.ElementTree @@ -368,6 +391,13 @@ True, '{http://ns.codespeak.net/test}baz' in root.attrib) + def test_attribute_set(self): + Element = self.etree.Element + + root = Element("root") + root.set("attr", "TEST") + self.assertEquals("TEST", root.get("attr")) + def test_XML(self): XML = self.etree.XML @@ -888,6 +918,17 @@ self.assertEquals('c', a.tag) self.assertEquals('', + a) + def test_delitem(self): Element = self.etree.Element SubElement = self.etree.SubElement @@ -1154,6 +1195,7 @@ self.assertXML( '', a) + self.assertEquals('b2', b.tail) def test_getchildren(self): Element = self.etree.Element Modified: lxml/branch/lxml-1.0/src/lxml/tests/test_etree.py ============================================================================== --- lxml/branch/lxml-1.0/src/lxml/tests/test_etree.py (original) +++ lxml/branch/lxml-1.0/src/lxml/tests/test_etree.py Fri Jul 21 21:57:13 2006 @@ -38,6 +38,16 @@ self.assertRaises(ValueError, Element, '{test}') self.assertRaises(ValueError, setattr, el, 'tag', '{test}') + def test_attribute_set(self): + # ElementTree accepts arbitrary attribute values + # lxml.etree allows only strings + Element = self.etree.Element + + root = Element("root") + root.set("attr", "TEST") + self.assertEquals("TEST", root.get("attr")) + self.assertRaises(TypeError, root.set, "newattr", 5) + def test_parse_error(self): parse = self.etree.parse # from StringIO @@ -506,6 +516,56 @@ self.assertEquals( 0, e.index(e[0], -12, -1)) + def test_replace(self): + etree = self.etree + e = etree.Element('foo') + for i in range(10): + el = etree.SubElement(e, 'a%s' % i) + el.text = "text%d" % i + el.tail = "tail%d" % i + + child0 = e[0] + child1 = e[1] + child2 = e[2] + + e.replace(e[0], e[1]) + self.assertEquals( + 9, len(e)) + self.assertEquals( + child1, e[0]) + self.assertEquals( + child1.text, "text1") + self.assertEquals( + child1.tail, "tail1") + self.assertEquals( + child0.tail, "tail0") + self.assertEquals( + child2, e[1]) + + e.replace(e[-1], e[0]) + self.assertEquals( + child1, e[-1]) + self.assertEquals( + child1.text, "text1") + self.assertEquals( + child1.tail, "tail1") + self.assertEquals( + child2, e[0]) + + def test_replace_new(self): + etree = self.etree + e = etree.Element('foo') + for i in range(10): + etree.SubElement(e, 'a%s' % i) + + new_element = etree.Element("test") + child1 = e[1] + e.replace(e[0], new_element) + self.assertEquals( + new_element, e[0]) + self.assertEquals( + child1, e[1]) + def test_docinfo_public(self): etree = self.etree xml_header = '' From scoder at codespeak.net Sat Jul 22 22:08:05 2006 From: scoder at codespeak.net (scoder at codespeak.net) Date: Sat, 22 Jul 2006 22:08:05 +0200 (CEST) Subject: [Lxml-checkins] r30365 - in lxml/trunk: . src/lxml src/lxml/tests Message-ID: <20060722200805.7A745100BF@code0.codespeak.net> Author: scoder Date: Sat Jul 22 22:08:02 2006 New Revision: 30365 Modified: lxml/trunk/CHANGES.txt lxml/trunk/src/lxml/iterparse.pxi lxml/trunk/src/lxml/tests/test_elementtree.py Log: fix: iterparse.__next__ did not INCREF borrowed Python references returned by PyList_GET_ITEM Modified: lxml/trunk/CHANGES.txt ============================================================================== --- lxml/trunk/CHANGES.txt (original) +++ lxml/trunk/CHANGES.txt Sat Jul 22 22:08:02 2006 @@ -13,6 +13,8 @@ Bugs fixed ---------- +* ``iterparse()`` could crash on long XML files + * Setting an attribute to a non-string value did not raise an exception * Element.remove() deleted the tail text from the removed Element Modified: lxml/trunk/src/lxml/iterparse.pxi ============================================================================== --- lxml/trunk/src/lxml/iterparse.pxi (original) +++ lxml/trunk/src/lxml/iterparse.pxi Sat Jul 22 22:08:02 2006 @@ -249,6 +249,7 @@ context = <_IterparseResolverContext>self._context if python.PyList_GET_SIZE(context._events) > context._event_index: item = python.PyList_GET_ITEM(context._events, context._event_index) + python.Py_INCREF(item) # 'borrowed reference' from PyList_GET_ITEM context._event_index = context._event_index + 1 return item @@ -280,7 +281,9 @@ raise StopIteration context._event_index = 1 - return python.PyList_GET_ITEM(context._events, 0) + element = python.PyList_GET_ITEM(context._events, 0) + python.Py_INCREF(element) # 'borrowed reference' from PyList_GET_ITEM + return element cdef class iterwalk: Modified: lxml/trunk/src/lxml/tests/test_elementtree.py ============================================================================== --- lxml/trunk/src/lxml/tests/test_elementtree.py (original) +++ lxml/trunk/src/lxml/tests/test_elementtree.py Sat Jul 22 22:08:02 2006 @@ -1845,6 +1845,17 @@ self.assertEquals(0, len(root)) + def test_iterparse_large(self): + iterparse = self.etree.iterparse + CHILD_COUNT = 12345 + f = StringIO('' + ('test'*CHILD_COUNT) + '') + + i = 0 + for key in iterparse(f): + event, element = key + i += 1 + self.assertEquals(i, CHILD_COUNT + 1) + def test_iterparse_attrib_ns(self): iterparse = self.etree.iterparse f = StringIO('') From scoder at codespeak.net Sun Jul 23 08:00:30 2006 From: scoder at codespeak.net (scoder at codespeak.net) Date: Sun, 23 Jul 2006 08:00:30 +0200 (CEST) Subject: [Lxml-checkins] r30366 - lxml/branch/capi/src/lxml Message-ID: <20060723060030.EDFEC100CB@code0.codespeak.net> Author: scoder Date: Sun Jul 23 08:00:28 2006 New Revision: 30366 Modified: lxml/branch/capi/src/lxml/tree.pxd Log: return value fixes for Python API functions Modified: lxml/branch/capi/src/lxml/tree.pxd ============================================================================== --- lxml/branch/capi/src/lxml/tree.pxd (original) +++ lxml/branch/capi/src/lxml/tree.pxd Sun Jul 23 08:00:28 2006 @@ -175,9 +175,10 @@ cdef char* xmlGetNoNsProp(xmlNode* node, char* name) cdef char* xmlGetNsProp(xmlNode* node, char* name, char* nameSpace) cdef void xmlSetNs(xmlNode* node, xmlNs* ns) - cdef void xmlSetProp(xmlNode* node, char* name, char* value) - cdef void xmlSetNsProp(xmlNode* node, xmlNs* ns, char* name, char* value) - cdef void xmlRemoveProp(xmlAttr* cur) + cdef xmlAttr* xmlSetProp(xmlNode* node, char* name, char* value) + cdef xmlAttr* xmlSetNsProp(xmlNode* node, xmlNs* ns, + char* name, char* value) + cdef int xmlRemoveProp(xmlAttr* cur) cdef char* xmlGetNodePath(xmlNode* node) cdef void xmlDocDumpMemory(xmlDoc* cur, char** mem, int* size) cdef void xmlDocDumpMemoryEnc(xmlDoc* cur, char** mem, int* size, From scoder at codespeak.net Sun Jul 23 08:03:07 2006 From: scoder at codespeak.net (scoder at codespeak.net) Date: Sun, 23 Jul 2006 08:03:07 +0200 (CEST) Subject: [Lxml-checkins] r30367 - lxml/branch/capi/src/lxml Message-ID: <20060723060307.BE407100CB@code0.codespeak.net> Author: scoder Date: Sun Jul 23 08:03:04 2006 New Revision: 30367 Modified: lxml/branch/capi/src/lxml/etreepublic.pxd lxml/branch/capi/src/lxml/public-api.pxi Log: new public API function findOrBuildNodeNs Modified: lxml/branch/capi/src/lxml/etreepublic.pxd ============================================================================== --- lxml/branch/capi/src/lxml/etreepublic.pxd (original) +++ lxml/branch/capi/src/lxml/etreepublic.pxd Sun Jul 23 08:03:04 2006 @@ -154,6 +154,10 @@ # set the tail text value of an element cdef object setTailText(tree.xmlNode* c_node, text) + # recursively lookup a namespace in element or ancestors, or create it + cdef tree.xmlNs* findOrBuildNodeNs(_Document doc, tree.xmlNode* c_node, + char* href) + # find the Document of an Element, ElementTree or Document (itself!) cdef _Document documentOrRaise(object input) Modified: lxml/branch/capi/src/lxml/public-api.pxi ============================================================================== --- lxml/branch/capi/src/lxml/public-api.pxi (original) +++ lxml/branch/capi/src/lxml/public-api.pxi Sun Jul 23 08:03:04 2006 @@ -109,3 +109,9 @@ cdef public void initTagMatch(_ElementTagMatcher matcher, tag): matcher._initTagMatch(tag) + +cdef public tree.xmlNs* findOrBuildNodeNs(_Document doc, xmlNode* c_node, + char* href) except NULL: + if doc is None: + raise TypeError + return doc._findOrBuildNodeNs(c_node, href) From scoder at codespeak.net Sun Jul 23 08:03:25 2006 From: scoder at codespeak.net (scoder at codespeak.net) Date: Sun, 23 Jul 2006 08:03:25 +0200 (CEST) Subject: [Lxml-checkins] r30368 - in lxml/branch/capi/src/lxml: . tests Message-ID: <20060723060325.BFD91100CB@code0.codespeak.net> Author: scoder Date: Sun Jul 23 08:03:23 2006 New Revision: 30368 Modified: lxml/branch/capi/src/lxml/objectify.pyx lxml/branch/capi/src/lxml/tests/test_objectify.py Log: objectify.typedef() to create pytype attributes for the elements of an XML tree, support for registering new types, _BoolElement Modified: lxml/branch/capi/src/lxml/objectify.pyx ============================================================================== --- lxml/branch/capi/src/lxml/objectify.pyx (original) +++ lxml/branch/capi/src/lxml/objectify.pyx Sun Jul 23 08:03:23 2006 @@ -1,6 +1,6 @@ -from etreepublic cimport _Element, ElementBase, _ElementIterator +from etreepublic cimport _Document, _Element, ElementBase, _ElementIterator from etreepublic cimport elementFactory, import_etree, textOf -from python cimport isinstance, getattr, _cstr, Py_ssize_t +from python cimport isinstance, issubclass, callable, getattr, _cstr, Py_ssize_t cimport etreepublic as cetree cimport python cimport tree @@ -30,6 +30,11 @@ cdef object _len _len = __builtin__.len +cdef object True +True = __builtin__.True +cdef object False +False = __builtin__.False + cdef object list list = __builtin__.list @@ -42,6 +47,12 @@ cdef char* _PYTYPE_NAMESPACE _PYTYPE_NAMESPACE = _cstr(PYTYPE_NAMESPACE) +PYTYPE_ATTRIBUTE_NAME = "pytype" +cdef char* _PYTYPE_ATTRIBUTE_NAME +_PYTYPE_ATTRIBUTE_NAME = _cstr(PYTYPE_ATTRIBUTE_NAME) + +PYTYPE_ATTRIBUTE = cetree.namespacedNameFromNsName( + _PYTYPE_NAMESPACE, _PYTYPE_ATTRIBUTE_NAME) cdef class ObjectifiedElement(ElementBase): """Element class with an Amara-like API. @@ -371,10 +382,6 @@ def _init(self): self._type = int -cdef class _LongElement(_NumberElement): - def _init(self): - self._type = long - cdef class _FloatElement(_NumberElement): def _init(self): self._type = float @@ -417,11 +424,30 @@ cdef class _NoneElement(ObjectifiedElement): pass +cdef class _BoolElement(ObjectifiedElement): + """Boolean type base on string values: 'true' or 'false'. + """ + def __nonzero__(self): + text = textOf(self._c_node) + if text is None: + return False + text = text.lower() + if text == 'false': + return False + elif text == 'true': + return True + else: + raise ValueError, "Invalid boolean value: '%s'" % text + +def __checkBool(s): + if s != 'true' and s != 'false': + raise ValueError + cdef object _strValueOf(obj): if python._isString(obj): return obj - if isinstance(obj, _StringElement): - return textOf((<_StringElement>obj)._c_node) + if isinstance(obj, _Element): + return textOf((<_Element>obj)._c_node) if obj is None: return '' return str(obj) @@ -432,33 +458,81 @@ textOf((<_NumberElement>obj)._c_node)) return obj +cdef class PyType: + """User defined type. + + Named type that contains a type check function and a type class that + inherits from ObjectifiedElement. The type check must take a string as + argument and raise a ValueError if it cannot handle the string value. + + Example: + PyType('int', int, MyIntClass).register() + + Note that the order in which types are registered matters. The first + matching type will be used. + """ + cdef readonly object name + cdef object _type_check + cdef object _type + def __init__(self, name, type_check, type_class): + if not python._isString(name): + raise TypeError, "Type name must be a string" + if type_check is not None and not callable(type_check): + raise TypeError, "Type check function must be callable" + if not issubclass(type_class, ObjectifiedElement): + raise TypeError, "Type class must inherit from ObjectifiedElement" + self.name = name + self._type = type_class + self._type_check = type_check + + def register(self): + _TYPE_DICT[self.name] = self + if self._type_check is None: + return + for item in _TYPE_CHECKS: + if item[0] is self._type_check: + _TYPE_CHECKS.remove(item) + break + _TYPE_CHECKS.append( (self._type_check, self) ) + + def unregister(self): + if _TYPE_DICT.get(self.name) is self: + del _TYPE_DICT[self.name] + if self._type_check is None: + return + try: + _TYPE_CHECKS.remove( (self._type_check, self) ) + except ValueError: + pass + cdef object _TYPE_DICT -_TYPE_DICT = { - 'int' : _IntElement, - 'long' : _LongElement, - 'float' : _FloatElement, - 'str' : _StringElement, - 'unicode' : _StringElement - } +_TYPE_DICT = {} + +cdef object _TYPE_CHECKS +_TYPE_CHECKS = [] -cdef object _PY_NUMBER_TYPES -_PY_NUMBER_TYPES = (int, float) +PyType('int', int, _IntElement ).register() +PyType('long', None, _IntElement ).register() +PyType('float', float, _FloatElement ).register() +PyType('bool', __checkBool, _BoolElement ).register() +PyType('str', None, _StringElement).register() cdef object _guessElementClass(tree.xmlNode* c_node): value = textOf(c_node) if value is None: - # FIXME: how do we get rid of the class if it was not the right one? + # FIXME: how do we get rid of the class if the value changes? return _NoneElement - if python.PyUnicode_Check(value): - # values containing unicode characters cannot be numbers - return _StringElement - for pytype in _PY_NUMBER_TYPES: +## if python.PyUnicode_Check(value): +## # values containing unicode characters cannot be numbers +## return _StringElement + _ValueError = ValueError + for type_check, pytype in _TYPE_CHECKS: try: - pytype(value) - except: + type_check(value) + return (pytype)._type + except _ValueError: pass - else: - return _TYPE_DICT[pytype.__name__] + return _StringElement ################################################################################ @@ -477,7 +551,7 @@ # otherwise determine class based on text content type value = cetree.attributeValueFromNsName( - c_node, _PYTYPE_NAMESPACE, "pytype") + c_node, _PYTYPE_NAMESPACE, _PYTYPE_ATTRIBUTE_NAME) if value is None: el_class = _guessElementClass(c_node) if el_class is not None: @@ -485,12 +559,80 @@ else: dict_result = python.PyDict_GetItem(_TYPE_DICT, value) if dict_result is not NULL: - return dict_result + return (dict_result)._type # FIXME: is it right to raise an Exception based on data? raise ValueError, "Invalid type attribute in element '%s'" % \ cetree.namespacedNameFromNsName(tree._getNs(c_node), c_node.name) +def typedef(element_or_tree, ignore_old=True): + """Recursively creates pytype attributes on the elements of an XML tree. + + If the second argument is True (the default), current attributes will be + ignored and replaced. Otherwise, they will be checked and only replaced + if they no longer fit the current text value. + """ + cdef _Element element + cdef _Document doc + cdef int ignore + cdef tree.xmlNode* c_node + cdef tree.xmlAttr* c_attr + cdef tree.xmlNs* c_ns + element = cetree.rootNodeOrRaise(element_or_tree) + doc = element._doc + ignore = bool(ignore_old) + + _ValueError = ValueError + StrType = _TYPE_DICT.get('str') + c_node = element._c_node + tree.BEGIN_FOR_EACH_ELEMENT_FROM(c_node, c_node, 1) + pytype = None + value = None + if not ignore: + # check that old value is valid + old_value = cetree.attributeValueFromNsName( + c_node, _PYTYPE_NAMESPACE, _PYTYPE_ATTRIBUTE_NAME) + if old_value is not None: + pytype = _TYPE_DICT.get(old_value) + if pytype is not None: + value = textOf(c_node) + try: + if not (pytype)._type_check(value): + pytype = None + except _ValueError: + pytype = None + if pytype is None: + # try to guess type + if not cetree.findChildForwards(c_node, 0): + # element has no children => data class + if value is None: + value = textOf(c_node) + if value is not None: + for type_check, tested_pytype in _TYPE_CHECKS: + try: + if type_check(value) is not False: + pytype = tested_pytype + break + except _ValueError: + pass + else: + pytype = StrType + if pytype is None: + # delete attribute if it exists + c_attr = tree.xmlHasNsProp(c_node, _PYTYPE_NAMESPACE, + _PYTYPE_ATTRIBUTE_NAME) + if c_attr is not NULL: + tree.xmlRemoveProp(c_attr) + else: + # update or create attribute + c_ns = cetree.findOrBuildNodeNs(doc, c_node, _PYTYPE_NAMESPACE) + tree.xmlSetNsProp(c_node, c_ns, _PYTYPE_ATTRIBUTE_NAME, + _cstr(pytype.name)) + tree.END_FOR_EACH_ELEMENT_FROM(c_node) + +################################################################################ +# Module setup + def register(): etree.setDefaultElementClass(ObjectifiedElement) cetree.setElementClassLookupFunction(_lookupElementClass, None) Modified: lxml/branch/capi/src/lxml/tests/test_objectify.py ============================================================================== --- lxml/branch/capi/src/lxml/tests/test_objectify.py (original) +++ lxml/branch/capi/src/lxml/tests/test_objectify.py Sun Jul 23 08:03:23 2006 @@ -117,6 +117,18 @@ SubElement(root, "{objectified}none").text = "5.5" self.assert_(isinstance(root.none, objectify._FloatElement)) + def test_typedef(self): + XML = self.etree.XML + root = XML(u'5test1.1\uF8D2true') + objectify.typedef(root) + + child_types = [ c.get(objectify.PYTYPE_ATTRIBUTE) + for c in root.iterchildren() ] + self.assertEquals("int", child_types[0]) + self.assertEquals("str", child_types[1]) + self.assertEquals("float", child_types[2]) + self.assertEquals("str", child_types[3]) + self.assertEquals("bool", child_types[4]) def test_suite(): suite = unittest.TestSuite() From scoder at codespeak.net Mon Jul 24 09:30:13 2006 From: scoder at codespeak.net (scoder at codespeak.net) Date: Mon, 24 Jul 2006 09:30:13 +0200 (CEST) Subject: [Lxml-checkins] r30422 - in lxml/branch/capi: doc src/lxml src/lxml/tests Message-ID: <20060724073013.533F7100A9@code0.codespeak.net> Author: scoder Date: Mon Jul 24 09:30:11 2006 New Revision: 30422 Modified: lxml/branch/capi/doc/objectify.txt lxml/branch/capi/src/lxml/objectify.pyx lxml/branch/capi/src/lxml/tests/test_objectify.py Log: custom type classes are public (removed leading '_' from class name), __repr__ for boolean class, documentation on registering PyType's Modified: lxml/branch/capi/doc/objectify.txt ============================================================================== --- lxml/branch/capi/doc/objectify.txt (original) +++ lxml/branch/capi/doc/objectify.txt Mon Jul 24 09:30:11 2006 @@ -152,9 +152,9 @@ Namespace handling ------------------ -Namespaces are handled mostly behind the scenes. If you access an attribute -of an Element without specifying a namespace, the lookup will use the -namespace of the parent:: +Namespaces are handled mostly behind the scenes. If you access a child of an +Element without specifying a namespace, the lookup will use the namespace of +the parent:: >>> root = etree.Element("{ns}root") >>> b = etree.SubElement(root, "{ns}b") @@ -184,18 +184,26 @@ The objectify module knows about Python data types and tries its best to let element content behave like them. For example, this works:: - >>> root = etree.XML("511") + >>> root = etree.XML("511true") >>> root.a + root.b 16 >>> root.a += root.b >>> print root.a 16 + >>> root.a = 2 >>> print root.a + 2 4 >>> print 1 + root.a 3 + >>> root.c + True + >>> root.c = False + >>> if not root.c: + ... print "false!" + false! + Objectify determines data types by trial and error, unless it finds an attribute ``pytype`` in the namespace given by the URI in ``lxml.elementlib.objectify.PYTYPE_NAMESPACE``, which must contain any of the @@ -217,6 +225,39 @@ 1234 - 12345 +Defining additional data classes +-------------------------------- + +The objectify module support the standard `namespace classes API`_ of +lxml.etree. Note, however, that it is advisable to let custom element classes +inherit from ``ObjectifiedElement`` to inherit its API. + +.. _`namespace classes API`: namespace_extensions.html + +Data classes can either inherit from ``ObjectifiedElement`` directly or from +one of the specialised classes like ``NumberElement`` or ``BoolElement``. +Their registration uses the ``PyType`` class. + + >>> class MyChristmasDate(objectify.ObjectifiedElement): + ... def callSanta(self): + ... print "Ho ho ho!" + + >>> def checkChristmasDate(date_string): + ... if not date_string.startswith('24.12.'): + ... raise ValueError + + >>> pytype = objectify.PyType('date', checkChristmasDate, MyChristmasDate) + >>> pytype.register() + + >>> root = etree.XML("24.12.200012.24.2000") + >>> root.a.callSanta() + Ho ho ho! + >>> root.b.callSanta() + Traceback (most recent call last): + ... + AttributeError: no such child: callSanta + + What is different from ElementTree? ----------------------------------- Modified: lxml/branch/capi/src/lxml/objectify.pyx ============================================================================== --- lxml/branch/capi/src/lxml/objectify.pyx (original) +++ lxml/branch/capi/src/lxml/objectify.pyx Mon Jul 24 09:30:11 2006 @@ -71,7 +71,7 @@ return iter([self]) return etree.ElementChildIterator(parent, tag=self.tag) - def __str__(self): + def __repr__(self): return textOf(self._c_node) def __len__(self): @@ -140,7 +140,10 @@ except AttributeError: element = SubElement(self, tag) if not python._isString(value): - value = str(value) + if isinstance(value, bool): + value = str(value).lower() + else: + value = str(value) cetree.setNodeText(element._c_node, value) def __delattr__(self, tag): @@ -292,7 +295,7 @@ ################################################################################ # Data type support in subclasses -cdef class _NumberElement(ObjectifiedElement): +cdef class NumberElement(ObjectifiedElement): cdef object _type cdef _checkType(self, value): if not isinstance(value, self._type): @@ -378,15 +381,15 @@ ## cetree.setNodeText(self._c_node, _str(result)) ## return self -cdef class _IntElement(_NumberElement): +cdef class IntElement(NumberElement): def _init(self): self._type = int -cdef class _FloatElement(_NumberElement): +cdef class FloatElement(NumberElement): def _init(self): self._type = float -cdef class _StringElement(ObjectifiedElement): +cdef class StringElement(ObjectifiedElement): def __len__(self): return _len(_strValueOf(self)) @@ -400,8 +403,8 @@ return text + other def __mul__(self, other): - if isinstance(self, _StringElement): - return textOf((<_StringElement>self)._c_node) * _numericValueOf(other) + if isinstance(self, StringElement): + return textOf((self)._c_node) * _numericValueOf(other) else: raise TypeError, "invalid types for * operator" @@ -421,23 +424,35 @@ def __contains__(self, text): return text in textOf(self._c_node) -cdef class _NoneElement(ObjectifiedElement): +cdef class NoneElement(ObjectifiedElement): pass -cdef class _BoolElement(ObjectifiedElement): +cdef class BoolElement(ObjectifiedElement): """Boolean type base on string values: 'true' or 'false'. """ - def __nonzero__(self): + cdef int _boolval(self) except -1: text = textOf(self._c_node) if text is None: - return False + return 0 text = text.lower() if text == 'false': - return False + return 0 elif text == 'true': - return True + return 1 else: raise ValueError, "Invalid boolean value: '%s'" % text + + def __nonzero__(self): + if self._boolval(): + return True + else: + return False + + def __repr__(self): + if self._boolval(): + return "True" + else: + return "False" def __checkBool(s): if s != 'true' and s != 'false': @@ -453,9 +468,9 @@ return str(obj) cdef object _numericValueOf(obj): - if isinstance(obj, _NumberElement): - return (<_NumberElement>obj)._type( - textOf((<_NumberElement>obj)._c_node)) + if isinstance(obj, NumberElement): + return (obj)._type( + textOf((obj)._c_node)) return obj cdef class PyType: @@ -478,7 +493,7 @@ if not python._isString(name): raise TypeError, "Type name must be a string" if type_check is not None and not callable(type_check): - raise TypeError, "Type check function must be callable" + raise TypeError, "Type check function must be callable (or None)" if not issubclass(type_class, ObjectifiedElement): raise TypeError, "Type class must inherit from ObjectifiedElement" self.name = name @@ -511,17 +526,17 @@ cdef object _TYPE_CHECKS _TYPE_CHECKS = [] -PyType('int', int, _IntElement ).register() -PyType('long', None, _IntElement ).register() -PyType('float', float, _FloatElement ).register() -PyType('bool', __checkBool, _BoolElement ).register() -PyType('str', None, _StringElement).register() +PyType('int', int, IntElement ).register() +PyType('long', None, IntElement ).register() +PyType('float', float, FloatElement ).register() +PyType('bool', __checkBool, BoolElement ).register() +PyType('str', None, StringElement).register() cdef object _guessElementClass(tree.xmlNode* c_node): value = textOf(c_node) if value is None: # FIXME: how do we get rid of the class if the value changes? - return _NoneElement + return NoneElement ## if python.PyUnicode_Check(value): ## # values containing unicode characters cannot be numbers ## return _StringElement @@ -533,7 +548,7 @@ except _ValueError: pass - return _StringElement + return StringElement ################################################################################ # Element class lookup Modified: lxml/branch/capi/src/lxml/tests/test_objectify.py ============================================================================== --- lxml/branch/capi/src/lxml/tests/test_objectify.py (original) +++ lxml/branch/capi/src/lxml/tests/test_objectify.py Mon Jul 24 09:30:11 2006 @@ -94,28 +94,35 @@ SubElement = self.etree.SubElement root = Element("{objectified}root") SubElement(root, "{objectified}none") - self.assert_(isinstance(root.none, objectify._NoneElement)) + self.assert_(isinstance(root.none, objectify.NoneElement)) + + def test_type_bool(self): + Element = self.etree.Element + SubElement = self.etree.SubElement + root = Element("{objectified}root") + SubElement(root, "{objectified}none").text = 'true' + self.assert_(isinstance(root.none, objectify.BoolElement)) def test_type_str(self): Element = self.etree.Element SubElement = self.etree.SubElement root = Element("{objectified}root") SubElement(root, "{objectified}none").text = "test" - self.assert_(isinstance(root.none, objectify._StringElement)) + self.assert_(isinstance(root.none, objectify.StringElement)) def test_type_int(self): Element = self.etree.Element SubElement = self.etree.SubElement root = Element("{objectified}root") SubElement(root, "{objectified}none").text = "5" - self.assert_(isinstance(root.none, objectify._IntElement)) + self.assert_(isinstance(root.none, objectify.IntElement)) def test_type_float(self): Element = self.etree.Element SubElement = self.etree.SubElement root = Element("{objectified}root") SubElement(root, "{objectified}none").text = "5.5" - self.assert_(isinstance(root.none, objectify._FloatElement)) + self.assert_(isinstance(root.none, objectify.FloatElement)) def test_typedef(self): XML = self.etree.XML From scoder at codespeak.net Mon Jul 24 11:41:01 2006 From: scoder at codespeak.net (scoder at codespeak.net) Date: Mon, 24 Jul 2006 11:41:01 +0200 (CEST) Subject: [Lxml-checkins] r30427 - lxml/branch/capi/doc Message-ID: <20060724094101.5DB48100A4@code0.codespeak.net> Author: scoder Date: Mon Jul 24 11:41:00 2006 New Revision: 30427 Modified: lxml/branch/capi/doc/objectify.txt Log: doc updates and clarifications Modified: lxml/branch/capi/doc/objectify.txt ============================================================================== --- lxml/branch/capi/doc/objectify.txt (original) +++ lxml/branch/capi/doc/objectify.txt Mon Jul 24 11:41:00 2006 @@ -238,7 +238,7 @@ one of the specialised classes like ``NumberElement`` or ``BoolElement``. Their registration uses the ``PyType`` class. - >>> class MyChristmasDate(objectify.ObjectifiedElement): + >>> class ChristmasDate(objectify.ObjectifiedElement): ... def callSanta(self): ... print "Ho ho ho!" @@ -246,7 +246,7 @@ ... if not date_string.startswith('24.12.'): ... raise ValueError - >>> pytype = objectify.PyType('date', checkChristmasDate, MyChristmasDate) + >>> pytype = objectify.PyType('date', checkChristmasDate, ChristmasDate) >>> pytype.register() >>> root = etree.XML("24.12.200012.24.2000") @@ -257,6 +257,21 @@ ... AttributeError: no such child: callSanta +To unregister a type, call its ``unregister()`` method:: + + >>> root.a.callSanta() + Ho ho ho! + >>> pytype.unregister() + >>> root.a.callSanta() + Traceback (most recent call last): + ... + AttributeError: no such child: callSanta + +Please read the section on `Resetting the API`_ below to learn about possible +problems. + +.. _`Resetting the API`: #resetting-the-api + What is different from ElementTree? ----------------------------------- @@ -308,11 +323,21 @@ ``unregister()`` function. Be aware, though, that this does not immediately apply to elements to which there is a Python reference. Their Python class will only be changed after all references are gone and the Python object is -garbage collected. When you access an element for which there is not -currently a Python representation, it will be created with the currently -registered element class:: +garbage collected. The same applies to registered data classes for elements. + +When you access an element for which there is not currently a Python +representation, it will be created with the currently registered element +class:: - >>> objectify.unregister() >>> el = etree.Element("test") >>> print isinstance(el, objectify.ObjectifiedElement) + True + + >>> objectify.unregister() + + >>> print isinstance(el, objectify.ObjectifiedElement) + True + >>> new_el = etree.Element("test") + >>> print isinstance(new_el, objectify.ObjectifiedElement) False + From scoder at codespeak.net Mon Jul 24 12:03:07 2006 From: scoder at codespeak.net (scoder at codespeak.net) Date: Mon, 24 Jul 2006 12:03:07 +0200 (CEST) Subject: [Lxml-checkins] r30428 - in lxml/branch/capi: . doc src/lxml src/lxml/elementlib src/lxml/elements src/lxml/tests Message-ID: <20060724100307.1FAED100A4@code0.codespeak.net> Author: scoder Date: Mon Jul 24 12:03:04 2006 New Revision: 30428 Added: lxml/branch/capi/src/lxml/elements/ - copied from r30235, lxml/branch/capi/src/lxml/elementlib/ Removed: lxml/branch/capi/src/lxml/elementlib/ Modified: lxml/branch/capi/doc/objectify.txt lxml/branch/capi/setup.py lxml/branch/capi/src/lxml/objectify.pyx lxml/branch/capi/src/lxml/tests/test_classlookup.py lxml/branch/capi/src/lxml/tests/test_objectify.py Log: moved lxml.elementlib.objectify to lxml.objectify (it's a different API, after all), renamed lxml.elementlib to lxml.elements (which is what it is supposed to contain) Modified: lxml/branch/capi/doc/objectify.txt ============================================================================== --- lxml/branch/capi/doc/objectify.txt (original) +++ lxml/branch/capi/doc/objectify.txt Mon Jul 24 12:03:04 2006 @@ -1,6 +1,6 @@ -========================= -lxml.elementlib.objectify -========================= +============== +lxml.objectify +============== lxml supports an alternative element API similar to the Amara_ bindery through a custom Element implementation. This API is very different from the @@ -13,7 +13,7 @@ by simply importing the module and calling the ``register`` function:: >>> from lxml import etree - >>> from lxml.elementlib import objectify + >>> from lxml import objectify >>> objectify.register() >>> el = etree.Element("test") @@ -206,8 +206,8 @@ Objectify determines data types by trial and error, unless it finds an attribute ``pytype`` in the namespace given by the URI in -``lxml.elementlib.objectify.PYTYPE_NAMESPACE``, which must contain any of the -following string values: int, float, str, unicode:: +``lxml.objectify.PYTYPE_NAMESPACE``, which must contain any of the following +string values: int, float, str, unicode:: >>> pytype_attr = "{%s}%s" % (objectify.PYTYPE_NAMESPACE, "pytype") Modified: lxml/branch/capi/setup.py ============================================================================== --- lxml/branch/capi/setup.py (original) +++ lxml/branch/capi/setup.py Mon Jul 24 12:03:04 2006 @@ -2,8 +2,8 @@ EXT_MODULES = [ ("etree", "lxml.etree"), - ("objectify", "lxml.elementlib.objectify"), - ("classlookup", "lxml.elementlib.classlookup") + ("objectify", "lxml.objectify"), + ("classlookup", "lxml.elements.classlookup") ] setup_args = {} @@ -202,7 +202,7 @@ ], package_dir = {'': 'src'}, - packages = ['lxml', 'lxml.elementlib'], + packages = ['lxml', 'lxml.elements'], ext_modules = ext_modules, **setup_args ) Modified: lxml/branch/capi/src/lxml/objectify.pyx ============================================================================== --- lxml/branch/capi/src/lxml/objectify.pyx (original) +++ lxml/branch/capi/src/lxml/objectify.pyx Mon Jul 24 12:03:04 2006 @@ -6,7 +6,7 @@ cimport tree cdef object etree -from lxml import etree +import etree # initialize C-API of lxml.etree import_etree(etree) Modified: lxml/branch/capi/src/lxml/tests/test_classlookup.py ============================================================================== --- lxml/branch/capi/src/lxml/tests/test_classlookup.py (original) +++ lxml/branch/capi/src/lxml/tests/test_classlookup.py Mon Jul 24 12:03:04 2006 @@ -13,7 +13,7 @@ from common_imports import etree, StringIO, HelperTestCase, fileInTestDir from common_imports import SillyFileLike, canonicalize -from lxml.elementlib import classlookup +from lxml.elements import classlookup xml_str = '''\ Modified: lxml/branch/capi/src/lxml/tests/test_objectify.py ============================================================================== --- lxml/branch/capi/src/lxml/tests/test_objectify.py (original) +++ lxml/branch/capi/src/lxml/tests/test_objectify.py Mon Jul 24 12:03:04 2006 @@ -13,7 +13,7 @@ from common_imports import etree, StringIO, HelperTestCase, fileInTestDir from common_imports import SillyFileLike, canonicalize -from lxml.elementlib import objectify +from lxml import objectify xml_str = '''\ From scoder at codespeak.net Mon Jul 24 12:11:07 2006 From: scoder at codespeak.net (scoder at codespeak.net) Date: Mon, 24 Jul 2006 12:11:07 +0200 (CEST) Subject: [Lxml-checkins] r30429 - in lxml/branch/capi: . doc src/lxml src/lxml/tests Message-ID: <20060724101107.4AC77100CE@code0.codespeak.net> Author: scoder Date: Mon Jul 24 12:10:49 2006 New Revision: 30429 Modified: lxml/branch/capi/doc/objectify.txt lxml/branch/capi/setup.py lxml/branch/capi/src/lxml/objectify.pyx lxml/branch/capi/src/lxml/tests/test_objectify.py Log: moved lxml.objectify back to lxml.elements.objectify - I guess other extension modules are likely to implement similarly extensive APIs Modified: lxml/branch/capi/doc/objectify.txt ============================================================================== --- lxml/branch/capi/doc/objectify.txt (original) +++ lxml/branch/capi/doc/objectify.txt Mon Jul 24 12:10:49 2006 @@ -13,7 +13,7 @@ by simply importing the module and calling the ``register`` function:: >>> from lxml import etree - >>> from lxml import objectify + >>> from lxml.elements import objectify >>> objectify.register() >>> el = etree.Element("test") Modified: lxml/branch/capi/setup.py ============================================================================== --- lxml/branch/capi/setup.py (original) +++ lxml/branch/capi/setup.py Mon Jul 24 12:10:49 2006 @@ -2,7 +2,7 @@ EXT_MODULES = [ ("etree", "lxml.etree"), - ("objectify", "lxml.objectify"), + ("objectify", "lxml.elements.objectify"), ("classlookup", "lxml.elements.classlookup") ] Modified: lxml/branch/capi/src/lxml/objectify.pyx ============================================================================== --- lxml/branch/capi/src/lxml/objectify.pyx (original) +++ lxml/branch/capi/src/lxml/objectify.pyx Mon Jul 24 12:10:49 2006 @@ -6,7 +6,7 @@ cimport tree cdef object etree -import etree +from lxml import etree # initialize C-API of lxml.etree import_etree(etree) Modified: lxml/branch/capi/src/lxml/tests/test_objectify.py ============================================================================== --- lxml/branch/capi/src/lxml/tests/test_objectify.py (original) +++ lxml/branch/capi/src/lxml/tests/test_objectify.py Mon Jul 24 12:10:49 2006 @@ -13,7 +13,7 @@ from common_imports import etree, StringIO, HelperTestCase, fileInTestDir from common_imports import SillyFileLike, canonicalize -from lxml import objectify +from lxml.elements import objectify xml_str = '''\ From scoder at codespeak.net Mon Jul 24 13:33:41 2006 From: scoder at codespeak.net (scoder at codespeak.net) Date: Mon, 24 Jul 2006 13:33:41 +0200 (CEST) Subject: [Lxml-checkins] r30435 - in lxml/branch/capi: doc src/lxml Message-ID: <20060724113341.781A3100CE@code0.codespeak.net> Author: scoder Date: Mon Jul 24 13:33:29 2006 New Revision: 30435 Modified: lxml/branch/capi/doc/objectify.txt lxml/branch/capi/src/lxml/objectify.pyx Log: support recursive repr() on elements Modified: lxml/branch/capi/doc/objectify.txt ============================================================================== --- lxml/branch/capi/doc/objectify.txt (original) +++ lxml/branch/capi/doc/objectify.txt Mon Jul 24 13:33:29 2006 @@ -197,7 +197,7 @@ >>> print 1 + root.a 3 - >>> root.c + >>> print root.c True >>> root.c = False >>> if not root.c: @@ -246,8 +246,8 @@ ... if not date_string.startswith('24.12.'): ... raise ValueError - >>> pytype = objectify.PyType('date', checkChristmasDate, ChristmasDate) - >>> pytype.register() + >>> xmas_type = objectify.PyType('date', checkChristmasDate, ChristmasDate) + >>> xmas_type.register() >>> root = etree.XML("24.12.200012.24.2000") >>> root.a.callSanta() @@ -261,7 +261,7 @@ >>> root.a.callSanta() Ho ho ho! - >>> pytype.unregister() + >>> xmas_type.unregister() >>> root.a.callSanta() Traceback (most recent call last): ... @@ -273,6 +273,42 @@ .. _`Resetting the API`: #resetting-the-api +Recursive string representation of elements +------------------------------------------- + +Normally, elements use the standard string representation for repr() that is +provided by lxml.etree. You can enable + +Printing an objectify element outputs a pretty-print representation:: + + >>> objectify.enableRecursiveRepr() + + >>> root = etree.XML(""" + ... + ... 1 + ... 1.2 + ... 1 + ... true + ... what? + ... + ... + ... """) + >>> root + root = None [ObjectifiedElement] + a = 1 [IntElement] + * attr1 = 'foo' + * attr2 = 'bar' + a = 1.2 [FloatElement] + b = 1 [IntElement] + b = True [BoolElement] + c = 'what?' [StringElement] + d = None [NoneElement] + +This behaviour can be switched off in the same way:: + + >>> objectify.enableRecursiveRepr(False) + + What is different from ElementTree? ----------------------------------- @@ -289,33 +325,6 @@ they now support any XPath expression. -TODO: ------ - -Printing an objectify element outputs a pretty-print representation:: - - >>> root = etree.fromstring(""" - ... - ... 1 - ... 1.2 - ... 1 - ... 3.2 - ... what? - ... - ... """) - >>> print root - root = None [ObjectifiedElement] - { - a = 1 [_IntElement] - |---attr1 = foo - |---attr2 = bar - a = 1.2 [_FloatElement] - b = 1 [_IntElement] - b = 3.2 [_FloatElement] - c = 'what?' [_StringElement] - } - - Resetting the API ----------------- Modified: lxml/branch/capi/src/lxml/objectify.pyx ============================================================================== --- lxml/branch/capi/src/lxml/objectify.pyx (original) +++ lxml/branch/capi/src/lxml/objectify.pyx Mon Jul 24 13:33:29 2006 @@ -72,7 +72,10 @@ return etree.ElementChildIterator(parent, tag=self.tag) def __repr__(self): - return textOf(self._c_node) + if __RECURSIVE_REPR: + return _dump(self, 0) + else: + return _Element.__repr__(self) def __len__(self): """Count self and siblings with the same tag. @@ -316,6 +319,9 @@ def __float__(self): return _float(textOf(self._c_node)) + def __str__(self): + return textOf(self._c_node) + # def __oct__(self): # def __hex__(self): @@ -393,6 +399,9 @@ def __len__(self): return _len(_strValueOf(self)) + def __str__(self): + return textOf(self._c_node) + def __add__(self, other): text = _strValueOf(self) other = _strValueOf(other) @@ -425,7 +434,11 @@ return text in textOf(self._c_node) cdef class NoneElement(ObjectifiedElement): - pass + def __str__(self): + return "None" + + def value(self): + return None cdef class BoolElement(ObjectifiedElement): """Boolean type base on string values: 'true' or 'false'. @@ -448,12 +461,15 @@ else: return False - def __repr__(self): + def __str__(self): if self._boolval(): return "True" else: return "False" + def value(self): + return self.__nonzero__() + def __checkBool(s): if s != 'true' and s != 'false': raise ValueError @@ -537,9 +553,6 @@ if value is None: # FIXME: how do we get rid of the class if the value changes? return NoneElement -## if python.PyUnicode_Check(value): -## # values containing unicode characters cannot be numbers -## return _StringElement _ValueError = ValueError for type_check, pytype in _TYPE_CHECKS: try: @@ -551,6 +564,44 @@ return StringElement ################################################################################ +# Recursive element dumping + +cdef int __RECURSIVE_REPR +__RECURSIVE_REPR = 0 # default: off + +def enableRecursiveRepr(on=True): + """Enable a recursively generated tree representation for repr(element), + based on objectify.dump(element). + """ + global __RECURSIVE_REPR + __RECURSIVE_REPR = bool(on) + +def dump(_Element element not None): + return _dump(element, 0) + +cdef object _dump(_Element element, int indent): + indentstr = " " * indent + if hasattr(element, "value"): + value = element.value() + else: + value = textOf(element._c_node) + if value and not value.strip(): + value = None + result = "%s%s = %r [%s]\n" % (indentstr, element.tag, + value, type(element).__name__) + for name, value in element.items(): + result = result + "%s * %s = %r\n" % (indentstr, name, value) + + indent = indent + 1 + for child in element.iterchildren(): + result = result + _dump(child, indent) + if indent == 1: + return result[:-1] # strip last '\n' + else: + return result + + +################################################################################ # Element class lookup cdef object _lookupElementClass(state, tree.xmlNode* c_node): From scoder at codespeak.net Mon Jul 24 13:37:23 2006 From: scoder at codespeak.net (scoder at codespeak.net) Date: Mon, 24 Jul 2006 13:37:23 +0200 (CEST) Subject: [Lxml-checkins] r30436 - lxml/branch/capi/doc Message-ID: <20060724113723.5F926100CE@code0.codespeak.net> Author: scoder Date: Mon Jul 24 13:37:12 2006 New Revision: 30436 Modified: lxml/branch/capi/doc/objectify.txt Log: mention dump() function in docs Modified: lxml/branch/capi/doc/objectify.txt ============================================================================== --- lxml/branch/capi/doc/objectify.txt (original) +++ lxml/branch/capi/doc/objectify.txt Mon Jul 24 13:37:12 2006 @@ -304,10 +304,22 @@ c = 'what?' [StringElement] d = None [NoneElement] -This behaviour can be switched off in the same way:: +This behaviour can be switched off in the same way. Note that this output +format is always available through the ``dump()`` module function:: >>> objectify.enableRecursiveRepr(False) + >>> print objectify.dump(root) + root = None [ObjectifiedElement] + a = 1 [IntElement] + * attr1 = 'foo' + * attr2 = 'bar' + a = 1.2 [FloatElement] + b = 1 [IntElement] + b = True [BoolElement] + c = 'what?' [StringElement] + d = None [NoneElement] + What is different from ElementTree? ----------------------------------- From scoder at codespeak.net Mon Jul 24 14:03:06 2006 From: scoder at codespeak.net (scoder at codespeak.net) Date: Mon, 24 Jul 2006 14:03:06 +0200 (CEST) Subject: [Lxml-checkins] r30437 - in lxml/branch/capi: doc src/lxml Message-ID: <20060724120306.EEFB8100DA@code0.codespeak.net> Author: scoder Date: Mon Jul 24 14:03:05 2006 New Revision: 30437 Modified: lxml/branch/capi/doc/objectify.txt lxml/branch/capi/src/lxml/objectify.pyx Log: recursive repr() -> str() Modified: lxml/branch/capi/doc/objectify.txt ============================================================================== --- lxml/branch/capi/doc/objectify.txt (original) +++ lxml/branch/capi/doc/objectify.txt Mon Jul 24 14:03:05 2006 @@ -276,12 +276,11 @@ Recursive string representation of elements ------------------------------------------- -Normally, elements use the standard string representation for repr() that is -provided by lxml.etree. You can enable +Normally, elements use the standard string representation for str() that is +provided by lxml.etree. You can enable a pretty-print representation for +objectify elements like this:: -Printing an objectify element outputs a pretty-print representation:: - - >>> objectify.enableRecursiveRepr() + >>> objectify.enableRecursiveStr() >>> root = etree.XML(""" ... @@ -293,7 +292,8 @@ ... ... ... """) - >>> root + + >>> print str(root) root = None [ObjectifiedElement] a = 1 [IntElement] * attr1 = 'foo' @@ -307,7 +307,7 @@ This behaviour can be switched off in the same way. Note that this output format is always available through the ``dump()`` module function:: - >>> objectify.enableRecursiveRepr(False) + >>> objectify.enableRecursiveStr(False) >>> print objectify.dump(root) root = None [ObjectifiedElement] Modified: lxml/branch/capi/src/lxml/objectify.pyx ============================================================================== --- lxml/branch/capi/src/lxml/objectify.pyx (original) +++ lxml/branch/capi/src/lxml/objectify.pyx Mon Jul 24 14:03:05 2006 @@ -71,11 +71,11 @@ return iter([self]) return etree.ElementChildIterator(parent, tag=self.tag) - def __repr__(self): - if __RECURSIVE_REPR: + def __str__(self): + if __RECURSIVE_STR: return _dump(self, 0) else: - return _Element.__repr__(self) + return textOf(self._c_node) def __len__(self): """Count self and siblings with the same tag. @@ -566,15 +566,15 @@ ################################################################################ # Recursive element dumping -cdef int __RECURSIVE_REPR -__RECURSIVE_REPR = 0 # default: off +cdef int __RECURSIVE_STR +__RECURSIVE_STR = 0 # default: off -def enableRecursiveRepr(on=True): - """Enable a recursively generated tree representation for repr(element), +def enableRecursiveStr(on=True): + """Enable a recursively generated tree representation for str(element), based on objectify.dump(element). """ - global __RECURSIVE_REPR - __RECURSIVE_REPR = bool(on) + global __RECURSIVE_STR + __RECURSIVE_STR = bool(on) def dump(_Element element not None): return _dump(element, 0) From scoder at codespeak.net Mon Jul 24 17:43:32 2006 From: scoder at codespeak.net (scoder at codespeak.net) Date: Mon, 24 Jul 2006 17:43:32 +0200 (CEST) Subject: [Lxml-checkins] r30461 - in lxml/branch/capi: . doc src/lxml src/lxml/tests Message-ID: <20060724154332.5867A100CE@code0.codespeak.net> Author: scoder Date: Mon Jul 24 17:43:29 2006 New Revision: 30461 Modified: lxml/branch/capi/CHANGES.txt lxml/branch/capi/doc/compatibility.txt lxml/branch/capi/src/lxml/etree.pyx lxml/branch/capi/src/lxml/etree_defs.h lxml/branch/capi/src/lxml/nsclasses.pxi lxml/branch/capi/src/lxml/objectify.pyx lxml/branch/capi/src/lxml/tests/test_elementtree.py lxml/branch/capi/src/lxml/tests/test_etree.py lxml/branch/capi/src/lxml/tree.pxd Log: ET-like support for processing instructions Modified: lxml/branch/capi/CHANGES.txt ============================================================================== --- lxml/branch/capi/CHANGES.txt (original) +++ lxml/branch/capi/CHANGES.txt Mon Jul 24 17:43:29 2006 @@ -8,6 +8,8 @@ Features added -------------- +* Support for processing instructions (ET-like, not compatible) + * Element.replace(old, new) method to replace a subelement by another one * Public C-level API for independent extension modules Modified: lxml/branch/capi/doc/compatibility.txt ============================================================================== --- lxml/branch/capi/doc/compatibility.txt (original) +++ lxml/branch/capi/doc/compatibility.txt Mon Jul 24 17:43:29 2006 @@ -91,16 +91,21 @@ instead of a SyntaxError. lxml.etree follows the other parts of the parser API and raises an (XML)SyntaxError. +* ElementTree ignores comments and processing instructions when parsing XML, + while etree will read them in and treat them as Comment or + ProcessingInstruction elements respectively. + * ElementTree has a bug when serializing an empty Comment (no text argument given) to XML, etree serializes this successfully. -* ElementTree ignores comments when parsing XML, while etree will read them in - and treat them as Comment elements. - * ElementTree adds whitespace around comments on serialization, lxml does not. This means that a comment text "text" that ElementTree serializes as "" will become "" in lxml. +* ElementTree merges the target of a processing instruction into ``PI.text``, + while lxml.etree puts it into the ``.target`` property and leaves it out of + the ``.text`` property. + * Because etree is built on top of libxml2, which is namespace prefix aware, etree preserves namespaces declarations and prefixes while ElementTree tends to come up with its own prefixes (ns0, ns1, etc). When no namespace prefix Modified: lxml/branch/capi/src/lxml/etree.pyx ============================================================================== --- lxml/branch/capi/src/lxml/etree.pyx (original) +++ lxml/branch/capi/src/lxml/etree.pyx Mon Jul 24 17:43:29 2006 @@ -1156,7 +1156,9 @@ if c_node.type == tree.XML_ELEMENT_NODE: element_class = LOOKUP_ELEMENT_CLASS(ELEMENT_CLASS_LOOKUP_STATE, c_node) elif c_node.type == tree.XML_COMMENT_NODE: - element_class = _Comment + element_class = __DEFAULT_COMMENT_CLASS + elif c_node.type == tree.XML_PI_NODE: + element_class = __DEFAULT_PI_CLASS else: assert 0, "Unknown node type: %s" % c_node.type result = element_class() @@ -1166,16 +1168,12 @@ result._init() return result -cdef class _Comment(_Element): +cdef class __ContentOnlyElement(_Element): def set(self, key, value): pass def append(self, _Element element): pass - - property tag: - def __get__(self): - return None property attrib: def __get__(self): @@ -1216,19 +1214,26 @@ def items(self): return [] - -cdef _Comment _commentFactory(_Document doc, xmlNode* c_node): - cdef _Comment result - result = getProxy(c_node) - if result is not None: - return result - if c_node is NULL: - return None - result = _Comment() - result._doc = doc - result._c_node = c_node - registerProxy(result) - return result + +cdef class _Comment(__ContentOnlyElement): + property tag: + def __get__(self): + return Comment + +cdef class _ProcessingInstruction(__ContentOnlyElement): + property tag: + def __get__(self): + return ProcessingInstruction + + property target: + # not in ElementTree + def __get__(self): + return funicode(self._c_node.name) + + def __set__(self, value): + value = _utf8(value) + c_text = _cstr(value) + tree.xmlNodeSetName(self._c_node, c_text) cdef class _Attrib: cdef _NodeBase _element @@ -1525,6 +1530,11 @@ c_node = tree.xmlNewDocComment(c_doc, text) return c_node +cdef xmlNode* _createPI(xmlDoc* c_doc, char* target, char* text): + cdef xmlNode* c_node + c_node = tree.xmlNewDocPI(c_doc, target, text) + return c_node + cdef _initNodeAttributes(xmlNode* c_node, _Document doc, attrib, extra): cdef xmlNs* c_ns # 'extra' is not checked here (expected to be a keyword dict) @@ -1580,7 +1590,24 @@ doc = _documentFactory(c_doc, None) c_node = _createComment(c_doc, _cstr(text)) tree.xmlAddChild(c_doc, c_node) - return _commentFactory(doc, c_node) + return _elementFactory(doc, c_node) + +def ProcessingInstruction(target, text=None): + """Comment element factory. This factory function creates a special element that will + be serialized as an XML comment. + """ + cdef _Document doc + cdef xmlNode* c_node + cdef xmlDoc* c_doc + if text is None: + text = '' + else: + text = _utf8(text) + c_doc = _newDoc() + doc = _documentFactory(c_doc, None) + c_node = _createPI(c_doc, _cstr(target), _cstr(text)) + tree.xmlAddChild(c_doc, c_node) + return _elementFactory(doc, c_node) def SubElement(_Element _parent not None, _tag, attrib=None, nsmap=None, **_extra): Modified: lxml/branch/capi/src/lxml/etree_defs.h ============================================================================== --- lxml/branch/capi/src/lxml/etree_defs.h (original) +++ lxml/branch/capi/src/lxml/etree_defs.h Mon Jul 24 17:43:29 2006 @@ -38,8 +38,9 @@ #define _isString(obj) PyObject_TypeCheck(obj, &PyBaseString_Type) #define _isElement(c_node) \ - ((c_node)->type == XML_ELEMENT_NODE || \ - (c_node)->type == XML_COMMENT_NODE) + (((c_node)->type == XML_ELEMENT_NODE) || \ + ((c_node)->type == XML_COMMENT_NODE) || \ + ((c_node)->type == XML_PI_NODE)) #define _getNs(c_node) \ (((c_node)->ns == 0) ? 0 : ((c_node)->ns->href)) Modified: lxml/branch/capi/src/lxml/nsclasses.pxi ============================================================================== --- lxml/branch/capi/src/lxml/nsclasses.pxi (original) +++ lxml/branch/capi/src/lxml/nsclasses.pxi Mon Jul 24 17:43:29 2006 @@ -35,6 +35,12 @@ cdef object __DEFAULT_ELEMENT_CLASS __DEFAULT_ELEMENT_CLASS = _Element +cdef object __DEFAULT_COMMENT_CLASS +__DEFAULT_COMMENT_CLASS = _Comment + +cdef object __DEFAULT_PI_CLASS +__DEFAULT_PI_CLASS = _ProcessingInstruction + cdef object __NAMESPACE_REGISTRIES __NAMESPACE_REGISTRIES = {} Modified: lxml/branch/capi/src/lxml/objectify.pyx ============================================================================== --- lxml/branch/capi/src/lxml/objectify.pyx (original) +++ lxml/branch/capi/src/lxml/objectify.pyx Mon Jul 24 17:43:29 2006 @@ -577,6 +577,8 @@ __RECURSIVE_STR = bool(on) def dump(_Element element not None): + """Return a recursively generated string representation of an element. + """ return _dump(element, 0) cdef object _dump(_Element element, int indent): Modified: lxml/branch/capi/src/lxml/tests/test_elementtree.py ============================================================================== --- lxml/branch/capi/src/lxml/tests/test_elementtree.py (original) +++ lxml/branch/capi/src/lxml/tests/test_elementtree.py Mon Jul 24 17:43:29 2006 @@ -412,6 +412,7 @@ ElementTree = self.etree.ElementTree XML = self.etree.XML Comment = self.etree.Comment + ProcessingInstruction = self.etree.ProcessingInstruction el = Element('hoi') self.assert_(iselement(el)) @@ -425,6 +426,9 @@ c = Comment('test') self.assert_(iselement(c)) + + p = ProcessingInstruction("test", "some text") + self.assert_(iselement(p)) def test_iteration(self): XML = self.etree.XML @@ -723,7 +727,7 @@ None, a.tail) self.assertXML('', a) - + def test_comment(self): Element = self.etree.Element SubElement = self.etree.SubElement @@ -731,6 +735,7 @@ a = Element('a') a.append(Comment('foo')) + self.assertEquals(a[0].tag, Comment) self.assertEquals(a[0].text, 'foo') def test_comment_text(self): @@ -766,6 +771,28 @@ for i in c: pass + def test_pi(self): + # lxml.etree separates target and text + Element = self.etree.Element + SubElement = self.etree.SubElement + ProcessingInstruction = self.etree.ProcessingInstruction + + a = Element('a') + a.append(ProcessingInstruction('foo', 'some more text')) + self.assertEquals(a[0].tag, ProcessingInstruction) + + def test_pi_nonsense(self): + ProcessingInstruction = self.etree.ProcessingInstruction + pi = ProcessingInstruction('foo') + self.assertEquals({}, pi.attrib) + self.assertEquals([], pi.keys()) + self.assertEquals([], pi.items()) + self.assertEquals(None, pi.get('hoi')) + self.assertEquals(0, len(pi)) + # should not iterate + for i in pi: + pass + def test_setitem(self): Element = self.etree.Element SubElement = self.etree.SubElement Modified: lxml/branch/capi/src/lxml/tests/test_etree.py ============================================================================== --- lxml/branch/capi/src/lxml/tests/test_etree.py (original) +++ lxml/branch/capi/src/lxml/tests/test_etree.py Mon Jul 24 17:43:29 2006 @@ -41,6 +41,23 @@ self.assertRaises(ValueError, Element, '{test}') self.assertRaises(ValueError, setattr, el, 'tag', '{test}') + def test_pi(self): + # lxml.etree separates target and text + Element = self.etree.Element + SubElement = self.etree.SubElement + ProcessingInstruction = self.etree.ProcessingInstruction + + a = Element('a') + a.append(ProcessingInstruction('foo', 'some more text')) + self.assertEquals(a[0].target, 'foo') + self.assertEquals(a[0].text, 'some more text') + + def test_pi_parse(self): + XML = self.etree.XML + root = XML("") + self.assertEquals(root[0].target, "mypi") + self.assertEquals(root[0].text, "my test ") + def test_parse_error(self): parse = self.etree.parse # from StringIO Modified: lxml/branch/capi/src/lxml/tree.pxd ============================================================================== --- lxml/branch/capi/src/lxml/tree.pxd (original) +++ lxml/branch/capi/src/lxml/tree.pxd Mon Jul 24 17:43:29 2006 @@ -161,6 +161,7 @@ cdef xmlNode* xmlNewNode(xmlNs* ns, char* name) cdef xmlNode* xmlNewDocText(xmlDoc* doc, char* content) cdef xmlNode* xmlNewDocComment(xmlDoc* doc, char* content) + cdef xmlNode* xmlNewDocPI(xmlDoc* doc, char* name, char* content) cdef xmlNs* xmlNewNs(xmlNode* node, char* href, char* prefix) cdef xmlNode* xmlAddChild(xmlNode* parent, xmlNode* cur) cdef xmlNode* xmlReplaceNode(xmlNode* old, xmlNode* cur) From scoder at codespeak.net Mon Jul 24 18:02:46 2006 From: scoder at codespeak.net (scoder at codespeak.net) Date: Mon, 24 Jul 2006 18:02:46 +0200 (CEST) Subject: [Lxml-checkins] r30464 - lxml/branch/capi/src/lxml Message-ID: <20060724160246.D6DF0100D8@code0.codespeak.net> Author: scoder Date: Mon Jul 24 18:02:45 2006 New Revision: 30464 Modified: lxml/branch/capi/src/lxml/etree.pyx Log: fix: forgot to UTF-8-ify the PI target Modified: lxml/branch/capi/src/lxml/etree.pyx ============================================================================== --- lxml/branch/capi/src/lxml/etree.pyx (original) +++ lxml/branch/capi/src/lxml/etree.pyx Mon Jul 24 18:02:45 2006 @@ -1599,6 +1599,7 @@ cdef _Document doc cdef xmlNode* c_node cdef xmlDoc* c_doc + target = _utf8(target) if text is None: text = '' else: From scoder at codespeak.net Mon Jul 24 18:03:33 2006 From: scoder at codespeak.net (scoder at codespeak.net) Date: Mon, 24 Jul 2006 18:03:33 +0200 (CEST) Subject: [Lxml-checkins] r30465 - lxml/branch/capi/src/lxml/tests Message-ID: <20060724160333.7C18C100D8@code0.codespeak.net> Author: scoder Date: Mon Jul 24 18:03:32 2006 New Revision: 30465 Modified: lxml/branch/capi/src/lxml/tests/test_elementtree.py Log: test case for XML output of PIs Modified: lxml/branch/capi/src/lxml/tests/test_elementtree.py ============================================================================== --- lxml/branch/capi/src/lxml/tests/test_elementtree.py (original) +++ lxml/branch/capi/src/lxml/tests/test_elementtree.py Mon Jul 24 18:03:32 2006 @@ -780,6 +780,8 @@ a = Element('a') a.append(ProcessingInstruction('foo', 'some more text')) self.assertEquals(a[0].tag, ProcessingInstruction) + self.assertXML("", + a) def test_pi_nonsense(self): ProcessingInstruction = self.etree.ProcessingInstruction From scoder at codespeak.net Mon Jul 24 18:13:25 2006 From: scoder at codespeak.net (scoder at codespeak.net) Date: Mon, 24 Jul 2006 18:13:25 +0200 (CEST) Subject: [Lxml-checkins] r30467 - lxml/branch/capi/src/lxml Message-ID: <20060724161325.10F65100D2@code0.codespeak.net> Author: scoder Date: Mon Jul 24 18:13:24 2006 New Revision: 30467 Modified: lxml/branch/capi/src/lxml/etreepublic.pxd Log: etreepublic.pxd: comment on future extensions of element lookup functions to comments/PIs Modified: lxml/branch/capi/src/lxml/etreepublic.pxd ============================================================================== --- lxml/branch/capi/src/lxml/etreepublic.pxd (original) +++ lxml/branch/capi/src/lxml/etreepublic.pxd Mon Jul 24 18:13:24 2006 @@ -17,6 +17,10 @@ cdef void END_FOR_EACH_ELEMENT_FROM(tree.xmlNode* start_node) cdef extern from "etree.h": + + # first function to call! + cdef int import_etree(etree_module) except -1 + ########################################################################## # public ElementTree API classes @@ -40,9 +44,6 @@ ########################################################################## # creating Element objects - # First function to call! - cdef int import_etree(etree_module) except -1 - # create an Element for a C-node in the Document cdef _Element elementFactory(_Document doc, tree.xmlNode* c_node) @@ -57,6 +58,8 @@ # set the internal lookup function for Element classes # use setElementClassLookupFunction(NULL, None) to reset it + # ** this may also be used for comments and PIs in the future, + # ** so remember to check node type! cdef void setElementClassLookupFunction( object (*function)(object, tree.xmlNode*), object state) From scoder at codespeak.net Mon Jul 24 20:48:49 2006 From: scoder at codespeak.net (scoder at codespeak.net) Date: Mon, 24 Jul 2006 20:48:49 +0200 (CEST) Subject: [Lxml-checkins] r30476 - lxml/branch/capi/src/lxml Message-ID: <20060724184849.BC8D3100D8@code0.codespeak.net> Author: scoder Date: Mon Jul 24 20:48:47 2006 New Revision: 30476 Modified: lxml/branch/capi/src/lxml/classlookup.pyx lxml/branch/capi/src/lxml/etree.pyx lxml/branch/capi/src/lxml/etreepublic.pxd lxml/branch/capi/src/lxml/nsclasses.pxi Log: unify _Element/_Comment/_PI class lookup Modified: lxml/branch/capi/src/lxml/classlookup.pyx ============================================================================== --- lxml/branch/capi/src/lxml/classlookup.pyx (original) +++ lxml/branch/capi/src/lxml/classlookup.pyx Mon Jul 24 20:48:47 2006 @@ -59,6 +59,8 @@ cdef object _attribute_lookup(object state, tree.xmlNode* c_node): cdef AttributeBasedElementClassLookup lookup cdef python.PyObject* dict_result + if c_node.type != tree.XML_ELEMENT_NODE: + return cetree.lookupDefaultElementClass(None, c_node) lookup = state value = cetree.attributeValueFromNsName( c_node, lookup._c_ns, lookup._c_name) Modified: lxml/branch/capi/src/lxml/etree.pyx ============================================================================== --- lxml/branch/capi/src/lxml/etree.pyx (original) +++ lxml/branch/capi/src/lxml/etree.pyx Mon Jul 24 20:48:47 2006 @@ -1153,14 +1153,7 @@ return result if c_node is NULL: return None - if c_node.type == tree.XML_ELEMENT_NODE: - element_class = LOOKUP_ELEMENT_CLASS(ELEMENT_CLASS_LOOKUP_STATE, c_node) - elif c_node.type == tree.XML_COMMENT_NODE: - element_class = __DEFAULT_COMMENT_CLASS - elif c_node.type == tree.XML_PI_NODE: - element_class = __DEFAULT_PI_CLASS - else: - assert 0, "Unknown node type: %s" % c_node.type + element_class = LOOKUP_ELEMENT_CLASS(ELEMENT_CLASS_LOOKUP_STATE, c_node) result = element_class() result._doc = doc result._c_node = c_node Modified: lxml/branch/capi/src/lxml/etreepublic.pxd ============================================================================== --- lxml/branch/capi/src/lxml/etreepublic.pxd (original) +++ lxml/branch/capi/src/lxml/etreepublic.pxd Mon Jul 24 20:48:47 2006 @@ -56,10 +56,9 @@ # deep copy a node to include in in the Document cdef _Element deepcopyNodeToDocument(_Document doc, tree.xmlNode* c_root) - # set the internal lookup function for Element classes + # set the internal lookup function for Element/Comment/PI classes # use setElementClassLookupFunction(NULL, None) to reset it - # ** this may also be used for comments and PIs in the future, - # ** so remember to check node type! + # note that the lookup function *must always* return an _Element subclass! cdef void setElementClassLookupFunction( object (*function)(object, tree.xmlNode*), object state) Modified: lxml/branch/capi/src/lxml/nsclasses.pxi ============================================================================== --- lxml/branch/capi/src/lxml/nsclasses.pxi (original) +++ lxml/branch/capi/src/lxml/nsclasses.pxi Mon Jul 24 20:48:47 2006 @@ -29,7 +29,14 @@ cdef object _lookupDefaultElementClass(_, xmlNode* c_node): "Trivial class lookup function that always returns the default class." - return __DEFAULT_ELEMENT_CLASS + if c_node.type == tree.XML_ELEMENT_NODE: + return __DEFAULT_ELEMENT_CLASS + elif c_node.type == tree.XML_COMMENT_NODE: + return __DEFAULT_COMMENT_CLASS + elif c_node.type == tree.XML_PI_NODE: + return __DEFAULT_PI_CLASS + else: + assert 0, "Unknown node type: %s" % c_node.type cdef object __DEFAULT_ELEMENT_CLASS @@ -205,10 +212,12 @@ else: return dict_result -cdef object _find_nselement_class(_, xmlNode* c_node): +cdef object _find_nselement_class(state, xmlNode* c_node): cdef python.PyObject* dict_result cdef _NamespaceRegistry registry cdef char* c_namespace_utf + if c_node.type != tree.XML_ELEMENT_NODE: + return _lookupDefaultElementClass(state, c_node) c_namespace_utf = _getNs(c_node) if c_namespace_utf is not NULL: dict_result = python.PyDict_GetItemString( From scoder at codespeak.net Mon Jul 24 20:50:32 2006 From: scoder at codespeak.net (scoder at codespeak.net) Date: Mon, 24 Jul 2006 20:50:32 +0200 (CEST) Subject: [Lxml-checkins] r30477 - lxml/branch/capi Message-ID: <20060724185032.4E3FF100A9@code0.codespeak.net> Author: scoder Date: Mon Jul 24 20:50:31 2006 New Revision: 30477 Modified: lxml/branch/capi/bench.py Log: bench.py option '-fel' that registers the faster default element class lookup in lxml.etree Modified: lxml/branch/capi/bench.py ============================================================================== --- lxml/branch/capi/bench.py (original) +++ lxml/branch/capi/bench.py Mon Jul 24 20:50:31 2006 @@ -650,6 +650,16 @@ from lxml import etree _etrees.append(etree) + try: + sys.argv.remove('-fel') + except ValueError: + pass + else: + # use fast element creation in lxml.etree + from lxml.elements import classlookup + classlookup.setElementClassLookup( + classlookup.ElementDefaultClassLookup()) + if len(sys.argv) > 1: if '-a' in sys.argv or '-c' in sys.argv: # 'all' or 'C-implementations' ? From scoder at codespeak.net Tue Jul 25 11:03:58 2006 From: scoder at codespeak.net (scoder at codespeak.net) Date: Tue, 25 Jul 2006 11:03:58 +0200 (CEST) Subject: [Lxml-checkins] r30500 - in lxml/branch/capi/src/lxml: . tests Message-ID: <20060725090358.E6103100A0@code0.codespeak.net> Author: scoder Date: Tue Jul 25 11:03:56 2006 New Revision: 30500 Modified: lxml/branch/capi/src/lxml/classlookup.pyx lxml/branch/capi/src/lxml/etree.pyx lxml/branch/capi/src/lxml/etreepublic.pxd lxml/branch/capi/src/lxml/nsclasses.pxi lxml/branch/capi/src/lxml/objectify.pyx lxml/branch/capi/src/lxml/parser.pxi lxml/branch/capi/src/lxml/public-api.pxi lxml/branch/capi/src/lxml/tests/test_classlookup.py Log: new custom class lookups: parser based and subclassable, support for fallback lookups (chained lookup) Modified: lxml/branch/capi/src/lxml/classlookup.pyx ============================================================================== --- lxml/branch/capi/src/lxml/classlookup.pyx (original) +++ lxml/branch/capi/src/lxml/classlookup.pyx Tue Jul 25 11:03:56 2006 @@ -1,6 +1,12 @@ # Configurable Element class lookup +__doc__ = """Configurable Element class lookup. + + +""" + from python cimport isinstance, getattr, _cstr, Py_ssize_t +from etreepublic cimport _Document, _ElementClassLookup cimport etreepublic as cetree cimport python cimport tree @@ -10,21 +16,34 @@ # initialize C-API of lxml.etree cetree.import_etree(etree) -cdef class ElementClassLookup: - """Superclass of Element class lookups. +cdef class ElementClassLookup(_ElementClassLookup): + """Superclass of Element class lookups with additional fallback. """ - cdef object (*_lookup_function)(object, tree.xmlNode*) - def __init__(self): + cdef readonly _ElementClassLookup fallback + cdef object (*_fallback_function)(object, _Document, tree.xmlNode*) + def __init__(self, _ElementClassLookup fallback=None): self._lookup_function = NULL # use default lookup + if fallback is None: + fallback = ElementDefaultClassLookup() + self.setFallback(fallback) + + def setFallback(self, _ElementClassLookup lookup not None): + """Sets the fallback scheme for this lookup method. + """ + self.fallback = lookup + self._fallback_function = lookup._lookup_function + + cdef object _callFallback(self, doc, tree.xmlNode* c_node): + return self._fallback_function(self.fallback, doc, c_node) -cdef class ElementNamespaceClassLookup(ElementClassLookup): +cdef class ElementNamespaceClassLookup(_ElementClassLookup): """Looks up Element class in the Namespace registry. """ # uses default lookup -cdef class ElementDefaultClassLookup(ElementClassLookup): +cdef class ElementDefaultClassLookup(_ElementClassLookup): """Always returns the default Element class. """ def __init__(self): @@ -38,6 +57,7 @@ Arguments: * attribute name ('{ns}name' style string) * class mapping (Python dict mapping attribute values to Element classes) + * fallback (optional fallback lookup mechanism) A None key in the class mapping will be checked if the attribute is missing. """ @@ -45,7 +65,8 @@ cdef object _pytag cdef char* _c_ns cdef char* _c_name - def __init__(self, attribute_name, class_mapping): + def __init__(self, attribute_name, class_mapping, + _ElementClassLookup fallback=None): self._pytag = cetree.getNsTag(attribute_name) ns, name = self._pytag if ns is None: @@ -54,25 +75,88 @@ self._c_ns = _cstr(ns) self._c_name = _cstr(name) self._class_mapping = dict(class_mapping) + + ElementClassLookup.__init__(self, fallback) self._lookup_function = _attribute_lookup -cdef object _attribute_lookup(object state, tree.xmlNode* c_node): +cdef object _attribute_lookup(state, _Document doc, tree.xmlNode* c_node): cdef AttributeBasedElementClassLookup lookup cdef python.PyObject* dict_result - if c_node.type != tree.XML_ELEMENT_NODE: - return cetree.lookupDefaultElementClass(None, c_node) + lookup = state - value = cetree.attributeValueFromNsName( - c_node, lookup._c_ns, lookup._c_name) - dict_result = python.PyDict_GetItem(lookup._class_mapping, value) - if dict_result is NULL: - return cetree.lookupDefaultElementClass(None, c_node) - else: - return dict_result + if c_node.type == tree.XML_ELEMENT_NODE: + value = cetree.attributeValueFromNsName( + c_node, lookup._c_ns, lookup._c_name) + dict_result = python.PyDict_GetItem(lookup._class_mapping, value) + if dict_result is not NULL: + return dict_result + return lookup._callFallback(doc, c_node) + + +cdef class ParserBasedElementClassLookup(ElementClassLookup): + """Element class lookup based on the XML parser. + """ + def __init__(self, _ElementClassLookup fallback=None): + ElementClassLookup.__init__(self, fallback) + self._lookup_function = _parser_lookup + +cdef object _parser_lookup(state, _Document doc, tree.xmlNode* c_node): + cdef ElementClassLookup lookup + cdef _ElementClassLookup parser_lookup + + lookup = state + if c_node.type == tree.XML_ELEMENT_NODE: + parser_lookup = cetree.getParserElementLookupFromDocument(doc) + if parser_lookup is not None: + return parser_lookup._lookup_function(parser_lookup, doc, c_node) + return lookup._callFallback(doc, c_node) + + +cdef class CustomElementClassLookup(ElementClassLookup): + """Element class lookup based on a subclass method. + + You can inherit from this class and override the method + lookup(type, doc, namespace, name) -def setElementClassLookup(ElementClassLookup lookup = None): - if lookup is None or lookup._lookup_function is NULL: - cetree.setElementClassLookupFunction(NULL, None) + to lookup the element class for a node. Arguments of the method: + * type: one of 'element', 'comment', 'PI' + * doc: document that the node is in + * namespace: namespace URI of the node (or None for comments/PIs) + * name: name of the element, None for comments, target for PIs + + If you return None from this method, the fallback will be called. + """ + def __init__(self, _ElementClassLookup fallback=None): + ElementClassLookup.__init__(self, fallback) + self._lookup_function = _custom_lookup + + def lookup(self, type, doc, namespace, name): + return None + +cdef object _custom_lookup(state, _Document doc, tree.xmlNode* c_node): + cdef CustomElementClassLookup lookup + cdef char* c_str + + lookup = state + + if c_node.type == tree.XML_COMMENT_NODE: + element_type = "comment" + elif c_node.type == tree.XML_PI_NODE: + element_type = "PI" + else: + element_type = "element" + if c_node.name is NULL: + name = None else: - cetree.setElementClassLookupFunction(lookup._lookup_function, lookup) + name = c_node.name + c_str = tree._getNs(c_node) + if c_str is NULL: + ns = None + else: + ns = c_str + + cls = lookup.lookup(element_type, doc, ns, name) + if cls is not None: + return cls + return lookup._callFallback(doc, c_node) Modified: lxml/branch/capi/src/lxml/etree.pyx ============================================================================== --- lxml/branch/capi/src/lxml/etree.pyx (original) +++ lxml/branch/capi/src/lxml/etree.pyx Tue Jul 25 11:03:56 2006 @@ -169,6 +169,7 @@ # forward declaration of _BaseParser, see parser.pxi cdef class _BaseParser + cdef public class _Document [ type LxmlDocumentType, object LxmlDocument ]: """Internal base class to reference a libxml document. @@ -1153,7 +1154,8 @@ return result if c_node is NULL: return None - element_class = LOOKUP_ELEMENT_CLASS(ELEMENT_CLASS_LOOKUP_STATE, c_node) + element_class = LOOKUP_ELEMENT_CLASS(ELEMENT_CLASS_LOOKUP_STATE, + doc, c_node) result = element_class() result._doc = doc result._c_node = c_node @@ -1747,7 +1749,49 @@ return ElementTree(doc.getroot()) -# include submodules +################################################################################ +# Element class lookup + +ctypedef object (*_element_class_lookup_function)(object, _Document, xmlNode*) + +# class to store element class lookup functions +cdef public class _ElementClassLookup [ type LxmlElementClassLookupType, + object LxmlElementClassLookup ]: + """Superclass of Element class lookups. + """ + cdef _element_class_lookup_function _lookup_function + def __init__(self): + self._lookup_function = NULL # use default lookup + +# default: Namespace classes +cdef _element_class_lookup_function DEFAULT_ELEMENT_CLASS_LOOKUP +DEFAULT_ELEMENT_CLASS_LOOKUP = _find_nselement_class + +cdef _element_class_lookup_function LOOKUP_ELEMENT_CLASS +LOOKUP_ELEMENT_CLASS = DEFAULT_ELEMENT_CLASS_LOOKUP + +cdef object ELEMENT_CLASS_LOOKUP_STATE + +cdef void _setElementClassLookupFunction( + _element_class_lookup_function function, object state): + global LOOKUP_ELEMENT_CLASS, ELEMENT_CLASS_LOOKUP_STATE + if function is NULL: + LOOKUP_ELEMENT_CLASS = DEFAULT_ELEMENT_CLASS_LOOKUP + ELEMENT_CLASS_LOOKUP_STATE = None + else: + LOOKUP_ELEMENT_CLASS = function + ELEMENT_CLASS_LOOKUP_STATE = state + +def setElementClassLookup(_ElementClassLookup lookup = None): + if lookup is None or lookup._lookup_function is NULL: + _setElementClassLookupFunction(NULL, None) + else: + _setElementClassLookupFunction(lookup._lookup_function, lookup) + + +################################################################################ +# Include submodules + include "proxy.pxi" # Proxy handling (element backpointers/memory/etc.) include "apihelpers.pxi" # Private helper functions include "xmlerror.pxi" # Error and log handling @@ -1798,31 +1842,6 @@ include "xmlschema.pxi" # XMLSchema ################################################################################ -# Element class lookup - -ctypedef object (*_element_class_lookup_function)(object, xmlNode*) - -# default: Namespace classes -cdef _element_class_lookup_function DEFAULT_ELEMENT_CLASS_LOOKUP -DEFAULT_ELEMENT_CLASS_LOOKUP = _find_nselement_class - -cdef _element_class_lookup_function LOOKUP_ELEMENT_CLASS -LOOKUP_ELEMENT_CLASS = DEFAULT_ELEMENT_CLASS_LOOKUP - -cdef object ELEMENT_CLASS_LOOKUP_STATE - -cdef void _setElementClassLookupFunction( - _element_class_lookup_function function, object state): - global LOOKUP_ELEMENT_CLASS, ELEMENT_CLASS_LOOKUP_STATE - if function is NULL: - LOOKUP_ELEMENT_CLASS = DEFAULT_ELEMENT_CLASS_LOOKUP - ELEMENT_CLASS_LOOKUP_STATE = None - else: - LOOKUP_ELEMENT_CLASS = function - ELEMENT_CLASS_LOOKUP_STATE = state - - -################################################################################ # Public C API include "public-api.pxi" Modified: lxml/branch/capi/src/lxml/etreepublic.pxd ============================================================================== --- lxml/branch/capi/src/lxml/etreepublic.pxd (original) +++ lxml/branch/capi/src/lxml/etreepublic.pxd Tue Jul 25 11:03:56 2006 @@ -41,6 +41,9 @@ cdef _Document _doc cdef _Element _element + cdef class lxml.etree._ElementClassLookup [ object LxmlElementClassLookup ]: + cdef object (*_lookup_function)(object, _Document, tree.xmlNode*) + ########################################################################## # creating Element objects @@ -60,13 +63,18 @@ # use setElementClassLookupFunction(NULL, None) to reset it # note that the lookup function *must always* return an _Element subclass! cdef void setElementClassLookupFunction( - object (*function)(object, tree.xmlNode*), object state) + object (*function)(object, _Document, tree.xmlNode*), object state) # lookup function that always returns the default Element class - cdef object lookupDefaultElementClass(object _, tree.xmlNode* c_node) + cdef object lookupDefaultElementClass(_1, _Document _2, + tree.xmlNode* c_node) # lookup function for namespace/tag specific Element classes - cdef object lookupNamespaceElementClass(object _, tree.xmlNode* c_node) + cdef object lookupNamespaceElementClass(_1, _Document _2, + tree.xmlNode* c_node) + + # return the element class lookup registered for the parser of this document + cdef object getParserElementLookupFromDocument(_Document doc) ########################################################################## # XML attribute access Modified: lxml/branch/capi/src/lxml/nsclasses.pxi ============================================================================== --- lxml/branch/capi/src/lxml/nsclasses.pxi (original) +++ lxml/branch/capi/src/lxml/nsclasses.pxi Tue Jul 25 11:03:56 2006 @@ -27,7 +27,7 @@ else: __DEFAULT_ELEMENT_CLASS = cls -cdef object _lookupDefaultElementClass(_, xmlNode* c_node): +cdef object _lookupDefaultElementClass(_state, _doc, xmlNode* c_node): "Trivial class lookup function that always returns the default class." if c_node.type == tree.XML_ELEMENT_NODE: return __DEFAULT_ELEMENT_CLASS @@ -212,12 +212,12 @@ else: return dict_result -cdef object _find_nselement_class(state, xmlNode* c_node): +cdef object _find_nselement_class(state, _Document doc, xmlNode* c_node): cdef python.PyObject* dict_result cdef _NamespaceRegistry registry cdef char* c_namespace_utf if c_node.type != tree.XML_ELEMENT_NODE: - return _lookupDefaultElementClass(state, c_node) + return _lookupDefaultElementClass(state, doc, c_node) c_namespace_utf = _getNs(c_node) if c_namespace_utf is not NULL: dict_result = python.PyDict_GetItemString( Modified: lxml/branch/capi/src/lxml/objectify.pyx ============================================================================== --- lxml/branch/capi/src/lxml/objectify.pyx (original) +++ lxml/branch/capi/src/lxml/objectify.pyx Tue Jul 25 11:03:56 2006 @@ -606,10 +606,10 @@ ################################################################################ # Element class lookup -cdef object _lookupElementClass(state, tree.xmlNode* c_node): +cdef object _lookupElementClass(state, doc, tree.xmlNode* c_node): cdef python.PyObject* dict_result # default to namespace specific classes - nsclass = cetree.lookupNamespaceElementClass(state, c_node) + nsclass = cetree.lookupNamespaceElementClass(state, doc, c_node) if nsclass is not ObjectifiedElement: return nsclass Modified: lxml/branch/capi/src/lxml/parser.pxi ============================================================================== --- lxml/branch/capi/src/lxml/parser.pxi (original) +++ lxml/branch/capi/src/lxml/parser.pxi Tue Jul 25 11:03:56 2006 @@ -334,6 +334,7 @@ cdef _ResolverContext _context cdef LxmlParserType _parser_type cdef xmlParserCtxt* _parser_ctxt + cdef _ElementClassLookup _class_lookup cdef object _lockParser cdef object _unlockParser @@ -380,6 +381,9 @@ def __dummy(self): pass + def setElementClassLookup(self, _ElementClassLookup lookup not None): + self._class_lookup = lookup + cdef _BaseParser _copy(self): "Create a new parser with the same configuration." cdef _BaseParser parser Modified: lxml/branch/capi/src/lxml/public-api.pxi ============================================================================== --- lxml/branch/capi/src/lxml/public-api.pxi (original) +++ lxml/branch/capi/src/lxml/public-api.pxi Tue Jul 25 11:03:56 2006 @@ -25,11 +25,14 @@ _element_class_lookup_function function, state): _setElementClassLookupFunction(function, state) -cdef public object lookupDefaultElementClass(state, xmlNode* c_node): - return _lookupDefaultElementClass(state, c_node) +cdef public object lookupDefaultElementClass(state, doc, xmlNode* c_node): + return _lookupDefaultElementClass(state, doc, c_node) -cdef public object lookupNamespaceElementClass(state, xmlNode* c_node): - return _find_nselement_class(state, c_node) +cdef public object lookupNamespaceElementClass(state, doc, xmlNode* c_node): + return _find_nselement_class(state, doc, c_node) + +cdef public object getParserElementLookupFromDocument(_Document doc): + return doc._parser._class_lookup cdef public int tagMatches(xmlNode* c_node, char* c_href, char* c_name): if c_node is NULL: Modified: lxml/branch/capi/src/lxml/tests/test_classlookup.py ============================================================================== --- lxml/branch/capi/src/lxml/tests/test_classlookup.py (original) +++ lxml/branch/capi/src/lxml/tests/test_classlookup.py Tue Jul 25 11:03:56 2006 @@ -30,7 +30,7 @@ etree = etree def tearDown(self): - classlookup.setElementClassLookup() + etree.setElementClassLookup() ns = etree.Namespace("myNS") ns.clear() @@ -42,7 +42,7 @@ ns[None] = TestElement lookup = classlookup.ElementNamespaceClassLookup() - classlookup.setElementClassLookup(lookup) + etree.setElementClassLookup(lookup) root = etree.XML(xml_str) self.assertEquals(root.FIND_ME, @@ -59,7 +59,7 @@ ns[None] = TestElement lookup = classlookup.ElementDefaultClassLookup() - classlookup.setElementClassLookup(lookup) + etree.setElementClassLookup(lookup) root = etree.XML(xml_str) self.assertFalse(hasattr(root, 'FIND_ME')) @@ -73,7 +73,7 @@ lookup = classlookup.AttributeBasedElementClassLookup( "a1", class_dict) - classlookup.setElementClassLookup(lookup) + etree.setElementClassLookup(lookup) root = etree.XML(xml_str) self.assertFalse(hasattr(root, 'FIND_ME')) @@ -81,6 +81,47 @@ TestElement.FIND_ME) self.assertFalse(hasattr(root[0][0], 'FIND_ME')) + def test_custom_lookup(self): + class TestElement(etree.ElementBase): + FIND_ME = "custom" + + class MyLookup(classlookup.CustomElementClassLookup): + def lookup(self, t, d, ns, name): + if name == 'c1': + return TestElement + + etree.setElementClassLookup( MyLookup() ) + + root = etree.XML(xml_str) + self.assertFalse(hasattr(root, 'FIND_ME')) + self.assertEquals(root[0].FIND_ME, + TestElement.FIND_ME) + self.assertFalse(hasattr(root[0][1], 'FIND_ME')) + + def test_parser_based_lookup(self): + class TestElement(etree.ElementBase): + FIND_ME = "parser_based" + + lookup = classlookup.ParserBasedElementClassLookup() + etree.setElementClassLookup(lookup) + + class MyLookup(classlookup.CustomElementClassLookup): + def lookup(self, t, d, ns, name): + return TestElement + + parser = etree.XMLParser() + parser.setElementClassLookup( MyLookup() ) + + root = etree.parse(StringIO(xml_str), parser).getroot() + self.assertEquals(root.FIND_ME, + TestElement.FIND_ME) + self.assertEquals(root[0].FIND_ME, + TestElement.FIND_ME) + + root = etree.parse(StringIO(xml_str)).getroot() + self.assertFalse(hasattr(root, 'FIND_ME')) + self.assertFalse(hasattr(root[0], 'FIND_ME')) + def test_suite(): suite = unittest.TestSuite() From scoder at codespeak.net Tue Jul 25 12:05:14 2006 From: scoder at codespeak.net (scoder at codespeak.net) Date: Tue, 25 Jul 2006 12:05:14 +0200 (CEST) Subject: [Lxml-checkins] r30509 - lxml/branch/capi/doc Message-ID: <20060725100514.187C5100E2@code0.codespeak.net> Author: scoder Date: Tue Jul 25 12:05:11 2006 New Revision: 30509 Added: lxml/branch/capi/doc/elements.txt Modified: lxml/branch/capi/doc/api.txt lxml/branch/capi/doc/capi.txt lxml/branch/capi/doc/main.txt lxml/branch/capi/doc/mkhtml.py Log: doc updates for lxml.elements and lxml.elements.classlookup Modified: lxml/branch/capi/doc/api.txt ============================================================================== --- lxml/branch/capi/doc/api.txt (original) +++ lxml/branch/capi/doc/api.txt Tue Jul 25 12:05:11 2006 @@ -50,12 +50,17 @@ While lxml.etree itself uses the ElementTree API, it is possible to replace the Element implementation by `custom element subclasses`_. This has been -used to implement well-known XML APIs on top of lxml. The ``lxml.elementlib`` -package contains these APIs. Currently, there is a data-binding -implementation called `objectify`_, which is similar to the `Amara bindery`_. +used to implement well-known XML APIs on top of lxml. The ``lxml.elements`` +package contains examples. Currently, there is a data-binding implementation +called `objectify`_, which is similar to the `Amara bindery`_ tool. + +Additionally, the `lxml.elements.classlookup`_ module provides a number of +different schemes to customize the mapping between libxml2 nodes and the +Element classes used by lxml.etree. .. _`custom element subclasses`: namespace_extensions.html .. _`objectify`: objectify.html +.. _`lxml.elements.classlookup`: elements.html#lxml.elements.classlookup .. _`Amara bindery`: http://uche.ogbuji.net/tech/4suite/amara/ Modified: lxml/branch/capi/doc/capi.txt ============================================================================== --- lxml/branch/capi/doc/capi.txt (original) +++ lxml/branch/capi/doc/capi.txt Tue Jul 25 12:05:11 2006 @@ -37,7 +37,8 @@ Public lxml classes are easily subclassed. For example, to implement and set a new default element class, you can write code like the following:: - cdef class NewElementClass(cetree.ElementBase): + from etreepublic cimport ElementBase + cdef class NewElementClass(ElementBase): def setValue(self, myval): self.set("my_attribute", myval) Added: lxml/branch/capi/doc/elements.txt ============================================================================== --- (empty file) +++ lxml/branch/capi/doc/elements.txt Tue Jul 25 12:05:11 2006 @@ -0,0 +1,119 @@ +============= +lxml.elements +============= + +The lxml.elements package is a collection of Element related modules. It +provides enhanced XML APIs based on element classes and different lookup +schemes for element class implementations. + + +lxml.elements.objectify +----------------------- + +`objectify`_ is an alternative XML API implementation similar in spirit to the +Amara or gnosis.objectify tools. + +.. _`objectify`: objectify.html + + +lxml.elements.classlookup +------------------------- + +The classlookup module contains a set of generic Element class lookup +mechanisms. By default, lxml.etree supports `namespace based class lookup`_. +This module provides access to this method and to the following additional +lookup schemes. + +.. _`namespace based class lookup`: namespace_extensions.html + + +Namespace class lookup +...................... + +This is the default lookup mechanism: `namespace based class lookup`_. This +module provides it mainly as a fallback mechanism for other lookups. You can +select the default mechanism by calling:: + + >>> etree.setElementClassLookup() + +or, more explicitly, by doing this:: + + >>> from lxml.elements.classlookup import ElementNamespaceClassLookup + >>> lookup = ElementNamespaceClassLookup() + >>> etree.setElementClassLookup(lookup) + + +Default class lookup +.................... + +This is a faster replacement for the default lookup mechanism. It skips the +namespace lookup and always returns the default element class. + +Usage:: + + >>> from lxml.elements.classlookup import ElementDefaultClassLookup + >>> lookup = ElementDefaultClassLookup() + >>> etree.setElementClassLookup(lookup) + + +Attribute based lookup +...................... + +This uses a mapping from attribute values to classes. An attribute name is +set at initialisation time and is the used to find the corresponding value. +It is selected as follows:: + + >>> from lxml.elements.classlookup import AttributeBasedElementClassLookup + >>> lookup = AttributeBasedElementClassLookup('id', id_class_mapping) + >>> etree.setElementClassLookup(lookup) + +Note that this class supports a fallback mechanism that is used in the case +where the attribute value is not found in the mapping. Normally, the default +class lookup is used here. If you want to use the namespace lookup, for +example, you can use this code:: + + >>> fallback = ElementNamespaceClassLookup() + >>> lookup = AttributeBasedElementClassLookup( + ... 'id', id_class_mapping, fallback) + >>> etree.setElementClassLookup(lookup) + + +Parser based lookup +................... + +lxml.etree supports a per-parser setup of element lookup schemes. You can +enable it as follows:: + + >>> from lxml.elements.classlookup import ParserBasedElementClassLookup + >>> lookup = ParserBasedElementClassLookup() + >>> etree.setElementClassLookup(lookup) + +Now you can set a separate lookup strategy for each parser you create:: + + >>> parser = etree.XMLParser() + >>> parser.setElementClassLookup( ElementDefaultClassLookup() ) + +Whenever you create a document with this parser, its lookup scheme will be +inherited by the document and all subsequent element instantiations will use +it. Note that the parser lookup supports a fallback just like the previous +one. + + +Custom element class lookup +........................... + +This is the most customisable way of finding element classes. It allows you +to implement a custom lookup scheme in a subclass:: + + >>> from lxml.elements.classlookup import CustomElementClassLookup + >>> class MyLookup(CustomElementClassLookup): + ... def lookup(self, node_type, document, namespace, name): + ... return MyElementClass # defined elsewhere + + >>> etree.setElementClassLookup( MyLookup() ) + +The ``lookup()`` method is only required to return either None (which triggers +its fallback mechanism) or a subclass of ``lxml.etree.ElementBase``. It can +otherwise take any decision it wants based on the node type (one of "element", +"comment", "PI"), the XML document of the element, or its namespace or tag +name. Modified: lxml/branch/capi/doc/main.txt ============================================================================== --- lxml/branch/capi/doc/main.txt (original) +++ lxml/branch/capi/doc/main.txt Tue Jul 25 12:05:11 2006 @@ -97,26 +97,28 @@ and the `benchmark results`_ comparing lxml to the original ElementTree_ and cElementTree_ implementations. -lxml also `extends this API`_ to expose libxml2 and libxslt specific -functionality, such as XPath_, `Relax NG`_, `XML Schema`_, `XSLT`_, and -`c14n`_. Python code can be called from XPath expressions and XSLT stylesheets -through the use of `extension functions`_. lxml also offers a `SAX compliant -API`_, that works with the SAX support in the standard library. +Right after the ElementTree_ documentation, the most important place to look +is the `lxml.etree API documentation`_. It describes how lxml extends the +ElementTree API to expose libxml2 and libxslt specific functionality, such as +XPath_, `Relax NG`_, `XML Schema`_, `XSLT`_, and `c14n`_. Python code can be +called from XPath expressions and XSLT stylesheets through the use of +`extension functions`_. lxml also offers a `SAX compliant API`_, that works +with the SAX support in the standard library. In addition to the ElementTree API, lxml also features a sophisticated API for `custom element classes`_. This is a simple way to write arbitrary XML driven APIs on top of lxml. Some common XML APIs are implemented in the -``lxml.elementlib`` module, as described in the lxml.etree API documentation. -As of version 1.1, lxml.etree features a new `C-level API`_ that can be used -to efficiently extend lxml.etree in external C modules, including custom -element class support. +`lxml.elements`_ module. As of version 1.1, lxml.etree features a new +`C-level API`_ that can be used to efficiently extend lxml.etree in external C +modules, including custom element class support. .. _ElementTree: http://effbot.org/zone/element-index.htm .. _cElementTree: http://effbot.org/zone/celementtree.htm .. _`benchmark results`: performance.html .. _`ElementTree compatibility overview`: compatibility.html -.. _`extends this API`: api.html +.. _`lxml.etree API documentation`: api.html +.. _`lxml.elements`: elements.html .. _`extension functions`: extensions.html .. _`custom element classes`: namespace_extensions.html .. _`SAX compliant API`: sax.html Modified: lxml/branch/capi/doc/mkhtml.py ============================================================================== --- lxml/branch/capi/doc/mkhtml.py (original) +++ lxml/branch/capi/doc/mkhtml.py Tue Jul 25 12:05:11 2006 @@ -14,7 +14,7 @@ for name in ['main.txt', 'intro.txt', 'api.txt', 'compatibility.txt', 'extensions.txt', 'namespace_extensions.txt', 'sax.txt', 'build.txt', 'FAQ.txt', 'performance.txt', 'resolvers.txt', - 'capi.txt', 'objectify.txt']: + 'capi.txt', 'objectify.txt', 'elements.txt']: path = os.path.join(doc_dir, name) outname = os.path.splitext(name)[0] + '.html' outpath = os.path.join(dirname, outname) From scoder at codespeak.net Tue Jul 25 13:45:54 2006 From: scoder at codespeak.net (scoder at codespeak.net) Date: Tue, 25 Jul 2006 13:45:54 +0200 (CEST) Subject: [Lxml-checkins] r30519 - lxml/branch/capi Message-ID: <20060725114554.8E667100BC@code0.codespeak.net> Author: scoder Date: Tue Jul 25 13:45:52 2006 New Revision: 30519 Modified: lxml/branch/capi/setup.py Log: fix: setup.py --static did not set up libraries Modified: lxml/branch/capi/setup.py ============================================================================== --- lxml/branch/capi/setup.py (original) +++ lxml/branch/capi/setup.py Tue Jul 25 13:45:52 2006 @@ -109,6 +109,7 @@ # use the static setup as configured in setupStaticBuild sys.argv.remove('--static') cflags, xslt_libs = setupStaticBuild() + ext_args['extra_link_args'] = xslt_libs else: cflags = flags('xslt-config --cflags') xslt_libs = flags('xslt-config --libs') From scoder at codespeak.net Tue Jul 25 14:59:45 2006 From: scoder at codespeak.net (scoder at codespeak.net) Date: Tue, 25 Jul 2006 14:59:45 +0200 (CEST) Subject: [Lxml-checkins] r30523 - in lxml/branch/capi/src/lxml: . tests Message-ID: <20060725125945.B55D5100A0@code0.codespeak.net> Author: scoder Date: Tue Jul 25 14:59:43 2006 New Revision: 30523 Modified: lxml/branch/capi/src/lxml/parser.pxi lxml/branch/capi/src/lxml/tests/test_htmlparser.py Log: renamed [sg]et_default_parser to [sg]etDefaultParser Modified: lxml/branch/capi/src/lxml/parser.pxi ============================================================================== --- lxml/branch/capi/src/lxml/parser.pxi (original) +++ lxml/branch/capi/src/lxml/parser.pxi Tue Jul 25 14:59:43 2006 @@ -675,7 +675,7 @@ __GLOBAL_PARSER_CONTEXT.setDefaultParser(__DEFAULT_XML_PARSER) -def set_default_parser(_BaseParser parser=None): +def setDefaultParser(_BaseParser parser=None): """Set a default parser for the current thread. This parser is used globally whenever no parser is supplied to the various parse functions of the lxml API. If this function is called without a parser (or if it is @@ -689,9 +689,17 @@ parser = __DEFAULT_XML_PARSER __GLOBAL_PARSER_CONTEXT.setDefaultParser(parser) -def get_default_parser(): +def getDefaultParser(): return __GLOBAL_PARSER_CONTEXT.getDefaultParser() +def set_default_parser(parser): + "Deprecated, please use setDefaultParser instead." + setDefaultParser(parser) + +def get_default_parser(): + "Deprecated, please use getDefaultParser instead." + return getDefaultParser() + ############################################################ ## HTML parser ############################################################ Modified: lxml/branch/capi/src/lxml/tests/test_htmlparser.py ============================================================================== --- lxml/branch/capi/src/lxml/tests/test_htmlparser.py (original) +++ lxml/branch/capi/src/lxml/tests/test_htmlparser.py Tue Jul 25 14:59:43 2006 @@ -20,7 +20,7 @@ uhtml_str = u"test ??\uF8D2

page ??\uF8D2 title

" def tearDown(self): - self.etree.set_default_parser() + self.etree.setDefaultParser() def test_module_HTML(self): element = self.etree.HTML(self.html_str) @@ -92,13 +92,13 @@ self.assertRaises(self.etree.XMLSyntaxError, self.etree.parse, StringIO(self.broken_html_str)) - self.etree.set_default_parser( self.etree.HTMLParser() ) + self.etree.setDefaultParser( self.etree.HTMLParser() ) tree = self.etree.parse(StringIO(self.broken_html_str)) self.assertEqual(self.etree.tostring(tree.getroot()), self.html_str) - self.etree.set_default_parser() + self.etree.setDefaultParser() self.assertRaises(self.etree.XMLSyntaxError, self.etree.parse, StringIO(self.broken_html_str)) From scoder at codespeak.net Tue Jul 25 15:50:04 2006 From: scoder at codespeak.net (scoder at codespeak.net) Date: Tue, 25 Jul 2006 15:50:04 +0200 (CEST) Subject: [Lxml-checkins] r30526 - in lxml/branch/capi: doc src/lxml src/lxml/tests Message-ID: <20060725135004.1CD5910094@code0.codespeak.net> Author: scoder Date: Tue Jul 25 15:50:01 2006 New Revision: 30526 Modified: lxml/branch/capi/doc/objectify.txt lxml/branch/capi/src/lxml/objectify.pyx lxml/branch/capi/src/lxml/tests/test_objectify.py Log: partial rewrite of type support to add XML Schema types (Holger) Modified: lxml/branch/capi/doc/objectify.txt ============================================================================== --- lxml/branch/capi/doc/objectify.txt (original) +++ lxml/branch/capi/doc/objectify.txt Tue Jul 25 15:50:01 2006 @@ -27,6 +27,17 @@ .. _`namespace specific classes`: namespace_extensions.html +Since this API is meant for data-centered XML (as opposed to document XML with +mixed content), it might be worth-wile in this context to change the default +parser:: + + >>> etree.setDefaultParser( etree.XMLParser(remove_blank_text=True) ) + +Now the parser will remove whitespace-only text from the parsed document. +Note that this alters the document infoset, so if you consider spaces as data +in your specific use case, you should go with the normal parser. + + .. contents:: .. 1 Element access through object attributes @@ -283,13 +294,13 @@ >>> objectify.enableRecursiveStr() >>> root = etree.XML(""" - ... + ... ... 1 ... 1.2 ... 1 ... true ... what? - ... + ... ... ... """) @@ -362,3 +373,6 @@ >>> print isinstance(new_el, objectify.ObjectifiedElement) False +In case you changed the default parser also, here is how to change it back:: + + >>> etree.setDefaultParser() Modified: lxml/branch/capi/src/lxml/objectify.pyx ============================================================================== --- lxml/branch/capi/src/lxml/objectify.pyx (original) +++ lxml/branch/capi/src/lxml/objectify.pyx Tue Jul 25 15:50:01 2006 @@ -29,6 +29,8 @@ _abs = __builtin__.abs cdef object _len _len = __builtin__.len +cdef object _cmp +_cmp = __builtin__.cmp cdef object True True = __builtin__.True @@ -54,6 +56,11 @@ PYTYPE_ATTRIBUTE = cetree.namespacedNameFromNsName( _PYTYPE_NAMESPACE, _PYTYPE_ATTRIBUTE_NAME) +cdef object XML_SCHEMA_INSTANCE_NS +XML_SCHEMA_INSTANCE_NS = "http://www.w3.org/2001/XMLSchema-instance" +cdef char* _XML_SCHEMA_INSTANCE_NS +_XML_SCHEMA_INSTANCE_NS = _cstr(XML_SCHEMA_INSTANCE_NS) + cdef class ObjectifiedElement(ElementBase): """Element class with an Amara-like API. @@ -307,8 +314,12 @@ cdef _value(self): return self._type(textOf(self._c_node)) - def value(self): - return self._value() + property pyval: + def __get__(self): + return self._value() + def __set__(self, value): + self.text = str(value) + self._checkType(value) def __int__(self): return _int(textOf(self._c_node)) @@ -325,6 +336,18 @@ # def __oct__(self): # def __hex__(self): + def __richcmp__(self, other, int op): + cdef int c + text = _numericValueOf(self) + other = _numericValueOf(other) + c = _cmp(text, other) + if c < 0: + return op <= 1 or op == 3 + elif c > 0: + return op >= 3 + else: + return op == 2 + def __add__(self, other): return _numericValueOf(self) + _numericValueOf(other) @@ -391,14 +414,36 @@ def _init(self): self._type = int +cdef class LongElement(NumberElement): + def _init(self): + self._type = long + cdef class FloatElement(NumberElement): def _init(self): self._type = float cdef class StringElement(ObjectifiedElement): - def __len__(self): + """String data class. + + Note that this class does *not* support the sequence protocol of strings: + iter(), str[0], str[0:1], etc. are *not* supported. Instead, use the + .text attribute to get a 'real' string. + """ + def strlen(self): return _len(_strValueOf(self)) + def __richcmp__(self, other, int op): + cdef int c + text = _strValueOf(self) + other = _strValueOf(other) + c = _cmp(text, other) + if c < 0: + return op <= 1 or op == 3 + elif c > 0: + return op >= 3 + else: + return op == 2 + def __str__(self): return textOf(self._c_node) @@ -427,18 +472,16 @@ other = _strValueOf(other) return _strValueOf(self) % other - def __getitem__(self, index): - return textOf(self._c_node)[index] - - def __contains__(self, text): - return text in textOf(self._c_node) - cdef class NoneElement(ObjectifiedElement): def __str__(self): return "None" - def value(self): - return None + property pyval: + def __get__(self): + return None + def __set__(self, value): + if value is not None: + raise TypeError, "Invalid value for None type: %s" % type(value) cdef class BoolElement(ObjectifiedElement): """Boolean type base on string values: 'true' or 'false'. @@ -467,8 +510,14 @@ else: return "False" - def value(self): - return self.__nonzero__() + property pyval: + def __get__(self): + return self.__nonzero__() + def __set__(self, value): + if bool(value): + self.text = 'true' + else: + self.text = 'false' def __checkBool(s): if s != 'true' and s != 'false': @@ -494,7 +543,8 @@ Named type that contains a type check function and a type class that inherits from ObjectifiedElement. The type check must take a string as - argument and raise a ValueError if it cannot handle the string value. + argument and raise a ValueError if it cannot handle the string value. It + may be None in which case it is not considered for type guessing. Example: PyType('int', int, MyIntClass).register() @@ -505,6 +555,7 @@ cdef readonly object name cdef object _type_check cdef object _type + cdef object _schema_types def __init__(self, name, type_check, type_class): if not python._isString(name): raise TypeError, "Type name must be a string" @@ -515,9 +566,12 @@ self.name = name self._type = type_class self._type_check = type_check + self._schema_types = [] def register(self): - _TYPE_DICT[self.name] = self + _PYTYPE_DICT[self.name] = self + for xs_type in self._schema_types: + _SCHEMA_TYPE_DICT[xs_type] = self if self._type_check is None: return for item in _TYPE_CHECKS: @@ -527,8 +581,11 @@ _TYPE_CHECKS.append( (self._type_check, self) ) def unregister(self): - if _TYPE_DICT.get(self.name) is self: - del _TYPE_DICT[self.name] + if _PYTYPE_DICT.get(self.name) is self: + del _PYTYPE_DICT[self.name] + for xs_type, pytype in _SCHEMA_TYPE_DICT.items(): + if pytype is self: + del _SCHEMA_TYPE_DICT[xs_type] if self._type_check is None: return try: @@ -536,29 +593,61 @@ except ValueError: pass -cdef object _TYPE_DICT -_TYPE_DICT = {} + property xmlSchemaTypes: + """The list of XML Schema datatypes this Python type maps to. + + Note that this must be set before registering the type! + """ + def __get__(self): + return self._schema_types + def __set__(self, types): + self._schema_types = list(types) + +cdef object _PYTYPE_DICT +_PYTYPE_DICT = {} + +cdef object _SCHEMA_TYPE_DICT +_SCHEMA_TYPE_DICT = {} cdef object _TYPE_CHECKS _TYPE_CHECKS = [] -PyType('int', int, IntElement ).register() -PyType('long', None, IntElement ).register() -PyType('float', float, FloatElement ).register() -PyType('bool', __checkBool, BoolElement ).register() -PyType('str', None, StringElement).register() +cdef _registerPyTypes(): + pytype = PyType('int', int, IntElement) + pytype.xmlSchemaTypes = ("integer", "positiveInteger", "negativeInteger", + "nonNegativeInteger", "nonPositiveInteger", + "int", "unsignedInt", "short", "unsignedShort") + pytype.register() + + pytype = PyType('long', None, LongElement) + pytype.xmlSchemaTypes = ("long", "unsignedLong") + pytype.register() + + pytype = PyType('float', float, FloatElement) + pytype.xmlSchemaTypes = ("float", "double") + pytype.register() + + pytype = PyType('bool', __checkBool, BoolElement) + pytype.xmlSchemaTypes = ("boolean",) + pytype.register() + + pytype = PyType('str', None, StringElement) + pytype.xmlSchemaTypes = ("string", "normalizedString") + pytype.register() + +_registerPyTypes() cdef object _guessElementClass(tree.xmlNode* c_node): value = textOf(c_node) if value is None: - # FIXME: how do we get rid of the class if the value changes? - return NoneElement - _ValueError = ValueError + # default to string class + return StringElement + errors = (ValueError, TypeError) for type_check, pytype in _TYPE_CHECKS: try: type_check(value) return (pytype)._type - except _ValueError: + except errors: pass return StringElement @@ -583,15 +672,18 @@ cdef object _dump(_Element element, int indent): indentstr = " " * indent - if hasattr(element, "value"): - value = element.value() + if hasattr(element, "pyval"): + value = element.pyval else: value = textOf(element._c_node) if value and not value.strip(): value = None result = "%s%s = %r [%s]\n" % (indentstr, element.tag, value, type(element).__name__) + xsi_ns = "{%s}" % XML_SCHEMA_INSTANCE_NS for name, value in element.items(): + if name.startswith(xsi_ns): + continue result = result + "%s * %s = %r\n" % (indentstr, name, value) indent = indent + 1 @@ -606,7 +698,7 @@ ################################################################################ # Element class lookup -cdef object _lookupElementClass(state, doc, tree.xmlNode* c_node): +cdef object _lookupElementClass(state, _Document doc, tree.xmlNode* c_node): cdef python.PyObject* dict_result # default to namespace specific classes nsclass = cetree.lookupNamespaceElementClass(state, doc, c_node) @@ -617,21 +709,38 @@ if cetree.findChildForwards(c_node, 0): return ObjectifiedElement - # otherwise determine class based on text content type + # if element is defined as xsi:nil, return NoneElement class + if "true" == cetree.attributeValueFromNsName( + c_node, _XML_SCHEMA_INSTANCE_NS, "nil"): + return NoneElement + + # check for Python type hint value = cetree.attributeValueFromNsName( c_node, _PYTYPE_NAMESPACE, _PYTYPE_ATTRIBUTE_NAME) - if value is None: - el_class = _guessElementClass(c_node) - if el_class is not None: - return el_class - else: - dict_result = python.PyDict_GetItem(_TYPE_DICT, value) + + if value is not None: + dict_result = python.PyDict_GetItem(_PYTYPE_DICT, value) if dict_result is not NULL: return (dict_result)._type + raise ValueError, "Invalid pytype attribute in element '%s'" % \ + cetree.namespacedNameFromNsName(tree._getNs(c_node), c_node.name) + + # check for XML Schema type hint + value = cetree.attributeValueFromNsName( + c_node, _XML_SCHEMA_INSTANCE_NS, "type") + + if value is not None: + dict_result = python.PyDict_GetItem(_SCHEMA_TYPE_DICT, value) + if dict_result is not NULL: + return (dict_result)._type + + # otherwise determine class based on text content type + el_class = _guessElementClass(c_node) + if el_class is not None: + return el_class - # FIXME: is it right to raise an Exception based on data? - raise ValueError, "Invalid type attribute in element '%s'" % \ - cetree.namespacedNameFromNsName(tree._getNs(c_node), c_node.name) + # default to string element class if type attribute is not exploitable + return _StringElement def typedef(element_or_tree, ignore_old=True): """Recursively creates pytype attributes on the elements of an XML tree. @@ -651,7 +760,7 @@ ignore = bool(ignore_old) _ValueError = ValueError - StrType = _TYPE_DICT.get('str') + StrType = _PYTYPE_DICT.get('str') c_node = element._c_node tree.BEGIN_FOR_EACH_ELEMENT_FROM(c_node, c_node, 1) pytype = None @@ -661,7 +770,7 @@ old_value = cetree.attributeValueFromNsName( c_node, _PYTYPE_NAMESPACE, _PYTYPE_ATTRIBUTE_NAME) if old_value is not None: - pytype = _TYPE_DICT.get(old_value) + pytype = _PYTYPE_DICT.get(old_value) if pytype is not None: value = textOf(c_node) try: Modified: lxml/branch/capi/src/lxml/tests/test_objectify.py ============================================================================== --- lxml/branch/capi/src/lxml/tests/test_objectify.py (original) +++ lxml/branch/capi/src/lxml/tests/test_objectify.py Tue Jul 25 15:50:01 2006 @@ -92,9 +92,14 @@ def test_type_none(self): Element = self.etree.Element SubElement = self.etree.SubElement + + nil_attr = "{http://www.w3.org/2001/XMLSchema-instance}nil" root = Element("{objectified}root") SubElement(root, "{objectified}none") - self.assert_(isinstance(root.none, objectify.NoneElement)) + SubElement(root, "{objectified}none", {nil_attr : "true"}) + self.assertFalse(isinstance(root.none, objectify.NoneElement)) + self.assertFalse(isinstance(root.none[0], objectify.NoneElement)) + self.assert_(isinstance(root.none[1], objectify.NoneElement)) def test_type_bool(self): Element = self.etree.Element @@ -124,6 +129,32 @@ SubElement(root, "{objectified}none").text = "5.5" self.assert_(isinstance(root.none, objectify.FloatElement)) + def test_schema_types(self): + XML = self.etree.XML + root = XML('''\ + + 5 + 5 + 5 + + ''') + + self.assert_(isinstance(root.a[0], objectify.IntElement)) + self.assertEquals(root.a[0], 5) + + self.assert_(isinstance(root.a[1], objectify.StringElement)) + self.assertEquals(root.a[1], "5") + + self.assert_(isinstance(root.a[2], objectify.FloatElement)) + self.assertEquals(root.a[2], 5.0) + + def test_type_str_sequence(self): + XML = self.etree.XML + root = XML(u'whytry') + strs = [ str(s) for s in root.b ] + self.assertEquals(["why", "try"], + strs) + def test_typedef(self): XML = self.etree.XML root = XML(u'5test1.1\uF8D2true') From scoder at codespeak.net Tue Jul 25 16:34:05 2006 From: scoder at codespeak.net (scoder at codespeak.net) Date: Tue, 25 Jul 2006 16:34:05 +0200 (CEST) Subject: [Lxml-checkins] r30532 - lxml/branch/capi/doc Message-ID: <20060725143405.BE08A100BD@code0.codespeak.net> Author: scoder Date: Tue Jul 25 16:34:03 2006 New Revision: 30532 Modified: lxml/branch/capi/doc/objectify.txt Log: doctest for XML Schema data types Modified: lxml/branch/capi/doc/objectify.txt ============================================================================== --- lxml/branch/capi/doc/objectify.txt (original) +++ lxml/branch/capi/doc/objectify.txt Tue Jul 25 16:34:03 2006 @@ -258,6 +258,16 @@ ... raise ValueError >>> xmas_type = objectify.PyType('date', checkChristmasDate, ChristmasDate) + +If you want, you can also register this type under an XML Schema type name:: + + >>> xmas_type.xmlSchemaTypes = ("date",) + +XML Schema types will be considered if the element has an ``xsi:type`` +attribute that specifies its data type. The line above binds the XSD type +``date`` to the newly defined Python type. Note that this must be done before +the next step, which is to register the type. Then you can use it:: + >>> xmas_type.register() >>> root = etree.XML("24.12.200012.24.2000") @@ -268,6 +278,19 @@ ... AttributeError: no such child: callSanta +If you provide XML Schema type information, this will override the type check +function defined above:: + + >>> root = etree.XML('''\ + ... + ... 12.24.2000 + ... + ... ''') + >>> print root.a + 12.24.2000 + >>> root.a.callSanta() + Ho ho ho! + To unregister a type, call its ``unregister()`` method:: >>> root.a.callSanta() From scoder at codespeak.net Tue Jul 25 19:44:49 2006 From: scoder at codespeak.net (scoder at codespeak.net) Date: Tue, 25 Jul 2006 19:44:49 +0200 (CEST) Subject: [Lxml-checkins] r30537 - lxml/branch/capi/src/lxml Message-ID: <20060725174449.77D9E100B3@code0.codespeak.net> Author: scoder Date: Tue Jul 25 19:44:48 2006 New Revision: 30537 Modified: lxml/branch/capi/src/lxml/objectify.pyx lxml/branch/capi/src/lxml/python.pxd Log: fixed rich compare implementations Modified: lxml/branch/capi/src/lxml/objectify.pyx ============================================================================== --- lxml/branch/capi/src/lxml/objectify.pyx (original) +++ lxml/branch/capi/src/lxml/objectify.pyx Tue Jul 25 19:44:48 2006 @@ -337,16 +337,8 @@ # def __hex__(self): def __richcmp__(self, other, int op): - cdef int c - text = _numericValueOf(self) - other = _numericValueOf(other) - c = _cmp(text, other) - if c < 0: - return op <= 1 or op == 3 - elif c > 0: - return op >= 3 - else: - return op == 2 + return python.PyObject_RichCompareBool( + _numericValueOf(self), _numericValueOf(other), op) def __add__(self, other): return _numericValueOf(self) + _numericValueOf(other) @@ -433,16 +425,8 @@ return _len(_strValueOf(self)) def __richcmp__(self, other, int op): - cdef int c - text = _strValueOf(self) - other = _strValueOf(other) - c = _cmp(text, other) - if c < 0: - return op <= 1 or op == 3 - elif c > 0: - return op >= 3 - else: - return op == 2 + return python.PyObject_RichCompareBool( + _strValueOf(self), _strValueOf(other), op) def __str__(self): return textOf(self._c_node) @@ -538,6 +522,9 @@ textOf((obj)._c_node)) return obj +################################################################################ +# Python type registry + cdef class PyType: """User defined type. Modified: lxml/branch/capi/src/lxml/python.pxd ============================================================================== --- lxml/branch/capi/src/lxml/python.pxd (original) +++ lxml/branch/capi/src/lxml/python.pxd Tue Jul 25 19:44:48 2006 @@ -57,6 +57,7 @@ cdef int PyType_Check(object instance) cdef int PyObject_SetAttr(object o, object name, object value) + cdef int PyObject_RichCompareBool(object o1, object o2, int op) cdef void* PyMem_Malloc(size_t size) cdef void* PyMem_Realloc(void* p, size_t size) From scoder at codespeak.net Wed Jul 26 07:32:10 2006 From: scoder at codespeak.net (scoder at codespeak.net) Date: Wed, 26 Jul 2006 07:32:10 +0200 (CEST) Subject: [Lxml-checkins] r30548 - in lxml/branch/capi: doc src/lxml src/lxml/tests Message-ID: <20060726053210.CCCD5100A7@code0.codespeak.net> Author: scoder Date: Wed Jul 26 07:32:06 2006 New Revision: 30548 Modified: lxml/branch/capi/doc/objectify.txt lxml/branch/capi/src/lxml/objectify.pyx lxml/branch/capi/src/lxml/python.pxd lxml/branch/capi/src/lxml/tests/test_objectify.py Log: deleting and assigning slices, some cleanup in test cases Modified: lxml/branch/capi/doc/objectify.txt ============================================================================== --- lxml/branch/capi/doc/objectify.txt (original) +++ lxml/branch/capi/doc/objectify.txt Wed Jul 26 07:32:06 2006 @@ -130,6 +130,16 @@ >>> print el.tag yet_another_child + >>> root.y = [ etree.Element("y"), etree.Element("y") ] + >>> [ el.tag for el in root.y ] + ['y', 'y'] + +The latter is a short form for operations on the full slice:: + + >>> root.y[:] = [ etree.Element("y") ] + >>> [ el.tag for el in root.y ] + ['y'] + You can also replace children that way:: >>> child1 = etree.SubElement(root, "child") Modified: lxml/branch/capi/src/lxml/objectify.pyx ============================================================================== --- lxml/branch/capi/src/lxml/objectify.pyx (original) +++ lxml/branch/capi/src/lxml/objectify.pyx Wed Jul 26 07:32:06 2006 @@ -144,6 +144,14 @@ self.append(element) else: self.replace(child, element) + elif python.PySequence_Check(value): + try: + element = _lookupChild(self, tag) + except AttributeError: + for item in value: + _appendValue(self, tag, item) + else: + element.__setslice__(0, python.PY_SSIZE_T_MAX, value) else: try: element = _lookupChild(self, tag) @@ -194,6 +202,9 @@ * If argument is a string, does the same as setattr(). This is used to provide namespaces for element lookup. + + * If argument is a sequence (list, tuple, etc.), assign the contained + items to the siblings. """ cdef _Element element cdef _Element new_element @@ -202,6 +213,7 @@ cdef tree.xmlNode* c_node if python._isString(key): self.__setattr__(key, value) + return c_self_node = self._c_node c_parent = c_self_node.parent @@ -214,27 +226,50 @@ if c_node is NULL: raise IndexError, key element = elementFactory(self._doc, c_node) - - if isinstance(value, _Element): - # deep copy the new element - new_element = cetree.deepcopyNodeToDocument( - self._doc, (<_Element>value)._c_node) - new_element.tag = self.tag - self.getparent().replace(element, new_element) - else: - cetree.setNodeText(element._c_node, value) + _replaceElement(element, value) def __getslice__(self, Py_ssize_t start, Py_ssize_t end): return list(islice(self, start, end)) + def __setslice__(self, Py_ssize_t start, Py_ssize_t end, values): + cdef _Element el + parent = self.getparent() + if parent is None: + raise TypeError, "deleting slices of root element not supported" + # replace existing items + new_items = iter(values) + del_items = iter(list(islice(self, start, end))) + try: + for el in del_items: + item = new_items.next() + _replaceElement(el, item) + except StopIteration: + remove = parent.remove + remove(el) + for el in del_items: + remove(el) + return + + # append remaining new items + tag = self.tag + for item in new_items: + _appendValue(parent, tag, item) + def __delslice__(self, Py_ssize_t start, Py_ssize_t end): parent = self.getparent() if parent is None: raise TypeError, "deleting slices of root element not supported" remove = parent.remove - for el in self.__getslice__(start, end): + for el in list(islice(self, start, end)): remove(el) + def __delitem__(self, key): + parent = self.getparent() + if parent is None: + raise TypeError, "deleting items not supported by root element" + sibling = self.__getitem__(key) + parent.remove(sibling) + def findall(self, path): # Reimplementation of Element.findall() to make it work without child # iteration. @@ -302,6 +337,33 @@ c_href = _cstr(ns) return cetree.namespacedNameFromNsName(c_href, c_tag) +cdef object _replaceElement(_Element element, value): + if isinstance(value, _Element): + # deep copy the new element + new_element = cetree.deepcopyNodeToDocument( + element._doc, (<_Element>value)._c_node) + new_element.tag = element.tag + element.getparent().replace(element, new_element) + else: + if not python._isString(value): + value = str(value) + cetree.setNodeText(element._c_node, value) + +cdef object _appendValue(_Element parent, tag, value): + cdef _Element new_element + if isinstance(value, _Element): + # deep copy the new element + new_element = cetree.deepcopyNodeToDocument( + parent._doc, (<_Element>value)._c_node) + new_element.tag = tag + parent.append(new_element) + else: + new_element = etree.SubElement(parent, tag) + if not python._isString(value): + value = str(value) + cetree.setNodeText(new_element._c_node, value) + + ################################################################################ # Data type support in subclasses Modified: lxml/branch/capi/src/lxml/python.pxd ============================================================================== --- lxml/branch/capi/src/lxml/python.pxd (original) +++ lxml/branch/capi/src/lxml/python.pxd Wed Jul 26 07:32:06 2006 @@ -6,6 +6,7 @@ ctypedef int size_t ctypedef int Py_ssize_t cdef int INT_MAX + cdef int PY_SSIZE_T_MAX cdef void Py_INCREF(object o) @@ -50,6 +51,7 @@ cdef object PyTuple_GET_ITEM(object o, Py_ssize_t pos) cdef int PyDict_Check(object instance) + cdef int PyList_Check(object instance) cdef int PyTuple_Check(object instance) cdef int PyNumber_Check(object instance) cdef int PyBool_Check(object instance) Modified: lxml/branch/capi/src/lxml/tests/test_objectify.py ============================================================================== --- lxml/branch/capi/src/lxml/tests/test_objectify.py (original) +++ lxml/branch/capi/src/lxml/tests/test_objectify.py Wed Jul 26 07:32:06 2006 @@ -31,41 +31,41 @@ def setUp(self): objectify.register() - ns = etree.Namespace("otherNs") - ns[None] = etree.ElementBase + ns = self.etree.Namespace("otherNs") + ns[None] = self.etree.ElementBase def tearDown(self): - etree.Namespace("otherNs").clear() + self.etree.Namespace("otherNs").clear() objectify.unregister() def test_child(self): - root = etree.XML(xml_str) + root = self.etree.XML(xml_str) self.assertEquals("0", root.c1.c2.text) def test_child_getattr(self): - root = etree.XML(xml_str) + root = self.etree.XML(xml_str) self.assertEquals("0", getattr(root.c1, "{objectified}c2").text) self.assertEquals("2", getattr(root.c1, "{otherNS}c2").text) def test_child_nonexistant(self): - root = etree.XML(xml_str) + root = self.etree.XML(xml_str) self.assertRaises(AttributeError, getattr, root.c1, "NOT_THERE") self.assertRaises(AttributeError, getattr, root.c1, "{unknownNS}c2") def test_child_index(self): - root = etree.XML(xml_str) + root = self.etree.XML(xml_str) self.assertEquals("0", root.c1.c2[0].text) self.assertEquals("1", root.c1.c2[1].text) self.assertRaises(IndexError, operator.itemgetter(2), root.c1.c2) def test_child_len(self): - root = etree.XML(xml_str) + root = self.etree.XML(xml_str) self.assertEquals(1, len(root)) self.assertEquals(1, len(root.c1)) self.assertEquals(2, len(root.c1.c2)) def test_child_iter(self): - root = etree.XML(xml_str) + root = self.etree.XML(xml_str) self.assertEquals([root], list(iter(root))) self.assertEquals([root.c1], @@ -73,21 +73,43 @@ self.assertEquals([root.c1.c2[0], root.c1.c2[1]], list(iter((root.c1.c2)))) + def test_setslice(self): + Element = self.etree.Element + SubElement = self.etree.SubElement + root = Element("root") + c1 = SubElement(root, "c") + c1.text = "c1" + c2 = SubElement(root, "c") + c2.text = "c2" + + self.assertEquals([c1,c2], list(root.c)) + + el = Element("test") + el.text = "test" + root.c = [ el ] + self.assertEquals(["test"], + [ c.text for c in root.c ]) + + root.c[:] = [ c1, c2, c2, c1 ] + self.assertEquals(["c1", "c2", "c2", "c1"], + [ c.text for c in root.c ]) + + def test_findall(self): XML = self.etree.XML root = XML('') - self.assertEquals(len(root.findall("c")), 1) - self.assertEquals(len(root.findall(".//c")), 2) - self.assertEquals(len(root.findall(".//b")), 3) + self.assertEquals(1, len(root.findall("c"))) + self.assertEquals(2, len(root.findall(".//c"))) + self.assertEquals(3, len(root.findall(".//b"))) self.assertEquals(root.findall(".//b")[:2], root.getchildren()[:2]) def test_findall_ns(self): XML = self.etree.XML root = XML('') - self.assertEquals(len(root.findall(".//{X}b")), 2) - self.assertEquals(len(root.findall(".//b")), 3) - self.assertEquals(len(root.findall("b")), 2) + self.assertEquals(2, len(root.findall(".//{X}b"))) + self.assertEquals(3, len(root.findall(".//b"))) + self.assertEquals(2, len(root.findall("b"))) def test_type_none(self): Element = self.etree.Element @@ -140,13 +162,13 @@ ''') self.assert_(isinstance(root.a[0], objectify.IntElement)) - self.assertEquals(root.a[0], 5) + self.assertEquals(5, root.a[0]) self.assert_(isinstance(root.a[1], objectify.StringElement)) - self.assertEquals(root.a[1], "5") + self.assertEquals("5", root.a[1]) self.assert_(isinstance(root.a[2], objectify.FloatElement)) - self.assertEquals(root.a[2], 5.0) + self.assertEquals(5.0, root.a[2]) def test_type_str_sequence(self): XML = self.etree.XML @@ -155,6 +177,28 @@ self.assertEquals(["why", "try"], strs) + def test_type_str_cmp(self): + XML = self.etree.XML + root = XML(u'testtaste') + self.assertFalse(root.b[0] < root.b[1]) + self.assertFalse(root.b[0] <= root.b[1]) + self.assertFalse(root.b[0] == root.b[1]) + + self.assert_(root.b[0] != root.b[1]) + self.assert_(root.b[0] >= root.b[1]) + self.assert_(root.b[0] > root.b[1]) + + def test_type_int_cmp(self): + XML = self.etree.XML + root = XML(u'56') + self.assert_(root.b[0] < root.b[1]) + self.assert_(root.b[0] <= root.b[1]) + self.assert_(root.b[0] != root.b[1]) + + self.assertFalse(root.b[0] == root.b[1]) + self.assertFalse(root.b[0] >= root.b[1]) + self.assertFalse(root.b[0] > root.b[1]) + def test_typedef(self): XML = self.etree.XML root = XML(u'5test1.1\uF8D2true') From scoder at codespeak.net Wed Jul 26 07:57:04 2006 From: scoder at codespeak.net (scoder at codespeak.net) Date: Wed, 26 Jul 2006 07:57:04 +0200 (CEST) Subject: [Lxml-checkins] r30549 - lxml/branch/capi/src/lxml Message-ID: <20060726055704.31938100A7@code0.codespeak.net> Author: scoder Date: Wed Jul 26 07:56:59 2006 New Revision: 30549 Modified: lxml/branch/capi/src/lxml/objectify.pyx Log: .pyval for StringElement Modified: lxml/branch/capi/src/lxml/objectify.pyx ============================================================================== --- lxml/branch/capi/src/lxml/objectify.pyx (original) +++ lxml/branch/capi/src/lxml/objectify.pyx Wed Jul 26 07:56:59 2006 @@ -483,6 +483,12 @@ iter(), str[0], str[0:1], etc. are *not* supported. Instead, use the .text attribute to get a 'real' string. """ + property pyval: + def __get__(self): + return textOf(self._c_node) + def __set__(self, value): + cetree.setNodeText(self._c_node, value) + def strlen(self): return _len(_strValueOf(self)) From scoder at codespeak.net Wed Jul 26 08:01:54 2006 From: scoder at codespeak.net (scoder at codespeak.net) Date: Wed, 26 Jul 2006 08:01:54 +0200 (CEST) Subject: [Lxml-checkins] r30550 - lxml/branch/capi/doc Message-ID: <20060726060154.48272100A7@code0.codespeak.net> Author: scoder Date: Wed Jul 26 08:01:51 2006 New Revision: 30550 Modified: lxml/branch/capi/doc/objectify.txt Log: objectify.txt: make clear that sequence operations on data classes work on the tree sequence, not the data sequence (strings etc.) Modified: lxml/branch/capi/doc/objectify.txt ============================================================================== --- lxml/branch/capi/doc/objectify.txt (original) +++ lxml/branch/capi/doc/objectify.txt Wed Jul 26 08:01:51 2006 @@ -203,7 +203,8 @@ ----------------- The objectify module knows about Python data types and tries its best to let -element content behave like them. For example, this works:: +element content behave like them. For example, they support the normal math +operators:: >>> root = etree.XML("511true") >>> root.a + root.b @@ -225,6 +226,42 @@ ... print "false!" false! +However, data elements continue to provide the objectify API. This means that +sequence operations such as ``len()``, slicing and indexing (e.g. of strings) +cannot behave as the Python types. Like all other tree elements, they show +the normal slicing behaviour of objectify elements:: + + >>> root = etree.XML("testtoast") + >>> print root.a + ' me' + test me + >>> len(root.a) + 1 + >>> [ a.tag for a in root.a ] + ['a'] + >>> print root.a[0].tag + a + + >>> print root.a + test + >>> [ str(a) for a in root.a[:1] ] + ['test'] + +If you need to run sequence operations on data types, you must ask the API for +the *real* Python value. The string value is always available throught the +normal ElementTree ``.text`` attribute. Additionally, all data classes +provide a ``.pyval`` attribute that returns the value as Python type:: + + >>> root = etree.XML("test5") + >>> root.a.text + 'test' + >>> root.a.pyval + 'test' + + >>> root.b.text + '5' + >>> root.b.pyval + 5 + Objectify determines data types by trial and error, unless it finds an attribute ``pytype`` in the namespace given by the URI in ``lxml.objectify.PYTYPE_NAMESPACE``, which must contain any of the following From scoder at codespeak.net Wed Jul 26 08:45:11 2006 From: scoder at codespeak.net (scoder at codespeak.net) Date: Wed, 26 Jul 2006 08:45:11 +0200 (CEST) Subject: [Lxml-checkins] r30551 - in lxml/branch/capi/src/lxml: . tests Message-ID: <20060726064511.DC72B100A7@code0.codespeak.net> Author: scoder Date: Wed Jul 26 08:45:09 2006 New Revision: 30551 Modified: lxml/branch/capi/src/lxml/objectify.pyx lxml/branch/capi/src/lxml/python.pxd lxml/branch/capi/src/lxml/tests/test_objectify.py Log: richcmp fixes, more test cases, some cleanup Modified: lxml/branch/capi/src/lxml/objectify.pyx ============================================================================== --- lxml/branch/capi/src/lxml/objectify.pyx (original) +++ lxml/branch/capi/src/lxml/objectify.pyx Wed Jul 26 08:45:09 2006 @@ -15,22 +15,20 @@ cdef object __builtin__ import __builtin__ -cdef object _int -_int = __builtin__.int -cdef object _long -_long = __builtin__.long -cdef object _float -_float = __builtin__.float -cdef object _str -_str = __builtin__.str -cdef object _pow -_pow = __builtin__.pow -cdef object _abs -_abs = __builtin__.abs -cdef object _len -_len = __builtin__.len -cdef object _cmp -_cmp = __builtin__.cmp +cdef object int +int = __builtin__.int +cdef object long +long = __builtin__.long +cdef object float +float = __builtin__.float +cdef object bool +bool = __builtin__.bool +cdef object pow +pow = __builtin__.pow +cdef object abs +abs = __builtin__.abs +cdef object len +len = __builtin__.len cdef object True True = __builtin__.True @@ -384,13 +382,13 @@ self._checkType(value) def __int__(self): - return _int(textOf(self._c_node)) + return int(textOf(self._c_node)) def __long__(self): - return _long(textOf(self._c_node)) + return long(textOf(self._c_node)) def __float__(self): - return _float(textOf(self._c_node)) + return float(textOf(self._c_node)) def __str__(self): return textOf(self._c_node) @@ -399,8 +397,10 @@ # def __hex__(self): def __richcmp__(self, other, int op): - return python.PyObject_RichCompareBool( - _numericValueOf(self), _numericValueOf(other), op) + if hasattr(other, 'pyval'): + other = other.pyval + return python.PyObject_RichCompare( + _numericValueOf(self), other, op) def __add__(self, other): return _numericValueOf(self) + _numericValueOf(other) @@ -424,7 +424,7 @@ if modulo is None: return _numericValueOf(self) ** _numericValueOf(other) else: - return _pow(_numericValueOf(self), _numericValueOf(other), modulo) + return pow(_numericValueOf(self), _numericValueOf(other), modulo) def __neg__(self): return - _numericValueOf(self) @@ -433,7 +433,7 @@ return + _numericValueOf(self) def __abs__(self): - return _abs( _numericValueOf(self) ) + return abs( _numericValueOf(self) ) def __nonzero__(self): return _numericValueOf(self) @@ -461,7 +461,7 @@ ## other = (<_NumberElement>other)._value() ## result = self._value() + other ## self._checkType(result) -## cetree.setNodeText(self._c_node, _str(result)) +## cetree.setNodeText(self._c_node, str(result)) ## return self cdef class IntElement(NumberElement): @@ -490,11 +490,13 @@ cetree.setNodeText(self._c_node, value) def strlen(self): - return _len(_strValueOf(self)) + return len(_strValueOf(self)) def __richcmp__(self, other, int op): - return python.PyObject_RichCompareBool( - _strValueOf(self), _strValueOf(other), op) + if hasattr(other, 'pyval'): + other = other.pyval + return python.PyObject_RichCompare( + _strValueOf(self), other, op) def __str__(self): return textOf(self._c_node) @@ -556,6 +558,15 @@ else: return False + def __richcmp__(self, other, int op): + if hasattr(other, 'pyval'): + other = other.pyval + if hasattr(self, 'pyval'): + self_val = self.pyval + else: + self_val = bool(self) + return python.PyObject_RichCompare(self_val, other, op) + def __str__(self): if self._boolval(): return "True" Modified: lxml/branch/capi/src/lxml/python.pxd ============================================================================== --- lxml/branch/capi/src/lxml/python.pxd (original) +++ lxml/branch/capi/src/lxml/python.pxd Wed Jul 26 08:45:09 2006 @@ -59,6 +59,7 @@ cdef int PyType_Check(object instance) cdef int PyObject_SetAttr(object o, object name, object value) + cdef object PyObject_RichCompare(object o1, object o2, int op) cdef int PyObject_RichCompareBool(object o1, object o2, int op) cdef void* PyMem_Malloc(size_t size) Modified: lxml/branch/capi/src/lxml/tests/test_objectify.py ============================================================================== --- lxml/branch/capi/src/lxml/tests/test_objectify.py (original) +++ lxml/branch/capi/src/lxml/tests/test_objectify.py Wed Jul 26 08:45:09 2006 @@ -188,6 +188,11 @@ self.assert_(root.b[0] >= root.b[1]) self.assert_(root.b[0] > root.b[1]) + self.assertEquals(root.b[0], "test") + self.assertEquals("test", root.b[0]) + self.assert_(root.b[0] > 5) + self.assert_(5 < root.b[0]) + def test_type_int_cmp(self): XML = self.etree.XML root = XML(u'56') @@ -199,6 +204,27 @@ self.assertFalse(root.b[0] >= root.b[1]) self.assertFalse(root.b[0] > root.b[1]) + self.assertEquals(root.b[0], 5) + self.assertEquals(5, root.b[0]) + self.assert_(root.b[0] < "5") + self.assert_("5" > root.b[0]) + + def test_type_bool_cmp(self): + XML = self.etree.XML + root = XML(u'falsetrue') + self.assert_(root.b[0] < root.b[1]) + self.assert_(root.b[0] <= root.b[1]) + self.assert_(root.b[0] != root.b[1]) + + self.assertFalse(root.b[0] == root.b[1]) + self.assertFalse(root.b[0] >= root.b[1]) + self.assertFalse(root.b[0] > root.b[1]) + + self.assertEquals(root.b[0], False) + self.assertEquals(False, root.b[0]) + self.assert_(root.b[0] < 5) + self.assert_(5 > root.b[0]) + def test_typedef(self): XML = self.etree.XML root = XML(u'5test1.1\uF8D2true') From scoder at codespeak.net Wed Jul 26 15:06:49 2006 From: scoder at codespeak.net (scoder at codespeak.net) Date: Wed, 26 Jul 2006 15:06:49 +0200 (CEST) Subject: [Lxml-checkins] r30573 - in lxml/branch/capi/src/lxml: . tests Message-ID: <20060726130649.071C4100BD@code0.codespeak.net> Author: scoder Date: Wed Jul 26 15:06:46 2006 New Revision: 30573 Modified: lxml/branch/capi/src/lxml/objectify.pyx lxml/branch/capi/src/lxml/tests/test_objectify.py Log: use xsi:type hints in typedef() Modified: lxml/branch/capi/src/lxml/objectify.pyx ============================================================================== --- lxml/branch/capi/src/lxml/objectify.pyx (original) +++ lxml/branch/capi/src/lxml/objectify.pyx Wed Jul 26 15:06:46 2006 @@ -701,6 +701,9 @@ pytype.xmlSchemaTypes = ("string", "normalizedString") pytype.register() + pytype = PyType('none', None, NoneElement) + pytype.register() + _registerPyTypes() cdef object _guessElementClass(tree.xmlNode* c_node): @@ -821,6 +824,7 @@ cdef tree.xmlNode* c_node cdef tree.xmlAttr* c_attr cdef tree.xmlNs* c_ns + cdef python.PyObject* dict_result element = cetree.rootNodeOrRaise(element_or_tree) doc = element._doc ignore = bool(ignore_old) @@ -844,6 +848,23 @@ pytype = None except _ValueError: pytype = None + + if pytype is None: + # if element is defined as xsi:nil, return NoneElement class + if cetree.attributeValueFromNsName( + c_node, _XML_SCHEMA_INSTANCE_NS, "nil") == "true": + pytype = _PYTYPE_DICT.get("none") + + if pytype is None: + # check for XML Schema type hint + value = cetree.attributeValueFromNsName( + c_node, _XML_SCHEMA_INSTANCE_NS, "type") + + if value is not None: + dict_result = python.PyDict_GetItem(_SCHEMA_TYPE_DICT, value) + if dict_result is not NULL: + pytype = dict_result + if pytype is None: # try to guess type if not cetree.findChildForwards(c_node, 0): @@ -860,6 +881,7 @@ pass else: pytype = StrType + if pytype is None: # delete attribute if it exists c_attr = tree.xmlHasNsProp(c_node, _PYTYPE_NAMESPACE, Modified: lxml/branch/capi/src/lxml/tests/test_objectify.py ============================================================================== --- lxml/branch/capi/src/lxml/tests/test_objectify.py (original) +++ lxml/branch/capi/src/lxml/tests/test_objectify.py Wed Jul 26 15:06:46 2006 @@ -227,7 +227,18 @@ def test_typedef(self): XML = self.etree.XML - root = XML(u'5test1.1\uF8D2true') + root = XML(u'''\ + + 5 + test + 1.1 + \uF8D2 + true + + + 5 + + ''') objectify.typedef(root) child_types = [ c.get(objectify.PYTYPE_ATTRIBUTE) @@ -237,6 +248,9 @@ self.assertEquals("float", child_types[2]) self.assertEquals("str", child_types[3]) self.assertEquals("bool", child_types[4]) + self.assertEquals("none", child_types[5]) + self.assertEquals(None, child_types[6]) + self.assertEquals("float", child_types[7]) def test_suite(): suite = unittest.TestSuite() From scoder at codespeak.net Wed Jul 26 15:41:12 2006 From: scoder at codespeak.net (scoder at codespeak.net) Date: Wed, 26 Jul 2006 15:41:12 +0200 (CEST) Subject: [Lxml-checkins] r30576 - in lxml/trunk: . src/lxml src/lxml/tests Message-ID: <20060726134112.6B3AF10036@code0.codespeak.net> Author: scoder Date: Wed Jul 26 15:41:09 2006 New Revision: 30576 Modified: lxml/trunk/CHANGES.txt lxml/trunk/src/lxml/apihelpers.pxi lxml/trunk/src/lxml/tests/test_elementtree.py Log: fix: return empty string for el.text instead of None if it was set to an empty string before Modified: lxml/trunk/CHANGES.txt ============================================================================== --- lxml/trunk/CHANGES.txt (original) +++ lxml/trunk/CHANGES.txt Wed Jul 26 15:41:09 2006 @@ -13,6 +13,8 @@ Bugs fixed ---------- +* Setting element.text to '' returned None on request, not the empty string + * ``iterparse()`` could crash on long XML files * Setting an attribute to a non-string value did not raise an exception Modified: lxml/trunk/src/lxml/apihelpers.pxi ============================================================================== --- lxml/trunk/src/lxml/apihelpers.pxi (original) +++ lxml/trunk/src/lxml/apihelpers.pxi Wed Jul 26 15:41:09 2006 @@ -169,12 +169,15 @@ while c_node_cur is not NULL and c_node_cur.type == tree.XML_TEXT_NODE: if c_node_cur.content[0] != c'\0': text = c_node_cur.content - scount = scount + 1 + scount = scount + 1 c_node_cur = c_node_cur.next # handle two most common cases first if text is NULL: - return None + if scount > 0: + return '' + else: + return None if scount == 1: return funicode(text) Modified: lxml/trunk/src/lxml/tests/test_elementtree.py ============================================================================== --- lxml/trunk/src/lxml/tests/test_elementtree.py (original) +++ lxml/trunk/src/lxml/tests/test_elementtree.py Wed Jul 26 15:41:09 2006 @@ -707,6 +707,16 @@ a.text) self.assertXML('', a) + def test_set_text_empty(self): + Element = self.etree.Element + + a = Element('a') + self.assertEquals(None, a.text) + + a.text = '' + self.assertEquals('', a.text) + self.assertXML('', a) + def test_tail1(self): Element = self.etree.Element SubElement = self.etree.SubElement From scoder at codespeak.net Wed Jul 26 15:44:15 2006 From: scoder at codespeak.net (scoder at codespeak.net) Date: Wed, 26 Jul 2006 15:44:15 +0200 (CEST) Subject: [Lxml-checkins] r30577 - in lxml/branch/capi: . src/lxml src/lxml/tests Message-ID: <20060726134415.717F4100BA@code0.codespeak.net> Author: scoder Date: Wed Jul 26 15:44:13 2006 New Revision: 30577 Modified: lxml/branch/capi/CHANGES.txt lxml/branch/capi/src/lxml/apihelpers.pxi lxml/branch/capi/src/lxml/tests/test_elementtree.py Log: merged fix from trunk: return empty string for el.text instead of None if it was set to an empty string before Modified: lxml/branch/capi/CHANGES.txt ============================================================================== --- lxml/branch/capi/CHANGES.txt (original) +++ lxml/branch/capi/CHANGES.txt Wed Jul 26 15:44:13 2006 @@ -17,6 +17,8 @@ Bugs fixed ---------- +* Setting element.text to '' returned None on request, not the empty string + * Element.remove() deleted the tail text from the removed Element * Creating documents no longer copies the parser for later URL resolving. For Modified: lxml/branch/capi/src/lxml/apihelpers.pxi ============================================================================== --- lxml/branch/capi/src/lxml/apihelpers.pxi (original) +++ lxml/branch/capi/src/lxml/apihelpers.pxi Wed Jul 26 15:44:13 2006 @@ -182,12 +182,15 @@ while c_node_cur is not NULL and c_node_cur.type == tree.XML_TEXT_NODE: if c_node_cur.content[0] != c'\0': text = c_node_cur.content - scount = scount + 1 + scount = scount + 1 c_node_cur = c_node_cur.next # handle two most common cases first if text is NULL: - return None + if scount > 0: + return '' + else: + return None if scount == 1: return funicode(text) Modified: lxml/branch/capi/src/lxml/tests/test_elementtree.py ============================================================================== --- lxml/branch/capi/src/lxml/tests/test_elementtree.py (original) +++ lxml/branch/capi/src/lxml/tests/test_elementtree.py Wed Jul 26 15:44:13 2006 @@ -681,6 +681,16 @@ a.text) self.assertXML('', a) + def test_set_text_empty(self): + Element = self.etree.Element + + a = Element('a') + self.assertEquals(None, a.text) + + a.text = '' + self.assertEquals('', a.text) + self.assertXML('', a) + def test_tail1(self): Element = self.etree.Element SubElement = self.etree.SubElement From scoder at codespeak.net Wed Jul 26 16:19:02 2006 From: scoder at codespeak.net (scoder at codespeak.net) Date: Wed, 26 Jul 2006 16:19:02 +0200 (CEST) Subject: [Lxml-checkins] r30580 - lxml/branch/capi/src/lxml Message-ID: <20060726141902.CCC5E100CE@code0.codespeak.net> Author: scoder Date: Wed Jul 26 16:19:00 2006 New Revision: 30580 Modified: lxml/branch/capi/src/lxml/objectify.pyx Log: better handling of empty/None strings in element content Modified: lxml/branch/capi/src/lxml/objectify.pyx ============================================================================== --- lxml/branch/capi/src/lxml/objectify.pyx (original) +++ lxml/branch/capi/src/lxml/objectify.pyx Wed Jul 26 16:19:00 2006 @@ -490,7 +490,11 @@ cetree.setNodeText(self._c_node, value) def strlen(self): - return len(_strValueOf(self)) + text = textOf(self._c_node) + if text is None: + return 0 + else: + return len(text) def __richcmp__(self, other, int op): if hasattr(other, 'pyval'): @@ -499,7 +503,7 @@ _strValueOf(self), other, op) def __str__(self): - return textOf(self._c_node) + return textOf(self._c_node) or '' def __add__(self, other): text = _strValueOf(self) @@ -711,6 +715,8 @@ if value is None: # default to string class return StringElement + if value == '': + return StringElement errors = (ValueError, TypeError) for type_check, pytype in _TYPE_CHECKS: try: From scoder at codespeak.net Wed Jul 26 16:19:27 2006 From: scoder at codespeak.net (scoder at codespeak.net) Date: Wed, 26 Jul 2006 16:19:27 +0200 (CEST) Subject: [Lxml-checkins] r30581 - lxml/branch/capi Message-ID: <20060726141927.5F6A8100CE@code0.codespeak.net> Author: scoder Date: Wed Jul 26 16:19:26 2006 New Revision: 30581 Modified: lxml/branch/capi/CHANGES.txt Log: updated CHANGES.txt for objectify and classlookup modules Modified: lxml/branch/capi/CHANGES.txt ============================================================================== --- lxml/branch/capi/CHANGES.txt (original) +++ lxml/branch/capi/CHANGES.txt Wed Jul 26 16:19:26 2006 @@ -8,6 +8,14 @@ Features added -------------- +* XML-Object API on top of lxml (lxml.elements.objectify) + +* Customizable Element class lookup: + + * Support for externally provided lookup functions + + * lxml.elements.classlookup module implements different lookup mechanisms + * Support for processing instructions (ET-like, not compatible) * Element.replace(old, new) method to replace a subelement by another one From scoder at codespeak.net Wed Jul 26 16:49:44 2006 From: scoder at codespeak.net (scoder at codespeak.net) Date: Wed, 26 Jul 2006 16:49:44 +0200 (CEST) Subject: [Lxml-checkins] r30583 - in lxml/branch/capi/src/lxml: . tests Message-ID: <20060726144944.94779100D6@code0.codespeak.net> Author: scoder Date: Wed Jul 26 16:49:43 2006 New Revision: 30583 Modified: lxml/branch/capi/src/lxml/objectify.pyx lxml/branch/capi/src/lxml/tests/test_objectify.py Log: removed setters from .text and .pyval properties, fixed element attribute assignment of string values Modified: lxml/branch/capi/src/lxml/objectify.pyx ============================================================================== --- lxml/branch/capi/src/lxml/objectify.pyx (original) +++ lxml/branch/capi/src/lxml/objectify.pyx Wed Jul 26 16:49:43 2006 @@ -80,6 +80,10 @@ if __RECURSIVE_STR: return _dump(self, 0) else: + return textOf(self._c_node) or '' + + property text: + def __get__(self): return textOf(self._c_node) def __len__(self): @@ -142,7 +146,7 @@ self.append(element) else: self.replace(child, element) - elif python.PySequence_Check(value): + elif python.PyList_Check(value) or python.PyTuple_Check(value): try: element = _lookupChild(self, tag) except AttributeError: @@ -377,9 +381,6 @@ property pyval: def __get__(self): return self._value() - def __set__(self, value): - self.text = str(value) - self._checkType(value) def __int__(self): return int(textOf(self._c_node)) @@ -486,8 +487,6 @@ property pyval: def __get__(self): return textOf(self._c_node) - def __set__(self, value): - cetree.setNodeText(self._c_node, value) def strlen(self): text = textOf(self._c_node) @@ -537,9 +536,6 @@ property pyval: def __get__(self): return None - def __set__(self, value): - if value is not None: - raise TypeError, "Invalid value for None type: %s" % type(value) cdef class BoolElement(ObjectifiedElement): """Boolean type base on string values: 'true' or 'false'. @@ -580,11 +576,6 @@ property pyval: def __get__(self): return self.__nonzero__() - def __set__(self, value): - if bool(value): - self.text = 'true' - else: - self.text = 'false' def __checkBool(s): if s != 'true' and s != 'false': Modified: lxml/branch/capi/src/lxml/tests/test_objectify.py ============================================================================== --- lxml/branch/capi/src/lxml/tests/test_objectify.py (original) +++ lxml/branch/capi/src/lxml/tests/test_objectify.py Wed Jul 26 16:49:43 2006 @@ -94,6 +94,14 @@ self.assertEquals(["c1", "c2", "c2", "c1"], [ c.text for c in root.c ]) + def test_setslice_string(self): + # make sure strings are not handled as sequences + Element = self.etree.Element + SubElement = self.etree.SubElement + root = Element("root") + root.c = "TEST" + self.assertEquals(["TEST"], + [ c.text for c in root.c ]) def test_findall(self): XML = self.etree.XML From scoder at codespeak.net Wed Jul 26 16:59:17 2006 From: scoder at codespeak.net (scoder at codespeak.net) Date: Wed, 26 Jul 2006 16:59:17 +0200 (CEST) Subject: [Lxml-checkins] r30584 - lxml/branch/capi/src/lxml/tests Message-ID: <20060726145917.514D3100D6@code0.codespeak.net> Author: scoder Date: Wed Jul 26 16:59:16 2006 New Revision: 30584 Modified: lxml/branch/capi/src/lxml/tests/test_objectify.py Log: test case for str(empty_element) Modified: lxml/branch/capi/src/lxml/tests/test_objectify.py ============================================================================== --- lxml/branch/capi/src/lxml/tests/test_objectify.py (original) +++ lxml/branch/capi/src/lxml/tests/test_objectify.py Wed Jul 26 16:59:16 2006 @@ -38,6 +38,10 @@ self.etree.Namespace("otherNs").clear() objectify.unregister() + def test_str(self): + root = self.etree.Element("test") + self.assertEquals('', str(root)) + def test_child(self): root = self.etree.XML(xml_str) self.assertEquals("0", root.c1.c2.text) From scoder at codespeak.net Wed Jul 26 18:19:46 2006 From: scoder at codespeak.net (scoder at codespeak.net) Date: Wed, 26 Jul 2006 18:19:46 +0200 (CEST) Subject: [Lxml-checkins] r30590 - in lxml/branch/capi: doc src/lxml Message-ID: <20060726161946.0034B100E2@code0.codespeak.net> Author: scoder Date: Wed Jul 26 18:19:45 2006 New Revision: 30590 Modified: lxml/branch/capi/doc/objectify.txt lxml/branch/capi/src/lxml/objectify.pyx Log: tree elements are immutable, setting their value replaces the element Modified: lxml/branch/capi/doc/objectify.txt ============================================================================== --- lxml/branch/capi/doc/objectify.txt (original) +++ lxml/branch/capi/doc/objectify.txt Wed Jul 26 18:19:45 2006 @@ -226,6 +226,37 @@ ... print "false!" false! +You can freely switch between different types for the same child:: + + >>> root = etree.fromstring("""5""") + >>> print objectify.dump(root) + root = None [ObjectifiedElement] + a = 5 [IntElement] + + >>> root.a = 'nice string!' + >>> print objectify.dump(root) + root = None [ObjectifiedElement] + a = 'nice string!' [StringElement] + + >>> root.a = True + >>> print objectify.dump(root) + root = None [ObjectifiedElement] + a = True [BoolElement] + + >>> root.a = [1, 2, 3] + >>> print objectify.dump(root) + root = None [ObjectifiedElement] + a = 1 [IntElement] + a = 2 [IntElement] + a = 3 [IntElement] + + >>> root.a = (1, 2, 3) + >>> print objectify.dump(root) + root = None [ObjectifiedElement] + a = 1 [IntElement] + a = 2 [IntElement] + a = 3 [IntElement] + However, data elements continue to provide the objectify API. This means that sequence operations such as ``len()``, slicing and indexing (e.g. of strings) cannot behave as the Python types. Like all other tree elements, they show Modified: lxml/branch/capi/src/lxml/objectify.pyx ============================================================================== --- lxml/branch/capi/src/lxml/objectify.pyx (original) +++ lxml/branch/capi/src/lxml/objectify.pyx Wed Jul 26 18:19:45 2006 @@ -135,18 +135,7 @@ ElementBase.tag.__set__(self, value) return - if isinstance(value, _Element): - # deep copy the new element - element = cetree.deepcopyNodeToDocument( - self._doc, (<_Element>value)._c_node) - element.tag = _buildChildTag(self, tag) - try: - child = _lookupChild(self, tag) - except AttributeError: - self.append(element) - else: - self.replace(child, element) - elif python.PyList_Check(value) or python.PyTuple_Check(value): + if python.PyList_Check(value) or python.PyTuple_Check(value): try: element = _lookupChild(self, tag) except AttributeError: @@ -155,16 +144,26 @@ else: element.__setslice__(0, python.PY_SSIZE_T_MAX, value) else: + if isinstance(value, _Element): + # deep copy the new element + element = cetree.deepcopyNodeToDocument( + self._doc, (<_Element>value)._c_node) + element.tag = _buildChildTag(self, tag) + else: + element = self.makeelement(tag) + if not python._isString(value): + if isinstance(value, bool): + value = str(value).lower() + else: + value = str(value) + cetree.setNodeText(element._c_node, value) + try: - element = _lookupChild(self, tag) + child = _lookupChild(self, tag) except AttributeError: - element = SubElement(self, tag) - if not python._isString(value): - if isinstance(value, bool): - value = str(value).lower() - else: - value = str(value) - cetree.setNodeText(element._c_node, value) + self.append(element) + else: + self.replace(child, element) def __delattr__(self, tag): child = _lookupChild(self, tag) @@ -340,16 +339,18 @@ return cetree.namespacedNameFromNsName(c_href, c_tag) cdef object _replaceElement(_Element element, value): + cdef _Element new_element if isinstance(value, _Element): # deep copy the new element new_element = cetree.deepcopyNodeToDocument( element._doc, (<_Element>value)._c_node) new_element.tag = element.tag - element.getparent().replace(element, new_element) else: if not python._isString(value): value = str(value) - cetree.setNodeText(element._c_node, value) + new_element = element.makeelement(element.tag) + cetree.setNodeText(new_element._c_node, value) + element.getparent().replace(element, new_element) cdef object _appendValue(_Element parent, tag, value): cdef _Element new_element From scoder at codespeak.net Wed Jul 26 18:26:03 2006 From: scoder at codespeak.net (scoder at codespeak.net) Date: Wed, 26 Jul 2006 18:26:03 +0200 (CEST) Subject: [Lxml-checkins] r30592 - lxml/branch/capi/doc Message-ID: <20060726162603.36547100E2@code0.codespeak.net> Author: scoder Date: Wed Jul 26 18:26:01 2006 New Revision: 30592 Modified: lxml/branch/capi/doc/objectify.txt Log: doc update: describe objectify.dump() function earlier Modified: lxml/branch/capi/doc/objectify.txt ============================================================================== --- lxml/branch/capi/doc/objectify.txt (original) +++ lxml/branch/capi/doc/objectify.txt Wed Jul 26 18:26:01 2006 @@ -226,6 +226,32 @@ ... print "false!" false! +To see the data types that are currently used, you can call the module level +``dump()`` function that returns a recursive string representation for +elements:: + + >>> root = etree.XML(""" + ... + ... 1 + ... 1.2 + ... 1 + ... true + ... what? + ... + ... + ... """) + + >>> print objectify.dump(root) + root = None [ObjectifiedElement] + a = 1 [IntElement] + * attr1 = 'foo' + * attr2 = 'bar' + a = 1.2 [FloatElement] + b = 1 [IntElement] + b = True [BoolElement] + c = 'what?' [StringElement] + d = None [NoneElement] + You can freely switch between different types for the same child:: >>> root = etree.fromstring("""5""") @@ -416,22 +442,10 @@ c = 'what?' [StringElement] d = None [NoneElement] -This behaviour can be switched off in the same way. Note that this output -format is always available through the ``dump()`` module function:: +This behaviour can be switched off in the same way:: >>> objectify.enableRecursiveStr(False) - >>> print objectify.dump(root) - root = None [ObjectifiedElement] - a = 1 [IntElement] - * attr1 = 'foo' - * attr2 = 'bar' - a = 1.2 [FloatElement] - b = 1 [IntElement] - b = True [BoolElement] - c = 'what?' [StringElement] - d = None [NoneElement] - What is different from ElementTree? ----------------------------------- From scoder at codespeak.net Wed Jul 26 19:16:48 2006 From: scoder at codespeak.net (scoder at codespeak.net) Date: Wed, 26 Jul 2006 19:16:48 +0200 (CEST) Subject: [Lxml-checkins] r30600 - in lxml/branch/capi: doc src/lxml Message-ID: <20060726171648.6F8AE100BB@code0.codespeak.net> Author: scoder Date: Wed Jul 26 19:16:47 2006 New Revision: 30600 Modified: lxml/branch/capi/doc/objectify.txt lxml/branch/capi/src/lxml/objectify.pyx Log: ObjectifyElementClassLookup: support for using objectify on a per-parser basis etc. through the classlookup framework Modified: lxml/branch/capi/doc/objectify.txt ============================================================================== --- lxml/branch/capi/doc/objectify.txt (original) +++ lxml/branch/capi/doc/objectify.txt Wed Jul 26 19:16:47 2006 @@ -4,7 +4,7 @@ lxml supports an alternative element API similar to the Amara_ bindery through a custom Element implementation. This API is very different from the -ElementTree API. If it is used, it can only be used *exclusively*, to avoid +ElementTree API. If it is used, it should be used exclusively, to avoid common pitfalls when mixing element implementations. .. _Amara: http://uche.ogbuji.net/tech/4suite/amara/ @@ -27,24 +27,38 @@ .. _`namespace specific classes`: namespace_extensions.html +It is possible to use objectify's element classes on a more fine-grained +basis. Instead of activating it globally, it can be integrated with the class +lookup framework from `lxml.elements.classlookup`_. This is accomplished by +the class ``ObjectifyElementClassLookup``. By setting it as the local class +lookup scheme of a parser, for example, you can restrict the objectify API to +documents that were parsed by this specific parser. As said above, you really +have to take care in this case to prevent mixing the Element implementations +between documents. If you do, however, this provides a very convenient way of +using different XML APIs at the same time, e.g. in differen Python modules. + +.. _`lxml.elements.classlookup`: elements.html + Since this API is meant for data-centered XML (as opposed to document XML with mixed content), it might be worth-wile in this context to change the default parser:: >>> etree.setDefaultParser( etree.XMLParser(remove_blank_text=True) ) -Now the parser will remove whitespace-only text from the parsed document. -Note that this alters the document infoset, so if you consider spaces as data -in your specific use case, you should go with the normal parser. - +Now the parser will remove whitespace-only text from the parsed document, +unless it is found enclosed by an XML element. Note that this alters the +document infoset, so if you consider the removed spaces as data in your +specific use case, you should go with the normal parser. .. contents:: .. 1 Element access through object attributes 2 Namespace handling 3 Python data types - 4 What is different from ElementTree? - 5 Resetting the API + 4 Defining additional data classes + 5 Recursive string representation of elements + 6 What is different from ElementTree? + 7 Resetting the API Element access through object attributes Modified: lxml/branch/capi/src/lxml/objectify.pyx ============================================================================== --- lxml/branch/capi/src/lxml/objectify.pyx (original) +++ lxml/branch/capi/src/lxml/objectify.pyx Wed Jul 26 19:16:47 2006 @@ -1,4 +1,5 @@ -from etreepublic cimport _Document, _Element, ElementBase, _ElementIterator +from etreepublic cimport _Document, _Element, ElementBase +from etreepublic cimport _ElementIterator, _ElementClassLookup from etreepublic cimport elementFactory, import_etree, textOf from python cimport isinstance, issubclass, callable, getattr, _cstr, Py_ssize_t cimport etreepublic as cetree @@ -765,12 +766,28 @@ ################################################################################ # Element class lookup +cdef class ObjectifyElementClassLookup(_ElementClassLookup): + """Element class lookup method that uses the objectify classes. + + The constructor accepts a keyword argument 'default_to_nsclasses'. You can + set it to False to divert from the default behaviour of looking up + namespace registered classes before trying to determine the right + objectify type class. + """ + cdef int _default_to_nsclasses + def __init__(self, default_to_nsclasses=True): + self._lookup_function = _lookupElementClass + self._default_to_nsclasses = bool(default_to_nsclasses) + cdef object _lookupElementClass(state, _Document doc, tree.xmlNode* c_node): cdef python.PyObject* dict_result - # default to namespace specific classes - nsclass = cetree.lookupNamespaceElementClass(state, doc, c_node) - if nsclass is not ObjectifiedElement: - return nsclass + if state is None or \ + not isinstance(state, ObjectifyElementClassLookup) or \ + (state._default_to_nsclasses): + # default to namespace specific classes + nsclass = cetree.lookupNamespaceElementClass(state, doc, c_node) + if nsclass is not ObjectifiedElement: + return nsclass # if element has children => no data class if cetree.findChildForwards(c_node, 0): From scoder at codespeak.net Thu Jul 27 07:17:06 2006 From: scoder at codespeak.net (scoder at codespeak.net) Date: Thu, 27 Jul 2006 07:17:06 +0200 (CEST) Subject: [Lxml-checkins] r30607 - lxml/branch/capi/doc Message-ID: <20060727051706.EEDB5100BA@code0.codespeak.net> Author: scoder Date: Thu Jul 27 07:17:02 2006 New Revision: 30607 Modified: lxml/branch/capi/doc/elements.txt lxml/branch/capi/doc/objectify.txt Log: doc fixes Modified: lxml/branch/capi/doc/elements.txt ============================================================================== --- lxml/branch/capi/doc/elements.txt (original) +++ lxml/branch/capi/doc/elements.txt Thu Jul 27 07:17:02 2006 @@ -60,7 +60,7 @@ ...................... This uses a mapping from attribute values to classes. An attribute name is -set at initialisation time and is the used to find the corresponding value. +set at initialisation time and is then used to find the corresponding value. It is selected as follows:: >>> from lxml.elements.classlookup import AttributeBasedElementClassLookup @@ -68,9 +68,9 @@ >>> etree.setElementClassLookup(lookup) Note that this class supports a fallback mechanism that is used in the case -where the attribute value is not found in the mapping. Normally, the default -class lookup is used here. If you want to use the namespace lookup, for -example, you can use this code:: +where the attribute is not found or its value is not in the mapping. +Normally, the default class lookup is used here. If you want to use the +namespace lookup, for example, you can use this code:: >>> fallback = ElementNamespaceClassLookup() >>> lookup = AttributeBasedElementClassLookup( Modified: lxml/branch/capi/doc/objectify.txt ============================================================================== --- lxml/branch/capi/doc/objectify.txt (original) +++ lxml/branch/capi/doc/objectify.txt Thu Jul 27 07:17:02 2006 @@ -10,7 +10,7 @@ .. _Amara: http://uche.ogbuji.net/tech/4suite/amara/ You can replace the original implementation by the ``objectify`` element class -by simply importing the module and calling the ``register`` function:: +by simply importing the module and calling the ``register()`` function:: >>> from lxml import etree >>> from lxml.elements import objectify @@ -27,7 +27,7 @@ .. _`namespace specific classes`: namespace_extensions.html -It is possible to use objectify's element classes on a more fine-grained +It is also possible to use objectify's element classes on a more fine-grained basis. Instead of activating it globally, it can be integrated with the class lookup framework from `lxml.elements.classlookup`_. This is accomplished by the class ``ObjectifyElementClassLookup``. By setting it as the local class From scoder at codespeak.net Thu Jul 27 07:22:24 2006 From: scoder at codespeak.net (scoder at codespeak.net) Date: Thu, 27 Jul 2006 07:22:24 +0200 (CEST) Subject: [Lxml-checkins] r30608 - lxml/branch/capi/doc Message-ID: <20060727052224.E01BF100BA@code0.codespeak.net> Author: scoder Date: Thu Jul 27 07:22:21 2006 New Revision: 30608 Modified: lxml/branch/capi/doc/objectify.txt Log: doc typos Modified: lxml/branch/capi/doc/objectify.txt ============================================================================== --- lxml/branch/capi/doc/objectify.txt (original) +++ lxml/branch/capi/doc/objectify.txt Thu Jul 27 07:22:21 2006 @@ -35,12 +35,12 @@ documents that were parsed by this specific parser. As said above, you really have to take care in this case to prevent mixing the Element implementations between documents. If you do, however, this provides a very convenient way of -using different XML APIs at the same time, e.g. in differen Python modules. +using different XML APIs at the same time, e.g. in different Python modules. .. _`lxml.elements.classlookup`: elements.html Since this API is meant for data-centered XML (as opposed to document XML with -mixed content), it might be worth-wile in this context to change the default +mixed content), it might be worthwhile in this context to change the default parser:: >>> etree.setDefaultParser( etree.XMLParser(remove_blank_text=True) ) From scoder at codespeak.net Thu Jul 27 12:04:41 2006 From: scoder at codespeak.net (scoder at codespeak.net) Date: Thu, 27 Jul 2006 12:04:41 +0200 (CEST) Subject: [Lxml-checkins] r30625 - in lxml/branch/capi: . src/lxml/tests Message-ID: <20060727100441.2E6F6100E6@code0.codespeak.net> Author: scoder Date: Thu Jul 27 12:04:39 2006 New Revision: 30625 Modified: lxml/branch/capi/CHANGES.txt lxml/branch/capi/src/lxml/tests/test_elementtree.py lxml/branch/capi/src/lxml/tests/test_etree.py Log: merged in attribute value setting fix from trunk (30333) Modified: lxml/branch/capi/CHANGES.txt ============================================================================== --- lxml/branch/capi/CHANGES.txt (original) +++ lxml/branch/capi/CHANGES.txt Thu Jul 27 12:04:39 2006 @@ -27,6 +27,8 @@ * Setting element.text to '' returned None on request, not the empty string +* Setting an attribute to a non-string value did not raise an exception + * Element.remove() deleted the tail text from the removed Element * Creating documents no longer copies the parser for later URL resolving. For Modified: lxml/branch/capi/src/lxml/tests/test_elementtree.py ============================================================================== --- lxml/branch/capi/src/lxml/tests/test_elementtree.py (original) +++ lxml/branch/capi/src/lxml/tests/test_elementtree.py Thu Jul 27 12:04:39 2006 @@ -368,6 +368,13 @@ True, '{http://ns.codespeak.net/test}baz' in root.attrib) + def test_attribute_set(self): + Element = self.etree.Element + + root = Element("root") + root.set("attr", "TEST") + self.assertEquals("TEST", root.get("attr")) + def test_XML(self): XML = self.etree.XML Modified: lxml/branch/capi/src/lxml/tests/test_etree.py ============================================================================== --- lxml/branch/capi/src/lxml/tests/test_etree.py (original) +++ lxml/branch/capi/src/lxml/tests/test_etree.py Thu Jul 27 12:04:39 2006 @@ -58,6 +58,16 @@ self.assertEquals(root[0].target, "mypi") self.assertEquals(root[0].text, "my test ") + def test_attribute_set(self): + # ElementTree accepts arbitrary attribute values + # lxml.etree allows only strings + Element = self.etree.Element + + root = Element("root") + root.set("attr", "TEST") + self.assertEquals("TEST", root.get("attr")) + self.assertRaises(TypeError, root.set, "newattr", 5) + def test_parse_error(self): parse = self.etree.parse # from StringIO From scoder at codespeak.net Thu Jul 27 13:10:49 2006 From: scoder at codespeak.net (scoder at codespeak.net) Date: Thu, 27 Jul 2006 13:10:49 +0200 (CEST) Subject: [Lxml-checkins] r30630 - lxml/branch/capi/src/lxml Message-ID: <20060727111049.E0300100E2@code0.codespeak.net> Author: scoder Date: Thu Jul 27 13:10:48 2006 New Revision: 30630 Modified: lxml/branch/capi/src/lxml/apihelpers.pxi lxml/branch/capi/src/lxml/etree.pyx lxml/branch/capi/src/lxml/etreepublic.pxd lxml/branch/capi/src/lxml/public-api.pxi Log: public API for setting and deleting attributes, fixed exception case for setNodeText() and setTailText() Modified: lxml/branch/capi/src/lxml/apihelpers.pxi ============================================================================== --- lxml/branch/capi/src/lxml/apihelpers.pxi (original) +++ lxml/branch/capi/src/lxml/apihelpers.pxi Thu Jul 27 13:10:48 2006 @@ -121,7 +121,7 @@ tree.xmlFree(c_result) return result -cdef void _setAttributeValue(_NodeBase element, key, value): +cdef int _setAttributeValue(_NodeBase element, key, value) except -1: cdef xmlNs* c_ns cdef char* c_value cdef char* c_tag @@ -134,6 +134,31 @@ else: c_ns = element._doc._findOrBuildNodeNs(element._c_node, _cstr(ns)) tree.xmlSetNsProp(element._c_node, c_ns, c_tag, c_value) + return 0 + +cdef int _delAttribute(_NodeBase element, key) except -1: + cdef xmlAttr* c_attr + cdef char* c_href + ns, tag = _getNsTag(key) + if ns is None: + c_href = NULL + else: + c_href = _cstr(ns) + if _delAttributeFromNsName(element._c_node, c_href, _cstr(tag)): + raise KeyError, key + return 0 + +cdef int _delAttributeFromNsName(xmlNode* c_node, char* c_href, char* c_name): + cdef xmlAttr* c_attr + if c_href is NULL: + c_attr = tree.xmlHasProp(c_node, c_name) + else: + c_attr = tree.xmlHasNsProp(c_node, c_name, c_href) + if c_attr is NULL: + # XXX free namespace that is not in use..? + return -1 + tree.xmlRemoveProp(c_attr) + return 0 cdef object __RE_XML_ENCODING __RE_XML_ENCODING = re.compile( @@ -214,12 +239,12 @@ tree.xmlFreeNode(c_node) c_node = c_next -cdef _setNodeText(xmlNode* c_node, value): +cdef int _setNodeText(xmlNode* c_node, value) except -1: cdef xmlNode* c_text_node # remove all text nodes at the start first _removeText(c_node.children) if value is None: - return + return 0 # now add new text node with value at start text = _utf8(value) c_text_node = tree.xmlNewDocText(c_node.doc, _cstr(text)) @@ -227,17 +252,19 @@ tree.xmlAddChild(c_node, c_text_node) else: tree.xmlAddPrevSibling(c_node.children, c_text_node) + return 0 -cdef _setTailText(xmlNode* c_node, value): +cdef int _setTailText(xmlNode* c_node, value) except -1: cdef xmlNode* c_text_node # remove all text nodes at the start first _removeText(c_node.next) if value is None: - return + return 0 text = _utf8(value) c_text_node = tree.xmlNewDocText(c_node.doc, _cstr(text)) # XXX what if we're the top element? tree.xmlAddNextSibling(c_node, c_text_node) + return 0 cdef xmlNode* _findChild(xmlNode* c_node, Py_ssize_t index): if index < 0: Modified: lxml/branch/capi/src/lxml/etree.pyx ============================================================================== --- lxml/branch/capi/src/lxml/etree.pyx (original) +++ lxml/branch/capi/src/lxml/etree.pyx Thu Jul 27 13:10:48 2006 @@ -1240,21 +1240,8 @@ _setAttributeValue(self._element, key, value) def __delitem__(self, key): - cdef xmlNode* c_node - cdef xmlAttr* c_attr - cdef char* c_tag - ns, tag = _getNsTag(key) - c_tag = _cstr(tag) - c_node = self._element._c_node - if ns is None: - c_attr = tree.xmlHasProp(c_node, c_tag) - else: - c_attr = tree.xmlHasNsProp(c_node, c_tag, _cstr(ns)) - if c_attr is NULL: - # XXX free namespace that is not in use..? - raise KeyError, key - tree.xmlRemoveProp(c_attr) - + _delAttribute(self._element, key) + # ACCESSORS def __repr__(self): result = {} Modified: lxml/branch/capi/src/lxml/etreepublic.pxd ============================================================================== --- lxml/branch/capi/src/lxml/etreepublic.pxd (original) +++ lxml/branch/capi/src/lxml/etreepublic.pxd Thu Jul 27 13:10:48 2006 @@ -90,6 +90,19 @@ # return the value of attribute "{ns}name", or the default value cdef object getAttributeValue(_NodeBase element, key, default) + # set an attribute value on an element + # on failure, sets an exception and returns -1 + cdef int setAttributeValue(_NodeBase element, key, value) except -1 + + # delete an attribute + # on failure, sets an exception and returns -1 + cdef int delAttribute(_NodeBase element, key) except -1 + + # delete an attribute based on name and namespace URI + # returns -1 if the attribute was not found (no exception) + cdef int delAttributeFromNsName(tree.xmlNode* c_element, + char* c_href, char* c_name) + ########################################################################## # XML node helper functions @@ -159,10 +172,10 @@ cdef object tailOf(tree.xmlNode* c_node) # set the text value of an element - cdef object setNodeText(tree.xmlNode* c_node, text) + cdef int setNodeText(tree.xmlNode* c_node, text) except -1 # set the tail text value of an element - cdef object setTailText(tree.xmlNode* c_node, text) + cdef int setTailText(tree.xmlNode* c_node, text) except -1 # recursively lookup a namespace in element or ancestors, or create it cdef tree.xmlNs* findOrBuildNodeNs(_Document doc, tree.xmlNode* c_node, Modified: lxml/branch/capi/src/lxml/public-api.pxi ============================================================================== --- lxml/branch/capi/src/lxml/public-api.pxi (original) +++ lxml/branch/capi/src/lxml/public-api.pxi Thu Jul 27 13:10:48 2006 @@ -55,15 +55,15 @@ return None return _collectText(c_node.next) -cdef public object setNodeText(xmlNode* c_node, text): +cdef public int setNodeText(xmlNode* c_node, text) except -1: if c_node is NULL: raise ValueError - _setNodeText(c_node, text) + return _setNodeText(c_node, text) -cdef public object setTailText(xmlNode* c_node, text): +cdef public int setTailText(xmlNode* c_node, text) except -1: if c_node is NULL: raise ValueError - _setTailText(c_node, text) + return _setTailText(c_node, text) cdef public object attributeValue(xmlNode* c_element, xmlAttr* c_attrib_node): return _attributeValue(c_element, c_attrib_node) @@ -75,6 +75,16 @@ cdef public object getAttributeValue(_NodeBase element, key, default): return _getAttributeValue(element, key, default) +cdef public int setAttributeValue(_NodeBase element, key, value) except -1: + return _setAttributeValue(element, key, value) + +cdef public int delAttribute(_NodeBase element, key) except -1: + return _delAttribute(element, key) + +cdef public int delAttributeFromNsName(tree.xmlNode* c_element, + char* c_href, char* c_name): + return _delAttributeFromNsName(c_element, c_href, c_name) + cdef public xmlNode* findChild(xmlNode* c_node, Py_ssize_t index): return _findChild(c_node, index) From scoder at codespeak.net Thu Jul 27 13:12:07 2006 From: scoder at codespeak.net (scoder at codespeak.net) Date: Thu, 27 Jul 2006 13:12:07 +0200 (CEST) Subject: [Lxml-checkins] r30631 - lxml/branch/capi/src/lxml Message-ID: <20060727111207.3C722100E2@code0.codespeak.net> Author: scoder Date: Thu Jul 27 13:12:06 2006 New Revision: 30631 Modified: lxml/branch/capi/src/lxml/objectify.pyx Log: set/delete xsi:nil on None valued elements, factored out function for element value setting (and converting) Modified: lxml/branch/capi/src/lxml/objectify.pyx ============================================================================== --- lxml/branch/capi/src/lxml/objectify.pyx (original) +++ lxml/branch/capi/src/lxml/objectify.pyx Thu Jul 27 13:12:06 2006 @@ -60,6 +60,9 @@ cdef char* _XML_SCHEMA_INSTANCE_NS _XML_SCHEMA_INSTANCE_NS = _cstr(XML_SCHEMA_INSTANCE_NS) +cdef object XML_SCHEMA_INSTANCE_NIL_ATTR +XML_SCHEMA_INSTANCE_NIL_ATTR = "{%s}nil" % XML_SCHEMA_INSTANCE_NS + cdef class ObjectifiedElement(ElementBase): """Element class with an Amara-like API. @@ -152,12 +155,7 @@ element.tag = _buildChildTag(self, tag) else: element = self.makeelement(tag) - if not python._isString(value): - if isinstance(value, bool): - value = str(value).lower() - else: - value = str(value) - cetree.setNodeText(element._c_node, value) + _setElementValue(element, value) try: child = _lookupChild(self, tag) @@ -347,10 +345,8 @@ element._doc, (<_Element>value)._c_node) new_element.tag = element.tag else: - if not python._isString(value): - value = str(value) new_element = element.makeelement(element.tag) - cetree.setNodeText(new_element._c_node, value) + _setElementValue(new_element, value) element.getparent().replace(element, new_element) cdef object _appendValue(_Element parent, tag, value): @@ -363,10 +359,21 @@ parent.append(new_element) else: new_element = etree.SubElement(parent, tag) - if not python._isString(value): - value = str(value) - cetree.setNodeText(new_element._c_node, value) + _setElementValue(new_element, value) +cdef _setElementValue(_Element element, value): + if value is None: + cetree.setAttributeValue( + element, XML_SCHEMA_INSTANCE_NIL_ATTR, "true") + else: + cetree.delAttributeFromNsName( + element._c_node, _XML_SCHEMA_INSTANCE_NS, "nil") + if not python._isString(value): + if isinstance(value, bool): + value = str(value).lower() + else: + value = str(value) + cetree.setNodeText(element._c_node, value) ################################################################################ # Data type support in subclasses From scoder at codespeak.net Thu Jul 27 13:26:23 2006 From: scoder at codespeak.net (scoder at codespeak.net) Date: Thu, 27 Jul 2006 13:26:23 +0200 (CEST) Subject: [Lxml-checkins] r30632 - lxml/branch/capi/src/lxml/tests Message-ID: <20060727112623.810DC100E4@code0.codespeak.net> Author: scoder Date: Thu Jul 27 13:26:21 2006 New Revision: 30632 Modified: lxml/branch/capi/src/lxml/tests/test_objectify.py Log: test cleanups Modified: lxml/branch/capi/src/lxml/tests/test_objectify.py ============================================================================== --- lxml/branch/capi/src/lxml/tests/test_objectify.py (original) +++ lxml/branch/capi/src/lxml/tests/test_objectify.py Thu Jul 27 13:26:21 2006 @@ -90,15 +90,15 @@ el = Element("test") el.text = "test" - root.c = [ el ] - self.assertEquals(["test"], + root.c = [ el, el ] + self.assertEquals(["test", "test"], [ c.text for c in root.c ]) root.c[:] = [ c1, c2, c2, c1 ] self.assertEquals(["c1", "c2", "c2", "c1"], [ c.text for c in root.c ]) - def test_setslice_string(self): + def test_set_string(self): # make sure strings are not handled as sequences Element = self.etree.Element SubElement = self.etree.SubElement From scoder at codespeak.net Thu Jul 27 13:27:15 2006 From: scoder at codespeak.net (scoder at codespeak.net) Date: Thu, 27 Jul 2006 13:27:15 +0200 (CEST) Subject: [Lxml-checkins] r30633 - lxml/branch/capi/src/lxml Message-ID: <20060727112715.D965D100E4@code0.codespeak.net> Author: scoder Date: Thu Jul 27 13:27:14 2006 New Revision: 30633 Modified: lxml/branch/capi/src/lxml/objectify.pyx Log: doc fix Modified: lxml/branch/capi/src/lxml/objectify.pyx ============================================================================== --- lxml/branch/capi/src/lxml/objectify.pyx (original) +++ lxml/branch/capi/src/lxml/objectify.pyx Thu Jul 27 13:27:14 2006 @@ -490,8 +490,8 @@ """String data class. Note that this class does *not* support the sequence protocol of strings: - iter(), str[0], str[0:1], etc. are *not* supported. Instead, use the - .text attribute to get a 'real' string. + len(), iter(), str_attr[0], str_attr[0:1], etc. are *not* supported. + Instead, use the .text attribute to get a 'real' string. """ property pyval: def __get__(self): From scoder at codespeak.net Thu Jul 27 15:34:07 2006 From: scoder at codespeak.net (scoder at codespeak.net) Date: Thu, 27 Jul 2006 15:34:07 +0200 (CEST) Subject: [Lxml-checkins] r30636 - lxml/branch/capi/src/lxml Message-ID: <20060727133407.D5A7410068@code0.codespeak.net> Author: scoder Date: Thu Jul 27 15:34:06 2006 New Revision: 30636 Modified: lxml/branch/capi/src/lxml/objectify.pyx Log: some cleanup, fall back to .pyval in _numericValueOf() to cover a larger number of cases Modified: lxml/branch/capi/src/lxml/objectify.pyx ============================================================================== --- lxml/branch/capi/src/lxml/objectify.pyx (original) +++ lxml/branch/capi/src/lxml/objectify.pyx Thu Jul 27 15:34:06 2006 @@ -380,9 +380,6 @@ cdef class NumberElement(ObjectifiedElement): cdef object _type - cdef _checkType(self, value): - if not isinstance(value, self._type): - self._type = type(value) cdef _value(self): return self._type(textOf(self._c_node)) @@ -466,14 +463,6 @@ def __xor__(self, other): return _numericValueOf(self) ^ _numericValueOf(other) -## def __iadd__(self, other): -## if isinstance(other, _NumberElement): -## other = (<_NumberElement>other)._value() -## result = self._value() + other -## self._checkType(result) -## cetree.setNodeText(self._c_node, str(result)) -## return self - cdef class IntElement(NumberElement): def _init(self): self._type = int @@ -603,6 +592,9 @@ if isinstance(obj, NumberElement): return (obj)._type( textOf((obj)._c_node)) + elif hasattr(obj, 'pyval'): + # not always numeric, but Python will raise the right exception + return obj.pyval return obj ################################################################################ From scoder at codespeak.net Thu Jul 27 16:22:15 2006 From: scoder at codespeak.net (scoder at codespeak.net) Date: Thu, 27 Jul 2006 16:22:15 +0200 (CEST) Subject: [Lxml-checkins] r30640 - lxml/branch/capi/src/lxml Message-ID: <20060727142215.05B3B1008F@code0.codespeak.net> Author: scoder Date: Thu Jul 27 16:22:14 2006 New Revision: 30640 Modified: lxml/branch/capi/src/lxml/objectify.pyx Log: fixed string conversion of numeric types: requires Python type conversion to make it look 'normal' Modified: lxml/branch/capi/src/lxml/objectify.pyx ============================================================================== --- lxml/branch/capi/src/lxml/objectify.pyx (original) +++ lxml/branch/capi/src/lxml/objectify.pyx Thu Jul 27 16:22:14 2006 @@ -24,6 +24,8 @@ float = __builtin__.float cdef object bool bool = __builtin__.bool +cdef object str +str = __builtin__.str cdef object pow pow = __builtin__.pow cdef object abs @@ -398,7 +400,7 @@ return float(textOf(self._c_node)) def __str__(self): - return textOf(self._c_node) + return str(self._type(textOf(self._c_node))) # def __oct__(self): # def __hex__(self): From scoder at codespeak.net Thu Jul 27 17:20:11 2006 From: scoder at codespeak.net (scoder at codespeak.net) Date: Thu, 27 Jul 2006 17:20:11 +0200 (CEST) Subject: [Lxml-checkins] r30644 - in lxml/branch/capi: doc src/lxml src/lxml/tests Message-ID: <20060727152011.BDC2D100C0@code0.codespeak.net> Author: scoder Date: Thu Jul 27 17:20:08 2006 New Revision: 30644 Modified: lxml/branch/capi/doc/objectify.txt lxml/branch/capi/src/lxml/objectify.pyx lxml/branch/capi/src/lxml/tests/test_objectify.py Log: fixed test cases and doctests after making .text and .pyval read-only, namespace fix for assignment to new child Modified: lxml/branch/capi/doc/objectify.txt ============================================================================== --- lxml/branch/capi/doc/objectify.txt (original) +++ lxml/branch/capi/doc/objectify.txt Thu Jul 27 17:20:08 2006 @@ -220,7 +220,7 @@ element content behave like them. For example, they support the normal math operators:: - >>> root = etree.XML("511true") + >>> root = etree.XML("511truehoi") >>> root.a + root.b 16 >>> root.a += root.b @@ -240,6 +240,13 @@ ... print "false!" false! + >>> print root.d + " test !" + hoi test ! + >>> root.d = "%s - %s" + >>> print root.d % (1234, 12345) + 1234 - 12345 + + To see the data types that are currently used, you can call the module level ``dump()`` function that returns a recursive string representation for elements:: @@ -303,9 +310,9 @@ the normal slicing behaviour of objectify elements:: >>> root = etree.XML("testtoast") - >>> print root.a + ' me' + >>> print root.a + ' me' # behaves like a string, right? test me - >>> len(root.a) + >>> len(root.a) # but there's only one 'a' element! 1 >>> [ a.tag for a in root.a ] ['a'] @@ -320,7 +327,7 @@ If you need to run sequence operations on data types, you must ask the API for the *real* Python value. The string value is always available throught the normal ElementTree ``.text`` attribute. Additionally, all data classes -provide a ``.pyval`` attribute that returns the value as Python type:: +provide a ``.pyval`` attribute that returns the value as plain Python type:: >>> root = etree.XML("test5") >>> root.a.text @@ -333,25 +340,56 @@ >>> root.b.pyval 5 +Note, however, that both attributes are read-only in objectify. If you want +to change values, just assign them directly to the attribute:: + + >>> root.a.text = "25" + Traceback (most recent call last): + ... + TypeError: attribute 'text' of 'StringElement' objects is not writable + + >>> root.a.pyval = 25 + Traceback (most recent call last): + ... + TypeError: attribute 'pyval' of 'StringElement' objects is not writable + + >>> root.a = 25 + >>> print root.a + 25 + Objectify determines data types by trial and error, unless it finds an attribute ``pytype`` in the namespace given by the URI in ``lxml.objectify.PYTYPE_NAMESPACE``, which must contain any of the following string values: int, float, str, unicode:: - >>> pytype_attr = "{%s}%s" % (objectify.PYTYPE_NAMESPACE, "pytype") + >>> root = etree.XML("""\ + ... + ... 5 + ... 5 + ... + ... """ % objectify.PYTYPE_NAMESPACE) - >>> el = etree.Element("test", {pytype_attr : "int"}) - >>> el.text = "5" - >>> el + 10 + >>> print root.a + 10 + 510 + >>> print root.b + 10 15 - >>> el = etree.Element("test", {pytype_attr : "str"}) - >>> el.text = "5" - >>> print el + "10" - 510 - >>> el.text = "%s - %s" - >>> print el % (1234, 12345) - 1234 - 12345 +A second way of specifying data type information uses XML Schema types as +element annotations. Objectify knows those that can be mapped to normal +Python types:: + + >>> root = etree.XML('''\ + ... + ... 5 + ... 5 + ... 5 + ... + ... ''') + >>> print objectify.dump(root) + root = None [ObjectifiedElement] + d = 5.0 [FloatElement] + l = 5L [LongElement] + s = '5' [StringElement] Defining additional data classes Modified: lxml/branch/capi/src/lxml/objectify.pyx ============================================================================== --- lxml/branch/capi/src/lxml/objectify.pyx (original) +++ lxml/branch/capi/src/lxml/objectify.pyx Thu Jul 27 17:20:08 2006 @@ -131,9 +131,10 @@ """ cdef _Element element # properties are looked up /after/ __setattr__, so we must emulate them - if tag == 'text': - cetree.setNodeText(self._c_node, value) - return + if tag == 'text' or tag == 'pyval': + # read-only ! + raise TypeError, "attribute '%s' of '%s' objects is not writable"% \ + (tag, type(self).__name__) elif tag == 'tail': cetree.setTailText(self._c_node, value) return @@ -156,7 +157,7 @@ self._doc, (<_Element>value)._c_node) element.tag = _buildChildTag(self, tag) else: - element = self.makeelement(tag) + element = self.makeelement( _buildChildTag(self, tag) ) _setElementValue(element, value) try: Modified: lxml/branch/capi/src/lxml/tests/test_objectify.py ============================================================================== --- lxml/branch/capi/src/lxml/tests/test_objectify.py (original) +++ lxml/branch/capi/src/lxml/tests/test_objectify.py Thu Jul 27 17:20:08 2006 @@ -77,22 +77,34 @@ self.assertEquals([root.c1.c2[0], root.c1.c2[1]], list(iter((root.c1.c2)))) + def test_child_set_ro(self): + root = self.etree.XML(xml_str) + self.assertRaises(TypeError, setattr, root.c1.c2, 'text', "test") + self.assertRaises(TypeError, setattr, root.c1.c2, 'pyval', "test") + def test_setslice(self): Element = self.etree.Element SubElement = self.etree.SubElement root = Element("root") - c1 = SubElement(root, "c") - c1.text = "c1" - c2 = SubElement(root, "c") - c2.text = "c2" + root.c = ["c1", "c2"] + + c1 = root.c[0] + c2 = root.c[1] self.assertEquals([c1,c2], list(root.c)) + self.assertEquals(["c1", "c2"], + [ c.text for c in root.c ]) - el = Element("test") - el.text = "test" - root.c = [ el, el ] + root2 = Element("root2") + root2.el = [ "test", "test" ] + self.assertEquals(["test", "test"], + [ el.text for el in root2.el ]) + + root.c = [ root2.el, root2.el ] self.assertEquals(["test", "test"], [ c.text for c in root.c ]) + self.assertEquals(["test", "test"], + [ el.text for el in root2.el ]) root.c[:] = [ c1, c2, c2, c1 ] self.assertEquals(["c1", "c2", "c2", "c1"], @@ -139,28 +151,28 @@ Element = self.etree.Element SubElement = self.etree.SubElement root = Element("{objectified}root") - SubElement(root, "{objectified}none").text = 'true' + root.none = 'true' self.assert_(isinstance(root.none, objectify.BoolElement)) def test_type_str(self): Element = self.etree.Element SubElement = self.etree.SubElement root = Element("{objectified}root") - SubElement(root, "{objectified}none").text = "test" + root.none = "test" self.assert_(isinstance(root.none, objectify.StringElement)) def test_type_int(self): Element = self.etree.Element SubElement = self.etree.SubElement root = Element("{objectified}root") - SubElement(root, "{objectified}none").text = "5" + root.none = 5 self.assert_(isinstance(root.none, objectify.IntElement)) def test_type_float(self): Element = self.etree.Element SubElement = self.etree.SubElement root = Element("{objectified}root") - SubElement(root, "{objectified}none").text = "5.5" + root.none = 5.5 self.assert_(isinstance(root.none, objectify.FloatElement)) def test_schema_types(self): From scoder at codespeak.net Thu Jul 27 17:54:18 2006 From: scoder at codespeak.net (scoder at codespeak.net) Date: Thu, 27 Jul 2006 17:54:18 +0200 (CEST) Subject: [Lxml-checkins] r30649 - in lxml/branch/capi: doc src/lxml src/lxml/tests Message-ID: <20060727155418.8A0EE100F5@code0.codespeak.net> Author: scoder Date: Thu Jul 27 17:54:15 2006 New Revision: 30649 Modified: lxml/branch/capi/doc/objectify.txt lxml/branch/capi/src/lxml/objectify.pyx lxml/branch/capi/src/lxml/tests/test_objectify.py Log: doc updates, NumberElement._setValueParser() method for subclasses, renamed typedef() to annotate(), some cleanup Modified: lxml/branch/capi/doc/objectify.txt ============================================================================== --- lxml/branch/capi/doc/objectify.txt (original) +++ lxml/branch/capi/doc/objectify.txt Thu Jul 27 17:54:15 2006 @@ -402,8 +402,12 @@ .. _`namespace classes API`: namespace_extensions.html Data classes can either inherit from ``ObjectifiedElement`` directly or from -one of the specialised classes like ``NumberElement`` or ``BoolElement``. -Their registration uses the ``PyType`` class. +one of the specialised classes like ``NumberElement`` or ``BoolElement``. The +numeric types require an initial call to ``self._setValueParser(function)`` to +set the type conversion funtion (string -> Python type). This call should be +placed into the element ``_init()`` method. + +The registration of data classes uses the ``PyType`` class:: >>> class ChristmasDate(objectify.ObjectifiedElement): ... def callSanta(self): Modified: lxml/branch/capi/src/lxml/objectify.pyx ============================================================================== --- lxml/branch/capi/src/lxml/objectify.pyx (original) +++ lxml/branch/capi/src/lxml/objectify.pyx Thu Jul 27 17:54:15 2006 @@ -383,6 +383,9 @@ cdef class NumberElement(ObjectifiedElement): cdef object _type + def _setValueParser(self, function): + "Set the function that parses the Python value from a string." + self._type = function cdef _value(self): return self._type(textOf(self._c_node)) @@ -801,9 +804,7 @@ return NoneElement # check for Python type hint - value = cetree.attributeValueFromNsName( - c_node, _PYTYPE_NAMESPACE, _PYTYPE_ATTRIBUTE_NAME) - + value = _getPytypeAttribute(c_node) if value is not None: dict_result = python.PyDict_GetItem(_PYTYPE_DICT, value) if dict_result is not NULL: @@ -828,8 +829,9 @@ # default to string element class if type attribute is not exploitable return _StringElement -def typedef(element_or_tree, ignore_old=True): - """Recursively creates pytype attributes on the elements of an XML tree. +def annotate(element_or_tree, ignore_old=True): + """Recursively annotates the elements of an XML tree with 'pytype' + attributes. If the second argument is True (the default), current attributes will be ignored and replaced. Otherwise, they will be checked and only replaced @@ -839,7 +841,6 @@ cdef _Document doc cdef int ignore cdef tree.xmlNode* c_node - cdef tree.xmlAttr* c_attr cdef tree.xmlNs* c_ns cdef python.PyObject* dict_result element = cetree.rootNodeOrRaise(element_or_tree) @@ -854,8 +855,7 @@ value = None if not ignore: # check that old value is valid - old_value = cetree.attributeValueFromNsName( - c_node, _PYTYPE_NAMESPACE, _PYTYPE_ATTRIBUTE_NAME) + old_value = _getPytypeAttribute(c_node) if old_value is not None: pytype = _PYTYPE_DICT.get(old_value) if pytype is not None: @@ -901,10 +901,8 @@ if pytype is None: # delete attribute if it exists - c_attr = tree.xmlHasNsProp(c_node, _PYTYPE_NAMESPACE, - _PYTYPE_ATTRIBUTE_NAME) - if c_attr is not NULL: - tree.xmlRemoveProp(c_attr) + cetree.delAttributeFromNsName( + c_node, _PYTYPE_NAMESPACE, _PYTYPE_ATTRIBUTE_NAME) else: # update or create attribute c_ns = cetree.findOrBuildNodeNs(doc, c_node, _PYTYPE_NAMESPACE) @@ -912,6 +910,11 @@ _cstr(pytype.name)) tree.END_FOR_EACH_ELEMENT_FROM(c_node) +cdef _getPytypeAttribute(tree.xmlNode* c_element): + return cetree.attributeValueFromNsName( + c_element, _PYTYPE_NAMESPACE, _PYTYPE_ATTRIBUTE_NAME) + + ################################################################################ # Module setup Modified: lxml/branch/capi/src/lxml/tests/test_objectify.py ============================================================================== --- lxml/branch/capi/src/lxml/tests/test_objectify.py (original) +++ lxml/branch/capi/src/lxml/tests/test_objectify.py Thu Jul 27 17:54:15 2006 @@ -249,7 +249,7 @@ self.assert_(root.b[0] < 5) self.assert_(5 > root.b[0]) - def test_typedef(self): + def test_type_annotation(self): XML = self.etree.XML root = XML(u'''\ @@ -263,7 +263,7 @@ 5 ''') - objectify.typedef(root) + objectify.annotate(root) child_types = [ c.get(objectify.PYTYPE_ATTRIBUTE) for c in root.iterchildren() ] From scoder at codespeak.net Sat Jul 29 08:46:15 2006 From: scoder at codespeak.net (scoder at codespeak.net) Date: Sat, 29 Jul 2006 08:46:15 +0200 (CEST) Subject: [Lxml-checkins] r30715 - in lxml/branch/capi: doc src/lxml src/lxml/tests Message-ID: <20060729064615.0132010068@code0.codespeak.net> Author: scoder Date: Sat Jul 29 08:46:07 2006 New Revision: 30715 Modified: lxml/branch/capi/doc/objectify.txt lxml/branch/capi/src/lxml/objectify.pyx lxml/branch/capi/src/lxml/tests/test_objectify.py Log: support changing pytype attribute name/namespace through objectify.setPytypeAttribute(tag), some cleanup, only leave objectify.PYTYPE_ATTRIBUTE as public module attribute Modified: lxml/branch/capi/doc/objectify.txt ============================================================================== --- lxml/branch/capi/doc/objectify.txt (original) +++ lxml/branch/capi/doc/objectify.txt Sat Jul 29 08:46:07 2006 @@ -358,21 +358,27 @@ 25 Objectify determines data types by trial and error, unless it finds an -attribute ``pytype`` in the namespace given by the URI in -``lxml.objectify.PYTYPE_NAMESPACE``, which must contain any of the following -string values: int, float, str, unicode:: +attribute named ``lxml.objectify.PYTYPE_ATTRIBUTE``, which must contain any of +the following string values: int, long, float, str, unicode, none:: + + >>> print objectify.PYTYPE_ATTRIBUTE + {http://codespeak.net/lxml/objectify/pytype}pytype + >>> ns, name = objectify.PYTYPE_ATTRIBUTE[1:].split('}') >>> root = etree.XML("""\ ... ... 5 ... 5 + ... ... - ... """ % objectify.PYTYPE_NAMESPACE) + ... """ % ns) >>> print root.a + 10 510 >>> print root.b + 10 15 + >>> print root.c + None A second way of specifying data type information uses XML Schema types as element annotations. Objectify knows those that can be mapped to normal Modified: lxml/branch/capi/src/lxml/objectify.pyx ============================================================================== --- lxml/branch/capi/src/lxml/objectify.pyx (original) +++ lxml/branch/capi/src/lxml/objectify.pyx Sat Jul 29 08:46:07 2006 @@ -45,18 +45,39 @@ from itertools import islice -# namespace for "pytype" hint attribute -PYTYPE_NAMESPACE = "http://codespeak.net/lxml/objectify/pytype" +# namespace/name for "pytype" hint attribute +cdef object PYTYPE_NAMESPACE cdef char* _PYTYPE_NAMESPACE -_PYTYPE_NAMESPACE = _cstr(PYTYPE_NAMESPACE) -PYTYPE_ATTRIBUTE_NAME = "pytype" +cdef object PYTYPE_ATTRIBUTE_NAME cdef char* _PYTYPE_ATTRIBUTE_NAME -_PYTYPE_ATTRIBUTE_NAME = _cstr(PYTYPE_ATTRIBUTE_NAME) -PYTYPE_ATTRIBUTE = cetree.namespacedNameFromNsName( - _PYTYPE_NAMESPACE, _PYTYPE_ATTRIBUTE_NAME) +PYTYPE_ATTRIBUTE = None +def setPytypeAttribute(attribute_tag=None): + """Changes name and namespace of the XML attribute that holds Python type + information. + + Reset by calling without argument. + + Default: {http://codespeak.net/lxml/objectify/pytype}pytype + """ + global PYTYPE_ATTRIBUTE, _PYTYPE_NAMESPACE, _PYTYPE_ATTRIBUTE_NAME + global PYTYPE_NAMESPACE, PYTYPE_ATTRIBUTE_NAME + if attribute_tag is None: + PYTYPE_NAMESPACE = "http://codespeak.net/lxml/objectify/pytype" + PYTYPE_ATTRIBUTE_NAME = "pytype" + else: + PYTYPE_NAMESPACE, PYTYPE_ATTRIBUTE_NAME = cetree.getNsTag(attribute_tag) + _PYTYPE_NAMESPACE = _cstr(PYTYPE_NAMESPACE) + _PYTYPE_ATTRIBUTE_NAME = _cstr(PYTYPE_ATTRIBUTE_NAME) + PYTYPE_ATTRIBUTE = cetree.namespacedNameFromNsName( + _PYTYPE_NAMESPACE, _PYTYPE_ATTRIBUTE_NAME) + +setPytypeAttribute() + + +# namespace for XML Schema instance cdef object XML_SCHEMA_INSTANCE_NS XML_SCHEMA_INSTANCE_NS = "http://www.w3.org/2001/XMLSchema-instance" cdef char* _XML_SCHEMA_INSTANCE_NS @@ -65,6 +86,8 @@ cdef object XML_SCHEMA_INSTANCE_NIL_ATTR XML_SCHEMA_INSTANCE_NIL_ATTR = "{%s}nil" % XML_SCHEMA_INSTANCE_NS + +# element class for the main API cdef class ObjectifiedElement(ElementBase): """Element class with an Amara-like API. @@ -804,7 +827,8 @@ return NoneElement # check for Python type hint - value = _getPytypeAttribute(c_node) + value = cetree.attributeValueFromNsName( + c_node, _PYTYPE_NAMESPACE, _PYTYPE_ATTRIBUTE_NAME) if value is not None: dict_result = python.PyDict_GetItem(_PYTYPE_DICT, value) if dict_result is not NULL: @@ -855,7 +879,8 @@ value = None if not ignore: # check that old value is valid - old_value = _getPytypeAttribute(c_node) + old_value = cetree.attributeValueFromNsName( + c_node, _PYTYPE_NAMESPACE, _PYTYPE_ATTRIBUTE_NAME) if old_value is not None: pytype = _PYTYPE_DICT.get(old_value) if pytype is not None: @@ -910,11 +935,6 @@ _cstr(pytype.name)) tree.END_FOR_EACH_ELEMENT_FROM(c_node) -cdef _getPytypeAttribute(tree.xmlNode* c_element): - return cetree.attributeValueFromNsName( - c_element, _PYTYPE_NAMESPACE, _PYTYPE_ATTRIBUTE_NAME) - - ################################################################################ # Module setup Modified: lxml/branch/capi/src/lxml/tests/test_objectify.py ============================================================================== --- lxml/branch/capi/src/lxml/tests/test_objectify.py (original) +++ lxml/branch/capi/src/lxml/tests/test_objectify.py Sat Jul 29 08:46:07 2006 @@ -36,6 +36,7 @@ def tearDown(self): self.etree.Namespace("otherNs").clear() + objectify.setPytypeAttribute() objectify.unregister() def test_str(self): @@ -276,6 +277,49 @@ self.assertEquals(None, child_types[6]) self.assertEquals("float", child_types[7]) + def test_change_pytype_attribute(self): + XML = self.etree.XML + + xml = u'''\ + + 5 + test + 1.1 + \uF8D2 + true + + + 5 + + ''' + + pytype_ns, pytype_name = objectify.PYTYPE_ATTRIBUTE[1:].split('}') + objectify.setPytypeAttribute("{TEST}test") + + root = XML(xml) + objectify.annotate(root) + + attribs = root.xpath("//@py:%s" % pytype_name, {"py" : pytype_ns}) + self.assertEquals(0, len(attribs)) + attribs = root.xpath("//@py:test", {"py" : "TEST"}) + self.assertEquals(7, len(attribs)) + + objectify.setPytypeAttribute() + pytype_ns, pytype_name = objectify.PYTYPE_ATTRIBUTE[1:].split('}') + + self.assertNotEqual("test", pytype_ns.lower()) + self.assertNotEqual("test", pytype_name.lower()) + + root = XML(xml) + attribs = root.xpath("//@py:%s" % pytype_name, {"py" : pytype_ns}) + self.assertEquals(0, len(attribs)) + + objectify.annotate(root) + attribs = root.xpath("//@py:%s" % pytype_name, {"py" : pytype_ns}) + self.assertEquals(7, len(attribs)) + + + def test_suite(): suite = unittest.TestSuite() suite.addTests([unittest.makeSuite(ObjectifyTestCase)]) From scoder at codespeak.net Sat Jul 29 09:02:18 2006 From: scoder at codespeak.net (scoder at codespeak.net) Date: Sat, 29 Jul 2006 09:02:18 +0200 (CEST) Subject: [Lxml-checkins] r30716 - in lxml/branch/capi: doc src/lxml Message-ID: <20060729070218.2072910072@code0.codespeak.net> Author: scoder Date: Sat Jul 29 09:02:16 2006 New Revision: 30716 Modified: lxml/branch/capi/doc/objectify.txt lxml/branch/capi/src/lxml/objectify.pyx Log: doc updates abut annotate() Modified: lxml/branch/capi/doc/objectify.txt ============================================================================== --- lxml/branch/capi/doc/objectify.txt (original) +++ lxml/branch/capi/doc/objectify.txt Sat Jul 29 09:02:16 2006 @@ -380,6 +380,26 @@ >>> print root.c None +Note that you can change the name and namespace used for this attribute +through the ``setPytypeAttribute(tag)`` module function, in case your +application ever needs to. There is also a utility function ``annotate()`` +that recursively generates this attribute for the elements of a tree:: + + >>> root = etree.XML("test5") + >>> print objectify.dump(root) + root = None [ObjectifiedElement] + a = 'test' [StringElement] + b = 5 [IntElement] + + >>> objectify.annotate(root) + + >>> print objectify.dump(root) + root = None [ObjectifiedElement] + a = 'test' [StringElement] + * {http://codespeak.net/lxml/objectify/pytype}pytype = 'str' + b = 5 [IntElement] + * {http://codespeak.net/lxml/objectify/pytype}pytype = 'int' + A second way of specifying data type information uses XML Schema types as element annotations. Objectify knows those that can be mapped to normal Python types:: Modified: lxml/branch/capi/src/lxml/objectify.pyx ============================================================================== --- lxml/branch/capi/src/lxml/objectify.pyx (original) +++ lxml/branch/capi/src/lxml/objectify.pyx Sat Jul 29 09:02:16 2006 @@ -857,9 +857,9 @@ """Recursively annotates the elements of an XML tree with 'pytype' attributes. - If the second argument is True (the default), current attributes will be - ignored and replaced. Otherwise, they will be checked and only replaced - if they no longer fit the current text value. + If the 'ignore_old' keyword argument is True (the default), current + attributes will be ignored and replaced. Otherwise, they will be checked + and only replaced if they no longer fit the current text value. """ cdef _Element element cdef _Document doc From scoder at codespeak.net Sat Jul 29 11:44:03 2006 From: scoder at codespeak.net (scoder at codespeak.net) Date: Sat, 29 Jul 2006 11:44:03 +0200 (CEST) Subject: [Lxml-checkins] r30719 - lxml/branch/capi/doc Message-ID: <20060729094403.887501007A@code0.codespeak.net> Author: scoder Date: Sat Jul 29 11:44:01 2006 New Revision: 30719 Modified: lxml/branch/capi/doc/objectify.txt Log: small doc change Modified: lxml/branch/capi/doc/objectify.txt ============================================================================== --- lxml/branch/capi/doc/objectify.txt (original) +++ lxml/branch/capi/doc/objectify.txt Sat Jul 29 11:44:01 2006 @@ -441,7 +441,7 @@ >>> def checkChristmasDate(date_string): ... if not date_string.startswith('24.12.'): - ... raise ValueError + ... raise ValueError # or TypeError >>> xmas_type = objectify.PyType('date', checkChristmasDate, ChristmasDate) From scoder at codespeak.net Sat Jul 29 11:49:18 2006 From: scoder at codespeak.net (scoder at codespeak.net) Date: Sat, 29 Jul 2006 11:49:18 +0200 (CEST) Subject: [Lxml-checkins] r30720 - lxml/branch/capi/src/lxml Message-ID: <20060729094918.B00BD1007A@code0.codespeak.net> Author: scoder Date: Sat Jul 29 11:49:16 2006 New Revision: 30720 Modified: lxml/branch/capi/src/lxml/classlookup.pyx lxml/branch/capi/src/lxml/etree.pyx lxml/branch/capi/src/lxml/etreepublic.pxd lxml/branch/capi/src/lxml/parser.pxi lxml/branch/capi/src/lxml/public-api.pxi Log: merged classlookup.ElementClassLookup back into etree.pyx as FallbackElementClassLookup, public API call callLookupFallback() for it Modified: lxml/branch/capi/src/lxml/classlookup.pyx ============================================================================== --- lxml/branch/capi/src/lxml/classlookup.pyx (original) +++ lxml/branch/capi/src/lxml/classlookup.pyx Sat Jul 29 11:49:16 2006 @@ -6,7 +6,8 @@ """ from python cimport isinstance, getattr, _cstr, Py_ssize_t -from etreepublic cimport _Document, _ElementClassLookup +from etreepublic cimport _Document +from etreepublic cimport ElementClassLookup, FallbackElementClassLookup cimport etreepublic as cetree cimport python cimport tree @@ -16,41 +17,21 @@ # initialize C-API of lxml.etree cetree.import_etree(etree) -cdef class ElementClassLookup(_ElementClassLookup): - """Superclass of Element class lookups with additional fallback. - """ - cdef readonly _ElementClassLookup fallback - cdef object (*_fallback_function)(object, _Document, tree.xmlNode*) - def __init__(self, _ElementClassLookup fallback=None): - self._lookup_function = NULL # use default lookup - if fallback is None: - fallback = ElementDefaultClassLookup() - self.setFallback(fallback) - - def setFallback(self, _ElementClassLookup lookup not None): - """Sets the fallback scheme for this lookup method. - """ - self.fallback = lookup - self._fallback_function = lookup._lookup_function - - cdef object _callFallback(self, doc, tree.xmlNode* c_node): - return self._fallback_function(self.fallback, doc, c_node) - -cdef class ElementNamespaceClassLookup(_ElementClassLookup): +cdef class ElementNamespaceClassLookup(ElementClassLookup): """Looks up Element class in the Namespace registry. """ # uses default lookup -cdef class ElementDefaultClassLookup(_ElementClassLookup): +cdef class ElementDefaultClassLookup(ElementClassLookup): """Always returns the default Element class. """ def __init__(self): self._lookup_function = cetree.lookupDefaultElementClass -cdef class AttributeBasedElementClassLookup(ElementClassLookup): +cdef class AttributeBasedElementClassLookup(FallbackElementClassLookup): """Checks an attribute of an Element and looks up the value in a class dictionary. @@ -66,7 +47,7 @@ cdef char* _c_ns cdef char* _c_name def __init__(self, attribute_name, class_mapping, - _ElementClassLookup fallback=None): + ElementClassLookup fallback=None): self._pytag = cetree.getNsTag(attribute_name) ns, name = self._pytag if ns is None: @@ -76,7 +57,7 @@ self._c_name = _cstr(name) self._class_mapping = dict(class_mapping) - ElementClassLookup.__init__(self, fallback) + FallbackElementClassLookup.__init__(self, fallback) self._lookup_function = _attribute_lookup cdef object _attribute_lookup(state, _Document doc, tree.xmlNode* c_node): @@ -90,29 +71,29 @@ dict_result = python.PyDict_GetItem(lookup._class_mapping, value) if dict_result is not NULL: return dict_result - return lookup._callFallback(doc, c_node) + return cetree.callLookupFallback(lookup, doc, c_node) -cdef class ParserBasedElementClassLookup(ElementClassLookup): +cdef class ParserBasedElementClassLookup(FallbackElementClassLookup): """Element class lookup based on the XML parser. """ - def __init__(self, _ElementClassLookup fallback=None): - ElementClassLookup.__init__(self, fallback) + def __init__(self, ElementClassLookup fallback=None): + FallbackElementClassLookup.__init__(self, fallback) self._lookup_function = _parser_lookup cdef object _parser_lookup(state, _Document doc, tree.xmlNode* c_node): cdef ElementClassLookup lookup - cdef _ElementClassLookup parser_lookup + cdef ElementClassLookup parser_lookup lookup = state if c_node.type == tree.XML_ELEMENT_NODE: parser_lookup = cetree.getParserElementLookupFromDocument(doc) if parser_lookup is not None: return parser_lookup._lookup_function(parser_lookup, doc, c_node) - return lookup._callFallback(doc, c_node) + return cetree.callLookupFallback(lookup, doc, c_node) -cdef class CustomElementClassLookup(ElementClassLookup): +cdef class CustomElementClassLookup(FallbackElementClassLookup): """Element class lookup based on a subclass method. You can inherit from this class and override the method @@ -127,8 +108,8 @@ If you return None from this method, the fallback will be called. """ - def __init__(self, _ElementClassLookup fallback=None): - ElementClassLookup.__init__(self, fallback) + def __init__(self, ElementClassLookup fallback=None): + FallbackElementClassLookup.__init__(self, fallback) self._lookup_function = _custom_lookup def lookup(self, type, doc, namespace, name): @@ -159,4 +140,4 @@ cls = lookup.lookup(element_type, doc, ns, name) if cls is not None: return cls - return lookup._callFallback(doc, c_node) + return cetree.callLookupFallback(lookup, doc, c_node) Modified: lxml/branch/capi/src/lxml/etree.pyx ============================================================================== --- lxml/branch/capi/src/lxml/etree.pyx (original) +++ lxml/branch/capi/src/lxml/etree.pyx Sat Jul 29 11:49:16 2006 @@ -1742,7 +1742,7 @@ ctypedef object (*_element_class_lookup_function)(object, _Document, xmlNode*) # class to store element class lookup functions -cdef public class _ElementClassLookup [ type LxmlElementClassLookupType, +cdef public class ElementClassLookup [ type LxmlElementClassLookupType, object LxmlElementClassLookup ]: """Superclass of Element class lookups. """ @@ -1750,6 +1750,29 @@ def __init__(self): self._lookup_function = NULL # use default lookup +cdef public class FallbackElementClassLookup(ElementClassLookup) \ + [ type LxmlFallbackElementClassLookupType, + object LxmlFallbackElementClassLookup ]: + """Superclass of Element class lookups with additional fallback. + """ + cdef readonly ElementClassLookup fallback + cdef _element_class_lookup_function _fallback_function + def __init__(self, ElementClassLookup fallback=None): + self._lookup_function = NULL # use default lookup + if fallback is not None: + self.setFallback(fallback) + else: + self._fallback_function = DEFAULT_ELEMENT_CLASS_LOOKUP + + def setFallback(self, ElementClassLookup lookup not None): + """Sets the fallback scheme for this lookup method. + """ + self.fallback = lookup + self._fallback_function = lookup._lookup_function + + cdef object _callFallback(self, doc, tree.xmlNode* c_node): + return self._fallback_function(self.fallback, doc, c_node) + # default: Namespace classes cdef _element_class_lookup_function DEFAULT_ELEMENT_CLASS_LOOKUP DEFAULT_ELEMENT_CLASS_LOOKUP = _find_nselement_class @@ -1769,7 +1792,7 @@ LOOKUP_ELEMENT_CLASS = function ELEMENT_CLASS_LOOKUP_STATE = state -def setElementClassLookup(_ElementClassLookup lookup = None): +def setElementClassLookup(ElementClassLookup lookup = None): if lookup is None or lookup._lookup_function is NULL: _setElementClassLookupFunction(NULL, None) else: Modified: lxml/branch/capi/src/lxml/etreepublic.pxd ============================================================================== --- lxml/branch/capi/src/lxml/etreepublic.pxd (original) +++ lxml/branch/capi/src/lxml/etreepublic.pxd Sat Jul 29 11:49:16 2006 @@ -41,9 +41,14 @@ cdef _Document _doc cdef _Element _element - cdef class lxml.etree._ElementClassLookup [ object LxmlElementClassLookup ]: + cdef class lxml.etree.ElementClassLookup [ object LxmlElementClassLookup ]: cdef object (*_lookup_function)(object, _Document, tree.xmlNode*) + cdef class lxml.etree.FallbackElementClassLookup(ElementClassLookup) \ + [ object LxmlFallbackElementClassLookup ]: + cdef ElementClassLookup fallback + cdef object (*_fallback_function)(object, _Document, tree.xmlNode*) + ########################################################################## # creating Element objects @@ -74,7 +79,11 @@ tree.xmlNode* c_node) # return the element class lookup registered for the parser of this document - cdef object getParserElementLookupFromDocument(_Document doc) + cdef ElementClassLookup getParserElementLookupFromDocument(_Document doc) + + # call the fallback lookup function of an FallbackElementClassLookup + cdef object callLookupFallback(FallbackElementClassLookup lookup, + _Document doc, tree.xmlNode* c_node) ########################################################################## # XML attribute access Modified: lxml/branch/capi/src/lxml/parser.pxi ============================================================================== --- lxml/branch/capi/src/lxml/parser.pxi (original) +++ lxml/branch/capi/src/lxml/parser.pxi Sat Jul 29 11:49:16 2006 @@ -334,7 +334,7 @@ cdef _ResolverContext _context cdef LxmlParserType _parser_type cdef xmlParserCtxt* _parser_ctxt - cdef _ElementClassLookup _class_lookup + cdef ElementClassLookup _class_lookup cdef object _lockParser cdef object _unlockParser @@ -381,7 +381,7 @@ def __dummy(self): pass - def setElementClassLookup(self, _ElementClassLookup lookup not None): + def setElementClassLookup(self, ElementClassLookup lookup not None): self._class_lookup = lookup cdef _BaseParser _copy(self): Modified: lxml/branch/capi/src/lxml/public-api.pxi ============================================================================== --- lxml/branch/capi/src/lxml/public-api.pxi (original) +++ lxml/branch/capi/src/lxml/public-api.pxi Sat Jul 29 11:49:16 2006 @@ -31,9 +31,13 @@ cdef public object lookupNamespaceElementClass(state, doc, xmlNode* c_node): return _find_nselement_class(state, doc, c_node) -cdef public object getParserElementLookupFromDocument(_Document doc): +cdef public ElementClassLookup getParserElementLookupFromDocument(_Document doc): return doc._parser._class_lookup +cdef public object callLookupFallback(FallbackElementClassLookup lookup, + _Document doc, xmlNode* c_node): + return lookup._callFallback(doc, c_node) + cdef public int tagMatches(xmlNode* c_node, char* c_href, char* c_name): if c_node is NULL: return -1 From scoder at codespeak.net Sat Jul 29 12:00:56 2006 From: scoder at codespeak.net (scoder at codespeak.net) Date: Sat, 29 Jul 2006 12:00:56 +0200 (CEST) Subject: [Lxml-checkins] r30721 - lxml/pyrex/Pyrex/PC Message-ID: <20060729100056.D215C10074@code0.codespeak.net> Author: scoder Date: Sat Jul 29 12:00:55 2006 New Revision: 30721 Added: lxml/pyrex/Pyrex/PC/LinuxSystem.py lxml/pyrex/Pyrex/PC/__init__.py Log: added missing Pyrex/PC/LinuxSystem.py Added: lxml/pyrex/Pyrex/PC/LinuxSystem.py ============================================================================== --- (empty file) +++ lxml/pyrex/Pyrex/PC/LinuxSystem.py Sat Jul 29 12:00:55 2006 @@ -0,0 +1,34 @@ +import os, sys +from Pyrex.Utils import replace_suffix +from Pyrex.Compiler.Errors import PyrexError + +class CCompilerError(PyrexError): + pass + +def call(args): + return os.spawnvp(os.P_WAIT, args[0], args) + +def c_compile(c_file, verbose_flag=False, cplus=False, obj_suffix=".o"): + out_file = replace_suffix(c_file, obj_suffix) + version = sys.version_info[:2] + args = [cplus and 'c++' or 'cc', + '-I' + sys.exec_prefix + '/include/python%d.%d/' % version, + '-fPIC', '-c', c_file, '-o', out_file] + if verbose_flag: + args += ['-v', '-Wall'] + #print 'Calling %r ...' % str.join(' ', args) + status = call(args) + if status: + raise CCompilerError("C compiler returned status %s" % status) + return out_file + +def c_link(obj_file, verbose_flag=False, extra_objects=[], cplus=False): + out_file = replace_suffix(obj_file, ".so") + args = ['ld', '-shared', obj_file, '-o', out_file] + if verbose_flag: + args += ['-v'] + #print 'Calling %r ...' % str.join(' ', args) + status = call(args) + if status: + raise CCompilerError("Linker returned status %s" % status) + return out_file Added: lxml/pyrex/Pyrex/PC/__init__.py ============================================================================== From scoder at codespeak.net Sat Jul 29 12:14:26 2006 From: scoder at codespeak.net (scoder at codespeak.net) Date: Sat, 29 Jul 2006 12:14:26 +0200 (CEST) Subject: [Lxml-checkins] r30722 - in lxml/branch/capi/src/lxml: . tests Message-ID: <20060729101426.F04361007A@code0.codespeak.net> Author: scoder Date: Sat Jul 29 12:14:25 2006 New Revision: 30722 Modified: lxml/branch/capi/src/lxml/objectify.pyx lxml/branch/capi/src/lxml/python.pxd lxml/branch/capi/src/lxml/tests/test_objectify.py Log: no longer changes default element class in objectify.register(), defaults to ObjectifiedElement instead of StringElement, support for dir()/vars() in ObjectifiedElement, let ObjectifiedElement return self for .pyval Modified: lxml/branch/capi/src/lxml/objectify.pyx ============================================================================== --- lxml/branch/capi/src/lxml/objectify.pyx (original) +++ lxml/branch/capi/src/lxml/objectify.pyx Sat Jul 29 12:14:25 2006 @@ -1,10 +1,11 @@ from etreepublic cimport _Document, _Element, ElementBase -from etreepublic cimport _ElementIterator, _ElementClassLookup +from etreepublic cimport _ElementIterator, ElementClassLookup from etreepublic cimport elementFactory, import_etree, textOf from python cimport isinstance, issubclass, callable, getattr, _cstr, Py_ssize_t cimport etreepublic as cetree cimport python cimport tree +cimport cstd cdef object etree from lxml import etree @@ -40,6 +41,11 @@ cdef object list list = __builtin__.list +cdef object set +try: + set = __builtin__.set +except AttributeError: + from sets import Set as set cdef object islice from itertools import islice @@ -115,6 +121,33 @@ def __get__(self): return textOf(self._c_node) + property pyval: + def __get__(self): + return self # i.e. the 'list' of elements + + property __dict__: + """A fake implementation for __dict__ to support dir() etc. + + Note that this only considers the first child with a given name. + """ + def __get__(self): + cdef char* c_ns + cdef char* c_child_ns + cdef _Element child + c_ns = tree._getNs(self._c_node) + if c_ns is NULL: + tag = None + else: + tag = "{%s}*" % c_ns + children = {} + for child in etree.ElementChildIterator(self, tag=tag): + if c_ns is NULL and tree._getNs(child._c_node) is not NULL: + continue + name = child._c_node.name + if not python.PyDict_Contains(children, name): + python.PyDict_SetItem(children, name, child) + return children + def __len__(self): """Count self and siblings with the same tag. """ @@ -734,8 +767,8 @@ cdef object _guessElementClass(tree.xmlNode* c_node): value = textOf(c_node) if value is None: - # default to string class - return StringElement + # default to ObjectifiedElement class + return ObjectifiedElement if value == '': return StringElement errors = (ValueError, TypeError) @@ -748,6 +781,33 @@ return StringElement +def getRegisteredTypes(): + """Returns a list of the currently registered PyType objects. + + To add a new type, retrieve this list and call unregister() for all + entries. Then add the new type at a suitable position (possibly replacing + an existing one) and call register() for all entries. + + This is necessary if the new type interferes with the type check functions + of existing ones (normally only int/float/bool) and must the tried before + other types. To add a type that is not yet parsable by the current type + check functions, you can simply register() it, which will append it to the + end of the type list. + """ + types = [] + known = set() + for check, pytype in _TYPE_CHECKS: + name = pytype.name + if name not in known: + known.add(name) + types.append(pytype) + for pytype in _PYTYPE_DICT.itervalues(): + name = pytype.name + if name not in known: + known.add(name) + types.append(pytype) + return types + ################################################################################ # Recursive element dumping @@ -770,6 +830,9 @@ indentstr = " " * indent if hasattr(element, "pyval"): value = element.pyval + if isinstance(value, ObjectifiedElement): + # ObjectifiedElement returns itself for pyval + value = None else: value = textOf(element._c_node) if value and not value.strip(): @@ -794,7 +857,7 @@ ################################################################################ # Element class lookup -cdef class ObjectifyElementClassLookup(_ElementClassLookup): +cdef class ObjectifyElementClassLookup(ElementClassLookup): """Element class lookup method that uses the objectify classes. The constructor accepts a keyword argument 'default_to_nsclasses'. You can @@ -811,10 +874,11 @@ cdef python.PyObject* dict_result if state is None or \ not isinstance(state, ObjectifyElementClassLookup) or \ - (state._default_to_nsclasses): + (state)._default_to_nsclasses: # default to namespace specific classes nsclass = cetree.lookupNamespaceElementClass(state, doc, c_node) - if nsclass is not ObjectifiedElement: + default = cetree.lookupDefaultElementClass(state, doc, c_node) + if nsclass is not default: return nsclass # if element has children => no data class @@ -939,9 +1003,9 @@ # Module setup def register(): - etree.setDefaultElementClass(ObjectifiedElement) + #etree.setDefaultElementClass(ObjectifiedElement) cetree.setElementClassLookupFunction(_lookupElementClass, None) def unregister(): - etree.setDefaultElementClass() + #etree.setDefaultElementClass() cetree.setElementClassLookupFunction(NULL, None) Modified: lxml/branch/capi/src/lxml/python.pxd ============================================================================== --- lxml/branch/capi/src/lxml/python.pxd (original) +++ lxml/branch/capi/src/lxml/python.pxd Sat Jul 29 12:14:25 2006 @@ -45,6 +45,7 @@ cdef void PyDict_Clear(object d) cdef object PyDict_Copy(object d) cdef Py_ssize_t PyDict_Size(object d) + cdef int PyDict_Contains(object d, object key) cdef object PyList_AsTuple(object o) cdef object PySequence_List(object o) cdef object PySequence_Tuple(object o) Modified: lxml/branch/capi/src/lxml/tests/test_objectify.py ============================================================================== --- lxml/branch/capi/src/lxml/tests/test_objectify.py (original) +++ lxml/branch/capi/src/lxml/tests/test_objectify.py Sat Jul 29 12:14:25 2006 @@ -43,6 +43,10 @@ root = self.etree.Element("test") self.assertEquals('', str(root)) + def test_root_pyval(self): + root = self.etree.Element("test") + self.assertEquals(root, root.pyval) + def test_child(self): root = self.etree.XML(xml_str) self.assertEquals("0", root.c1.c2.text) @@ -78,6 +82,21 @@ self.assertEquals([root.c1.c2[0], root.c1.c2[1]], list(iter((root.c1.c2)))) + def test_dir(self): + root = self.etree.XML(xml_str) + dir_c1 = dir(objectify.ObjectifiedElement) + ['c1'] + dir_c1.sort() + dir_c2 = dir(objectify.ObjectifiedElement) + ['c2'] + dir_c2.sort() + + self.assertEquals(dir_c1, dir(root)) + self.assertEquals(dir_c2, dir(root.c1)) + + def test_vars(self): + root = self.etree.XML(xml_str) + self.assertEquals({'c1' : root.c1}, vars(root)) + self.assertEquals({'c2' : root.c1.c2}, vars(root.c1)) + def test_child_set_ro(self): root = self.etree.XML(xml_str) self.assertRaises(TypeError, setattr, root.c1.c2, 'text', "test") @@ -136,6 +155,14 @@ self.assertEquals(3, len(root.findall(".//b"))) self.assertEquals(2, len(root.findall("b"))) + def test_build_tree(self): + root = self.etree.Element('root') + root.a = 5 + root.b = 6 + self.assert_(isinstance(root, objectify.ObjectifiedElement)) + self.assert_(isinstance(root.a, objectify.IntElement)) + self.assert_(isinstance(root.b, objectify.IntElement)) + def test_type_none(self): Element = self.etree.Element SubElement = self.etree.SubElement @@ -319,7 +346,6 @@ self.assertEquals(7, len(attribs)) - def test_suite(): suite = unittest.TestSuite() suite.addTests([unittest.makeSuite(ObjectifyTestCase)]) From scoder at codespeak.net Sat Jul 29 12:26:53 2006 From: scoder at codespeak.net (scoder at codespeak.net) Date: Sat, 29 Jul 2006 12:26:53 +0200 (CEST) Subject: [Lxml-checkins] r30723 - in lxml/branch/capi/src/lxml: . tests Message-ID: <20060729102653.A158A10074@code0.codespeak.net> Author: scoder Date: Sat Jul 29 12:26:52 2006 New Revision: 30723 Modified: lxml/branch/capi/src/lxml/objectify.pyx lxml/branch/capi/src/lxml/tests/test_objectify.py Log: removed .pyval from ObjectifiedElement: better be able to call hatattr() to see if an element has a Python value Modified: lxml/branch/capi/src/lxml/objectify.pyx ============================================================================== --- lxml/branch/capi/src/lxml/objectify.pyx (original) +++ lxml/branch/capi/src/lxml/objectify.pyx Sat Jul 29 12:26:52 2006 @@ -121,10 +121,6 @@ def __get__(self): return textOf(self._c_node) - property pyval: - def __get__(self): - return self # i.e. the 'list' of elements - property __dict__: """A fake implementation for __dict__ to support dir() etc. @@ -830,9 +826,6 @@ indentstr = " " * indent if hasattr(element, "pyval"): value = element.pyval - if isinstance(value, ObjectifiedElement): - # ObjectifiedElement returns itself for pyval - value = None else: value = textOf(element._c_node) if value and not value.strip(): Modified: lxml/branch/capi/src/lxml/tests/test_objectify.py ============================================================================== --- lxml/branch/capi/src/lxml/tests/test_objectify.py (original) +++ lxml/branch/capi/src/lxml/tests/test_objectify.py Sat Jul 29 12:26:52 2006 @@ -43,10 +43,6 @@ root = self.etree.Element("test") self.assertEquals('', str(root)) - def test_root_pyval(self): - root = self.etree.Element("test") - self.assertEquals(root, root.pyval) - def test_child(self): root = self.etree.XML(xml_str) self.assertEquals("0", root.c1.c2.text) From scoder at codespeak.net Sat Jul 29 12:40:15 2006 From: scoder at codespeak.net (scoder at codespeak.net) Date: Sat, 29 Jul 2006 12:40:15 +0200 (CEST) Subject: [Lxml-checkins] r30724 - lxml/pyrex Message-ID: <20060729104015.1A4CD10074@code0.codespeak.net> Author: scoder Date: Sat Jul 29 12:40:14 2006 New Revision: 30724 Modified: lxml/pyrex/setup.py Log: added Pyrex.PC to the list of modules in setup.py Modified: lxml/pyrex/setup.py ============================================================================== --- lxml/pyrex/setup.py (original) +++ lxml/pyrex/setup.py Sat Jul 29 12:40:14 2006 @@ -22,6 +22,7 @@ 'Pyrex.Compiler', 'Pyrex.Distutils', 'Pyrex.Mac', + 'Pyrex.PC', 'Pyrex.Plex' ], data_files=[ From scoder at codespeak.net Sat Jul 29 13:22:33 2006 From: scoder at codespeak.net (scoder at codespeak.net) Date: Sat, 29 Jul 2006 13:22:33 +0200 (CEST) Subject: [Lxml-checkins] r30725 - in lxml/branch/capi/src/lxml: . tests Message-ID: <20060729112233.E477610069@code0.codespeak.net> Author: scoder Date: Sat Jul 29 13:22:31 2006 New Revision: 30725 Modified: lxml/branch/capi/src/lxml/objectify.pyx lxml/branch/capi/src/lxml/tests/test_objectify.py Log: before/after keywords in PyType.register(): allow specifying 'dependencies' between registered types Modified: lxml/branch/capi/src/lxml/objectify.pyx ============================================================================== --- lxml/branch/capi/src/lxml/objectify.pyx (original) +++ lxml/branch/capi/src/lxml/objectify.pyx Sat Jul 29 13:22:31 2006 @@ -688,17 +688,42 @@ self._type_check = type_check self._schema_types = [] - def register(self): + def register(self, before=None, after=None): + """Register the type. + + The additional keyword arguments 'before' and 'after' accept a + sequence of type names that must appear before/after the new type in + the type list. If any of them is not currently known, it is simply + ignored. Raises ValueError if the dependencies cannot be fulfilled. + """ + if self._type_check is not None: + for item in _TYPE_CHECKS: + if item[0] is self._type_check: + _TYPE_CHECKS.remove(item) + break + entry = (self._type_check, self) + first_pos = 0 + last_pos = -1 + if before or after: + if before is None: + before = () + elif after is None: + after = () + for i, (check, pytype) in enumerate(_TYPE_CHECKS): + if last_pos == -1 and pytype.name in before: + last_pos = i + if pytype.name in after: + first_pos = i+1 + if last_pos == -1: + _TYPE_CHECKS.append(entry) + elif first_pos > last_pos: + raise ValueError, "inconsistent before/after dependencies" + else: + _TYPE_CHECKS.insert(last_pos, entry) + _PYTYPE_DICT[self.name] = self for xs_type in self._schema_types: _SCHEMA_TYPE_DICT[xs_type] = self - if self._type_check is None: - return - for item in _TYPE_CHECKS: - if item[0] is self._type_check: - _TYPE_CHECKS.remove(item) - break - _TYPE_CHECKS.append( (self._type_check, self) ) def unregister(self): if _PYTYPE_DICT.get(self.name) is self: Modified: lxml/branch/capi/src/lxml/tests/test_objectify.py ============================================================================== --- lxml/branch/capi/src/lxml/tests/test_objectify.py (original) +++ lxml/branch/capi/src/lxml/tests/test_objectify.py Sat Jul 29 13:22:31 2006 @@ -341,6 +341,42 @@ attribs = root.xpath("//@py:%s" % pytype_name, {"py" : pytype_ns}) self.assertEquals(7, len(attribs)) + def test_registered_types(self): + orig_types = objectify.getRegisteredTypes() + + try: + orig_types[0].unregister() + self.assertEquals(orig_types[1:], objectify.getRegisteredTypes()) + + class NewType(objectify.ObjectifiedElement): + pass + + def checkMyType(s): + return True + + pytype = objectify.PyType("mytype", checkMyType, NewType) + pytype.register() + self.assert_(pytype in objectify.getRegisteredTypes()) + pytype.unregister() + + pytype.register(before = [objectify.getRegisteredTypes()[0].name]) + self.assertEquals(pytype, objectify.getRegisteredTypes()[0]) + pytype.unregister() + + pytype.register(after = [objectify.getRegisteredTypes()[0].name]) + self.assertNotEqual(pytype, objectify.getRegisteredTypes()[0]) + pytype.unregister() + + self.assertRaises(ValueError, pytype.register, + before = [objectify.getRegisteredTypes()[0].name], + after = [objectify.getRegisteredTypes()[1].name]) + + finally: + for pytype in objectify.getRegisteredTypes(): + pytype.unregister() + for pytype in orig_types: + pytype.register() + def test_suite(): suite = unittest.TestSuite() From scoder at codespeak.net Sat Jul 29 13:30:10 2006 From: scoder at codespeak.net (scoder at codespeak.net) Date: Sat, 29 Jul 2006 13:30:10 +0200 (CEST) Subject: [Lxml-checkins] r30726 - lxml/branch/capi/doc Message-ID: <20060729113010.7FAAF10069@code0.codespeak.net> Author: scoder Date: Sat Jul 29 13:30:09 2006 New Revision: 30726 Modified: lxml/branch/capi/doc/objectify.txt Log: doc note on registering PyTypes with before/after keyword Modified: lxml/branch/capi/doc/objectify.txt ============================================================================== --- lxml/branch/capi/doc/objectify.txt (original) +++ lxml/branch/capi/doc/objectify.txt Sat Jul 29 13:30:09 2006 @@ -464,6 +464,14 @@ ... AttributeError: no such child: callSanta +If you need to specify dependencies between the type check functions, you can +pass a sequence of type names through the ``before`` and ``after`` keyword +arguments of the ``register()`` method. The PyType will then try to register +itself before or after the respective types, as long as they are currently +registered. Note that this only impacts the currently registered types at the +time of registration. Types that are registered later on will not care about +the dependencies of already registered types. + If you provide XML Schema type information, this will override the type check function defined above:: From scoder at codespeak.net Sat Jul 29 17:50:48 2006 From: scoder at codespeak.net (scoder at codespeak.net) Date: Sat, 29 Jul 2006 17:50:48 +0200 (CEST) Subject: [Lxml-checkins] r30732 - in lxml/branch/capi: . benchmark Message-ID: <20060729155048.90FF51007A@code0.codespeak.net> Author: scoder Date: Sat Jul 29 17:50:45 2006 New Revision: 30732 Added: lxml/branch/capi/benchmark/ lxml/branch/capi/benchmark/bench.py - copied, changed from r30633, lxml/branch/capi/bench.py lxml/branch/capi/benchmark/benchbase.py - copied, changed from r30633, lxml/branch/capi/bench.py Removed: lxml/branch/capi/bench.py Log: split: bench.py -> benchmark/benchbase.py + benchmark/bench.py Deleted: /lxml/branch/capi/bench.py ============================================================================== --- /lxml/branch/capi/bench.py Sat Jul 29 17:50:45 2006 +++ (empty file) @@ -1,779 +0,0 @@ -import sys, string, time, copy, gc -from itertools import * -from StringIO import StringIO - -TREE_FACTOR = 1 # increase tree size with '-l / '-L' cmd option - -_TEXT = "some ASCII text" * TREE_FACTOR -_UTEXT = u"some klingon: \F8D2" * TREE_FACTOR -_ATTRIBUTES = { - '{attr}test1' : _TEXT, - '{attr}test2' : _TEXT, - 'bla1' : _TEXT, - 'bla2' : _TEXT, - 'bla3' : _TEXT - } - -def with_attributes(*use_attributes): - "Decorator for benchmarks that use attributes" - vmap = {False : 0, True : 1} - values = [ vmap[bool(v)] for v in use_attributes ] - def set_value(function): - try: - function.ATTRIBUTES.update(values) - except AttributeError: - function.ATTRIBUTES = set(values) - return function - return set_value - -def with_text(no_text=False, text=False, utext=False): - "Decorator for benchmarks that use text" - values = [] - if no_text: - values.append(0) - if text: - values.append(1) - if utext: - values.append(2) - def set_value(function): - try: - function.TEXT.add(values) - except AttributeError: - function.TEXT = set(values) - return function - return set_value - -def onlylib(*libs): - "Decorator to restrict benchmarks to specific libraries" - def set_libs(function): - if libs: - function.LIBS = libs - return function - return set_libs - -def serialized(function): - "Decorator for benchmarks that require serialized XML data" - function.STRING = True - return function - -class SkippedTest(Exception): - pass - -class BenchMarkBase(object): - atoz = string.ascii_lowercase - - _LIB_NAME_MAP = { - 'etree' : 'lxe', - 'ElementTree' : 'ET', - 'cElementTree' : 'cET' - } - - SEARCH_TAG = "{cdefg}a00001" - - def __init__(self, etree): - self.etree = etree - libname = etree.__name__.split('.')[-1] - self.lib_name = self._LIB_NAME_MAP.get(libname, libname) - - if libname == 'etree': - deepcopy = copy.deepcopy - def set_property(root, fname): - setattr(self, fname, lambda : deepcopy(root)) - xml = self._serialize_tree(root) - setattr(self, fname + '_xml', lambda : xml) - else: - def set_property(root, fname): - setattr(self, fname, self.et_make_clone_factory(root)) - xml = self._serialize_tree(root) - setattr(self, fname + '_xml', lambda : xml) - - attribute_list = list(izip(count(), ({}, _ATTRIBUTES))) - text_list = list(izip(count(), (None, _TEXT, _UTEXT))) - build_name = self._tree_builder_name - - self.setup_times = [] - for tree in self._all_trees(): - times = [] - self.setup_times.append(times) - setup = getattr(self, '_setup_tree%d' % tree) - for an, attributes in attribute_list: - for tn, text in text_list: - root, t = setup(text, attributes) - times.append(t) - set_property(root, build_name(tree, tn, an)) - - def _tree_builder_name(self, tree, tn, an): - return '_root%d_T%d_A%d' % (tree, tn, an) - - def tree_builder(self, tree, tn, an, serial): - name = self._tree_builder_name(tree, tn, an) - if serial: - name += '_xml' - return getattr(self, name) - - def _serialize_tree(self, root): - return self.etree.tostring(root, 'UTF-8') - - def et_make_clone_factory(self, elem): - def generate_elem(append, elem, level): - var = "e" + str(level) - arg = repr(elem.tag) - if elem.attrib: - arg += ", **%r" % elem.attrib - if level == 1: - append(" e1 = Element(%s)" % arg) - else: - append(" %s = SubElement(e%d, %s)" % (var, level-1, arg)) - if elem.text: - append(" %s.text = %r" % (var, elem.text)) - if elem.tail: - append(" %s.tail = %r" % (var, elem.tail)) - for e in elem: - generate_elem(append, e, level+1) - # generate code for a function that creates a tree - output = ["def element_factory():"] - generate_elem(output.append, elem, 1) - output.append(" return e1") - # setup global function namespace - namespace = { - "Element" : self.etree.Element, - "SubElement" : self.etree.SubElement - } - # create function object - exec "\n".join(output) in namespace - return namespace["element_factory"] - - def _all_trees(self): - all_trees = [] - for name in dir(self): - if name.startswith('_setup_tree'): - all_trees.append(int(name[11:])) - return all_trees - - def _setup_tree1(self, text, attributes): - "tree with 26 2nd level and 520 * TREE_FACTOR 3rd level children" - atoz = self.atoz - SubElement = self.etree.SubElement - current_time = time.time - t = current_time() - root = self.etree.Element('{abc}rootnode') - for ch1 in atoz: - el = SubElement(root, "{bcd}"+ch1*5, attributes) - el.text = text - for ch2 in atoz: - for i in range(20 * TREE_FACTOR): - SubElement(el, "{cdefg}%s%05d" % (ch2, i)) - t = current_time() - t - return (root, t) - - def _setup_tree2(self, text, attributes): - "tree with 520 * TREE_FACTOR 2nd level and 26 3rd level children" - atoz = self.atoz - SubElement = self.etree.SubElement - current_time = time.time - t = current_time() - root = self.etree.Element('{abc}rootnode') - for ch1 in atoz: - for i in range(20 * TREE_FACTOR): - el = SubElement(root, "{bcd}"+ch1*5, attributes) - el.text = text - for ch2 in atoz: - SubElement(el, "{cdefg}%s%05d" % (ch2, i)) - t = current_time() - t - return (root, t) - - def _setup_tree3(self, text, attributes): - "tree of depth 8 + TREE_FACTOR with 3 children per node" - SubElement = self.etree.SubElement - current_time = time.time - t = current_time() - root = self.etree.Element('{abc}rootnode') - children = [root] - for i in range(6 + TREE_FACTOR): - tag_no = count().next - children = [ SubElement(c, "{cdefg}a%05d" % i, attributes) - for i,c in enumerate(chain(children, children, children)) ] - for child in root: - child.text = text - t = current_time() - t - return (root, t) - - def _setup_tree4(self, text, attributes): - "small tree with 26 2nd level and 2 3rd level children" - SubElement = self.etree.SubElement - current_time = time.time - t = current_time() - root = self.etree.Element('{abc}rootnode') - children = [root] - for ch1 in self.atoz: - el = SubElement(root, "{bcd}"+ch1*5, attributes) - el.text = text - SubElement(el, "{cdefg}a00001", attributes) - SubElement(el, "{cdefg}a00002", attributes) - t = current_time() - t - return (root, t) - - def benchmarks(self): - """Returns a list of all benchmarks. - - A benchmark is a tuple containing a method name and a list of tree - numbers. Trees are prepared by the setup function. - """ - all_trees = self._all_trees() - benchmarks = [] - for name in dir(self): - if not name.startswith('bench_'): - continue - method = getattr(self, name) - if hasattr(method, 'LIBS') and self.lib_name not in method.LIBS: - method_call = None - else: - method_call = method - if method.__doc__: - tree_sets = method.__doc__.split() - else: - tree_sets = () - if tree_sets: - tree_tuples = [ map(int, tree_set.split(',')) - for tree_set in tree_sets ] - else: - try: - function = getattr(method, 'im_func', method) - arg_count = method.func_code.co_argcount - 1 - except AttributeError: - arg_count = 1 - tree_tuples = self._permutations(all_trees, arg_count) - - serialized = getattr(method, 'STRING', False) - - for tree_tuple in tree_tuples: - for tn in sorted(getattr(method, 'TEXT', (0,))): - for an in sorted(getattr(method, 'ATTRIBUTES', (0,))): - benchmarks.append((name, method_call, tree_tuple, - tn, an, serialized)) - - return benchmarks - - def _permutations(self, seq, count): - def _permutations(prefix, remainder, count): - if count == 0: - return [ prefix[:] ] - count -= 1 - perms = [] - prefix.append(None) - for pos, el in enumerate(remainder): - new_remainder = remainder[:pos] + remainder[pos+1:] - prefix[-1] = el - perms.extend( _permutations(prefix, new_remainder, count) ) - prefix.pop() - return perms - return _permutations([], seq, count) - - -############################################################ -# Benchmarks -############################################################ - -class BenchMark(BenchMarkBase): - def bench_iter_children(self, root): - for child in root: - pass - - def bench_iter_children_reversed(self, root): - for child in reversed(root): - pass - - @with_attributes(True, False) - @with_text(text=True, utext=True) - def bench_tostring_utf8(self, root): - self.etree.tostring(root, 'UTF-8') - - @with_attributes(True, False) - @with_text(text=True, utext=True) - def bench_tostring_utf16(self, root): - self.etree.tostring(root, 'UTF-16') - - @with_attributes(True, False) - @with_text(text=True, utext=True) - def bench_tostring_utf8_unicode_XML(self, root): - xml = unicode(self.etree.tostring(root, 'UTF-8'), 'UTF-8') - self.etree.XML(xml) - - @with_attributes(True, False) - @with_text(text=True, utext=True) - def bench_write_utf8_parse_stringIO(self, root): - f = StringIO() - self.etree.ElementTree(root).write(f, 'UTF-8') - f.seek(0) - self.etree.parse(f) - - @with_attributes(True, False) - @with_text(text=True, utext=True) - @serialized - def bench_parse_stringIO(self, root_xml): - f = StringIO(root_xml) - self.etree.parse(f) - - @with_attributes(True, False) - @with_text(text=True, utext=True) - @serialized - def bench_XML(self, root_xml): - self.etree.XML(root_xml) - - @with_attributes(True, False) - @with_text(text=True, utext=True) - @serialized - def bench_iterparse_stringIO(self, root_xml): - f = StringIO(root_xml) - for event, element in self.etree.iterparse(f): - pass - - @with_attributes(True, False) - @with_text(text=True, utext=True) - @serialized - def bench_iterparse_stringIO_clear(self, root_xml): - f = StringIO(root_xml) - for event, element in self.etree.iterparse(f): - element.clear() - - def bench_append_from_document(self, root1, root2): - # == "1,2 2,3 1,3 3,1 3,2 2,1" # trees 1 and 2, or 2 and 3, or ... - for el in root2: - root1.append(el) - - def bench_insert_from_document(self, root1, root2): - for el in root2: - root1.insert(len(root1)/2, el) - - def bench_rotate_children(self, root): - # == "1 2 3" # runs on any single tree independently - for i in range(100): - el = root[0] - del root[0] - root.append(el) - - def bench_reorder(self, root): - for i in range(1,len(root)/2): - el = root[0] - del root[0] - root[-i:-i] = [ el ] - - def bench_reorder_slice(self, root): - for i in range(1,len(root)/2): - els = root[0:1] - del root[0] - root[-i:-i] = els - - def bench_clear(self, root): - root.clear() - - def bench_has_children(self, root): - for child in root: - if child and child and child and child and child: - pass - - def bench_len(self, root): - for child in root: - map(len, repeat(child, 20)) - - def bench_create_subelements(self, root): - SubElement = self.etree.SubElement - for child in root: - SubElement(child, '{test}test') - - def bench_append_elements(self, root): - Element = self.etree.Element - for child in root: - el = Element('{test}test') - child.append(el) - - def bench_makeelement(self, root): - empty_attrib = {} - for child in root: - child.makeelement('{test}test', empty_attrib) - - def bench_create_elements(self, root): - Element = self.etree.Element - for child in root: - Element('{test}test') - - def bench_replace_children_element(self, root): - Element = self.etree.Element - for child in root: - el = Element('{test}test') - child[:] = [el] - - def bench_replace_children(self, root): - Element = self.etree.Element - for child in root: - child[:] = [ child[0] ] - - def bench_remove_children(self, root): - for child in root: - root.remove(child) - - def bench_remove_children_reversed(self, root): - for child in reversed(root[:]): - root.remove(child) - - def bench_set_attributes(self, root): - for child in root: - child.set('a', 'bla') - - @with_attributes(True) - def bench_get_attributes(self, root): - for child in root: - child.get('bla1') - child.get('{attr}test1') - - def bench_setget_attributes(self, root): - for child in root: - child.set('a', 'bla') - for child in root: - child.get('a') - - def bench_root_getchildren(self, root): - root.getchildren() - - def bench_getchildren(self, root): - for child in root: - child.getchildren() - - def bench_get_children_slice(self, root): - for child in root: - child[:] - - def bench_get_children_slice_2x(self, root): - for child in root: - children = child[:] - child[:] - - def bench_deepcopy(self, root): - for child in root: - copy.deepcopy(child) - - def bench_deepcopy_all(self, root): - copy.deepcopy(root) - - def bench_tag(self, root): - for child in root: - child.tag - - def bench_tag_repeat(self, root): - for child in root: - for i in repeat(0, 100): - child.tag - - @with_text(utext=True, text=True, no_text=True) - def bench_text(self, root): - for child in root: - child.text - - @with_text(utext=True, text=True, no_text=True) - def bench_text_repeat(self, root): - repeat = range(500) - for child in root: - for i in repeat: - child.text - - def bench_set_text(self, root): - text = _TEXT - for child in root: - child.text = text - - def bench_set_utext(self, root): - text = _UTEXT - for child in root: - child.text = text - - @onlylib('lxe') - def bench_index(self, root): - for child in root: - root.index(child) - - @onlylib('lxe') - def bench_index_slice(self, root): - for child in root[5:100]: - root.index(child, 5, 100) - - @onlylib('lxe') - def bench_index_slice_neg(self, root): - for child in root[-100:-5]: - root.index(child, start=-100, stop=-5) - - def bench_getiterator_all(self, root): - list(root.getiterator()) - - def bench_getiterator_islice(self, root): - list(islice(root.getiterator(), 10, 110)) - - def bench_getiterator_tag(self, root): - list(islice(root.getiterator(self.SEARCH_TAG), 3, 10)) - - def bench_getiterator_tag_all(self, root): - list(root.getiterator(self.SEARCH_TAG)) - - def bench_getiterator_tag_text(self, root): - [ e.text for e in root.getiterator(self.SEARCH_TAG) ] - - def bench_findall(self, root): - root.findall(".//*") - - def bench_findall_tag(self, root): - root.findall(".//" + self.SEARCH_TAG) - - @onlylib('lxe') - def bench_xpath_class(self, root): - xpath = self.etree.XPath("./*[0]") - for child in root: - xpath(child) - - @onlylib('lxe') - def bench_xpath_class_repeat(self, root): - for child in root: - xpath = self.etree.XPath("./*[0]") - xpath(child) - - @onlylib('lxe') - def bench_xpath_element(self, root): - xpath = self.etree.XPathElementEvaluator(root) - for child in root: - xpath.evaluate("./*[0]") - - @onlylib('lxe') - def bench_xpath_method(self, root): - for child in root: - child.xpath("./*[0]") - - @onlylib('lxe') - def bench_xpath_extensions_old(self, root): - def return_child(_, element): - if element: - return element[0] - else: - return () - extensions = {(None, 'child') : return_child} - xpath = self.etree.XPath("child(.)", extensions=extensions) - for child in root: - xpath(child) - - @onlylib('lxe') - def bench_xslt_extensions_old(self, root): - tree = self.etree.XML("""\ - - TEST - - - - - - - - -""") - def return_child(_, elements): - return elements[0][0] - - extensions = {('testns', 'child') : return_child} - - transform = self.etree.XSLT(tree, extensions) - for i in range(10): - transform(root) - - @onlylib('lxe') - def bench_xslt_document(self, root): - transform = self.etree.XSLT(self.etree.XML("""\ - - TEST - - - - - - - - -""")) - transform(root) - - -############################################################ -# Main program -############################################################ - -if __name__ == '__main__': - import_lxml = True - callgrind_zero = False - if len(sys.argv) > 1: - try: - sys.argv.remove('-i') - # run benchmark 'inplace' - sys.path.insert(0, 'src') - except ValueError: - pass - - try: - sys.argv.remove('-nolxml') - # run without lxml - import_lxml = False - except ValueError: - pass - - try: - sys.argv.remove('-z') - # reset callgrind after tree setup - callgrind_zero = True - except ValueError: - pass - - try: - sys.argv.remove('-l') - # use large trees - TREE_FACTOR *= 2 - except ValueError: - pass - - try: - sys.argv.remove('-L') - # use LARGE trees - TREE_FACTOR *= 2 - except ValueError: - pass - - _etrees = [] - if import_lxml: - from lxml import etree - _etrees.append(etree) - - try: - sys.argv.remove('-fel') - except ValueError: - pass - else: - # use fast element creation in lxml.etree - from lxml.elements import classlookup - classlookup.setElementClassLookup( - classlookup.ElementDefaultClassLookup()) - - if len(sys.argv) > 1: - if '-a' in sys.argv or '-c' in sys.argv: - # 'all' or 'C-implementations' ? - try: - import cElementTree as cET - _etrees.append(cET) - except ImportError: - pass - - try: - # 'all' ? - sys.argv.remove('-a') - from elementtree import ElementTree as ET - _etrees.append(ET) - except (ValueError, ImportError): - pass - - if not _etrees: - print "No library to test. Exiting." - sys.exit(1) - - print "Preparing test suites and trees ..." - - benchmark_suites = map(BenchMark, _etrees) - - # sorted by name and tree tuple - benchmarks = [ sorted(b.benchmarks()) for b in benchmark_suites ] - - if len(sys.argv) > 1: - selected = [] - for name in sys.argv[1:]: - selected.append(name) - benchmarks = [ [ b for b in bs - if [ match for match in selected - if match in b[0] ] ] - for bs in benchmarks ] - - import time - def run_bench(suite, method_name, method_call, tree_set, tn, an, serial): - if method_call is None: - raise SkippedTest - - current_time = time.time - call_repeat = range(10) - - tree_builders = [ suite.tree_builder(tree, tn, an, serial) - for tree in tree_set ] - - times = [] - args = () - for i in range(3): - gc.collect() - gc.disable() - t = 0 - for i in call_repeat: - args = [ build() for build in tree_builders ] - t_one_call = current_time() - method_call(*args) - t += current_time() - t_one_call - t = 1000.0 * t / len(call_repeat) - times.append(t) - gc.enable() - del args - return times - - def build_treeset_name(trees, tn, an, serialized): - text = {0:'-', 1:'S', 2:'U'}[tn] - attr = {0:'-', 1:'A'}[an] - ser = {True:'X', False:'T'}[serialized] - return "%s%s%s T%s" % (text, attr, ser, ',T'.join(imap(str, trees))[:6]) - - - print "Running benchmark on", ', '.join(b.lib_name - for b in benchmark_suites) - print - - print "Setup times for trees in seconds:" - for b in benchmark_suites: - print "%-3s: " % b.lib_name, - for an in (0,1): - for tn in (0,1,2): - print ' %s ' % build_treeset_name((), tn, an, False)[:2], - print - for i, tree_times in enumerate(b.setup_times): - print " T%d:" % (i+1), ' '.join("%6.4f" % t for t in tree_times) - print - - if callgrind_zero: - cmd = open("callgrind.cmd", 'w') - cmd.write('Zero\n') - cmd.close() - - for bench_calls in izip(*benchmarks): - for lib, (bench, benchmark_setup) in enumerate(izip(benchmark_suites, bench_calls)): - bench_name = benchmark_setup[0] - tree_set_name = build_treeset_name(*benchmark_setup[-4:]) - print "%-3s: %-28s" % (bench.lib_name, bench_name[6:34]), - print "(%-10s)" % tree_set_name, - sys.stdout.flush() - - try: - result = run_bench(bench, *benchmark_setup) - except SkippedTest: - print "skipped" - except KeyboardInterrupt: - print "interrupted by user" - sys.exit(1) - except Exception, e: - print "failed: %s: %s" % (e.__class__.__name__, e) - else: - print "%9.4f msec/pass, best of (" % min(result), - for t in result: - print "%9.4f" % t, - print ")" - - if len(benchmark_suites) > 1: - print # empty line between different benchmarks Copied: lxml/branch/capi/benchmark/bench.py (from r30633, lxml/branch/capi/bench.py) ============================================================================== --- lxml/branch/capi/bench.py (original) +++ lxml/branch/capi/benchmark/bench.py Sat Jul 29 17:50:45 2006 @@ -1,280 +1,15 @@ -import sys, string, time, copy, gc +import sys, copy from itertools import * from StringIO import StringIO -TREE_FACTOR = 1 # increase tree size with '-l / '-L' cmd option - -_TEXT = "some ASCII text" * TREE_FACTOR -_UTEXT = u"some klingon: \F8D2" * TREE_FACTOR -_ATTRIBUTES = { - '{attr}test1' : _TEXT, - '{attr}test2' : _TEXT, - 'bla1' : _TEXT, - 'bla2' : _TEXT, - 'bla3' : _TEXT - } - -def with_attributes(*use_attributes): - "Decorator for benchmarks that use attributes" - vmap = {False : 0, True : 1} - values = [ vmap[bool(v)] for v in use_attributes ] - def set_value(function): - try: - function.ATTRIBUTES.update(values) - except AttributeError: - function.ATTRIBUTES = set(values) - return function - return set_value - -def with_text(no_text=False, text=False, utext=False): - "Decorator for benchmarks that use text" - values = [] - if no_text: - values.append(0) - if text: - values.append(1) - if utext: - values.append(2) - def set_value(function): - try: - function.TEXT.add(values) - except AttributeError: - function.TEXT = set(values) - return function - return set_value - -def onlylib(*libs): - "Decorator to restrict benchmarks to specific libraries" - def set_libs(function): - if libs: - function.LIBS = libs - return function - return set_libs - -def serialized(function): - "Decorator for benchmarks that require serialized XML data" - function.STRING = True - return function - -class SkippedTest(Exception): - pass - -class BenchMarkBase(object): - atoz = string.ascii_lowercase - - _LIB_NAME_MAP = { - 'etree' : 'lxe', - 'ElementTree' : 'ET', - 'cElementTree' : 'cET' - } - - SEARCH_TAG = "{cdefg}a00001" - - def __init__(self, etree): - self.etree = etree - libname = etree.__name__.split('.')[-1] - self.lib_name = self._LIB_NAME_MAP.get(libname, libname) - - if libname == 'etree': - deepcopy = copy.deepcopy - def set_property(root, fname): - setattr(self, fname, lambda : deepcopy(root)) - xml = self._serialize_tree(root) - setattr(self, fname + '_xml', lambda : xml) - else: - def set_property(root, fname): - setattr(self, fname, self.et_make_clone_factory(root)) - xml = self._serialize_tree(root) - setattr(self, fname + '_xml', lambda : xml) - - attribute_list = list(izip(count(), ({}, _ATTRIBUTES))) - text_list = list(izip(count(), (None, _TEXT, _UTEXT))) - build_name = self._tree_builder_name - - self.setup_times = [] - for tree in self._all_trees(): - times = [] - self.setup_times.append(times) - setup = getattr(self, '_setup_tree%d' % tree) - for an, attributes in attribute_list: - for tn, text in text_list: - root, t = setup(text, attributes) - times.append(t) - set_property(root, build_name(tree, tn, an)) - - def _tree_builder_name(self, tree, tn, an): - return '_root%d_T%d_A%d' % (tree, tn, an) - - def tree_builder(self, tree, tn, an, serial): - name = self._tree_builder_name(tree, tn, an) - if serial: - name += '_xml' - return getattr(self, name) - - def _serialize_tree(self, root): - return self.etree.tostring(root, 'UTF-8') - - def et_make_clone_factory(self, elem): - def generate_elem(append, elem, level): - var = "e" + str(level) - arg = repr(elem.tag) - if elem.attrib: - arg += ", **%r" % elem.attrib - if level == 1: - append(" e1 = Element(%s)" % arg) - else: - append(" %s = SubElement(e%d, %s)" % (var, level-1, arg)) - if elem.text: - append(" %s.text = %r" % (var, elem.text)) - if elem.tail: - append(" %s.tail = %r" % (var, elem.tail)) - for e in elem: - generate_elem(append, e, level+1) - # generate code for a function that creates a tree - output = ["def element_factory():"] - generate_elem(output.append, elem, 1) - output.append(" return e1") - # setup global function namespace - namespace = { - "Element" : self.etree.Element, - "SubElement" : self.etree.SubElement - } - # create function object - exec "\n".join(output) in namespace - return namespace["element_factory"] - - def _all_trees(self): - all_trees = [] - for name in dir(self): - if name.startswith('_setup_tree'): - all_trees.append(int(name[11:])) - return all_trees - - def _setup_tree1(self, text, attributes): - "tree with 26 2nd level and 520 * TREE_FACTOR 3rd level children" - atoz = self.atoz - SubElement = self.etree.SubElement - current_time = time.time - t = current_time() - root = self.etree.Element('{abc}rootnode') - for ch1 in atoz: - el = SubElement(root, "{bcd}"+ch1*5, attributes) - el.text = text - for ch2 in atoz: - for i in range(20 * TREE_FACTOR): - SubElement(el, "{cdefg}%s%05d" % (ch2, i)) - t = current_time() - t - return (root, t) - - def _setup_tree2(self, text, attributes): - "tree with 520 * TREE_FACTOR 2nd level and 26 3rd level children" - atoz = self.atoz - SubElement = self.etree.SubElement - current_time = time.time - t = current_time() - root = self.etree.Element('{abc}rootnode') - for ch1 in atoz: - for i in range(20 * TREE_FACTOR): - el = SubElement(root, "{bcd}"+ch1*5, attributes) - el.text = text - for ch2 in atoz: - SubElement(el, "{cdefg}%s%05d" % (ch2, i)) - t = current_time() - t - return (root, t) - - def _setup_tree3(self, text, attributes): - "tree of depth 8 + TREE_FACTOR with 3 children per node" - SubElement = self.etree.SubElement - current_time = time.time - t = current_time() - root = self.etree.Element('{abc}rootnode') - children = [root] - for i in range(6 + TREE_FACTOR): - tag_no = count().next - children = [ SubElement(c, "{cdefg}a%05d" % i, attributes) - for i,c in enumerate(chain(children, children, children)) ] - for child in root: - child.text = text - t = current_time() - t - return (root, t) - - def _setup_tree4(self, text, attributes): - "small tree with 26 2nd level and 2 3rd level children" - SubElement = self.etree.SubElement - current_time = time.time - t = current_time() - root = self.etree.Element('{abc}rootnode') - children = [root] - for ch1 in self.atoz: - el = SubElement(root, "{bcd}"+ch1*5, attributes) - el.text = text - SubElement(el, "{cdefg}a00001", attributes) - SubElement(el, "{cdefg}a00002", attributes) - t = current_time() - t - return (root, t) - - def benchmarks(self): - """Returns a list of all benchmarks. - - A benchmark is a tuple containing a method name and a list of tree - numbers. Trees are prepared by the setup function. - """ - all_trees = self._all_trees() - benchmarks = [] - for name in dir(self): - if not name.startswith('bench_'): - continue - method = getattr(self, name) - if hasattr(method, 'LIBS') and self.lib_name not in method.LIBS: - method_call = None - else: - method_call = method - if method.__doc__: - tree_sets = method.__doc__.split() - else: - tree_sets = () - if tree_sets: - tree_tuples = [ map(int, tree_set.split(',')) - for tree_set in tree_sets ] - else: - try: - function = getattr(method, 'im_func', method) - arg_count = method.func_code.co_argcount - 1 - except AttributeError: - arg_count = 1 - tree_tuples = self._permutations(all_trees, arg_count) - - serialized = getattr(method, 'STRING', False) - - for tree_tuple in tree_tuples: - for tn in sorted(getattr(method, 'TEXT', (0,))): - for an in sorted(getattr(method, 'ATTRIBUTES', (0,))): - benchmarks.append((name, method_call, tree_tuple, - tn, an, serialized)) - - return benchmarks - - def _permutations(self, seq, count): - def _permutations(prefix, remainder, count): - if count == 0: - return [ prefix[:] ] - count -= 1 - perms = [] - prefix.append(None) - for pos, el in enumerate(remainder): - new_remainder = remainder[:pos] + remainder[pos+1:] - prefix[-1] = el - perms.extend( _permutations(prefix, new_remainder, count) ) - prefix.pop() - return perms - return _permutations([], seq, count) - +import benchbase +from benchbase import with_attributes, with_text, onlylib, serialized ############################################################ # Benchmarks ############################################################ -class BenchMark(BenchMarkBase): +class BenchMark(benchbase.BenchMarkBase): def bench_iter_children(self, root): for child in root: pass @@ -631,19 +366,7 @@ except ValueError: pass - try: - sys.argv.remove('-l') - # use large trees - TREE_FACTOR *= 2 - except ValueError: - pass - - try: - sys.argv.remove('-L') - # use LARGE trees - TREE_FACTOR *= 2 - except ValueError: - pass + benchbase.initArgs(sys.argv) _etrees = [] if import_lxml: @@ -682,98 +405,19 @@ sys.exit(1) print "Preparing test suites and trees ..." - - benchmark_suites = map(BenchMark, _etrees) - - # sorted by name and tree tuple - benchmarks = [ sorted(b.benchmarks()) for b in benchmark_suites ] - - if len(sys.argv) > 1: - selected = [] - for name in sys.argv[1:]: - selected.append(name) - benchmarks = [ [ b for b in bs - if [ match for match in selected - if match in b[0] ] ] - for bs in benchmarks ] - - import time - def run_bench(suite, method_name, method_call, tree_set, tn, an, serial): - if method_call is None: - raise SkippedTest - - current_time = time.time - call_repeat = range(10) - - tree_builders = [ suite.tree_builder(tree, tn, an, serial) - for tree in tree_set ] - - times = [] - args = () - for i in range(3): - gc.collect() - gc.disable() - t = 0 - for i in call_repeat: - args = [ build() for build in tree_builders ] - t_one_call = current_time() - method_call(*args) - t += current_time() - t_one_call - t = 1000.0 * t / len(call_repeat) - times.append(t) - gc.enable() - del args - return times - - def build_treeset_name(trees, tn, an, serialized): - text = {0:'-', 1:'S', 2:'U'}[tn] - attr = {0:'-', 1:'A'}[an] - ser = {True:'X', False:'T'}[serialized] - return "%s%s%s T%s" % (text, attr, ser, ',T'.join(imap(str, trees))[:6]) - + selected = set( sys.argv[1:] ) + benchmark_suites, benchmarks = \ + benchbase.buildSuites(BenchMark, _etrees, selected) print "Running benchmark on", ', '.join(b.lib_name for b in benchmark_suites) print - print "Setup times for trees in seconds:" - for b in benchmark_suites: - print "%-3s: " % b.lib_name, - for an in (0,1): - for tn in (0,1,2): - print ' %s ' % build_treeset_name((), tn, an, False)[:2], - print - for i, tree_times in enumerate(b.setup_times): - print " T%d:" % (i+1), ' '.join("%6.4f" % t for t in tree_times) - print + benchbase.printSetupTimes(benchmark_suites) if callgrind_zero: cmd = open("callgrind.cmd", 'w') cmd.write('Zero\n') cmd.close() - for bench_calls in izip(*benchmarks): - for lib, (bench, benchmark_setup) in enumerate(izip(benchmark_suites, bench_calls)): - bench_name = benchmark_setup[0] - tree_set_name = build_treeset_name(*benchmark_setup[-4:]) - print "%-3s: %-28s" % (bench.lib_name, bench_name[6:34]), - print "(%-10s)" % tree_set_name, - sys.stdout.flush() - - try: - result = run_bench(bench, *benchmark_setup) - except SkippedTest: - print "skipped" - except KeyboardInterrupt: - print "interrupted by user" - sys.exit(1) - except Exception, e: - print "failed: %s: %s" % (e.__class__.__name__, e) - else: - print "%9.4f msec/pass, best of (" % min(result), - for t in result: - print "%9.4f" % t, - print ")" - - if len(benchmark_suites) > 1: - print # empty line between different benchmarks + benchbase.runBenchmarks(benchmark_suites, benchmarks) Copied: lxml/branch/capi/benchmark/benchbase.py (from r30633, lxml/branch/capi/bench.py) ============================================================================== --- lxml/branch/capi/bench.py (original) +++ lxml/branch/capi/benchmark/benchbase.py Sat Jul 29 17:50:45 2006 @@ -1,6 +1,8 @@ import sys, string, time, copy, gc from itertools import * from StringIO import StringIO +import time + TREE_FACTOR = 1 # increase tree size with '-l / '-L' cmd option @@ -14,6 +16,26 @@ 'bla3' : _TEXT } + +def initArgs(argv): + try: + argv.remove('-l') + # use large trees + TREE_FACTOR *= 2 + except ValueError: + pass + + try: + argv.remove('-L') + # use LARGE trees + TREE_FACTOR *= 2 + except ValueError: + pass + +############################################################ +# benchmark decorators +############################################################ + def with_attributes(*use_attributes): "Decorator for benchmarks that use attributes" vmap = {False : 0, True : 1} @@ -56,6 +78,10 @@ function.STRING = True return function +############################################################ +# benchmark baseclass +############################################################ + class SkippedTest(Exception): pass @@ -269,473 +295,31 @@ return perms return _permutations([], seq, count) - ############################################################ -# Benchmarks +# Prepare and run benchmark suites ############################################################ -class BenchMark(BenchMarkBase): - def bench_iter_children(self, root): - for child in root: - pass - - def bench_iter_children_reversed(self, root): - for child in reversed(root): - pass - - @with_attributes(True, False) - @with_text(text=True, utext=True) - def bench_tostring_utf8(self, root): - self.etree.tostring(root, 'UTF-8') - - @with_attributes(True, False) - @with_text(text=True, utext=True) - def bench_tostring_utf16(self, root): - self.etree.tostring(root, 'UTF-16') - - @with_attributes(True, False) - @with_text(text=True, utext=True) - def bench_tostring_utf8_unicode_XML(self, root): - xml = unicode(self.etree.tostring(root, 'UTF-8'), 'UTF-8') - self.etree.XML(xml) - - @with_attributes(True, False) - @with_text(text=True, utext=True) - def bench_write_utf8_parse_stringIO(self, root): - f = StringIO() - self.etree.ElementTree(root).write(f, 'UTF-8') - f.seek(0) - self.etree.parse(f) - - @with_attributes(True, False) - @with_text(text=True, utext=True) - @serialized - def bench_parse_stringIO(self, root_xml): - f = StringIO(root_xml) - self.etree.parse(f) - - @with_attributes(True, False) - @with_text(text=True, utext=True) - @serialized - def bench_XML(self, root_xml): - self.etree.XML(root_xml) - - @with_attributes(True, False) - @with_text(text=True, utext=True) - @serialized - def bench_iterparse_stringIO(self, root_xml): - f = StringIO(root_xml) - for event, element in self.etree.iterparse(f): - pass - - @with_attributes(True, False) - @with_text(text=True, utext=True) - @serialized - def bench_iterparse_stringIO_clear(self, root_xml): - f = StringIO(root_xml) - for event, element in self.etree.iterparse(f): - element.clear() - - def bench_append_from_document(self, root1, root2): - # == "1,2 2,3 1,3 3,1 3,2 2,1" # trees 1 and 2, or 2 and 3, or ... - for el in root2: - root1.append(el) - - def bench_insert_from_document(self, root1, root2): - for el in root2: - root1.insert(len(root1)/2, el) - - def bench_rotate_children(self, root): - # == "1 2 3" # runs on any single tree independently - for i in range(100): - el = root[0] - del root[0] - root.append(el) - - def bench_reorder(self, root): - for i in range(1,len(root)/2): - el = root[0] - del root[0] - root[-i:-i] = [ el ] - - def bench_reorder_slice(self, root): - for i in range(1,len(root)/2): - els = root[0:1] - del root[0] - root[-i:-i] = els - - def bench_clear(self, root): - root.clear() - - def bench_has_children(self, root): - for child in root: - if child and child and child and child and child: - pass - - def bench_len(self, root): - for child in root: - map(len, repeat(child, 20)) - - def bench_create_subelements(self, root): - SubElement = self.etree.SubElement - for child in root: - SubElement(child, '{test}test') - - def bench_append_elements(self, root): - Element = self.etree.Element - for child in root: - el = Element('{test}test') - child.append(el) - - def bench_makeelement(self, root): - empty_attrib = {} - for child in root: - child.makeelement('{test}test', empty_attrib) - - def bench_create_elements(self, root): - Element = self.etree.Element - for child in root: - Element('{test}test') - - def bench_replace_children_element(self, root): - Element = self.etree.Element - for child in root: - el = Element('{test}test') - child[:] = [el] - - def bench_replace_children(self, root): - Element = self.etree.Element - for child in root: - child[:] = [ child[0] ] - - def bench_remove_children(self, root): - for child in root: - root.remove(child) - - def bench_remove_children_reversed(self, root): - for child in reversed(root[:]): - root.remove(child) - - def bench_set_attributes(self, root): - for child in root: - child.set('a', 'bla') - - @with_attributes(True) - def bench_get_attributes(self, root): - for child in root: - child.get('bla1') - child.get('{attr}test1') - - def bench_setget_attributes(self, root): - for child in root: - child.set('a', 'bla') - for child in root: - child.get('a') - - def bench_root_getchildren(self, root): - root.getchildren() - - def bench_getchildren(self, root): - for child in root: - child.getchildren() - - def bench_get_children_slice(self, root): - for child in root: - child[:] - - def bench_get_children_slice_2x(self, root): - for child in root: - children = child[:] - child[:] - - def bench_deepcopy(self, root): - for child in root: - copy.deepcopy(child) - - def bench_deepcopy_all(self, root): - copy.deepcopy(root) - - def bench_tag(self, root): - for child in root: - child.tag - - def bench_tag_repeat(self, root): - for child in root: - for i in repeat(0, 100): - child.tag - - @with_text(utext=True, text=True, no_text=True) - def bench_text(self, root): - for child in root: - child.text - - @with_text(utext=True, text=True, no_text=True) - def bench_text_repeat(self, root): - repeat = range(500) - for child in root: - for i in repeat: - child.text - - def bench_set_text(self, root): - text = _TEXT - for child in root: - child.text = text - - def bench_set_utext(self, root): - text = _UTEXT - for child in root: - child.text = text - - @onlylib('lxe') - def bench_index(self, root): - for child in root: - root.index(child) - - @onlylib('lxe') - def bench_index_slice(self, root): - for child in root[5:100]: - root.index(child, 5, 100) - - @onlylib('lxe') - def bench_index_slice_neg(self, root): - for child in root[-100:-5]: - root.index(child, start=-100, stop=-5) - - def bench_getiterator_all(self, root): - list(root.getiterator()) - - def bench_getiterator_islice(self, root): - list(islice(root.getiterator(), 10, 110)) - - def bench_getiterator_tag(self, root): - list(islice(root.getiterator(self.SEARCH_TAG), 3, 10)) - - def bench_getiterator_tag_all(self, root): - list(root.getiterator(self.SEARCH_TAG)) - - def bench_getiterator_tag_text(self, root): - [ e.text for e in root.getiterator(self.SEARCH_TAG) ] - - def bench_findall(self, root): - root.findall(".//*") - - def bench_findall_tag(self, root): - root.findall(".//" + self.SEARCH_TAG) - - @onlylib('lxe') - def bench_xpath_class(self, root): - xpath = self.etree.XPath("./*[0]") - for child in root: - xpath(child) - - @onlylib('lxe') - def bench_xpath_class_repeat(self, root): - for child in root: - xpath = self.etree.XPath("./*[0]") - xpath(child) - - @onlylib('lxe') - def bench_xpath_element(self, root): - xpath = self.etree.XPathElementEvaluator(root) - for child in root: - xpath.evaluate("./*[0]") - - @onlylib('lxe') - def bench_xpath_method(self, root): - for child in root: - child.xpath("./*[0]") - - @onlylib('lxe') - def bench_xpath_extensions_old(self, root): - def return_child(_, element): - if element: - return element[0] - else: - return () - extensions = {(None, 'child') : return_child} - xpath = self.etree.XPath("child(.)", extensions=extensions) - for child in root: - xpath(child) - - @onlylib('lxe') - def bench_xslt_extensions_old(self, root): - tree = self.etree.XML("""\ - - TEST - - - - - - - - -""") - def return_child(_, elements): - return elements[0][0] - - extensions = {('testns', 'child') : return_child} - - transform = self.etree.XSLT(tree, extensions) - for i in range(10): - transform(root) - - @onlylib('lxe') - def bench_xslt_document(self, root): - transform = self.etree.XSLT(self.etree.XML("""\ - - TEST - - - - - - - - -""")) - transform(root) - - -############################################################ -# Main program -############################################################ - -if __name__ == '__main__': - import_lxml = True - callgrind_zero = False - if len(sys.argv) > 1: - try: - sys.argv.remove('-i') - # run benchmark 'inplace' - sys.path.insert(0, 'src') - except ValueError: - pass - - try: - sys.argv.remove('-nolxml') - # run without lxml - import_lxml = False - except ValueError: - pass - - try: - sys.argv.remove('-z') - # reset callgrind after tree setup - callgrind_zero = True - except ValueError: - pass - - try: - sys.argv.remove('-l') - # use large trees - TREE_FACTOR *= 2 - except ValueError: - pass - - try: - sys.argv.remove('-L') - # use LARGE trees - TREE_FACTOR *= 2 - except ValueError: - pass - - _etrees = [] - if import_lxml: - from lxml import etree - _etrees.append(etree) - - try: - sys.argv.remove('-fel') - except ValueError: - pass - else: - # use fast element creation in lxml.etree - from lxml.elements import classlookup - classlookup.setElementClassLookup( - classlookup.ElementDefaultClassLookup()) - - if len(sys.argv) > 1: - if '-a' in sys.argv or '-c' in sys.argv: - # 'all' or 'C-implementations' ? - try: - import cElementTree as cET - _etrees.append(cET) - except ImportError: - pass - - try: - # 'all' ? - sys.argv.remove('-a') - from elementtree import ElementTree as ET - _etrees.append(ET) - except (ValueError, ImportError): - pass - - if not _etrees: - print "No library to test. Exiting." - sys.exit(1) - - print "Preparing test suites and trees ..." - - benchmark_suites = map(BenchMark, _etrees) +def buildSuites(benchmark_class, etrees, selected): + benchmark_suites = map(benchmark_class, etrees) # sorted by name and tree tuple benchmarks = [ sorted(b.benchmarks()) for b in benchmark_suites ] - if len(sys.argv) > 1: - selected = [] - for name in sys.argv[1:]: - selected.append(name) + if selected: benchmarks = [ [ b for b in bs if [ match for match in selected if match in b[0] ] ] for bs in benchmarks ] - import time - def run_bench(suite, method_name, method_call, tree_set, tn, an, serial): - if method_call is None: - raise SkippedTest - - current_time = time.time - call_repeat = range(10) - - tree_builders = [ suite.tree_builder(tree, tn, an, serial) - for tree in tree_set ] + return (benchmark_suites, benchmarks) - times = [] - args = () - for i in range(3): - gc.collect() - gc.disable() - t = 0 - for i in call_repeat: - args = [ build() for build in tree_builders ] - t_one_call = current_time() - method_call(*args) - t += current_time() - t_one_call - t = 1000.0 * t / len(call_repeat) - times.append(t) - gc.enable() - del args - return times - - def build_treeset_name(trees, tn, an, serialized): - text = {0:'-', 1:'S', 2:'U'}[tn] - attr = {0:'-', 1:'A'}[an] - ser = {True:'X', False:'T'}[serialized] - return "%s%s%s T%s" % (text, attr, ser, ',T'.join(imap(str, trees))[:6]) - - - print "Running benchmark on", ', '.join(b.lib_name - for b in benchmark_suites) - print +def build_treeset_name(trees, tn, an, serialized): + text = {0:'-', 1:'S', 2:'U'}[tn] + attr = {0:'-', 1:'A'}[an] + ser = {True:'X', False:'T'}[serialized] + return "%s%s%s T%s" % (text, attr, ser, ',T'.join(imap(str, trees))[:6]) +def printSetupTimes(benchmark_suites): print "Setup times for trees in seconds:" for b in benchmark_suites: print "%-3s: " % b.lib_name, @@ -747,11 +331,34 @@ print " T%d:" % (i+1), ' '.join("%6.4f" % t for t in tree_times) print - if callgrind_zero: - cmd = open("callgrind.cmd", 'w') - cmd.write('Zero\n') - cmd.close() +def runBench(suite, method_name, method_call, tree_set, tn, an, serial): + if method_call is None: + raise SkippedTest + + current_time = time.time + call_repeat = range(10) + + tree_builders = [ suite.tree_builder(tree, tn, an, serial) + for tree in tree_set ] + + times = [] + args = () + for i in range(3): + gc.collect() + gc.disable() + t = 0 + for i in call_repeat: + args = [ build() for build in tree_builders ] + t_one_call = current_time() + method_call(*args) + t += current_time() - t_one_call + t = 1000.0 * t / len(call_repeat) + times.append(t) + gc.enable() + del args + return times +def runBenchmarks(benchmark_suites, benchmarks): for bench_calls in izip(*benchmarks): for lib, (bench, benchmark_setup) in enumerate(izip(benchmark_suites, bench_calls)): bench_name = benchmark_setup[0] @@ -761,7 +368,7 @@ sys.stdout.flush() try: - result = run_bench(bench, *benchmark_setup) + result = runBench(bench, *benchmark_setup) except SkippedTest: print "skipped" except KeyboardInterrupt: From scoder at codespeak.net Sat Jul 29 17:59:08 2006 From: scoder at codespeak.net (scoder at codespeak.net) Date: Sat, 29 Jul 2006 17:59:08 +0200 (CEST) Subject: [Lxml-checkins] r30733 - in lxml/branch/capi: . benchmark Message-ID: <20060729155908.5106F1007C@code0.codespeak.net> Author: scoder Date: Sat Jul 29 17:59:05 2006 New Revision: 30733 Added: lxml/branch/capi/benchmark/bench_etree.py - copied, changed from r30732, lxml/branch/capi/benchmark/bench.py lxml/branch/capi/benchmark/bench_xpath.py lxml/branch/capi/benchmark/bench_xslt.py Removed: lxml/branch/capi/benchmark/bench.py Modified: lxml/branch/capi/MANIFEST.in lxml/branch/capi/benchmark/benchbase.py Log: split: bench.py -> bench_etree.py, bench_xpath.py, bench_xslt.py Modified: lxml/branch/capi/MANIFEST.in ============================================================================== --- lxml/branch/capi/MANIFEST.in (original) +++ lxml/branch/capi/MANIFEST.in Sat Jul 29 17:59:05 2006 @@ -5,6 +5,7 @@ include CHANGES.txt CREDITS.txt INSTALL.txt LICENSES.txt README.txt TODO.txt recursive-include src *.pyx *.pxd *.pxi *.py etree.c etree.h etree_defs.h recursive-include src/lxml/tests *.rng *.xslt *.xml +recursive-include benchmark *.py recursive-include doc *.txt *.html *.css *.xml *.mgp pubkey.asc recursive-include doc mkhtml.py rest2html.py exclude doc/pyrex.txt src/lxml/etree.pxi Deleted: /lxml/branch/capi/benchmark/bench.py ============================================================================== --- /lxml/branch/capi/benchmark/bench.py Sat Jul 29 17:59:05 2006 +++ (empty file) @@ -1,423 +0,0 @@ -import sys, copy -from itertools import * -from StringIO import StringIO - -import benchbase -from benchbase import with_attributes, with_text, onlylib, serialized - -############################################################ -# Benchmarks -############################################################ - -class BenchMark(benchbase.BenchMarkBase): - def bench_iter_children(self, root): - for child in root: - pass - - def bench_iter_children_reversed(self, root): - for child in reversed(root): - pass - - @with_attributes(True, False) - @with_text(text=True, utext=True) - def bench_tostring_utf8(self, root): - self.etree.tostring(root, 'UTF-8') - - @with_attributes(True, False) - @with_text(text=True, utext=True) - def bench_tostring_utf16(self, root): - self.etree.tostring(root, 'UTF-16') - - @with_attributes(True, False) - @with_text(text=True, utext=True) - def bench_tostring_utf8_unicode_XML(self, root): - xml = unicode(self.etree.tostring(root, 'UTF-8'), 'UTF-8') - self.etree.XML(xml) - - @with_attributes(True, False) - @with_text(text=True, utext=True) - def bench_write_utf8_parse_stringIO(self, root): - f = StringIO() - self.etree.ElementTree(root).write(f, 'UTF-8') - f.seek(0) - self.etree.parse(f) - - @with_attributes(True, False) - @with_text(text=True, utext=True) - @serialized - def bench_parse_stringIO(self, root_xml): - f = StringIO(root_xml) - self.etree.parse(f) - - @with_attributes(True, False) - @with_text(text=True, utext=True) - @serialized - def bench_XML(self, root_xml): - self.etree.XML(root_xml) - - @with_attributes(True, False) - @with_text(text=True, utext=True) - @serialized - def bench_iterparse_stringIO(self, root_xml): - f = StringIO(root_xml) - for event, element in self.etree.iterparse(f): - pass - - @with_attributes(True, False) - @with_text(text=True, utext=True) - @serialized - def bench_iterparse_stringIO_clear(self, root_xml): - f = StringIO(root_xml) - for event, element in self.etree.iterparse(f): - element.clear() - - def bench_append_from_document(self, root1, root2): - # == "1,2 2,3 1,3 3,1 3,2 2,1" # trees 1 and 2, or 2 and 3, or ... - for el in root2: - root1.append(el) - - def bench_insert_from_document(self, root1, root2): - for el in root2: - root1.insert(len(root1)/2, el) - - def bench_rotate_children(self, root): - # == "1 2 3" # runs on any single tree independently - for i in range(100): - el = root[0] - del root[0] - root.append(el) - - def bench_reorder(self, root): - for i in range(1,len(root)/2): - el = root[0] - del root[0] - root[-i:-i] = [ el ] - - def bench_reorder_slice(self, root): - for i in range(1,len(root)/2): - els = root[0:1] - del root[0] - root[-i:-i] = els - - def bench_clear(self, root): - root.clear() - - def bench_has_children(self, root): - for child in root: - if child and child and child and child and child: - pass - - def bench_len(self, root): - for child in root: - map(len, repeat(child, 20)) - - def bench_create_subelements(self, root): - SubElement = self.etree.SubElement - for child in root: - SubElement(child, '{test}test') - - def bench_append_elements(self, root): - Element = self.etree.Element - for child in root: - el = Element('{test}test') - child.append(el) - - def bench_makeelement(self, root): - empty_attrib = {} - for child in root: - child.makeelement('{test}test', empty_attrib) - - def bench_create_elements(self, root): - Element = self.etree.Element - for child in root: - Element('{test}test') - - def bench_replace_children_element(self, root): - Element = self.etree.Element - for child in root: - el = Element('{test}test') - child[:] = [el] - - def bench_replace_children(self, root): - Element = self.etree.Element - for child in root: - child[:] = [ child[0] ] - - def bench_remove_children(self, root): - for child in root: - root.remove(child) - - def bench_remove_children_reversed(self, root): - for child in reversed(root[:]): - root.remove(child) - - def bench_set_attributes(self, root): - for child in root: - child.set('a', 'bla') - - @with_attributes(True) - def bench_get_attributes(self, root): - for child in root: - child.get('bla1') - child.get('{attr}test1') - - def bench_setget_attributes(self, root): - for child in root: - child.set('a', 'bla') - for child in root: - child.get('a') - - def bench_root_getchildren(self, root): - root.getchildren() - - def bench_getchildren(self, root): - for child in root: - child.getchildren() - - def bench_get_children_slice(self, root): - for child in root: - child[:] - - def bench_get_children_slice_2x(self, root): - for child in root: - children = child[:] - child[:] - - def bench_deepcopy(self, root): - for child in root: - copy.deepcopy(child) - - def bench_deepcopy_all(self, root): - copy.deepcopy(root) - - def bench_tag(self, root): - for child in root: - child.tag - - def bench_tag_repeat(self, root): - for child in root: - for i in repeat(0, 100): - child.tag - - @with_text(utext=True, text=True, no_text=True) - def bench_text(self, root): - for child in root: - child.text - - @with_text(utext=True, text=True, no_text=True) - def bench_text_repeat(self, root): - repeat = range(500) - for child in root: - for i in repeat: - child.text - - def bench_set_text(self, root): - text = _TEXT - for child in root: - child.text = text - - def bench_set_utext(self, root): - text = _UTEXT - for child in root: - child.text = text - - @onlylib('lxe') - def bench_index(self, root): - for child in root: - root.index(child) - - @onlylib('lxe') - def bench_index_slice(self, root): - for child in root[5:100]: - root.index(child, 5, 100) - - @onlylib('lxe') - def bench_index_slice_neg(self, root): - for child in root[-100:-5]: - root.index(child, start=-100, stop=-5) - - def bench_getiterator_all(self, root): - list(root.getiterator()) - - def bench_getiterator_islice(self, root): - list(islice(root.getiterator(), 10, 110)) - - def bench_getiterator_tag(self, root): - list(islice(root.getiterator(self.SEARCH_TAG), 3, 10)) - - def bench_getiterator_tag_all(self, root): - list(root.getiterator(self.SEARCH_TAG)) - - def bench_getiterator_tag_text(self, root): - [ e.text for e in root.getiterator(self.SEARCH_TAG) ] - - def bench_findall(self, root): - root.findall(".//*") - - def bench_findall_tag(self, root): - root.findall(".//" + self.SEARCH_TAG) - - @onlylib('lxe') - def bench_xpath_class(self, root): - xpath = self.etree.XPath("./*[0]") - for child in root: - xpath(child) - - @onlylib('lxe') - def bench_xpath_class_repeat(self, root): - for child in root: - xpath = self.etree.XPath("./*[0]") - xpath(child) - - @onlylib('lxe') - def bench_xpath_element(self, root): - xpath = self.etree.XPathElementEvaluator(root) - for child in root: - xpath.evaluate("./*[0]") - - @onlylib('lxe') - def bench_xpath_method(self, root): - for child in root: - child.xpath("./*[0]") - - @onlylib('lxe') - def bench_xpath_extensions_old(self, root): - def return_child(_, element): - if element: - return element[0] - else: - return () - extensions = {(None, 'child') : return_child} - xpath = self.etree.XPath("child(.)", extensions=extensions) - for child in root: - xpath(child) - - @onlylib('lxe') - def bench_xslt_extensions_old(self, root): - tree = self.etree.XML("""\ - - TEST - - - - - - - - -""") - def return_child(_, elements): - return elements[0][0] - - extensions = {('testns', 'child') : return_child} - - transform = self.etree.XSLT(tree, extensions) - for i in range(10): - transform(root) - - @onlylib('lxe') - def bench_xslt_document(self, root): - transform = self.etree.XSLT(self.etree.XML("""\ - - TEST - - - - - - - - -""")) - transform(root) - - -############################################################ -# Main program -############################################################ - -if __name__ == '__main__': - import_lxml = True - callgrind_zero = False - if len(sys.argv) > 1: - try: - sys.argv.remove('-i') - # run benchmark 'inplace' - sys.path.insert(0, 'src') - except ValueError: - pass - - try: - sys.argv.remove('-nolxml') - # run without lxml - import_lxml = False - except ValueError: - pass - - try: - sys.argv.remove('-z') - # reset callgrind after tree setup - callgrind_zero = True - except ValueError: - pass - - benchbase.initArgs(sys.argv) - - _etrees = [] - if import_lxml: - from lxml import etree - _etrees.append(etree) - - try: - sys.argv.remove('-fel') - except ValueError: - pass - else: - # use fast element creation in lxml.etree - from lxml.elements import classlookup - classlookup.setElementClassLookup( - classlookup.ElementDefaultClassLookup()) - - if len(sys.argv) > 1: - if '-a' in sys.argv or '-c' in sys.argv: - # 'all' or 'C-implementations' ? - try: - import cElementTree as cET - _etrees.append(cET) - except ImportError: - pass - - try: - # 'all' ? - sys.argv.remove('-a') - from elementtree import ElementTree as ET - _etrees.append(ET) - except (ValueError, ImportError): - pass - - if not _etrees: - print "No library to test. Exiting." - sys.exit(1) - - print "Preparing test suites and trees ..." - selected = set( sys.argv[1:] ) - benchmark_suites, benchmarks = \ - benchbase.buildSuites(BenchMark, _etrees, selected) - - print "Running benchmark on", ', '.join(b.lib_name - for b in benchmark_suites) - print - - benchbase.printSetupTimes(benchmark_suites) - - if callgrind_zero: - cmd = open("callgrind.cmd", 'w') - cmd.write('Zero\n') - cmd.close() - - benchbase.runBenchmarks(benchmark_suites, benchmarks) Copied: lxml/branch/capi/benchmark/bench_etree.py (from r30732, lxml/branch/capi/benchmark/bench.py) ============================================================================== --- lxml/branch/capi/benchmark/bench.py (original) +++ lxml/branch/capi/benchmark/bench_etree.py Sat Jul 29 17:59:05 2006 @@ -257,167 +257,5 @@ def bench_findall_tag(self, root): root.findall(".//" + self.SEARCH_TAG) - @onlylib('lxe') - def bench_xpath_class(self, root): - xpath = self.etree.XPath("./*[0]") - for child in root: - xpath(child) - - @onlylib('lxe') - def bench_xpath_class_repeat(self, root): - for child in root: - xpath = self.etree.XPath("./*[0]") - xpath(child) - - @onlylib('lxe') - def bench_xpath_element(self, root): - xpath = self.etree.XPathElementEvaluator(root) - for child in root: - xpath.evaluate("./*[0]") - - @onlylib('lxe') - def bench_xpath_method(self, root): - for child in root: - child.xpath("./*[0]") - - @onlylib('lxe') - def bench_xpath_extensions_old(self, root): - def return_child(_, element): - if element: - return element[0] - else: - return () - extensions = {(None, 'child') : return_child} - xpath = self.etree.XPath("child(.)", extensions=extensions) - for child in root: - xpath(child) - - @onlylib('lxe') - def bench_xslt_extensions_old(self, root): - tree = self.etree.XML("""\ - - TEST - - - - - - - - -""") - def return_child(_, elements): - return elements[0][0] - - extensions = {('testns', 'child') : return_child} - - transform = self.etree.XSLT(tree, extensions) - for i in range(10): - transform(root) - - @onlylib('lxe') - def bench_xslt_document(self, root): - transform = self.etree.XSLT(self.etree.XML("""\ - - TEST - - - - - - - - -""")) - transform(root) - - -############################################################ -# Main program -############################################################ - if __name__ == '__main__': - import_lxml = True - callgrind_zero = False - if len(sys.argv) > 1: - try: - sys.argv.remove('-i') - # run benchmark 'inplace' - sys.path.insert(0, 'src') - except ValueError: - pass - - try: - sys.argv.remove('-nolxml') - # run without lxml - import_lxml = False - except ValueError: - pass - - try: - sys.argv.remove('-z') - # reset callgrind after tree setup - callgrind_zero = True - except ValueError: - pass - - benchbase.initArgs(sys.argv) - - _etrees = [] - if import_lxml: - from lxml import etree - _etrees.append(etree) - - try: - sys.argv.remove('-fel') - except ValueError: - pass - else: - # use fast element creation in lxml.etree - from lxml.elements import classlookup - classlookup.setElementClassLookup( - classlookup.ElementDefaultClassLookup()) - - if len(sys.argv) > 1: - if '-a' in sys.argv or '-c' in sys.argv: - # 'all' or 'C-implementations' ? - try: - import cElementTree as cET - _etrees.append(cET) - except ImportError: - pass - - try: - # 'all' ? - sys.argv.remove('-a') - from elementtree import ElementTree as ET - _etrees.append(ET) - except (ValueError, ImportError): - pass - - if not _etrees: - print "No library to test. Exiting." - sys.exit(1) - - print "Preparing test suites and trees ..." - selected = set( sys.argv[1:] ) - benchmark_suites, benchmarks = \ - benchbase.buildSuites(BenchMark, _etrees, selected) - - print "Running benchmark on", ', '.join(b.lib_name - for b in benchmark_suites) - print - - benchbase.printSetupTimes(benchmark_suites) - - if callgrind_zero: - cmd = open("callgrind.cmd", 'w') - cmd.write('Zero\n') - cmd.close() - - benchbase.runBenchmarks(benchmark_suites, benchmarks) + benchbase.main(BenchMark) Added: lxml/branch/capi/benchmark/bench_xpath.py ============================================================================== --- (empty file) +++ lxml/branch/capi/benchmark/bench_xpath.py Sat Jul 29 17:59:05 2006 @@ -0,0 +1,49 @@ +import sys, copy +from itertools import * +from StringIO import StringIO + +import benchbase +from benchbase import with_attributes, with_text, onlylib, serialized + +############################################################ +# Benchmarks +############################################################ + +class XPathBenchMark(benchbase.BenchMarkBase): + @onlylib('lxe') + def bench_xpath_class(self, root): + xpath = self.etree.XPath("./*[0]") + for child in root: + xpath(child) + + @onlylib('lxe') + def bench_xpath_class_repeat(self, root): + for child in root: + xpath = self.etree.XPath("./*[0]") + xpath(child) + + @onlylib('lxe') + def bench_xpath_element(self, root): + xpath = self.etree.XPathElementEvaluator(root) + for child in root: + xpath.evaluate("./*[0]") + + @onlylib('lxe') + def bench_xpath_method(self, root): + for child in root: + child.xpath("./*[0]") + + @onlylib('lxe') + def bench_xpath_extensions_old(self, root): + def return_child(_, element): + if element: + return element[0] + else: + return () + extensions = {(None, 'child') : return_child} + xpath = self.etree.XPath("child(.)", extensions=extensions) + for child in root: + xpath(child) + +if __name__ == '__main__': + benchbase.main(XPathBenchMark) Added: lxml/branch/capi/benchmark/bench_xslt.py ============================================================================== --- (empty file) +++ lxml/branch/capi/benchmark/bench_xslt.py Sat Jul 29 17:59:05 2006 @@ -0,0 +1,58 @@ +import sys, copy +from itertools import * +from StringIO import StringIO + +import benchbase +from benchbase import with_attributes, with_text, onlylib, serialized + +############################################################ +# Benchmarks +############################################################ + +class XSLTBenchMark(benchbase.BenchMarkBase): + @onlylib('lxe') + def bench_xslt_extensions_old(self, root): + tree = self.etree.XML("""\ + + TEST + + + + + + + + +""") + def return_child(_, elements): + return elements[0][0] + + extensions = {('testns', 'child') : return_child} + + transform = self.etree.XSLT(tree, extensions) + for i in range(10): + transform(root) + + @onlylib('lxe') + def bench_xslt_document(self, root): + transform = self.etree.XSLT(self.etree.XML("""\ + + TEST + + + + + + + + +""")) + transform(root) + +if __name__ == '__main__': + benchbase.main(XSLTBenchMark) Modified: lxml/branch/capi/benchmark/benchbase.py ============================================================================== --- lxml/branch/capi/benchmark/benchbase.py (original) +++ lxml/branch/capi/benchmark/benchbase.py Sat Jul 29 17:59:05 2006 @@ -384,3 +384,88 @@ if len(benchmark_suites) > 1: print # empty line between different benchmarks + +############################################################ +# Main program +############################################################ + +def main(benchmark_class): + import_lxml = True + callgrind_zero = False + if len(sys.argv) > 1: + try: + sys.argv.remove('-i') + # run benchmark 'inplace' + sys.path.insert(0, 'src') + except ValueError: + pass + + try: + sys.argv.remove('-nolxml') + # run without lxml + import_lxml = False + except ValueError: + pass + + try: + sys.argv.remove('-z') + # reset callgrind after tree setup + callgrind_zero = True + except ValueError: + pass + + initArgs(sys.argv) + + _etrees = [] + if import_lxml: + from lxml import etree + _etrees.append(etree) + + try: + sys.argv.remove('-fel') + except ValueError: + pass + else: + # use fast element creation in lxml.etree + from lxml.elements import classlookup + classlookup.setElementClassLookup( + classlookup.ElementDefaultClassLookup()) + + if len(sys.argv) > 1: + if '-a' in sys.argv or '-c' in sys.argv: + # 'all' or 'C-implementations' ? + try: + import cElementTree as cET + _etrees.append(cET) + except ImportError: + pass + + try: + # 'all' ? + sys.argv.remove('-a') + from elementtree import ElementTree as ET + _etrees.append(ET) + except (ValueError, ImportError): + pass + + if not _etrees: + print "No library to test. Exiting." + sys.exit(1) + + print "Preparing test suites and trees ..." + selected = set( sys.argv[1:] ) + benchmark_suites, benchmarks = \ + buildSuites(benchmark_class, _etrees, selected) + + print "Running benchmark on", ', '.join(b.lib_name + for b in benchmark_suites) + print + + printSetupTimes(benchmark_suites) + + if callgrind_zero: + cmd = open("callgrind.cmd", 'w') + cmd.write('Zero\n') + cmd.close() + + runBenchmarks(benchmark_suites, benchmarks) From scoder at codespeak.net Mon Jul 31 11:40:22 2006 From: scoder at codespeak.net (scoder at codespeak.net) Date: Mon, 31 Jul 2006 11:40:22 +0200 (CEST) Subject: [Lxml-checkins] r30762 - lxml/branch/capi/src/lxml Message-ID: <20060731094022.7F0EE1007A@code0.codespeak.net> Author: scoder Date: Mon Jul 31 11:40:20 2006 New Revision: 30762 Modified: lxml/branch/capi/src/lxml/objectify.pyx Log: renamed setPytypeAttribute -> setPytypeAttributeTag Modified: lxml/branch/capi/src/lxml/objectify.pyx ============================================================================== --- lxml/branch/capi/src/lxml/objectify.pyx (original) +++ lxml/branch/capi/src/lxml/objectify.pyx Mon Jul 31 11:40:20 2006 @@ -60,13 +60,13 @@ PYTYPE_ATTRIBUTE = None -def setPytypeAttribute(attribute_tag=None): +def setPytypeAttributeTag(attribute_tag=None): """Changes name and namespace of the XML attribute that holds Python type information. Reset by calling without argument. - Default: {http://codespeak.net/lxml/objectify/pytype}pytype + Default: "{http://codespeak.net/lxml/objectify/pytype}pytype" """ global PYTYPE_ATTRIBUTE, _PYTYPE_NAMESPACE, _PYTYPE_ATTRIBUTE_NAME global PYTYPE_NAMESPACE, PYTYPE_ATTRIBUTE_NAME @@ -95,7 +95,7 @@ # element class for the main API cdef class ObjectifiedElement(ElementBase): - """Element class with an Amara-like API. + """Main XML Element class. Element children are accessed as object attributes. Multiple children with the same name are available through a list index. Example: From scoder at codespeak.net Mon Jul 31 12:23:02 2006 From: scoder at codespeak.net (scoder at codespeak.net) Date: Mon, 31 Jul 2006 12:23:02 +0200 (CEST) Subject: [Lxml-checkins] r30768 - lxml/trunk/doc Message-ID: <20060731102302.84B561007A@code0.codespeak.net> Author: scoder Date: Mon Jul 31 12:23:00 2006 New Revision: 30768 Modified: lxml/trunk/doc/FAQ.txt Log: FAQ entry on missing symbol errors for eggs compiled on UCS4 platforms but installed on a UCS2 Python Modified: lxml/trunk/doc/FAQ.txt ============================================================================== --- lxml/trunk/doc/FAQ.txt (original) +++ lxml/trunk/doc/FAQ.txt Mon Jul 31 12:23:00 2006 @@ -95,7 +95,7 @@ requires the tree to be intact to finish parsing. If you delete or modify parents of the current node, chances are you modify the structure in a way that breaks the parser. Normally, this will result in a segfault. Please - refer to the `iterparse section` of the lxml API documentation to find out + refer to the `iterparse section`_ of the lxml API documentation to find out what you can do and what you can't do. .. _`iterparse section`: api.html#iterparse-and-iterwalk @@ -137,6 +137,23 @@ parsable data in a valid encoding. +#) Why do I get errors about missing UCS4 symbols when installing lxml? + + Most likely, you use a Python installation that was configured for internal + use of UCS2 unicode, meaning 16-bit unicode. The lxml egg distributions + are generally compiled on platforms that use UCS4, a 32-bit unicode + encoding, as this is used on the majority of platforms. Sadly, both are + not compatible, so the eggs can only support the one they were compiled + with. + + This means that you have to compile lxml from sources for your system. + Note that you do not need Pyrex for this, the lxml source distribution is + directly compilable on both platform types. See the `build instructions`_ + on how to do this. + + .. _`build instructions`: build.html + + #) How can I find out which namespace prefixes are used in a document? You can traverse the document (``getiterator()``) and collect the prefix From scoder at codespeak.net Mon Jul 31 20:09:29 2006 From: scoder at codespeak.net (scoder at codespeak.net) Date: Mon, 31 Jul 2006 20:09:29 +0200 (CEST) Subject: [Lxml-checkins] r30815 - in lxml/trunk: . src/lxml src/lxml/tests Message-ID: <20060731180929.E8B0510077@code0.codespeak.net> Author: scoder Date: Mon Jul 31 20:09:26 2006 New Revision: 30815 Modified: lxml/trunk/CHANGES.txt lxml/trunk/src/lxml/python.pxd lxml/trunk/src/lxml/tests/test_xslt.py lxml/trunk/src/lxml/xslt.pxi Log: fix for EXSLT regexp:match function: handle groups of non-global expressions as defined by the standard Modified: lxml/trunk/CHANGES.txt ============================================================================== --- lxml/trunk/CHANGES.txt (original) +++ lxml/trunk/CHANGES.txt Mon Jul 31 20:09:26 2006 @@ -13,6 +13,9 @@ Bugs fixed ---------- +* The EXSLT ``regexp:match`` function now works as defined (except for some + differences in the regular expression syntax) + * Setting element.text to '' returned None on request, not the empty string * ``iterparse()`` could crash on long XML files Modified: lxml/trunk/src/lxml/python.pxd ============================================================================== --- lxml/trunk/src/lxml/python.pxd (original) +++ lxml/trunk/src/lxml/python.pxd Mon Jul 31 20:09:26 2006 @@ -54,6 +54,7 @@ cdef int PyBool_Check(object instance) cdef int PySequence_Check(object instance) cdef int PyType_Check(object instance) + cdef int PyTuple_CheckExact(object instance) cdef int PyObject_SetAttr(object o, object name, object value) Modified: lxml/trunk/src/lxml/tests/test_xslt.py ============================================================================== --- lxml/trunk/src/lxml/tests/test_xslt.py (original) +++ lxml/trunk/src/lxml/tests/test_xslt.py Mon Jul 31 20:09:26 2006 @@ -589,6 +589,150 @@ self.assertEquals(root[2][2].tag, 'match') self.assertEquals(root[2][2].text, 'De') + def test_exslt_regexp_match_groups(self): + xslt = etree.XSLT(etree.XML("""\ + + + + + + + + + +""")) + result = xslt(etree.XML('')) + root = result.getroot() + self.assertEquals(root.tag, 'test') + self.assertEquals(len(root), 4) + + self.assertEquals(root[0].text, "123abc567") + self.assertEquals(root[1].text, "123") + self.assertEquals(root[2].text, "abc") + self.assertEquals(root[3].text, "567") + + def test_exslt_regexp_match1(self): + # taken from http://www.exslt.org/regexp/functions/match/index.html + xslt = etree.XSLT(etree.XML("""\ + + + + + + + + + +""")) + result = xslt(etree.XML('')) + root = result.getroot() + self.assertEquals(root.tag, 'test') + self.assertEquals(len(root), 5) + + self.assertEquals( + root[0].text, + "http://www.bayes.co.uk/xml/index.xml?/xml/utils/rechecker.xml") + self.assertEquals( + root[1].text, + "http") + self.assertEquals( + root[2].text, + "www.bayes.co.uk") + self.assertEquals( + root[3].text, + "") + self.assertEquals( + root[4].text, + "/xml/index.xml?/xml/utils/rechecker.xml") + + def test_exslt_regexp_match2(self): + # taken from http://www.exslt.org/regexp/functions/match/index.html + xslt = etree.XSLT(etree.XML("""\ + + + + + + + + + +""")) + result = xslt(etree.XML('')) + root = result.getroot() + self.assertEquals(root.tag, 'test') + self.assertEquals(len(root), 5) + + self.assertEquals(root[0].text, "This") + self.assertEquals(root[1].text, "is") + self.assertEquals(root[2].text, "a") + self.assertEquals(root[3].text, "test") + self.assertEquals(root[4].text, "string") + + def _test_exslt_regexp_match3(self): + # taken from http://www.exslt.org/regexp/functions/match/index.html + # THIS IS NOT SUPPORTED! + xslt = etree.XSLT(etree.XML("""\ + + + + + + + + + +""")) + result = xslt(etree.XML('')) + root = result.getroot() + self.assertEquals(root.tag, 'test') + self.assertEquals(len(root), 4) + + self.assertEquals(root[0].text, "his") + self.assertEquals(root[1].text, "is") + self.assertEquals(root[2].text, "a") + self.assertEquals(root[3].text, "test") + + def _test_exslt_regexp_match4(self): + # taken from http://www.exslt.org/regexp/functions/match/index.html + # THIS IS NOT SUPPORTED! + xslt = etree.XSLT(etree.XML("""\ + + + + + + + + + +""")) + result = xslt(etree.XML('')) + root = result.getroot() + self.assertEquals(root.tag, 'test') + self.assertEquals(len(root), 4) + + self.assertEquals(root[0].text, "This") + self.assertEquals(root[1].text, "is") + self.assertEquals(root[2].text, "a") + self.assertEquals(root[3].text, "test") + def test_suite(): suite = unittest.TestSuite() suite.addTests([unittest.makeSuite(ETreeXSLTTestCase)]) Modified: lxml/trunk/src/lxml/xslt.pxi ============================================================================== --- lxml/trunk/src/lxml/xslt.pxi (original) +++ lxml/trunk/src/lxml/xslt.pxi Mon Jul 31 20:09:26 2006 @@ -559,20 +559,22 @@ results = rexpc.findall(s) if not results: return () - result_list = [] - root = Element('matches') - for s_match in results: - elem = SubElement(root, 'match') - elem.text = s_match - python.PyList_Append(result_list, elem) - return result_list else: result = rexpc.search(s) - if result is None: + if not result: return () - root = Element('match') - root.text = result.group() - return (root,) + results = [ result.group() ] + results.extend( result.groups('') ) + result_list = [] + root = Element('matches') + join_groups = ''.join + for s_match in results: + if python.PyTuple_CheckExact(s_match): + s_match = join_groups(s_match) + elem = SubElement(root, 'match') + elem.text = s_match + python.PyList_Append(result_list, elem) + return result_list def replace(self, ctxt, s, rexp, flags, replacement): replacement = self._make_string(replacement)