From lxml-checkins at codespeak.net Sat Nov 3 04:29:26 2007 From: lxml-checkins at codespeak.net (VIAGRA ® Official Site) Date: Sat, 3 Nov 2007 04:29:26 +0100 (CET) Subject: [Lxml-checkins] November 72% OFF Message-ID: <20051103153036.6825.qmail@b-internet.90.189.174.12.snt.ru> An HTML attachment was scrubbed... URL: http://codespeak.net/pipermail/lxml-checkins/attachments/20071103/b32745be/attachment.htm From scoder at codespeak.net Sun Nov 4 19:42:26 2007 From: scoder at codespeak.net (scoder at codespeak.net) Date: Sun, 4 Nov 2007 19:42:26 +0100 (CET) Subject: [Lxml-checkins] r48300 - in lxml/trunk: doc src/lxml Message-ID: <20071104184226.7282381AA@code0.codespeak.net> Author: scoder Date: Sun Nov 4 19:42:25 2007 New Revision: 48300 Modified: lxml/trunk/doc/build.txt lxml/trunk/src/lxml/etreepublic.pxd lxml/trunk/src/lxml/extensions.pxi lxml/trunk/src/lxml/lxml.objectify.pyx lxml/trunk/src/lxml/lxml.pyclasslookup.pyx lxml/trunk/src/lxml/parser.pxi lxml/trunk/src/lxml/parsertarget.pxi lxml/trunk/src/lxml/public-api.pxi Log: make it build with Cython 0.9.6.8 (now required!) Modified: lxml/trunk/doc/build.txt ============================================================================== --- lxml/trunk/doc/build.txt (original) +++ lxml/trunk/doc/build.txt Sun Nov 4 19:42:25 2007 @@ -33,12 +33,12 @@ be an lxml developer, you do need a working Cython installation. You can use EasyInstall_ to install it:: - easy_install Cython==0.9.6.7 + easy_install Cython==0.9.6.8 .. _EasyInstall: http://peak.telecommunity.com/DevCenter/EasyInstall -lxml currently requires Cython 0.9.6.7, but it should work with later -versions. +lxml currently requires at least Cython 0.9.6.8, but later versions +should work. Subversion Modified: lxml/trunk/src/lxml/etreepublic.pxd ============================================================================== --- lxml/trunk/src/lxml/etreepublic.pxd (original) +++ lxml/trunk/src/lxml/etreepublic.pxd Sun Nov 4 19:42:25 2007 @@ -16,10 +16,10 @@ int start_node_inclusive) cdef void END_FOR_EACH_ELEMENT_FROM(tree.xmlNode* start_node) -cdef extern from "lxml.etree.h": +cdef extern from "lxml.etree_api.h": # first function to call! - cdef int import_etree(etree_module) except -1 + cdef int import_lxml__etree() except -1 ########################################################################## # public ElementTree API classes Modified: lxml/trunk/src/lxml/extensions.pxi ============================================================================== --- lxml/trunk/src/lxml/extensions.pxi (original) +++ lxml/trunk/src/lxml/extensions.pxi Sun Nov 4 19:42:25 2007 @@ -583,7 +583,7 @@ # lookup the function by name and call it cdef void _xpath_function_call(xpath.xmlXPathParserContext* ctxt, - int nargs) with GIL: + int nargs) with gil: cdef xpath.xmlXPathContext* rctxt cdef _BaseContext context rctxt = ctxt.context Modified: lxml/trunk/src/lxml/lxml.objectify.pyx ============================================================================== --- lxml/trunk/src/lxml/lxml.objectify.pyx (original) +++ lxml/trunk/src/lxml/lxml.objectify.pyx Sun Nov 4 19:42:25 2007 @@ -1,6 +1,6 @@ from etreepublic cimport _Document, _Element, ElementBase from etreepublic cimport _ElementIterator, ElementClassLookup -from etreepublic cimport elementFactory, import_etree, textOf +from etreepublic cimport elementFactory, import_lxml__etree, textOf from python cimport callable, _cstr cimport etreepublic as cetree cimport python @@ -10,7 +10,7 @@ cdef object etree from lxml import etree # initialize C-API of lxml.etree -import_etree(etree) +import_lxml__etree() __version__ = etree.__version__ Modified: lxml/trunk/src/lxml/lxml.pyclasslookup.pyx ============================================================================== --- lxml/trunk/src/lxml/lxml.pyclasslookup.pyx (original) +++ lxml/trunk/src/lxml/lxml.pyclasslookup.pyx Sun Nov 4 19:42:25 2007 @@ -1,6 +1,6 @@ from etreepublic cimport _Document, _Element, ElementBase from etreepublic cimport ElementClassLookup, FallbackElementClassLookup -from etreepublic cimport elementFactory, import_etree +from etreepublic cimport elementFactory, import_lxml__etree from python cimport str, repr, isinstance, issubclass, iter from python cimport _cstr cimport etreepublic as cetree @@ -13,7 +13,7 @@ cdef object etree from lxml import etree # initialize C-API of lxml.etree -import_etree(etree) +import_lxml__etree() __version__ = etree.__version__ Modified: lxml/trunk/src/lxml/parser.pxi ============================================================================== --- lxml/trunk/src/lxml/parser.pxi (original) +++ lxml/trunk/src/lxml/parser.pxi Sun Nov 4 19:42:25 2007 @@ -288,7 +288,7 @@ self._exc_context._store_raised() return -1 -cdef int _readFilelikeParser(void* ctxt, char* c_buffer, int c_size) with GIL: +cdef int _readFilelikeParser(void* ctxt, char* c_buffer, int c_size) with gil: return (<_FileReaderContext>ctxt).copyToBuffer(c_buffer, c_size) ############################################################ @@ -297,7 +297,7 @@ cdef xmlparser.xmlParserInput* _parser_resolve_from_python( char* c_url, char* c_pubid, xmlparser.xmlParserCtxt* c_context, - int* error) with GIL: + int* error) with gil: # call the Python document loaders cdef xmlparser.xmlParserInput* c_input cdef _ResolverContext context Modified: lxml/trunk/src/lxml/parsertarget.pxi ============================================================================== --- lxml/trunk/src/lxml/parsertarget.pxi (original) +++ lxml/trunk/src/lxml/parsertarget.pxi Sun Nov 4 19:42:25 2007 @@ -87,7 +87,7 @@ char* c_namespace, int c_nb_namespaces, char** c_namespaces, int c_nb_attributes, int c_nb_defaulted, - char** c_attributes) with GIL: + char** c_attributes) with gil: cdef _TargetParserContext context cdef xmlparser.xmlParserCtxt* c_ctxt cdef int i @@ -118,7 +118,7 @@ _handleSaxTargetException(context, c_ctxt) cdef void _targetSaxEnd(void* ctxt, char* c_localname, char* c_prefix, - char* c_namespace) with GIL: + char* c_namespace) with gil: cdef _TargetParserContext context cdef xmlparser.xmlParserCtxt* c_ctxt c_ctxt = ctxt @@ -131,7 +131,7 @@ except: _handleSaxTargetException(context, c_ctxt) -cdef void _targetSaxData(void* ctxt, char* c_data, int data_len) with GIL: +cdef void _targetSaxData(void* ctxt, char* c_data, int data_len) with gil: cdef _TargetParserContext context cdef xmlparser.xmlParserCtxt* c_ctxt c_ctxt = ctxt @@ -145,7 +145,7 @@ _handleSaxTargetException(context, c_ctxt) cdef void _targetSaxDoctype(void* ctxt, char* c_name, char* c_public, - char* c_system) with GIL: + char* c_system) with gil: cdef _TargetParserContext context cdef xmlparser.xmlParserCtxt* c_ctxt c_ctxt = ctxt @@ -162,7 +162,7 @@ except: _handleSaxTargetException(context, c_ctxt) -cdef void _targetSaxPI(void* ctxt, char* c_target, char* c_data) with GIL: +cdef void _targetSaxPI(void* ctxt, char* c_target, char* c_data) with gil: cdef _TargetParserContext context cdef xmlparser.xmlParserCtxt* c_ctxt c_ctxt = ctxt @@ -176,7 +176,7 @@ except: _handleSaxTargetException(context, c_ctxt) -cdef void _targetSaxComment(void* ctxt, char* c_data, int data_len) with GIL: +cdef void _targetSaxComment(void* ctxt, char* c_data, int data_len) with gil: cdef _TargetParserContext context cdef xmlparser.xmlParserCtxt* c_ctxt c_ctxt = ctxt Modified: lxml/trunk/src/lxml/public-api.pxi ============================================================================== --- lxml/trunk/src/lxml/public-api.pxi (original) +++ lxml/trunk/src/lxml/public-api.pxi Sun Nov 4 19:42:25 2007 @@ -1,156 +1,156 @@ # Public C API for lxml.etree -cdef public _Element deepcopyNodeToDocument(_Document doc, xmlNode* c_root): +cdef public api _Element deepcopyNodeToDocument(_Document doc, xmlNode* c_root): "Recursively copy the element into the document. doc is not modified." cdef xmlNode* c_node c_node = _copyNodeToDoc(c_root, doc._c_doc) return _elementFactory(doc, c_node) -cdef public _ElementTree elementTreeFactory(_Element context_node): +cdef public api _ElementTree elementTreeFactory(_Element context_node): return newElementTree(context_node, _ElementTree) -cdef public _ElementTree newElementTree(_Element context_node, +cdef public api _ElementTree newElementTree(_Element context_node, object subclass): if context_node is NULL or context_node is None: raise TypeError return _newElementTree(context_node._doc, context_node, subclass) -cdef public _Element elementFactory(_Document doc, xmlNode* c_node): +cdef public api _Element elementFactory(_Document doc, xmlNode* c_node): if c_node is NULL or doc is None: raise TypeError return _elementFactory(doc, c_node) -cdef public _Element makeElement(tag, _Document doc, parser, +cdef public api _Element makeElement(tag, _Document doc, parser, text, tail, attrib, nsmap): return _makeElement(tag, NULL, doc, parser, text, tail, attrib, nsmap, None) -cdef public _Element makeSubElement(_Element parent, tag, text, tail, +cdef public api _Element makeSubElement(_Element parent, tag, text, tail, attrib, nsmap): return _makeSubElement(parent, tag, text, tail, attrib, nsmap, None) -cdef public void setElementClassLookupFunction( +cdef public api void setElementClassLookupFunction( _element_class_lookup_function function, state): _setElementClassLookupFunction(function, state) -cdef public object lookupDefaultElementClass(state, doc, xmlNode* c_node): +cdef public api object lookupDefaultElementClass(state, doc, xmlNode* c_node): return _lookupDefaultElementClass(state, doc, c_node) -cdef public object lookupNamespaceElementClass(state, doc, xmlNode* c_node): +cdef public api object lookupNamespaceElementClass(state, doc, xmlNode* c_node): return _find_nselement_class(state, doc, c_node) -cdef public object callLookupFallback(FallbackElementClassLookup lookup, +cdef public api object callLookupFallback(FallbackElementClassLookup lookup, _Document doc, xmlNode* c_node): return lookup._callFallback(doc, c_node) -cdef public int tagMatches(xmlNode* c_node, char* c_href, char* c_name): +cdef public api int tagMatches(xmlNode* c_node, char* c_href, char* c_name): if c_node is NULL: return -1 return _tagMatches(c_node, c_href, c_name) -cdef public _Document documentOrRaise(object input): +cdef public api _Document documentOrRaise(object input): return _documentOrRaise(input) -cdef public _Element rootNodeOrRaise(object input): +cdef public api _Element rootNodeOrRaise(object input): return _rootNodeOrRaise(input) -cdef public bint hasText(xmlNode* c_node): +cdef public api bint hasText(xmlNode* c_node): return _hasText(c_node) -cdef public bint hasTail(xmlNode* c_node): +cdef public api bint hasTail(xmlNode* c_node): return _hasTail(c_node) -cdef public object textOf(xmlNode* c_node): +cdef public api object textOf(xmlNode* c_node): if c_node is NULL: return None return _collectText(c_node.children) -cdef public object tailOf(xmlNode* c_node): +cdef public api object tailOf(xmlNode* c_node): if c_node is NULL: return None return _collectText(c_node.next) -cdef public int setNodeText(xmlNode* c_node, text) except -1: +cdef public api int setNodeText(xmlNode* c_node, text) except -1: if c_node is NULL: raise ValueError return _setNodeText(c_node, text) -cdef public int setTailText(xmlNode* c_node, text) except -1: +cdef public api int setTailText(xmlNode* c_node, text) except -1: if c_node is NULL: raise ValueError return _setTailText(c_node, text) -cdef public object attributeValue(xmlNode* c_element, xmlAttr* c_attrib_node): +cdef public api object attributeValue(xmlNode* c_element, xmlAttr* c_attrib_node): return _attributeValue(c_element, c_attrib_node) -cdef public object attributeValueFromNsName(xmlNode* c_element, +cdef public api object attributeValueFromNsName(xmlNode* c_element, char* ns, char* name): return _attributeValueFromNsName(c_element, ns, name) -cdef public object getAttributeValue(_Element element, key, default): +cdef public api object getAttributeValue(_Element element, key, default): return _getAttributeValue(element, key, default) -cdef public object iterattributes(_Element element, int keysvalues): +cdef public api object iterattributes(_Element element, int keysvalues): return _attributeIteratorFactory(element, keysvalues) -cdef public object collectAttributes(xmlNode* c_element, int keysvalues): +cdef public api object collectAttributes(xmlNode* c_element, int keysvalues): return _collectAttributes(c_element, keysvalues) -cdef public int setAttributeValue(_Element element, key, value) except -1: +cdef public api int setAttributeValue(_Element element, key, value) except -1: return _setAttributeValue(element, key, value) -cdef public int delAttribute(_Element element, key) except -1: +cdef public api int delAttribute(_Element element, key) except -1: return _delAttribute(element, key) -cdef public int delAttributeFromNsName(tree.xmlNode* c_element, +cdef public api int delAttributeFromNsName(tree.xmlNode* c_element, char* c_href, char* c_name): return _delAttributeFromNsName(c_element, c_href, c_name) -cdef public bint hasChild(xmlNode* c_node): +cdef public api bint hasChild(xmlNode* c_node): return _hasChild(c_node) -cdef public xmlNode* findChild(xmlNode* c_node, Py_ssize_t index): +cdef public api xmlNode* findChild(xmlNode* c_node, Py_ssize_t index): return _findChild(c_node, index) -cdef public xmlNode* findChildForwards(xmlNode* c_node, Py_ssize_t index): +cdef public api xmlNode* findChildForwards(xmlNode* c_node, Py_ssize_t index): return _findChildForwards(c_node, index) -cdef public xmlNode* findChildBackwards(xmlNode* c_node, Py_ssize_t index): +cdef public api xmlNode* findChildBackwards(xmlNode* c_node, Py_ssize_t index): return _findChildBackwards(c_node, index) -cdef public xmlNode* nextElement(xmlNode* c_node): +cdef public api xmlNode* nextElement(xmlNode* c_node): return _nextElement(c_node) -cdef public xmlNode* previousElement(xmlNode* c_node): +cdef public api xmlNode* previousElement(xmlNode* c_node): return _previousElement(c_node) -cdef public void appendChild(_Element parent, _Element child): +cdef public api void appendChild(_Element parent, _Element child): _appendChild(parent, child) -cdef public object pyunicode(char* s): +cdef public api object pyunicode(char* s): if s is NULL: raise TypeError return funicode(s) -cdef public object utf8(object s): +cdef public api object utf8(object s): return _utf8(s) -cdef public object getNsTag(object tag): +cdef public api object getNsTag(object tag): return _getNsTag(tag) -cdef public object namespacedName(xmlNode* c_node): +cdef public api object namespacedName(xmlNode* c_node): return _namespacedName(c_node) -cdef public object namespacedNameFromNsName(char* href, char* name): +cdef public api object namespacedNameFromNsName(char* href, char* name): return _namespacedNameFromNsName(href, name) -cdef public void iteratorStoreNext(_ElementIterator iterator, _Element node): +cdef public api void iteratorStoreNext(_ElementIterator iterator, _Element node): iterator._storeNext(node) -cdef public void initTagMatch(_ElementTagMatcher matcher, tag): +cdef public api void initTagMatch(_ElementTagMatcher matcher, tag): matcher._initTagMatch(tag) -cdef public tree.xmlNs* findOrBuildNodeNsPrefix( +cdef public api tree.xmlNs* findOrBuildNodeNsPrefix( _Document doc, xmlNode* c_node, char* href, char* prefix) except NULL: if doc is None: raise TypeError From ianb at codespeak.net Mon Nov 5 03:04:47 2007 From: ianb at codespeak.net (ianb at codespeak.net) Date: Mon, 5 Nov 2007 03:04:47 +0100 (CET) Subject: [Lxml-checkins] r48303 - lxml/trunk/src/lxml/html Message-ID: <20071105020447.31F9A8243@code0.codespeak.net> Author: ianb Date: Mon Nov 5 03:04:45 2007 New Revision: 48303 Modified: lxml/trunk/src/lxml/html/clean.py Log: Handle the case of in lxml.html.clean, where more than one attribute can contain a link Modified: lxml/trunk/src/lxml/html/clean.py ============================================================================== --- lxml/trunk/src/lxml/html/clean.py (original) +++ lxml/trunk/src/lxml/html/clean.py Mon Nov 5 03:04:45 2007 @@ -368,10 +368,20 @@ def allow_element(self, el): if el.tag not in self._tag_link_attrs: return False - url = el.get(self._tag_link_attrs[el.tag]) - if not url: - return False - return self.allow_embedded_url(el, url) + attr = self._tag_link_attrs[el.tag] + if isinstance(attr, (list, tuple)): + for one_attr in attr: + url = el.get(one_attr) + if not url: + return False + if not self.allow_embedded_url(el, url): + return False + return True + else: + url = el.get(attr) + if not url: + return False + return self.allow_embedded_url(el, url) def allow_embedded_url(self, el, url): if (self.whitelist_tags is not None From scoder at codespeak.net Mon Nov 5 12:25:12 2007 From: scoder at codespeak.net (scoder at codespeak.net) Date: Mon, 5 Nov 2007 12:25:12 +0100 (CET) Subject: [Lxml-checkins] r48314 - lxml/trunk Message-ID: <20071105112512.9DCE281B4@code0.codespeak.net> Author: scoder Date: Mon Nov 5 12:25:11 2007 New Revision: 48314 Modified: lxml/trunk/selftest2.py Log: API usage fix Modified: lxml/trunk/selftest2.py ============================================================================== --- lxml/trunk/selftest2.py (original) +++ lxml/trunk/selftest2.py Mon Nov 5 12:25:11 2007 @@ -19,7 +19,7 @@ file = StringIO.StringIO() tree = ElementTree.ElementTree(elem) if encoding: - tree.write(file, encoding) + tree.write(file, encoding=encoding) else: tree.write(file) return file.getvalue().replace(' />', '/>') From scoder at codespeak.net Mon Nov 5 15:27:50 2007 From: scoder at codespeak.net (scoder at codespeak.net) Date: Mon, 5 Nov 2007 15:27:50 +0100 (CET) Subject: [Lxml-checkins] r48316 - in lxml/trunk: . doc src/lxml Message-ID: <20071105142750.3C24F81E9@code0.codespeak.net> Author: scoder Date: Mon Nov 5 15:27:46 2007 New Revision: 48316 Modified: lxml/trunk/CHANGES.txt lxml/trunk/doc/objectify.txt lxml/trunk/doc/tutorial.txt lxml/trunk/src/lxml/lxml.etree.pyx Log: use default prefixes for common namespaces (following ET 1.3) Modified: lxml/trunk/CHANGES.txt ============================================================================== --- lxml/trunk/CHANGES.txt (original) +++ lxml/trunk/CHANGES.txt Mon Nov 5 15:27:46 2007 @@ -8,6 +8,8 @@ Features added -------------- +* Use default prefixes for some common XML namespaces + * ``lxml.html.clean.Cleaner`` now allows for a ``host_whitelist``, and two overridable methods: ``allow_embedded_url(el, url)`` and the more general ``allow_element(el)``. Modified: lxml/trunk/doc/objectify.txt ============================================================================== --- lxml/trunk/doc/objectify.txt (original) +++ lxml/trunk/doc/objectify.txt Mon Nov 5 15:27:46 2007 @@ -956,12 +956,15 @@ >>> for prefix, namespace in el.nsmap.items(): ... print prefix, '-', namespace ns0 - http://codespeak.net/lxml/objectify/pytype - ns1 - http://www.w3.org/2001/XMLSchema-instance foo - http://www.w3.org/2001/XMLSchema + xsi - http://www.w3.org/2001/XMLSchema-instance >>> print el.get("{http://www.w3.org/2001/XMLSchema-instance}type") foo:string +Note how lxml chose a default prefix for the XML Schema Instance +namespace. We can override it as in the following example:: + >>> el = objectify.DataElement('5', _xsi='foo:string', ... nsmap={'foo': 'http://www.w3.org/2001/XMLSchema', ... 'myxsi': 'http://www.w3.org/2001/XMLSchema-instance'}) Modified: lxml/trunk/doc/tutorial.txt ============================================================================== --- lxml/trunk/doc/tutorial.txt (original) +++ lxml/trunk/doc/tutorial.txt Mon Nov 5 15:27:46 2007 @@ -658,9 +658,9 @@ >>> body.text = "Hello World" >>> print etree.tostring(xhtml, pretty_print=True) - - Hello World - + + Hello World + .. _`namespace prefixes`: http://www.w3.org/TR/xml-names/#ns-qualnames Modified: lxml/trunk/src/lxml/lxml.etree.pyx ============================================================================== --- lxml/trunk/src/lxml/lxml.etree.pyx (original) +++ lxml/trunk/src/lxml/lxml.etree.pyx Mon Nov 5 15:27:46 2007 @@ -68,6 +68,17 @@ cdef char* _C_FILENAME_ENCODING _C_FILENAME_ENCODING = _cstr(_FILENAME_ENCODING) +# set up some default namespace prefixes +_DEFAULT_NAMESPACE_PREFIXES = { + "http://www.w3.org/1999/xhtml": "html", + "http://www.w3.org/1999/02/22-rdf-syntax-ns#": "rdf", + "http://schemas.xmlsoap.org/wsdl/": "wsdl", + # xml schema + "http://www.w3.org/2001/XMLSchema": "xs", + "http://www.w3.org/2001/XMLSchema-instance": "xsi", + # dublic core + "http://purl.org/dc/elements/1.1/": "dc", +} # Error superclass for ElementTree compatibility class Error(Exception): @@ -323,6 +334,7 @@ """ cdef xmlNs* c_ns cdef xmlNs* c_doc_ns + cdef python.PyObject* dict_result if c_node.type != tree.XML_ELEMENT_NODE: assert c_node.type == tree.XML_ELEMENT_NODE, \ "invalid node type %d, expected %d" % ( @@ -332,6 +344,12 @@ if c_ns is not NULL: return c_ns + if c_prefix is NULL: + dict_result = python.PyDict_GetItemString( + _DEFAULT_NAMESPACE_PREFIXES, c_href) + if dict_result is not NULL: + c_prefix = _cstr(dict_result) + if c_prefix is NULL or \ tree.xmlSearchNs(self._c_doc, c_node, c_prefix) is not NULL: # try to simulate ElementTree's namespace prefix creation From scoder at codespeak.net Mon Nov 5 17:34:19 2007 From: scoder at codespeak.net (scoder at codespeak.net) Date: Mon, 5 Nov 2007 17:34:19 +0100 (CET) Subject: [Lxml-checkins] r48322 - lxml/trunk/src/lxml Message-ID: <20071105163419.7D4AD81BF@code0.codespeak.net> Author: scoder Date: Mon Nov 5 17:34:19 2007 New Revision: 48322 Modified: lxml/trunk/src/lxml/lxml.etree.pyx Log: cleanup Modified: lxml/trunk/src/lxml/lxml.etree.pyx ============================================================================== --- lxml/trunk/src/lxml/lxml.etree.pyx (original) +++ lxml/trunk/src/lxml/lxml.etree.pyx Mon Nov 5 17:34:19 2007 @@ -69,6 +69,7 @@ _C_FILENAME_ENCODING = _cstr(_FILENAME_ENCODING) # set up some default namespace prefixes +cdef object _DEFAULT_NAMESPACE_PREFIXES _DEFAULT_NAMESPACE_PREFIXES = { "http://www.w3.org/1999/xhtml": "html", "http://www.w3.org/1999/02/22-rdf-syntax-ns#": "rdf", From lxml-checkins at codespeak.net Tue Nov 6 14:39:34 2007 From: lxml-checkins at codespeak.net (VIAGRA ® Official Site) Date: Tue, 6 Nov 2007 14:39:34 +0100 (CET) Subject: [Lxml-checkins] November 73% OFF Message-ID: <20071106153928.3131.qmail@ppp-124.120.166.237.revip2.asianet.co.th> An HTML attachment was scrubbed... URL: http://codespeak.net/pipermail/lxml-checkins/attachments/20071106/8183df19/attachment.htm From lxml-checkins at codespeak.net Wed Nov 7 14:36:43 2007 From: lxml-checkins at codespeak.net (VIAGRA ® Official Site) Date: Wed, 7 Nov 2007 14:36:43 +0100 (CET) Subject: [Lxml-checkins] November 75% OFF Message-ID: <20071107033637.5926.qmail@host92-74-dynamic.1-79-r.retail.telecomitalia.it> An HTML attachment was scrubbed... URL: http://codespeak.net/pipermail/lxml-checkins/attachments/20071107/c1025415/attachment.htm From scoder at codespeak.net Fri Nov 9 08:25:45 2007 From: scoder at codespeak.net (scoder at codespeak.net) Date: Fri, 9 Nov 2007 08:25:45 +0100 (CET) Subject: [Lxml-checkins] r48460 - lxml/trunk/benchmark Message-ID: <20071109072545.7B65981A8@code0.codespeak.net> Author: scoder Date: Fri Nov 9 08:25:44 2007 New Revision: 48460 Modified: lxml/trunk/benchmark/bench_objectify.py Log: benchmark on attribute assignment Modified: lxml/trunk/benchmark/bench_objectify.py ============================================================================== --- lxml/trunk/benchmark/bench_objectify.py (original) +++ lxml/trunk/benchmark/bench_objectify.py Fri Nov 9 08:25:44 2007 @@ -26,6 +26,16 @@ for i in self.repeat3000: root.zzzzz + def bench_attribute_assign_int(self, root): + "1 2 4" + for i in self.repeat3000: + root.XYZ = 5 + + def bench_attribute_assign_string(self, root): + "1 2 4" + for i in self.repeat3000: + root.XYZ = "5" + def bench_attribute_cached(self, root): "1 2 4" cache = root.zzzzz From scoder at codespeak.net Sun Nov 11 16:31:01 2007 From: scoder at codespeak.net (scoder at codespeak.net) Date: Sun, 11 Nov 2007 16:31:01 +0100 (CET) Subject: [Lxml-checkins] r48572 - lxml/trunk/src/lxml Message-ID: <20071111153101.65F428259@code0.codespeak.net> Author: scoder Date: Sun Nov 11 16:31:01 2007 New Revision: 48572 Modified: lxml/trunk/src/lxml/parser.pxi Log: cleanup Modified: lxml/trunk/src/lxml/parser.pxi ============================================================================== --- lxml/trunk/src/lxml/parser.pxi (original) +++ lxml/trunk/src/lxml/parser.pxi Sun Nov 11 16:31:01 2007 @@ -648,9 +648,6 @@ c_ctxt, self._default_encoding_int) return c_ctxt - def __dealloc__(self): - pass - property error_log: """The error log of the last parser run. """ From scoder at codespeak.net Sun Nov 11 16:31:25 2007 From: scoder at codespeak.net (scoder at codespeak.net) Date: Sun, 11 Nov 2007 16:31:25 +0100 (CET) Subject: [Lxml-checkins] r48573 - lxml/trunk Message-ID: <20071111153125.A635B8259@code0.codespeak.net> Author: scoder Date: Sun Nov 11 16:31:25 2007 New Revision: 48573 Modified: lxml/trunk/CHANGES.txt Log: changelog Modified: lxml/trunk/CHANGES.txt ============================================================================== --- lxml/trunk/CHANGES.txt (original) +++ lxml/trunk/CHANGES.txt Sun Nov 11 16:31:25 2007 @@ -50,6 +50,9 @@ classes now know about their fully qualified class name, including the package name of their module. +* Keyword-only arguments in some API functions, especially in the + parsers and serialisers. + 1.3.6 (2007-10-29) ================== From scoder at codespeak.net Sat Nov 17 12:44:46 2007 From: scoder at codespeak.net (scoder at codespeak.net) Date: Sat, 17 Nov 2007 12:44:46 +0100 (CET) Subject: [Lxml-checkins] r48749 - lxml/trunk/src/lxml Message-ID: <20071117114446.E08F0815D@code0.codespeak.net> Author: scoder Date: Sat Nov 17 12:44:45 2007 New Revision: 48749 Modified: lxml/trunk/src/lxml/python.pxd Log: Python function declared Modified: lxml/trunk/src/lxml/python.pxd ============================================================================== --- lxml/trunk/src/lxml/python.pxd (original) +++ lxml/trunk/src/lxml/python.pxd Sat Nov 17 12:44:45 2007 @@ -62,6 +62,7 @@ cdef int PyDict_DelItem(object d, object key) except -1 cdef void PyDict_Clear(object d) cdef object PyDict_Copy(object d) + cdef object PyDictProxy_New(object d) cdef int PyDict_Contains(object d, object key) except -1 cdef Py_ssize_t PyDict_Size(object d) cdef object PySequence_List(object o) From scoder at codespeak.net Sat Nov 17 12:52:19 2007 From: scoder at codespeak.net (scoder at codespeak.net) Date: Sat, 17 Nov 2007 12:52:19 +0100 (CET) Subject: [Lxml-checkins] r48750 - lxml/trunk/src/lxml Message-ID: <20071117115219.9C8088161@code0.codespeak.net> Author: scoder Date: Sat Nov 17 12:52:19 2007 New Revision: 48750 Modified: lxml/trunk/src/lxml/iterparse.pxi Log: cleanup Modified: lxml/trunk/src/lxml/iterparse.pxi ============================================================================== --- lxml/trunk/src/lxml/iterparse.pxi (original) +++ lxml/trunk/src/lxml/iterparse.pxi Sat Nov 17 12:52:19 2007 @@ -348,7 +348,6 @@ cdef _IterparseContext context cdef xmlparser.xmlParserCtxt* pctxt cdef int error - cdef char* c_filename if self._source is None: raise StopIteration @@ -383,6 +382,7 @@ break if error != 0: self._source = None + del context._events[:] _raiseParseError(pctxt, self._filename, context._error_log) if python.PyList_GET_SIZE(context._events) == 0: self.root = context._root From scoder at codespeak.net Sat Nov 17 12:53:15 2007 From: scoder at codespeak.net (scoder at codespeak.net) Date: Sat, 17 Nov 2007 12:53:15 +0100 (CET) Subject: [Lxml-checkins] r48751 - lxml/trunk/src/lxml Message-ID: <20071117115315.5F2748161@code0.codespeak.net> Author: scoder Date: Sat Nov 17 12:53:15 2007 New Revision: 48751 Modified: lxml/trunk/src/lxml/parser.pxi Log: small setup fixes for feed and target parser Modified: lxml/trunk/src/lxml/parser.pxi ============================================================================== --- lxml/trunk/src/lxml/parser.pxi (original) +++ lxml/trunk/src/lxml/parser.pxi Sat Nov 17 12:53:15 2007 @@ -686,6 +686,7 @@ parser._remove_pis = self._remove_pis parser._filename = self._filename parser._resolvers = self._resolvers + parser._target = self._target parser._class_lookup = self._class_lookup return parser @@ -878,7 +879,10 @@ cdef int buffer_len cdef int error if python.PyString_Check(data): - c_encoding = NULL + if self._default_encoding is None: + c_encoding = NULL + else: + c_encoding = self._default_encoding c_data = _cstr(data) py_buffer_len = python.PyString_GET_SIZE(data) elif python.PyUnicode_Check(data): From lxml-checkins at codespeak.net Thu Nov 22 14:12:08 2007 From: lxml-checkins at codespeak.net (Canadian Doctor Art Moore) Date: Thu, 22 Nov 2007 14:12:08 +0100 (CET) Subject: [Lxml-checkins] November 78% OFF Message-ID: <20071122151232.3166.qmail@adsl-pool-222.123.33-140.tttmaxnet.com> An HTML attachment was scrubbed... URL: http://codespeak.net/pipermail/lxml-checkins/attachments/20071122/ae86e700/attachment.htm From scoder at codespeak.net Fri Nov 23 09:26:07 2007 From: scoder at codespeak.net (scoder at codespeak.net) Date: Fri, 23 Nov 2007 09:26:07 +0100 (CET) Subject: [Lxml-checkins] r48969 - lxml/trunk/src/lxml/tests Message-ID: <20071123082607.3D6CF81FA@code0.codespeak.net> Author: scoder Date: Fri Nov 23 09:26:05 2007 New Revision: 48969 Modified: lxml/trunk/src/lxml/tests/test_elementtree.py Log: new attrib test case Modified: lxml/trunk/src/lxml/tests/test_elementtree.py ============================================================================== --- lxml/trunk/src/lxml/tests/test_elementtree.py (original) +++ lxml/trunk/src/lxml/tests/test_elementtree.py Fri Nov 23 09:26:05 2007 @@ -317,6 +317,24 @@ self.assertEquals(None, root.get('one')) self.assertEquals(None, root.get('two')) + def test_attrib_ns_clear(self): + Element = self.etree.Element + SubElement = self.etree.SubElement + + attribNS = '{http://foo/bar}x' + + parent = Element('parent') + parent.set(attribNS, 'a') + child = SubElement(parent, 'child') + child.set(attribNS, 'b') + + self.assertEquals('a', parent.get(attribNS)) + self.assertEquals('b', child.get(attribNS)) + + parent.clear() + self.assertEquals(None, parent.get(attribNS)) + self.assertEquals('b', child.get(attribNS)) + def test_attribute_update_dict(self): XML = self.etree.XML From scoder at codespeak.net Fri Nov 23 09:27:51 2007 From: scoder at codespeak.net (scoder at codespeak.net) Date: Fri, 23 Nov 2007 09:27:51 +0100 (CET) Subject: [Lxml-checkins] r48970 - in lxml/trunk: . src/lxml Message-ID: <20071123082751.C909581FB@code0.codespeak.net> Author: scoder Date: Fri Nov 23 09:27:51 2007 New Revision: 48970 Modified: lxml/trunk/CHANGES.txt lxml/trunk/src/lxml/lxml.etree.pyx lxml/trunk/src/lxml/parser.pxi lxml/trunk/src/lxml/parsertarget.pxi lxml/trunk/src/lxml/xmlparser.pxd Log: new SAX parser framework + TreeBuilder class implementation Modified: lxml/trunk/CHANGES.txt ============================================================================== --- lxml/trunk/CHANGES.txt (original) +++ lxml/trunk/CHANGES.txt Fri Nov 23 09:27:51 2007 @@ -8,6 +8,8 @@ Features added -------------- +* ElementTree compatible TreeBuilder class. + * Use default prefixes for some common XML namespaces * ``lxml.html.clean.Cleaner`` now allows for a ``host_whitelist``, and @@ -27,7 +29,7 @@ Bugs fixed ---------- -* Well hidden free-while-in-use crash bug in ObjectPath +* Target parser failed to report comments. * In the ``lxml.html`` ``iter_links`` method, links in ```` tags weren't recognized. (Note: plugin-specific link parameters Modified: lxml/trunk/src/lxml/lxml.etree.pyx ============================================================================== --- lxml/trunk/src/lxml/lxml.etree.pyx (original) +++ lxml/trunk/src/lxml/lxml.etree.pyx Fri Nov 23 09:27:51 2007 @@ -31,6 +31,9 @@ cdef object ITER_EMPTY ITER_EMPTY = iter(()) +cdef object EMPTY_READ_ONLY_DICT +EMPTY_READ_ONLY_DICT = python.PyDictProxy_New({}) + # the rules # any libxml C argument/variable is prefixed with c_ # any non-public function/class is prefixed with an underscore Modified: lxml/trunk/src/lxml/parser.pxi ============================================================================== --- lxml/trunk/src/lxml/parser.pxi (original) +++ lxml/trunk/src/lxml/parser.pxi Fri Nov 23 09:27:51 2007 @@ -376,7 +376,8 @@ ############################################################ cdef class _ParserContext(_ResolverContext) -cdef class _TargetParserContext(_ParserContext) +cdef class _SaxParserContext(_ParserContext) +cdef class _TargetParserContext(_SaxParserContext) cdef class _ParserContext(_ResolverContext): cdef _ErrorLog _error_log @@ -577,39 +578,33 @@ cdef xmlparser.xmlParserCtxt* pctxt if self._parser_context is None: self._parser_context = self._createContext(self._target) - pctxt = self._newParserCtxt() if pctxt is NULL: python.PyErr_NoMemory() - + _initParserContext(self._parser_context, self._resolvers, pctxt) if self._remove_comments: pctxt.sax.comment = NULL if self._remove_pis: pctxt.sax.processingInstruction = NULL # hard switch-off for CDATA nodes => makes them plain text pctxt.sax.cdataBlock = NULL - - _initParserContext(self._parser_context, self._resolvers, pctxt) return self._parser_context cdef _ParserContext _getPushParserContext(self): cdef xmlparser.xmlParserCtxt* pctxt if self._push_parser_context is None: self._push_parser_context = self._createContext(self._target) - pctxt = self._newPushParserCtxt() if pctxt is NULL: python.PyErr_NoMemory() - + _initParserContext( + self._push_parser_context, self._resolvers, pctxt) if self._remove_comments: pctxt.sax.comment = NULL if self._remove_pis: pctxt.sax.processingInstruction = NULL # hard switch-off for CDATA nodes => makes them plain text pctxt.sax.cdataBlock = NULL - - _initParserContext( - self._push_parser_context, self._resolvers, pctxt) return self._push_parser_context cdef _ParserContext _createContext(self, target): @@ -992,7 +987,422 @@ if c_data is not NULL and buffer_len > 0: return htmlparser.htmlParseChunk(c_ctxt, c_data, buffer_len, 0) return 0 - + + +############################################################ +## SAX event handler +############################################################ + +ctypedef enum _SaxParserEvents: + SAX_EVENT_START = 1 + SAX_EVENT_END = 2 + SAX_EVENT_DATA = 4 + SAX_EVENT_DOCTYPE = 8 + SAX_EVENT_PI = 16 + SAX_EVENT_COMMENT = 32 + +cdef class _SaxParserTarget: + cdef int _sax_event_filter + cdef int _sax_event_propagate + cdef _handleSaxStart(self, tag, attrib, nsmap): + return None + cdef _handleSaxEnd(self, tag): + return None + cdef int _handleSaxData(self, data) except -1: + return 0 + cdef int _handleSaxDoctype(self, root_tag, public_id, system_id) except -1: + return 0 + cdef _handleSaxPi(self, target, data): + return None + cdef _handleSaxComment(self, comment): + return None + +cdef class _SaxParserContext(_ParserContext): + """This class maps SAX2 events to method calls. + """ + cdef _SaxParserTarget _target + cdef xmlparser.startElementNsSAX2Func _origSaxStart + cdef xmlparser.endElementNsSAX2Func _origSaxEnd + cdef xmlparser.startElementSAXFunc _origSaxStartNoNs + cdef xmlparser.endElementSAXFunc _origSaxEndNoNs + cdef xmlparser.charactersSAXFunc _origSaxData + cdef xmlparser.internalSubsetSAXFunc _origSaxDoctype + cdef xmlparser.commentSAXFunc _origSaxComment + cdef xmlparser.processingInstructionSAXFunc _origSaxPi + + cdef void _setSaxParserTarget(self, _SaxParserTarget target): + self._target = target + + cdef void _initParserContext(self, xmlparser.xmlParserCtxt* c_ctxt): + "wrap original SAX2 callbacks" + cdef xmlparser.xmlSAXHandler* sax + _ParserContext._initParserContext(self, c_ctxt) + sax = c_ctxt.sax + if self._target._sax_event_propagate & SAX_EVENT_START: + # propagate => keep orig callback + self._origSaxStart = sax.startElementNs + self._origSaxStartNoNs = sax.startElement + else: + # otherwise: never call orig callback + self._origSaxStart = sax.startElementNs = NULL + self._origSaxStartNoNs = sax.startElement = NULL + if self._target._sax_event_filter & SAX_EVENT_START: + # intercept => overwrite orig callback + if sax.initialized == xmlparser.XML_SAX2_MAGIC: + sax.startElementNs = _handleSaxStart + sax.startElement = _handleSaxStartNoNs + + if self._target._sax_event_propagate & SAX_EVENT_END: + self._origSaxEnd = sax.endElementNs + self._origSaxEndNoNs = sax.endElement + else: + self._origSaxEnd = sax.endElementNs = NULL + self._origSaxEndNoNs = sax.endElement = NULL + if self._target._sax_event_filter & SAX_EVENT_END: + if sax.initialized == xmlparser.XML_SAX2_MAGIC: + sax.endElementNs = _handleSaxEnd + sax.endElement = _handleSaxEndNoNs + + if self._target._sax_event_propagate & SAX_EVENT_DATA: + self._origSaxData = sax.characters + else: + self._origSaxData = sax.characters = NULL + if self._target._sax_event_filter & SAX_EVENT_DATA: + sax.characters = _handleSaxData + + if self._target._sax_event_propagate & SAX_EVENT_DOCTYPE: + self._origSaxDoctype = sax.internalSubset + else: + self._origSaxDoctype = sax.internalSubset = NULL + if self._target._sax_event_filter & SAX_EVENT_DOCTYPE: + sax.internalSubset = _handleSaxDoctype + + if self._target._sax_event_propagate & SAX_EVENT_PI: + self._origSaxPi = sax.processingInstruction + else: + self._origSaxPi = sax.processingInstruction = NULL + if self._target._sax_event_filter & SAX_EVENT_PI: + sax.processingInstruction = _handleSaxPI + + if self._target._sax_event_propagate & SAX_EVENT_COMMENT: + self._origSaxComment = sax.comment + else: + self._origSaxComment = sax.comment = NULL + if self._target._sax_event_filter & SAX_EVENT_COMMENT: + sax.comment = _handleSaxComment + + cdef void _handleSaxException(self, xmlparser.xmlParserCtxt* c_ctxt): + self._store_raised() + if c_ctxt.errNo == xmlerror.XML_ERR_OK: + c_ctxt.errNo = xmlerror.XML_ERR_INTERNAL_ERROR + c_ctxt.disableSAX = 1 + +cdef void _handleSaxStart(void* ctxt, char* c_localname, char* c_prefix, + char* c_namespace, int c_nb_namespaces, + char** c_namespaces, + int c_nb_attributes, int c_nb_defaulted, + char** c_attributes) with gil: + cdef _SaxParserContext context + cdef xmlparser.xmlParserCtxt* c_ctxt + cdef _Element element + cdef int i + c_ctxt = ctxt + if c_ctxt._private is NULL: + return + context = <_SaxParserContext>c_ctxt._private + if context._origSaxStart is not NULL: + context._origSaxStart(c_ctxt, c_localname, c_prefix, c_namespace, + c_nb_namespaces, c_namespaces, c_nb_attributes, + c_nb_defaulted, c_attributes) + try: + tag = _namespacedNameFromNsName(c_namespace, c_localname) + if c_nb_defaulted > 0: + # only add default attributes if we asked for them + if c_ctxt.loadsubset & xmlparser.XML_COMPLETE_ATTRS == 0: + c_nb_attributes = c_nb_attributes - c_nb_defaulted + if c_nb_attributes == 0: + attrib = EMPTY_READ_ONLY_DICT + else: + attrib = {} + for i from 0 <= i < c_nb_attributes: + name = _namespacedNameFromNsName( + c_attributes[2], c_attributes[0]) + if c_attributes[3] is NULL: + value = "" + else: + value = python.PyUnicode_DecodeUTF8( + c_attributes[3], c_attributes[4] - c_attributes[3], + "strict") + python.PyDict_SetItem(attrib, name, value) + c_attributes = c_attributes + 5 + if c_nb_namespaces == 0: + nsmap = EMPTY_READ_ONLY_DICT + else: + nsmap = {} + for i from 0 <= i < c_nb_namespaces: + if c_namespaces[0] is NULL: + prefix = None + else: + prefix = funicode(c_namespaces[0]) + python.PyDict_SetItem( + nsmap, prefix, funicode(c_namespaces[1])) + c_namespaces = c_namespaces + 2 + element = context._target._handleSaxStart(tag, attrib, nsmap) + if element is not None and c_ctxt.input is not NULL: + if c_ctxt.input.line < 65535: + element._c_node.line = c_ctxt.input.line + else: + element._c_node.line = 65535 + except: + context._handleSaxException(c_ctxt) + +cdef void _handleSaxStartNoNs(void* ctxt, char* c_name, + char** c_attributes) with gil: + cdef _SaxParserContext context + cdef xmlparser.xmlParserCtxt* c_ctxt + cdef _Element element + c_ctxt = ctxt + if c_ctxt._private is NULL: + return + context = <_SaxParserContext>c_ctxt._private + if context._origSaxStartNoNs is not NULL: + context._origSaxStartNoNs(c_ctxt, c_name, c_attributes) + try: + tag = funicode(c_name) + if c_attributes is NULL: + attrib = EMPTY_READ_ONLY_DICT + else: + attrib = {} + while c_attributes[0] is not NULL: + name = funicode(c_attributes[0]) + if c_attributes[1] is NULL: + value = "" + else: + value = funicode(c_attributes[1]) + c_attributes = c_attributes + 2 + python.PyDict_SetItem(attrib, name, value) + element = context._target._handleSaxStart( + tag, attrib, EMPTY_READ_ONLY_DICT) + if element is not None and c_ctxt.input is not NULL: + if c_ctxt.input.line < 65535: + element._c_node.line = c_ctxt.input.line + else: + element._c_node.line = 65535 + except: + context._handleSaxException(c_ctxt) + +cdef void _handleSaxEnd(void* ctxt, char* c_localname, char* c_prefix, + char* c_namespace) with gil: + cdef _SaxParserContext context + cdef xmlparser.xmlParserCtxt* c_ctxt + c_ctxt = ctxt + if c_ctxt._private is NULL: + return + context = <_SaxParserContext>c_ctxt._private + if context._origSaxEnd is not NULL: + context._origSaxEnd(c_ctxt, c_localname, c_prefix, c_namespace) + try: + tag = _namespacedNameFromNsName(c_namespace, c_localname) + context._target._handleSaxEnd(tag) + except: + context._handleSaxException(c_ctxt) + +cdef void _handleSaxEndNoNs(void* ctxt, char* c_name) with gil: + cdef _SaxParserContext context + cdef xmlparser.xmlParserCtxt* c_ctxt + c_ctxt = ctxt + if c_ctxt._private is NULL: + return + context = <_SaxParserContext>c_ctxt._private + if context._origSaxEndNoNs is not NULL: + context._origSaxEndNoNs(c_ctxt, c_name) + try: + context._target._handleSaxEnd(funicode(c_name)) + except: + context._handleSaxException(c_ctxt) + +cdef void _handleSaxData(void* ctxt, char* c_data, int data_len) with gil: + cdef _SaxParserContext context + cdef xmlparser.xmlParserCtxt* c_ctxt + c_ctxt = ctxt + if c_ctxt._private is NULL: + return + context = <_SaxParserContext>c_ctxt._private + if context._origSaxData is not NULL: + context._origSaxData(c_ctxt, c_data, data_len) + try: + context._target._handleSaxData( + python.PyUnicode_DecodeUTF8(c_data, data_len, NULL)) + except: + context._handleSaxException(c_ctxt) + +cdef void _handleSaxDoctype(void* ctxt, char* c_name, char* c_public, + char* c_system) with gil: + cdef _SaxParserContext context + cdef xmlparser.xmlParserCtxt* c_ctxt + c_ctxt = ctxt + if c_ctxt._private is NULL: + return + context = <_SaxParserContext>c_ctxt._private + if context._origSaxDoctype is not NULL: + context._origSaxDoctype(c_ctxt, c_name, c_public, c_system) + try: + if c_public is not NULL: + public_id = funicode(c_public) + if c_system is not NULL: + system_id = funicode(c_system) + context._target._handleSaxDoctype( + funicode(c_name), public_id, system_id) + except: + context._handleSaxException(c_ctxt) + +cdef void _handleSaxPI(void* ctxt, char* c_target, char* c_data) with gil: + cdef _SaxParserContext context + cdef xmlparser.xmlParserCtxt* c_ctxt + c_ctxt = ctxt + if c_ctxt._private is NULL: + return + context = <_SaxParserContext>c_ctxt._private + if context._origSaxPi is not NULL: + context._origSaxPi(c_ctxt, c_target, c_data) + try: + if c_data is not NULL: + data = funicode(c_data) + context._target._handleSaxPi(funicode(c_target), data) + except: + context._handleSaxException(c_ctxt) + +cdef void _handleSaxComment(void* ctxt, char* c_data) with gil: + cdef _SaxParserContext context + cdef xmlparser.xmlParserCtxt* c_ctxt + c_ctxt = ctxt + if c_ctxt._private is NULL: + return + context = <_SaxParserContext>c_ctxt._private + if context._origSaxComment is not NULL: + context._origSaxComment(c_ctxt, c_data) + try: + context._target._handleSaxComment(funicode(c_data)) + except: + context._handleSaxException(c_ctxt) + + +############################################################ +## ET compatible XML tree builder +############################################################ + +cdef class TreeBuilder(_SaxParserTarget): + cdef _BaseParser _parser + cdef object _factory + cdef object _data + cdef object _element_stack + cdef object _element_stack_pop + cdef _Element _last + cdef bint _in_tail + + def __init__(self, *, element_factory=None, parser=None): + self._sax_event_filter = \ + SAX_EVENT_START | SAX_EVENT_END | SAX_EVENT_DATA | \ + SAX_EVENT_PI | SAX_EVENT_COMMENT + self._data = [] # data collector + self._element_stack = [] # element stack + self._element_stack_pop = self._element_stack.pop + self._last = None # last element + self._in_tail = 0 # true if we're after an end tag + self._factory = element_factory + self._parser = parser + + cdef int _flush(self) except -1: + if python.PyList_GET_SIZE(self._data) > 0: + if self._last is not None: + text = "".join(self._data) + if self._in_tail: + assert self._last.tail is None, "internal error (tail)" + self._last.tail = text + else: + assert self._last.text is None, "internal error (text)" + self._last.text = text + del self._data[:] + return 0 + + # Python level event handlers + + def close(self): + """Flushes the builder buffers, and returns the toplevel document + element. + """ + assert python.PyList_GET_SIZE(self._element_stack) == 0, "missing end tags" + assert self._last is not None, "missing toplevel element" + return self._last + + def data(self, data): + """Adds text to the current element. The value should be either an + 8-bit string containing ASCII text, or a Unicode string. + """ + self._handleSaxData(data) + + def start(self, tag, attrs, nsmap=None): + "Opens a new element." + if nsmap is None: + nsmap = EMPTY_READ_ONLY_DICT + self._handleSaxStart(tag, attrs, nsmap) + + def end(self, tag): + "Closes the current element." + element = self._handleSaxEnd(tag) + assert self._last.tag == tag,\ + "end tag mismatch (expected %s, got %s)" % ( + self._last.tag, tag) + return element + + def pi(self, target, data): + return self._handleSaxPi(target, data) + + def comment(self, comment): + return self._handleSaxComment(comment) + + # internal SAX event handlers + + cdef _handleSaxStart(self, tag, attrib, nsmap): + self._flush() + if self._factory is not None: + self._last = self._factory(tag, attrib) + if python.PyList_GET_SIZE(self._element_stack) > 0: + _appendChild(self._element_stack[-1], self._last) + elif python.PyList_GET_SIZE(self._element_stack) > 0: + self._last = _makeSubElement( + self._element_stack[-1], tag, None, None, attrib, nsmap, None) + else: + self._last = _makeElement( + tag, NULL, None, self._parser, None, None, attrib, nsmap, None) + python.PyList_Append(self._element_stack, self._last) + self._in_tail = 0 + return self._last + + cdef _handleSaxEnd(self, tag): + self._flush() + self._last = self._element_stack_pop() + self._in_tail = 1 + return self._last + + cdef int _handleSaxData(self, data) except -1: + python.PyList_Append(self._data, data) + + cdef _handleSaxPi(self, target, data): + self._flush() + self._last = ProcessingInstruction(target, data) + if python.PyList_GET_SIZE(self._element_stack) > 0: + _appendChild(self._element_stack[-1], self._last) + self._in_tail = 1 + return self._last + + cdef _handleSaxComment(self, comment): + self._flush() + self._last = Comment(comment) + if python.PyList_GET_SIZE(self._element_stack) > 0: + _appendChild(self._element_stack[-1], self._last) + self._in_tail = 1 + return self._last ############################################################ ## XML parser Modified: lxml/trunk/src/lxml/parsertarget.pxi ============================================================================== --- lxml/trunk/src/lxml/parsertarget.pxi (original) +++ lxml/trunk/src/lxml/parsertarget.pxi Fri Nov 23 09:27:51 2007 @@ -1,5 +1,8 @@ # Parser target context (ET target interface) +cdef object inspect_getargspec +from inspect import getargspec as inspect_getargspec + class _TargetParserResult(Exception): # Admittedly, this is somewhat ugly, but it's the easiest way # to push the Python level parser result through the parser @@ -7,191 +10,110 @@ def __init__(self, result): self.result = result -cdef class _TargetParserContext(_ParserContext): - """This class maps SAX2 events to the ET parser target interface. - """ - cdef object _target +cdef class _PythonSaxParserTarget(_SaxParserTarget): cdef object _target_start cdef object _target_end cdef object _target_data cdef object _target_doctype cdef object _target_pi cdef object _target_comment + cdef bint _start_takes_nsmap - cdef void _setTarget(self, target): - self._target = target - - cdef _ParserContext _copy(self): - cdef _TargetParserContext context - context = _ParserContext._copy(self) - context._setTarget(self._target) - return context - - cdef void _initParserContext(self, xmlparser.xmlParserCtxt* c_ctxt): - "wrap original SAX2 callbacks" - cdef xmlparser.xmlSAXHandler* sax - _ParserContext._initParserContext(self, c_ctxt) - sax = c_ctxt.sax - cstd.memset(sax, 0, sizeof(xmlparser.xmlSAXHandler)) + def __init__(self, target): + cdef int event_filter + event_filter = 0 + self._start_takes_nsmap = 0 try: - self._target_start = self._target.start + self._target_start = target.start if self._target_start is not None: - sax.startElementNs = _targetSaxStart + event_filter = event_filter | SAX_EVENT_START except AttributeError: pass + else: + try: + arguments = inspect_getargspec(self._target_start) + if len(arguments[0]) > 3 or arguments[1] is not None: + self._start_takes_nsmap = 1 + except TypeError: + pass try: - self._target_end = self._target.end + self._target_end = target.end if self._target_end is not None: - sax.endElementNs = _targetSaxEnd + event_filter = event_filter | SAX_EVENT_END except AttributeError: pass try: - self._target_data = self._target.data + self._target_data = target.data if self._target_data is not None: - sax.characters = _targetSaxData + event_filter = event_filter | SAX_EVENT_DATA except AttributeError: pass try: - self._target_doctype = self._target.doctype + self._target_doctype = target.doctype if self._target_doctype is not None: - sax.internalSubset = _targetSaxDoctype + event_filter = event_filter | SAX_EVENT_DOCTYPE except AttributeError: pass try: - self._target_pi = self._target.pi + self._target_pi = target.pi if self._target_pi is not None: - sax.processingInstruction = _targetSaxPI + event_filter = event_filter | SAX_EVENT_PI except AttributeError: pass try: - self._target_comment = self._target.comment + self._target_comment = target.comment if self._target_comment is not None: - sax.startElementNs = _targetSaxStart + event_filter = event_filter | SAX_EVENT_COMMENT except AttributeError: pass + self._sax_event_filter = event_filter + + cdef _handleSaxStart(self, tag, attrib, nsmap): + if self._start_takes_nsmap: + return self._target_start(tag, attrib, nsmap) + else: + return self._target_start(tag, attrib) + + cdef _handleSaxEnd(self, tag): + return self._target_end(tag) + + cdef int _handleSaxData(self, data) except -1: + self._target_data(data) + + cdef int _handleSaxDoctype(self, root_tag, public_id, system_id) except -1: + self._target_doctype(root_tag, public_id, system_id) - sax.initialized = xmlparser.XML_SAX2_MAGIC + cdef _handleSaxPi(self, target, data): + return self._target_pi(target, data) + + cdef _handleSaxComment(self, comment): + return self._target_comment(comment) + + +cdef class _TargetParserContext(_SaxParserContext): + """This class maps SAX2 events to the ET parser target interface. + """ + cdef object _python_target + cdef int _setTarget(self, target) except -1: + self._python_target = target + if not isinstance(target, _SaxParserTarget) or \ + hasattr(target, '__dict__'): + target = _PythonSaxParserTarget(target) + self._setSaxParserTarget(target) + return 0 + + cdef _ParserContext _copy(self): + cdef _TargetParserContext context + context = _ParserContext._copy(self) + context._setTarget(self._python_target) + return context cdef object _handleParseResult(self, _BaseParser parser, xmlDoc* result, filename): self._raise_if_stored() - return self._target.close() + return self._python_target.close() cdef xmlDoc* _handleParseResultDoc(self, _BaseParser parser, xmlDoc* result, filename) except NULL: self._raise_if_stored() - raise _TargetParserResult(self._target.close()) - - -cdef void _targetSaxStart(void* ctxt, char* c_localname, char* c_prefix, - char* c_namespace, int c_nb_namespaces, - char** c_namespaces, - int c_nb_attributes, int c_nb_defaulted, - char** c_attributes) with gil: - cdef _TargetParserContext context - cdef xmlparser.xmlParserCtxt* c_ctxt - cdef int i - c_ctxt = ctxt - if c_ctxt._private is NULL: - return - context = <_TargetParserContext>c_ctxt._private - try: - tag = _namespacedNameFromNsName(c_namespace, c_localname) - if c_nb_defaulted > 0: - # only add default attributes if we asked for them - if c_ctxt.loadsubset & xmlparser.XML_COMPLETE_ATTRS == 0: - c_nb_attributes = c_nb_attributes - c_nb_defaulted - attrib = {} - for i from 0 <= i < c_nb_attributes: - name = _namespacedNameFromNsName( - c_attributes[2], c_attributes[0]) - if c_attributes[3] is NULL: - value = "" - else: - value = python.PyUnicode_DecodeUTF8( - c_attributes[3], c_attributes[4] - c_attributes[3], - "strict") - python.PyDict_SetItem(attrib, name, value) - c_attributes = c_attributes + 5 - context._target_start(tag, attrib) - except: - _handleSaxTargetException(context, c_ctxt) - -cdef void _targetSaxEnd(void* ctxt, char* c_localname, char* c_prefix, - char* c_namespace) with gil: - cdef _TargetParserContext context - cdef xmlparser.xmlParserCtxt* c_ctxt - c_ctxt = ctxt - if c_ctxt._private is NULL: - return - context = <_TargetParserContext>c_ctxt._private - try: - tag = _namespacedNameFromNsName(c_namespace, c_localname) - context._target_end(tag) - except: - _handleSaxTargetException(context, c_ctxt) - -cdef void _targetSaxData(void* ctxt, char* c_data, int data_len) with gil: - cdef _TargetParserContext context - cdef xmlparser.xmlParserCtxt* c_ctxt - c_ctxt = ctxt - if c_ctxt._private is NULL: - return - context = <_TargetParserContext>c_ctxt._private - try: - context._target_data( - python.PyUnicode_DecodeUTF8(c_data, data_len, NULL)) - except: - _handleSaxTargetException(context, c_ctxt) - -cdef void _targetSaxDoctype(void* ctxt, char* c_name, char* c_public, - char* c_system) with gil: - cdef _TargetParserContext context - cdef xmlparser.xmlParserCtxt* c_ctxt - c_ctxt = ctxt - if c_ctxt._private is NULL: - return - context = <_TargetParserContext>c_ctxt._private - try: - if c_public is not NULL: - public_id = funicode(c_public) - if c_system is not NULL: - system_id = funicode(c_system) - context._target_doctype( - funicode(c_name), public_id, system_id) - except: - _handleSaxTargetException(context, c_ctxt) - -cdef void _targetSaxPI(void* ctxt, char* c_target, char* c_data) with gil: - cdef _TargetParserContext context - cdef xmlparser.xmlParserCtxt* c_ctxt - c_ctxt = ctxt - if c_ctxt._private is NULL: - return - context = <_TargetParserContext>c_ctxt._private - try: - if c_data is not NULL: - data = funicode(c_data) - context._target_pi(funicode(c_target), data) - except: - _handleSaxTargetException(context, c_ctxt) - -cdef void _targetSaxComment(void* ctxt, char* c_data, int data_len) with gil: - cdef _TargetParserContext context - cdef xmlparser.xmlParserCtxt* c_ctxt - c_ctxt = ctxt - if c_ctxt._private is NULL: - return - context = <_TargetParserContext>c_ctxt._private - try: - context._target_comment( - python.PyUnicode_DecodeUTF8(c_data, data_len, NULL)) - except: - _handleSaxTargetException(context, c_ctxt) - -cdef void _handleSaxTargetException(_TargetParserContext context, - xmlparser.xmlParserCtxt* c_ctxt): - context._store_raised() - if c_ctxt.errNo == xmlerror.XML_ERR_OK: - c_ctxt.errNo = xmlerror.XML_ERR_INTERNAL_ERROR - c_ctxt.disableSAX = 1 + raise _TargetParserResult(self._python_target.close()) Modified: lxml/trunk/src/lxml/xmlparser.pxd ============================================================================== --- lxml/trunk/src/lxml/xmlparser.pxd (original) +++ lxml/trunk/src/lxml/xmlparser.pxd Fri Nov 23 09:27:51 2007 @@ -43,7 +43,9 @@ cdef int XML_SAX2_MAGIC cdef extern from "libxml/tree.h": - ctypedef struct xmlParserInput + ctypedef struct xmlParserInput: + int line + ctypedef struct xmlParserInputBuffer: void* context xmlInputReadCallback readcallback @@ -94,7 +96,8 @@ bint html bint progressive int charset - + xmlParserInput* input + ctypedef enum xmlParserOption: XML_PARSE_RECOVER = 1 # recover on errors XML_PARSE_NOENT = 2 # substitute entities From scoder at codespeak.net Sat Nov 24 08:30:11 2007 From: scoder at codespeak.net (scoder at codespeak.net) Date: Sat, 24 Nov 2007 08:30:11 +0100 (CET) Subject: [Lxml-checkins] r49014 - lxml/trunk/src/lxml/tests Message-ID: <20071124073011.9E763822B@code0.codespeak.net> Author: scoder Date: Sat Nov 24 08:30:09 2007 New Revision: 49014 Modified: lxml/trunk/src/lxml/tests/test_elementtree.py lxml/trunk/src/lxml/tests/test_io.py Log: test cleanup Modified: lxml/trunk/src/lxml/tests/test_elementtree.py ============================================================================== --- lxml/trunk/src/lxml/tests/test_elementtree.py (original) +++ lxml/trunk/src/lxml/tests/test_elementtree.py Sat Nov 24 08:30:09 2007 @@ -9,7 +9,7 @@ """ import unittest, doctest -import os, re, shutil, tempfile, copy, operator, gc +import os, re, tempfile, copy, operator, gc from common_imports import StringIO, etree, ElementTree, cElementTree from common_imports import fileInTestDir, canonicalize @@ -21,54 +21,16 @@ class ETreeTestCaseBase(unittest.TestCase): etree = None - - def setUp(self): - self._temp_dir = tempfile.mkdtemp() - + def tearDown(self): gc.collect() - shutil.rmtree(self._temp_dir) - def getTestFilePath(self, name): - return os.path.join(self._temp_dir, name) - def test_element(self): for i in range(10): e = self.etree.Element('foo') - - def test_tree(self): - Element = self.etree.Element - ElementTree = self.etree.ElementTree - - element = Element('top') - tree = ElementTree(element) - self.buildNodes(element, 10, 3) - f = open(self.getTestFilePath('testdump.xml'), 'w') - tree.write(f, encoding='UTF-8') - f.close() - f = open(self.getTestFilePath('testdump.xml'), 'r') - tree = ElementTree(file=f) - f.close() - f = open(self.getTestFilePath('testdump2.xml'), 'w') - tree.write(f, encoding='UTF-8') - f.close() - f = open(self.getTestFilePath('testdump.xml'), 'r') - data1 = f.read() - f.close() - f = open(self.getTestFilePath('testdump2.xml'), 'r') - data2 = f.read() - f.close() - self.assertEquals(data1, data2) - - def buildNodes(self, element, children, depth): - Element = self.etree.Element - - if depth == 0: - return - for i in range(children): - new_element = Element('element_%s_%s' % (depth, i)) - self.buildNodes(new_element, children, depth - 1) - element.append(new_element) + self.assertEquals(e.tag, 'foo') + self.assertEquals(e.text, None) + self.assertEquals(e.tail, None) def test_simple(self): Element = self.etree.Element Modified: lxml/trunk/src/lxml/tests/test_io.py ============================================================================== --- lxml/trunk/src/lxml/tests/test_io.py (original) +++ lxml/trunk/src/lxml/tests/test_io.py Sat Nov 24 08:30:09 2007 @@ -5,7 +5,7 @@ """ import unittest -import tempfile, gzip, os, gc +import tempfile, gzip, os, gc, shutil from common_imports import etree, ElementTree, fileInTestDir from common_imports import SillyFileLike, LargeFileLike @@ -21,10 +21,49 @@ self.root = self.etree.Element('a') self.root_str = self.etree.tostring(self.root) self.tree = self.etree.ElementTree(self.root) - + self._temp_dir = tempfile.mkdtemp() + def tearDown(self): gc.collect() + shutil.rmtree(self._temp_dir) + + def getTestFilePath(self, name): + return os.path.join(self._temp_dir, name) + def buildNodes(self, element, children, depth): + Element = self.etree.Element + + if depth == 0: + return + for i in range(children): + new_element = Element('element_%s_%s' % (depth, i)) + self.buildNodes(new_element, children, depth - 1) + element.append(new_element) + + def test_tree_io(self): + Element = self.etree.Element + ElementTree = self.etree.ElementTree + + element = Element('top') + tree = ElementTree(element) + self.buildNodes(element, 10, 3) + f = open(self.getTestFilePath('testdump.xml'), 'w') + tree.write(f, encoding='UTF-8') + f.close() + f = open(self.getTestFilePath('testdump.xml'), 'r') + tree = ElementTree(file=f) + f.close() + f = open(self.getTestFilePath('testdump2.xml'), 'w') + tree.write(f, encoding='UTF-8') + f.close() + f = open(self.getTestFilePath('testdump.xml'), 'r') + data1 = f.read() + f.close() + f = open(self.getTestFilePath('testdump2.xml'), 'r') + data2 = f.read() + f.close() + self.assertEquals(data1, data2) + def test_write_filename(self): # (c)ElementTree supports filename strings as write argument From scoder at codespeak.net Sat Nov 24 10:26:38 2007 From: scoder at codespeak.net (scoder at codespeak.net) Date: Sat, 24 Nov 2007 10:26:38 +0100 (CET) Subject: [Lxml-checkins] r49015 - in lxml/trunk: . src/lxml Message-ID: <20071124092638.907EA822B@code0.codespeak.net> Author: scoder Date: Sat Nov 24 10:26:37 2007 New Revision: 49015 Modified: lxml/trunk/CHANGES.txt lxml/trunk/src/lxml/lxml.etree.pyx Log: rich comparison of _Attrib objects Modified: lxml/trunk/CHANGES.txt ============================================================================== --- lxml/trunk/CHANGES.txt (original) +++ lxml/trunk/CHANGES.txt Sat Nov 24 10:26:37 2007 @@ -8,9 +8,11 @@ Features added -------------- +* Rich comparison of ``element.attrib`` proxies. + * ElementTree compatible TreeBuilder class. -* Use default prefixes for some common XML namespaces +* Use default prefixes for some common XML namespaces. * ``lxml.html.clean.Cleaner`` now allows for a ``host_whitelist``, and two overridable methods: ``allow_embedded_url(el, url)`` and the Modified: lxml/trunk/src/lxml/lxml.etree.pyx ============================================================================== --- lxml/trunk/src/lxml/lxml.etree.pyx (original) +++ lxml/trunk/src/lxml/lxml.etree.pyx Sat Nov 24 10:26:37 2007 @@ -1749,6 +1749,9 @@ tree.xmlFree(c_result) return 1 + def __richcmp__(self, other, int op): + return python.PyObject_RichCompare(dict(self), other_items, op) + cdef class _AttribIterator: """Attribute iterator - for internal use only! """ From scoder at codespeak.net Sat Nov 24 10:45:18 2007 From: scoder at codespeak.net (scoder at codespeak.net) Date: Sat, 24 Nov 2007 10:45:18 +0100 (CET) Subject: [Lxml-checkins] r49016 - lxml/trunk/src/lxml Message-ID: <20071124094518.14C6081FE@code0.codespeak.net> Author: scoder Date: Sat Nov 24 10:45:17 2007 New Revision: 49016 Modified: lxml/trunk/src/lxml/lxml.etree.pyx Log: compile fix Modified: lxml/trunk/src/lxml/lxml.etree.pyx ============================================================================== --- lxml/trunk/src/lxml/lxml.etree.pyx (original) +++ lxml/trunk/src/lxml/lxml.etree.pyx Sat Nov 24 10:45:17 2007 @@ -1750,7 +1750,7 @@ return 1 def __richcmp__(self, other, int op): - return python.PyObject_RichCompare(dict(self), other_items, op) + return python.PyObject_RichCompare(dict(self), other, op) cdef class _AttribIterator: """Attribute iterator - for internal use only! From scoder at codespeak.net Sat Nov 24 10:45:49 2007 From: scoder at codespeak.net (scoder at codespeak.net) Date: Sat, 24 Nov 2007 10:45:49 +0100 (CET) Subject: [Lxml-checkins] r49017 - lxml/trunk/src/lxml Message-ID: <20071124094549.CA69980DD@code0.codespeak.net> Author: scoder Date: Sat Nov 24 10:45:49 2007 New Revision: 49017 Modified: lxml/trunk/src/lxml/parser.pxi Log: fix: return element from SAX method Modified: lxml/trunk/src/lxml/parser.pxi ============================================================================== --- lxml/trunk/src/lxml/parser.pxi (original) +++ lxml/trunk/src/lxml/parser.pxi Sat Nov 24 10:45:49 2007 @@ -1345,7 +1345,7 @@ "Opens a new element." if nsmap is None: nsmap = EMPTY_READ_ONLY_DICT - self._handleSaxStart(tag, attrs, nsmap) + return self._handleSaxStart(tag, attrs, nsmap) def end(self, tag): "Closes the current element." From scoder at codespeak.net Sat Nov 24 10:46:05 2007 From: scoder at codespeak.net (scoder at codespeak.net) Date: Sat, 24 Nov 2007 10:46:05 +0100 (CET) Subject: [Lxml-checkins] r49018 - lxml/trunk/src/lxml/tests Message-ID: <20071124094605.2C6558111@code0.codespeak.net> Author: scoder Date: Sat Nov 24 10:46:04 2007 New Revision: 49018 Modified: lxml/trunk/src/lxml/tests/test_elementtree.py Log: TreeBuilder tests Modified: lxml/trunk/src/lxml/tests/test_elementtree.py ============================================================================== --- lxml/trunk/src/lxml/tests/test_elementtree.py (original) +++ lxml/trunk/src/lxml/tests/test_elementtree.py Sat Nov 24 10:46:04 2007 @@ -3036,7 +3036,6 @@ def test_parser_target_attrib(self): assertEquals = self.assertEquals - assertFalse = self.assertFalse events = [] class Target(object): @@ -3059,9 +3058,6 @@ events) def test_parser_target_data(self): - assertEquals = self.assertEquals - assertFalse = self.assertFalse - events = [] class Target(object): def start(self, tag, attrib): @@ -3083,6 +3079,39 @@ "end-sub", "data-B", "end-root"], events) + def test_treebuilder(self): + builder = self.etree.TreeBuilder() + el = builder.start("root", {'a':'A', 'b':'B'}) + self.assertEquals("root", el.tag) + self.assertEquals({'a':'A', 'b':'B'}, el.attrib) + builder.data("ROOTTEXT") + el = builder.start("child", {'x':'X', 'y':'Y'}) + self.assertEquals("child", el.tag) + self.assertEquals({'x':'X', 'y':'Y'}, el.attrib) + builder.data("CHILDTEXT") + el = builder.end("child") + self.assertEquals("child", el.tag) + self.assertEquals({'x':'X', 'y':'Y'}, el.attrib) + self.assertEquals("CHILDTEXT", el.text) + self.assertEquals(None, el.tail) + builder.data("CHILDTAIL") + root = builder.end("root") + + self.assertEquals("root", root.tag) + self.assertEquals("ROOTTEXT", root.text) + self.assertEquals("CHILDTEXT", root[0].text) + self.assertEquals("CHILDTAIL", root[0].tail) + + def test_treebuilder_target(self): + parser = self.etree.XMLParser(target=self.etree.TreeBuilder()) + parser.feed('ROOTTEXTCHILDTEXTCHILDTAIL') + root = parser.close() + + self.assertEquals("root", root.tag) + self.assertEquals("ROOTTEXT", root.text) + self.assertEquals("CHILDTEXT", root[0].text) + self.assertEquals("CHILDTAIL", root[0].tail) + # helper methods def _writeElement(self, element, encoding='us-ascii'): From scoder at codespeak.net Sat Nov 24 11:00:38 2007 From: scoder at codespeak.net (scoder at codespeak.net) Date: Sat, 24 Nov 2007 11:00:38 +0100 (CET) Subject: [Lxml-checkins] r49019 - lxml/trunk/src/lxml/html/tests Message-ID: <20071124100038.BF6E0822B@code0.codespeak.net> Author: scoder Date: Sat Nov 24 11:00:38 2007 New Revision: 49019 Added: lxml/trunk/src/lxml/html/tests/test_clean_embed.txt Modified: lxml/trunk/src/lxml/html/tests/test_clean.py lxml/trunk/src/lxml/html/tests/test_clean.txt Log: split of 'clean embed' test case which fails in libxml2 2.6.29/30 Modified: lxml/trunk/src/lxml/html/tests/test_clean.py ============================================================================== --- lxml/trunk/src/lxml/html/tests/test_clean.py (original) +++ lxml/trunk/src/lxml/html/tests/test_clean.py Sat Nov 24 11:00:38 2007 @@ -1,7 +1,10 @@ import unittest from lxml.tests.common_imports import doctest +from lxml.etree import LIBXML_VERSION def test_suite(): suite = unittest.TestSuite() suite.addTests([doctest.DocFileSuite('test_clean.txt')]) + if LIBXML_VERSION <= (2,6,28): + suite.addTests([doctest.DocFileSuite('test_clean_embed.txt')]) return suite Modified: lxml/trunk/src/lxml/html/tests/test_clean.txt ============================================================================== --- lxml/trunk/src/lxml/html/tests/test_clean.txt (original) +++ lxml/trunk/src/lxml/html/tests/test_clean.txt Sat Nov 24 11:00:38 2007 @@ -117,29 +117,3 @@ - ->>> doc_embed = '''
-... -... -... -... -...
''' ->>> print tostring(fromstring(doc_embed)) -
- - - - -
->>> print Cleaner().clean_html(doc_embed) -
-
->>> print Cleaner(host_whitelist=['www.youtube.com']).clean_html(doc_embed) -
- -
->>> print Cleaner(host_whitelist=['www.youtube.com'], whitelist_tags=None).clean_html(doc_embed) -
- - -
Added: lxml/trunk/src/lxml/html/tests/test_clean_embed.txt ============================================================================== --- (empty file) +++ lxml/trunk/src/lxml/html/tests/test_clean_embed.txt Sat Nov 24 11:00:38 2007 @@ -0,0 +1,32 @@ +THIS FAILS IN libxml2 2.6.29 AND 2.6.30 !! + + +>>> from lxml.html import fromstring, tostring +>>> from lxml.html.clean import clean, clean_html, Cleaner +>>> from lxml.html import usedoctest + +>>> doc_embed = '''
+... +... +... +... +...
''' +>>> print tostring(fromstring(doc_embed)) +
+ + + + +
+>>> print Cleaner().clean_html(doc_embed) +
+
+>>> print Cleaner(host_whitelist=['www.youtube.com']).clean_html(doc_embed) +
+ +
+>>> print Cleaner(host_whitelist=['www.youtube.com'], whitelist_tags=None).clean_html(doc_embed) +
+ + +
From scoder at codespeak.net Sat Nov 24 12:08:23 2007 From: scoder at codespeak.net (scoder at codespeak.net) Date: Sat, 24 Nov 2007 12:08:23 +0100 (CET) Subject: [Lxml-checkins] r49023 - in lxml/trunk: . doc Message-ID: <20071124110823.8F38981FE@code0.codespeak.net> Author: scoder Date: Sat Nov 24 12:08:23 2007 New Revision: 49023 Modified: lxml/trunk/CHANGES.txt lxml/trunk/doc/main.txt Log: 2.0alpha5 Modified: lxml/trunk/CHANGES.txt ============================================================================== --- lxml/trunk/CHANGES.txt (original) +++ lxml/trunk/CHANGES.txt Sat Nov 24 12:08:23 2007 @@ -2,8 +2,8 @@ lxml changelog ============== -Under development -================= +2.0alpha5 (2007-11-24) +====================== Features added -------------- Modified: lxml/trunk/doc/main.txt ============================================================================== --- lxml/trunk/doc/main.txt (original) +++ lxml/trunk/doc/main.txt Sat Nov 24 12:08:23 2007 @@ -138,8 +138,8 @@ .. _`lxml at the Python Package Index`: http://pypi.python.org/pypi/lxml/ .. _`this key`: pubkey.asc -The latest version is `lxml 2.0alpha4`_, released 2007-10-07 -(`changes for 2.0alpha4`_). `Older versions`_ are listed below. +The latest version is `lxml 2.0alpha5`_, released 2007-11-24 +(`changes for 2.0alpha5`_). `Older versions`_ are listed below. .. _`Older versions`: #old-versions @@ -199,6 +199,8 @@ Old Versions ------------ +* `lxml 2.0alpha4`_, released 2007-10-07 (`changes for 2.0alpha4`_) + * `lxml 2.0alpha3`_, released 2007-09-26 (`changes for 2.0alpha3`_) * `lxml 2.0alpha2`_, released 2007-09-15 (`changes for 2.0alpha2`_) @@ -255,6 +257,7 @@ * `lxml 0.5`_, released 2005-04-08 +.. _`lxml 2.0alpha5`: lxml-2.0alpha5.tgz .. _`lxml 2.0alpha4`: lxml-2.0alpha4.tgz .. _`lxml 2.0alpha3`: lxml-2.0alpha3.tgz .. _`lxml 2.0alpha2`: lxml-2.0alpha2.tgz @@ -284,6 +287,7 @@ .. _`lxml 0.5.1`: lxml-0.5.1.tgz .. _`lxml 0.5`: lxml-0.5.tgz +.. _`changes for 2.0alpha5`: changes-2.0alpha5.html .. _`changes for 2.0alpha4`: changes-2.0alpha4.html .. _`changes for 2.0alpha3`: changes-2.0alpha3.html .. _`changes for 2.0alpha2`: changes-2.0alpha2.html From scoder at codespeak.net Sun Nov 25 12:19:14 2007 From: scoder at codespeak.net (scoder at codespeak.net) Date: Sun, 25 Nov 2007 12:19:14 +0100 (CET) Subject: [Lxml-checkins] r49066 - lxml/trunk Message-ID: <20071125111914.DC8978150@code0.codespeak.net> Author: scoder Date: Sun Nov 25 12:19:13 2007 New Revision: 49066 Modified: lxml/trunk/MANIFEST.in Log: fixed source archive content Modified: lxml/trunk/MANIFEST.in ============================================================================== --- lxml/trunk/MANIFEST.in (original) +++ lxml/trunk/MANIFEST.in Sun Nov 25 12:19:13 2007 @@ -5,7 +5,8 @@ include MANIFEST.in version.txt include CHANGES.txt CREDITS.txt INSTALL.txt LICENSES.txt README.txt TODO.txt recursive-include src *.pyx *.pxd *.pxi *.py -recursive-include src/lxml etree.c objectify.c pyclasslookup.c etree.h etree_defs.h +recursive-include src/lxml lxml.etree.c lxml.objectify.c lxml.pyclasslookup.c +recursive-include src/lxml lxml.etree.h lxml.etree_api.h etree_defs.h recursive-include src/lxml/tests *.rng *.xslt *.xml *.dtd recursive-include benchmark *.py recursive-include doc *.txt *.html *.css *.xml *.mgp pubkey.asc tagpython.png From scoder at codespeak.net Sun Nov 25 12:20:49 2007 From: scoder at codespeak.net (scoder at codespeak.net) Date: Sun, 25 Nov 2007 12:20:49 +0100 (CET) Subject: [Lxml-checkins] r49067 - lxml/tag/lxml-2.0alpha5 Message-ID: <20071125112049.0E54781B2@code0.codespeak.net> Author: scoder Date: Sun Nov 25 12:20:48 2007 New Revision: 49067 Added: lxml/tag/lxml-2.0alpha5/ - copied from r49066, lxml/trunk/ Log: 2.0alpha5 From scoder at codespeak.net Sun Nov 25 12:27:22 2007 From: scoder at codespeak.net (scoder at codespeak.net) Date: Sun, 25 Nov 2007 12:27:22 +0100 (CET) Subject: [Lxml-checkins] r49068 - lxml/tag/lxml-2.0alpha1 Message-ID: <20071125112722.5E52481B2@code0.codespeak.net> Author: scoder Date: Sun Nov 25 12:27:22 2007 New Revision: 49068 Added: lxml/tag/lxml-2.0alpha1/ - copied from r46242, lxml/trunk/ Log: lxml-2.0alpha1 From scoder at codespeak.net Sun Nov 25 12:30:18 2007 From: scoder at codespeak.net (scoder at codespeak.net) Date: Sun, 25 Nov 2007 12:30:18 +0100 (CET) Subject: [Lxml-checkins] r49069 - lxml/tag/lxml-2.0alpha2 Message-ID: <20071125113018.017BE81B2@code0.codespeak.net> Author: scoder Date: Sun Nov 25 12:30:18 2007 New Revision: 49069 Added: lxml/tag/lxml-2.0alpha2/ - copied from r46666, lxml/trunk/ Log: lxml-2.0alpha2 From scoder at codespeak.net Sun Nov 25 13:01:18 2007 From: scoder at codespeak.net (scoder at codespeak.net) Date: Sun, 25 Nov 2007 13:01:18 +0100 (CET) Subject: [Lxml-checkins] r49071 - lxml/tag/lxml-2.0alpha3 Message-ID: <20071125120118.CF942817F@code0.codespeak.net> Author: scoder Date: Sun Nov 25 13:01:18 2007 New Revision: 49071 Added: lxml/tag/lxml-2.0alpha3/ - copied from r46898, lxml/trunk/ Log: lxml-2.0alpha3 From scoder at codespeak.net Sun Nov 25 13:02:18 2007 From: scoder at codespeak.net (scoder at codespeak.net) Date: Sun, 25 Nov 2007 13:02:18 +0100 (CET) Subject: [Lxml-checkins] r49072 - lxml/tag/lxml-2.0alpha4 Message-ID: <20071125120218.734DD817F@code0.codespeak.net> Author: scoder Date: Sun Nov 25 13:02:18 2007 New Revision: 49072 Added: lxml/tag/lxml-2.0alpha4/ - copied from r47270, lxml/trunk/ Log: lxml-2.0alpha4 From scoder at codespeak.net Sun Nov 25 17:15:10 2007 From: scoder at codespeak.net (scoder at codespeak.net) Date: Sun, 25 Nov 2007 17:15:10 +0100 (CET) Subject: [Lxml-checkins] r49094 - lxml/tag/lxml-1.3.6 Message-ID: <20071125161510.475F281BC@code0.codespeak.net> Author: scoder Date: Sun Nov 25 17:15:09 2007 New Revision: 49094 Added: lxml/tag/lxml-1.3.6/ - copied from r48178, lxml/branch/lxml-1.3/ Log: lxml 1.3.6 From lxml-checkins at codespeak.net Wed Nov 28 11:05:54 2007 From: lxml-checkins at codespeak.net (VIAGRA ® Official Site) Date: Wed, 28 Nov 2007 11:05:54 +0100 (CET) Subject: [Lxml-checkins] November 73% OFF Message-ID: <20071128140640.4532.qmail@dsl88.242-33153.ttnet.net.tr> An HTML attachment was scrubbed... URL: http://codespeak.net/pipermail/lxml-checkins/attachments/20071128/42966ebc/attachment.htm