From ianb at codespeak.net Tue Oct 2 00:24:09 2007 From: ianb at codespeak.net (ianb at codespeak.net) Date: Tue, 2 Oct 2007 00:24:09 +0200 (CEST) Subject: [Lxml-checkins] r47079 - lxml/trunk/src/lxml/html Message-ID: <20071001222409.DD3168111@code0.codespeak.net> Author: ianb Date: Tue Oct 2 00:24:07 2007 New Revision: 47079 Modified: lxml/trunk/src/lxml/html/setmixin.py Log: fix the in-place operators in SetMixin Modified: lxml/trunk/src/lxml/html/setmixin.py ============================================================================== --- lxml/trunk/src/lxml/html/setmixin.py (original) +++ lxml/trunk/src/lxml/html/setmixin.py Tue Oct 2 00:24:07 2007 @@ -71,21 +71,27 @@ for item in other: self.add(item) - __ior__ = update + def __ior__(self, other): + self.update(other) + return self def intersection_update(self, other): for item in self: if item not in other: self.remove(item) - __iand__ = intersection_update + def __iand__(self, other): + self.intersection_update(other) + return self def difference_update(self, other): for item in other: if item in self: self.remove(item) - __isub__ = difference_update + def __isub__(self, other): + self.difference_update(other) + return self def symmetric_difference_update(self, other): for item in other: @@ -94,7 +100,9 @@ else: self.add(item) - __ixor__ = symmetric_difference_update + def __ixor__(self, other): + self.symmetric_difference_update(other) + return self def discard(self, item): try: From lxml-checkins at codespeak.net Thu Oct 4 23:42:44 2007 From: lxml-checkins at codespeak.net (VIAGRA ® Official Site) Date: Thu, 4 Oct 2007 23:42:44 +0200 (CEST) Subject: [Lxml-checkins] October 75% OFF Message-ID: <20071004034331.7317.qmail@host75.201-252-1.telecom.net.ar> An HTML attachment was scrubbed... URL: http://codespeak.net/pipermail/lxml-checkins/attachments/20071004/e946f154/attachment.htm From lxml-checkins at codespeak.net Fri Oct 5 17:54:08 2007 From: lxml-checkins at codespeak.net (lxml-checkins at codespeak.net) Date: Fri, 5 Oct 2007 17:54:08 +0200 (CEST) Subject: [Lxml-checkins] Check out what's new Message-ID: <74047053679.3673357686918@delivery.net> An HTML attachment was scrubbed... URL: http://codespeak.net/pipermail/lxml-checkins/attachments/20071005/e44a8a2b/attachment.htm From scoder at codespeak.net Sun Oct 7 06:30:36 2007 From: scoder at codespeak.net (scoder at codespeak.net) Date: Sun, 7 Oct 2007 06:30:36 +0200 (CEST) Subject: [Lxml-checkins] r47250 - in lxml/trunk: . src/lxml src/lxml/tests Message-ID: <20071007043036.BD7A380FB@code0.codespeak.net> Author: scoder Date: Sun Oct 7 06:30:34 2007 New Revision: 47250 Modified: lxml/trunk/CHANGES.txt lxml/trunk/src/lxml/apihelpers.pxi lxml/trunk/src/lxml/etree.pyx lxml/trunk/src/lxml/tests/test_htmlparser.py lxml/trunk/src/lxml/tests/test_unicode.py Log: let tag name validation distinguish HTML/XML tags based on the related parser, allow ':' in HTML tags Modified: lxml/trunk/CHANGES.txt ============================================================================== --- lxml/trunk/CHANGES.txt (original) +++ lxml/trunk/CHANGES.txt Sun Oct 7 06:30:34 2007 @@ -16,9 +16,11 @@ Other changes ------------- -* lxml.etree no longer validates unicode characters in tag names to - avoid rejecting HTML tags. Only special characters like ':' and '>' - are rejected. +* Tag name validation in lxml.etree (and lxml.html) now distinguishes + between HTML tags and XML tags based on the parser that was used to + parse or create them. HTML tags no longer reject any non-ASCII + characters in tag names but only spaces and the special characters + '<>&/'. 2.0alpha3 (2007-09-26) Modified: lxml/trunk/src/lxml/apihelpers.pxi ============================================================================== --- lxml/trunk/src/lxml/apihelpers.pxi (original) +++ lxml/trunk/src/lxml/apihelpers.pxi Sun Oct 7 06:30:34 2007 @@ -99,7 +99,10 @@ """ cdef xmlNode* c_node ns_utf, name_utf = _getNsTag(tag) - _tagValidOrRaise(name_utf) + if parser is not None and parser._for_html: + _htmlTagValidOrRaise(name_utf) + else: + _tagValidOrRaise(name_utf) if doc is not None: c_doc = doc._c_doc elif c_doc is NULL: @@ -147,16 +150,22 @@ If 'c_doc' is also NULL, a new xmlDoc will be created. """ + cdef _BaseParser parser cdef _Document doc cdef xmlNode* c_node cdef xmlDoc* c_doc if parent is None or parent._doc is None: return None ns_utf, name_utf = _getNsTag(tag) - _tagValidOrRaise(name_utf) doc = parent._doc c_doc = doc._c_doc + parser = doc._parser + if parser is not None and parser._for_html: + _htmlTagValidOrRaise(name_utf) + else: + _tagValidOrRaise(name_utf) + c_node = _createElement(c_doc, name_utf) if c_node is NULL: python.PyErr_NoMemory() @@ -175,6 +184,7 @@ cdef _initNodeAttributes(xmlNode* c_node, _Document doc, attrib, extra): """Initialise the attributes of an element node. """ + cdef bint is_html cdef xmlNs* c_ns # 'extra' is not checked here (expected to be a keyword dict) if attrib is not None and not hasattr(attrib, 'items'): @@ -185,9 +195,11 @@ else: attrib.update(extra) if attrib: + is_html = doc._parser._for_html for name, value in attrib.items(): attr_ns_utf, attr_name_utf = _getNsTag(name) - _attributeValidOrRaise(attr_name_utf) + if not is_html: + _attributeValidOrRaise(attr_name_utf) value_utf = _utf8(value) if attr_ns_utf is None: tree.xmlNewProp(c_node, _cstr(attr_name_utf), _cstr(value_utf)) @@ -242,7 +254,8 @@ cdef char* c_value cdef char* c_tag ns, tag = _getNsTag(key) - _attributeValidOrRaise(tag) + if not element._doc._parser._for_html: + _attributeValidOrRaise(tag) c_tag = _cstr(tag) if isinstance(value, QName): value = _resolveQNameText(element, value) @@ -790,13 +803,17 @@ cdef int _pyXmlNameIsValid(name_utf8): return _xmlNameIsValid(_cstr(name_utf8)) +cdef int _pyHtmlNameIsValid(name_utf8): + return _htmlNameIsValid(_cstr(name_utf8)) + cdef int _xmlNameIsValid(char* c_name): - #return tree.xmlValidateNCName(c_name, 0) == 0 + return tree.xmlValidateNCName(c_name, 0) == 0 + +cdef int _htmlNameIsValid(char* c_name): if c_name is NULL or c_name[0] == c'\0': return 0 while c_name[0] != c'\0': - if c_name[0] == c':' or \ - c_name[0] == c'&' or \ + if c_name[0] == c'&' or \ c_name[0] == c'<' or \ c_name[0] == c'>' or \ c_name[0] == c'/' or \ @@ -815,6 +832,12 @@ python.PyUnicode_FromEncodedObject(tag_utf, 'UTF-8', 'strict') return 0 +cdef int _htmlTagValidOrRaise(tag_utf) except -1: + if not _pyHtmlNameIsValid(tag_utf): + raise ValueError, "Invalid HTML tag name %r" % \ + python.PyUnicode_FromEncodedObject(tag_utf, 'UTF-8', 'strict') + return 0 + cdef int _attributeValidOrRaise(name_utf) except -1: if not _pyXmlNameIsValid(name_utf): raise ValueError, "Invalid attribute name %r" % \ Modified: lxml/trunk/src/lxml/etree.pyx ============================================================================== --- lxml/trunk/src/lxml/etree.pyx (original) +++ lxml/trunk/src/lxml/etree.pyx Sun Oct 7 06:30:34 2007 @@ -707,8 +707,13 @@ return self._tag def __set__(self, value): + cdef _BaseParser parser ns, name = _getNsTag(value) - _tagValidOrRaise(name) + parser = self._doc._parser + if parser is not None and parser._for_html: + _htmlTagValidOrRaise(name) + else: + _tagValidOrRaise(name) self._tag = value tree.xmlNodeSetName(self._c_node, _cstr(name)) if ns is None: Modified: lxml/trunk/src/lxml/tests/test_htmlparser.py ============================================================================== --- lxml/trunk/src/lxml/tests/test_htmlparser.py (original) +++ lxml/trunk/src/lxml/tests/test_htmlparser.py Sun Oct 7 06:30:34 2007 @@ -39,6 +39,74 @@ self.assertRaises(self.etree.XMLSyntaxError, parse, f, parser) + def test_html_element_name_empty(self): + parser = self.etree.HTMLParser() + Element = parser.makeelement + + el = Element('name') + self.assertRaises(ValueError, Element, '{}') + self.assertRaises(ValueError, setattr, el, 'tag', '{}') + + self.assertRaises(ValueError, Element, '{test}') + self.assertRaises(ValueError, setattr, el, 'tag', '{test}') + + def test_html_element_name_colon(self): + parser = self.etree.HTMLParser() + Element = parser.makeelement + + pname = Element('p:name') + self.assertEquals(pname.tag, 'p:name') + + pname = Element('{test}p:name') + self.assertEquals(pname.tag, '{test}p:name') + + pname = Element('name') + pname.tag = 'p:name' + self.assertEquals(pname.tag, 'p:name') + + def test_html_element_name_space(self): + parser = self.etree.HTMLParser() + Element = parser.makeelement + + self.assertRaises(ValueError, Element, ' name ') + self.assertRaises(ValueError, Element, 'na me') + self.assertRaises(ValueError, Element, '{test} name') + + el = Element('name') + self.assertRaises(ValueError, setattr, el, 'tag', ' name ') + + def test_html_subelement_name_empty(self): + parser = self.etree.HTMLParser() + Element = parser.makeelement + + SubElement = self.etree.SubElement + + el = Element('name') + self.assertRaises(ValueError, SubElement, el, '{}') + self.assertRaises(ValueError, SubElement, el, '{test}') + + def test_html_subelement_name_colon(self): + parser = self.etree.HTMLParser() + Element = parser.makeelement + SubElement = self.etree.SubElement + + el = Element('name') + pname = SubElement(el, 'p:name') + self.assertEquals(pname.tag, 'p:name') + + pname = SubElement(el, '{test}p:name') + self.assertEquals(pname.tag, '{test}p:name') + + def test_html_subelement_name_space(self): + parser = self.etree.HTMLParser() + Element = parser.makeelement + SubElement = self.etree.SubElement + + el = Element('name') + self.assertRaises(ValueError, SubElement, el, ' name ') + self.assertRaises(ValueError, SubElement, el, 'na me') + self.assertRaises(ValueError, SubElement, el, '{test} name') + def test_module_parse_html_norecover(self): parser = self.etree.HTMLParser(recover=False) parse = self.etree.parse Modified: lxml/trunk/src/lxml/tests/test_unicode.py ============================================================================== --- lxml/trunk/src/lxml/tests/test_unicode.py (original) +++ lxml/trunk/src/lxml/tests/test_unicode.py Sun Oct 7 06:30:34 2007 @@ -5,9 +5,9 @@ ascii_uni = u'a' -# klingon = u"\uF8D2" # not valid for XML names +klingon = u"\uF8D2" # not valid for XML names -invalid_tag = "\u0680:\u3120" +invalid_tag = "test" + klingon uni = u'?\u0680\u3120' # some non-ASCII characters From scoder at codespeak.net Sun Oct 7 06:32:49 2007 From: scoder at codespeak.net (scoder at codespeak.net) Date: Sun, 7 Oct 2007 06:32:49 +0200 (CEST) Subject: [Lxml-checkins] r47251 - in lxml/trunk: . doc Message-ID: <20071007043249.5136B80FB@code0.codespeak.net> Author: scoder Date: Sun Oct 7 06:32:49 2007 New Revision: 47251 Modified: lxml/trunk/CHANGES.txt lxml/trunk/doc/main.txt lxml/trunk/version.txt Log: 2.0alpha4 Modified: lxml/trunk/CHANGES.txt ============================================================================== --- lxml/trunk/CHANGES.txt (original) +++ lxml/trunk/CHANGES.txt Sun Oct 7 06:32:49 2007 @@ -2,8 +2,9 @@ lxml changelog ============== -Under development -================= + +2.0alpha3 (2007-10-07) +====================== Features added -------------- Modified: lxml/trunk/doc/main.txt ============================================================================== --- lxml/trunk/doc/main.txt (original) +++ lxml/trunk/doc/main.txt Sun Oct 7 06:32:49 2007 @@ -138,8 +138,8 @@ .. _`lxml at the Python Package Index`: http://pypi.python.org/pypi/lxml/ .. _`this key`: pubkey.asc -The latest version is `lxml 2.0alpha3`_, released 2007-09-26 -(`changes for 2.0alpha3`_). `Older versions`_ are listed below. +The latest version is `lxml 2.0alpha4`_, released 2007-10-07 +(`changes for 2.0alpha4`_). `Older versions`_ are listed below. .. _`Older versions`: #old-versions @@ -199,6 +199,8 @@ Old Versions ------------ +* `lxml 2.0alpha3`_, released 2007-09-26 (`changes for 2.0alpha3`_) + * `lxml 2.0alpha2`_, released 2007-09-15 (`changes for 2.0alpha2`_) * `lxml 2.0alpha1`_, released 2007-09-02 (`changes for 2.0alpha1`_) Modified: lxml/trunk/version.txt ============================================================================== --- lxml/trunk/version.txt (original) +++ lxml/trunk/version.txt Sun Oct 7 06:32:49 2007 @@ -1 +1 @@ -2.0alpha3 +2.0alpha4 From scoder at codespeak.net Sun Oct 7 06:34:18 2007 From: scoder at codespeak.net (scoder at codespeak.net) Date: Sun, 7 Oct 2007 06:34:18 +0200 (CEST) Subject: [Lxml-checkins] r47252 - lxml/trunk/doc Message-ID: <20071007043418.0354580FB@code0.codespeak.net> Author: scoder Date: Sun Oct 7 06:34:18 2007 New Revision: 47252 Modified: lxml/trunk/doc/main.txt Log: 2.0alpha4 Modified: lxml/trunk/doc/main.txt ============================================================================== --- lxml/trunk/doc/main.txt (original) +++ lxml/trunk/doc/main.txt Sun Oct 7 06:34:18 2007 @@ -251,6 +251,7 @@ * `lxml 0.5`_, released 2005-04-08 +.. _`lxml 2.0alpha4`: lxml-2.0alpha4.tgz .. _`lxml 2.0alpha3`: lxml-2.0alpha3.tgz .. _`lxml 2.0alpha2`: lxml-2.0alpha2.tgz .. _`lxml 2.0alpha1`: lxml-2.0alpha1.tgz @@ -277,6 +278,7 @@ .. _`lxml 0.5.1`: lxml-0.5.1.tgz .. _`lxml 0.5`: lxml-0.5.tgz +.. _`changes for 2.0alpha4`: changes-2.0alpha4.html .. _`changes for 2.0alpha3`: changes-2.0alpha3.html .. _`changes for 2.0alpha2`: changes-2.0alpha2.html .. _`changes for 2.0alpha1`: changes-2.0alpha1.html From scoder at codespeak.net Sun Oct 7 06:34:44 2007 From: scoder at codespeak.net (scoder at codespeak.net) Date: Sun, 7 Oct 2007 06:34:44 +0200 (CEST) Subject: [Lxml-checkins] r47253 - lxml/trunk Message-ID: <20071007043444.51E6E80FB@code0.codespeak.net> Author: scoder Date: Sun Oct 7 06:34:44 2007 New Revision: 47253 Modified: lxml/trunk/CHANGES.txt Log: 2.0alpha4 Modified: lxml/trunk/CHANGES.txt ============================================================================== --- lxml/trunk/CHANGES.txt (original) +++ lxml/trunk/CHANGES.txt Sun Oct 7 06:34:44 2007 @@ -3,7 +3,7 @@ ============== -2.0alpha3 (2007-10-07) +2.0alpha4 (2007-10-07) ====================== Features added From scoder at codespeak.net Sun Oct 7 07:03:33 2007 From: scoder at codespeak.net (scoder at codespeak.net) Date: Sun, 7 Oct 2007 07:03:33 +0200 (CEST) Subject: [Lxml-checkins] r47254 - in lxml/trunk: . src/lxml src/lxml/tests Message-ID: <20071007050333.809498113@code0.codespeak.net> Author: scoder Date: Sun Oct 7 07:03:32 2007 New Revision: 47254 Modified: lxml/trunk/CHANGES.txt lxml/trunk/src/lxml/apihelpers.pxi lxml/trunk/src/lxml/tests/test_etree.py lxml/trunk/src/lxml/tests/test_htmlparser.py Log: added " and ' to the list of invalid HTML tag characters Modified: lxml/trunk/CHANGES.txt ============================================================================== --- lxml/trunk/CHANGES.txt (original) +++ lxml/trunk/CHANGES.txt Sun Oct 7 07:03:32 2007 @@ -21,7 +21,7 @@ between HTML tags and XML tags based on the parser that was used to parse or create them. HTML tags no longer reject any non-ASCII characters in tag names but only spaces and the special characters - '<>&/'. + ``<>&/"'``. 2.0alpha3 (2007-09-26) Modified: lxml/trunk/src/lxml/apihelpers.pxi ============================================================================== --- lxml/trunk/src/lxml/apihelpers.pxi (original) +++ lxml/trunk/src/lxml/apihelpers.pxi Sun Oct 7 07:03:32 2007 @@ -817,6 +817,8 @@ c_name[0] == c'<' or \ c_name[0] == c'>' or \ c_name[0] == c'/' or \ + c_name[0] == c'"' or \ + c_name[0] == c"'" or \ c_name[0] == c'\x09' or \ c_name[0] == c'\x0A' or \ c_name[0] == c'\x0B' or \ Modified: lxml/trunk/src/lxml/tests/test_etree.py ============================================================================== --- lxml/trunk/src/lxml/tests/test_etree.py (original) +++ lxml/trunk/src/lxml/tests/test_etree.py Sun Oct 7 07:03:32 2007 @@ -74,6 +74,18 @@ el = Element('name') self.assertRaises(ValueError, setattr, el, 'tag', 'p:name') + def test_element_name_quote(self): + Element = self.etree.Element + self.assertRaises(ValueError, Element, "p'name") + self.assertRaises(ValueError, Element, 'p"name') + + self.assertRaises(ValueError, Element, "{test}p'name") + self.assertRaises(ValueError, Element, '{test}p"name') + + el = Element('name') + self.assertRaises(ValueError, setattr, el, 'tag', "p'name") + self.assertRaises(ValueError, setattr, el, 'tag', 'p"name') + def test_element_name_space(self): Element = self.etree.Element self.assertRaises(ValueError, Element, ' name ') @@ -99,6 +111,17 @@ self.assertRaises(ValueError, SubElement, el, 'p:name') self.assertRaises(ValueError, SubElement, el, '{test}p:name') + def test_subelement_name_quote(self): + Element = self.etree.Element + SubElement = self.etree.SubElement + + el = Element('name') + self.assertRaises(ValueError, SubElement, el, "p'name") + self.assertRaises(ValueError, SubElement, el, "{test}p'name") + + self.assertRaises(ValueError, SubElement, el, 'p"name') + self.assertRaises(ValueError, SubElement, el, '{test}p"name') + def test_subelement_name_space(self): Element = self.etree.Element SubElement = self.etree.SubElement Modified: lxml/trunk/src/lxml/tests/test_htmlparser.py ============================================================================== --- lxml/trunk/src/lxml/tests/test_htmlparser.py (original) +++ lxml/trunk/src/lxml/tests/test_htmlparser.py Sun Oct 7 07:03:32 2007 @@ -64,6 +64,20 @@ pname.tag = 'p:name' self.assertEquals(pname.tag, 'p:name') + def test_html_element_name_quote(self): + parser = self.etree.HTMLParser() + Element = parser.makeelement + + self.assertRaises(ValueError, Element, 'p"name') + self.assertRaises(ValueError, Element, "na'me") + self.assertRaises(ValueError, Element, '{test}"name') + self.assertRaises(ValueError, Element, "{test}name'") + + el = Element('name') + self.assertRaises(ValueError, setattr, el, 'tag', "pname'") + self.assertRaises(ValueError, setattr, el, 'tag', '"pname') + self.assertEquals(el.tag, "name") + def test_html_element_name_space(self): parser = self.etree.HTMLParser() Element = parser.makeelement @@ -74,6 +88,7 @@ el = Element('name') self.assertRaises(ValueError, setattr, el, 'tag', ' name ') + self.assertEquals(el.tag, "name") def test_html_subelement_name_empty(self): parser = self.etree.HTMLParser() @@ -97,6 +112,17 @@ pname = SubElement(el, '{test}p:name') self.assertEquals(pname.tag, '{test}p:name') + def test_html_subelement_name_quote(self): + parser = self.etree.HTMLParser() + Element = parser.makeelement + SubElement = self.etree.SubElement + + el = Element('name') + self.assertRaises(ValueError, SubElement, el, "name'") + self.assertRaises(ValueError, SubElement, el, 'na"me') + self.assertRaises(ValueError, SubElement, el, "{test}na'me") + self.assertRaises(ValueError, SubElement, el, '{test}"name') + def test_html_subelement_name_space(self): parser = self.etree.HTMLParser() Element = parser.makeelement From scoder at codespeak.net Sun Oct 7 22:12:25 2007 From: scoder at codespeak.net (scoder at codespeak.net) Date: Sun, 7 Oct 2007 22:12:25 +0200 (CEST) Subject: [Lxml-checkins] r47270 - lxml/trunk/doc Message-ID: <20071007201225.4C1318188@code0.codespeak.net> Author: scoder Date: Sun Oct 7 22:12:23 2007 New Revision: 47270 Modified: lxml/trunk/doc/lxml2.txt Log: docs Modified: lxml/trunk/doc/lxml2.txt ============================================================================== --- lxml/trunk/doc/lxml2.txt (original) +++ lxml/trunk/doc/lxml2.txt Sun Oct 7 22:12:23 2007 @@ -78,7 +78,7 @@ type annotation on serialisation, you can use the ``deannotate()`` function. * The C-API function ``findOrBuildNodeNs()`` was replaced by the more generic - ``findOrBuildNodeNsPrefix()`` + ``findOrBuildNodeNsPrefix()`` that accepts an additional default prefix. Enhancements From scoder at codespeak.net Mon Oct 8 21:57:08 2007 From: scoder at codespeak.net (scoder at codespeak.net) Date: Mon, 8 Oct 2007 21:57:08 +0200 (CEST) Subject: [Lxml-checkins] r47317 - lxml/trunk/src/lxml Message-ID: <20071008195708.A49C3815A@code0.codespeak.net> Author: scoder Date: Mon Oct 8 21:57:08 2007 New Revision: 47317 Modified: lxml/trunk/src/lxml/apihelpers.pxi Log: handle exceptions in subelement creation Modified: lxml/trunk/src/lxml/apihelpers.pxi ============================================================================== --- lxml/trunk/src/lxml/apihelpers.pxi (original) +++ lxml/trunk/src/lxml/apihelpers.pxi Mon Oct 8 21:57:08 2007 @@ -171,15 +171,24 @@ python.PyErr_NoMemory() tree.xmlAddChild(parent._c_node, c_node) - if text is not None: - _setNodeText(c_node, text) - if tail is not None: - _setTailText(c_node, tail) - - # add namespaces to node if necessary - doc._setNodeNamespaces(c_node, ns_utf, nsmap) - _initNodeAttributes(c_node, doc, attrib, extra_attrs) - return _elementFactory(doc, c_node) + try: + if text is not None: + _setNodeText(c_node, text) + if tail is not None: + _setTailText(c_node, tail) + + # add namespaces to node if necessary + doc._setNodeNamespaces(c_node, ns_utf, nsmap) + _initNodeAttributes(c_node, doc, attrib, extra_attrs) + return _elementFactory(doc, c_node) + except: + # free allocated c_node/c_doc unless Python does it for us + if c_node.doc is not c_doc: + # node not yet in document => will not be freed by document + if tail is not None: + _removeText(c_node.next) # tail + tree.xmlFreeNode(c_node) + raise cdef _initNodeAttributes(xmlNode* c_node, _Document doc, attrib, extra): """Initialise the attributes of an element node. From scoder at codespeak.net Mon Oct 8 22:00:20 2007 From: scoder at codespeak.net (scoder at codespeak.net) Date: Mon, 8 Oct 2007 22:00:20 +0200 (CEST) Subject: [Lxml-checkins] r47318 - in lxml/trunk: . src/lxml Message-ID: <20071008200020.C380F80C9@code0.codespeak.net> Author: scoder Date: Mon Oct 8 22:00:20 2007 New Revision: 47318 Modified: lxml/trunk/CHANGES.txt lxml/trunk/src/lxml/etree.pyx Log: make namespace prefix counter a Python long to avoid crashes by counter overflow Modified: lxml/trunk/CHANGES.txt ============================================================================== --- lxml/trunk/CHANGES.txt (original) +++ lxml/trunk/CHANGES.txt Mon Oct 8 22:00:20 2007 @@ -2,6 +2,21 @@ lxml changelog ============== +Under development +================= + +Features added +-------------- + +Bugs fixed +---------- + +* lxml.etree could crash when adding more than 10000 namespaces to a + document + +Other changes +------------- + 2.0alpha4 (2007-10-07) ====================== Modified: lxml/trunk/src/lxml/etree.pyx ============================================================================== --- lxml/trunk/src/lxml/etree.pyx (original) +++ lxml/trunk/src/lxml/etree.pyx Mon Oct 8 22:00:20 2007 @@ -229,7 +229,7 @@ When instances of this class are garbage collected, the libxml document is cleaned up. """ - cdef int _ns_counter + cdef object _ns_counter cdef xmlDoc* _c_doc cdef _BaseParser _parser @@ -295,7 +295,7 @@ return self._c_doc.URL cdef buildNewPrefix(self): - ns = python.PyString_FromFormat("ns%d", self._ns_counter) + ns = "ns%d" % self._ns_counter self._ns_counter = self._ns_counter + 1 return ns @@ -304,7 +304,6 @@ """Get or create namespace structure for a node. Reuses the prefix if possible. """ - cdef int i cdef xmlNs* c_ns cdef xmlNs* c_doc_ns # look for existing ns @@ -315,15 +314,12 @@ if c_prefix is NULL or \ tree.xmlSearchNs(self._c_doc, c_node, c_prefix) is not NULL: # try to simulate ElementTree's namespace prefix creation - for i from 0 <= i < 10000: + while 1: prefix = self.buildNewPrefix() c_prefix = _cstr(prefix) # make sure it's not used already if tree.xmlSearchNs(self._c_doc, c_node, c_prefix) is NULL: break - if i >= 10000: - # XXX too many prefixes in use - this is pretty bad! - return NULL return tree.xmlNewNs(c_node, c_href, c_prefix) @@ -333,8 +329,8 @@ c_ns = self._findOrBuildNodeNs(c_node, href, NULL) tree.xmlSetNs(c_node, c_ns) - cdef void _setNodeNamespaces(self, xmlNode* c_node, - object node_ns_utf, object nsmap): + cdef int _setNodeNamespaces(self, xmlNode* c_node, + object node_ns_utf, object nsmap) except -1: """Lookup current namespace prefixes, then set namespace structure for node and register new ns-prefix mappings. @@ -347,7 +343,7 @@ if not nsmap: if node_ns_utf is not None: self._setNodeNs(c_node, _cstr(node_ns_utf)) - return + return 0 c_doc = self._c_doc for prefix, href in nsmap.items(): @@ -368,6 +364,7 @@ if node_ns_utf is not None: self._setNodeNs(c_node, _cstr(node_ns_utf)) + return 0 cdef extern from "etree_defs.h": # macro call to 't->tp_new()' for fast instantiation @@ -377,7 +374,7 @@ cdef _Document result result = NEW_DOCUMENT(_Document) result._c_doc = c_doc - result._ns_counter = 0 + result._ns_counter = 0L if parser is None: parser = __GLOBAL_PARSER_CONTEXT.getDefaultParser() result._parser = parser From scoder at codespeak.net Mon Oct 8 22:59:19 2007 From: scoder at codespeak.net (scoder at codespeak.net) Date: Mon, 8 Oct 2007 22:59:19 +0200 (CEST) Subject: [Lxml-checkins] r47319 - in lxml/branch/lxml-1.3: . src/lxml Message-ID: <20071008205919.961DE812D@code0.codespeak.net> Author: scoder Date: Mon Oct 8 22:59:18 2007 New Revision: 47319 Modified: lxml/branch/lxml-1.3/CHANGES.txt lxml/branch/lxml-1.3/src/lxml/etree.pyx Log: make namespace prefix counter a Python long to avoid crashes by counter overflow Modified: lxml/branch/lxml-1.3/CHANGES.txt ============================================================================== --- lxml/branch/lxml-1.3/CHANGES.txt (original) +++ lxml/branch/lxml-1.3/CHANGES.txt Mon Oct 8 22:59:18 2007 @@ -11,6 +11,9 @@ Bugs fixed ---------- +* lxml.etree could crash when adding more than 10000 namespaces to a + document + * lxml failed to serialise namespace declarations of elements other than the root node of a tree Modified: lxml/branch/lxml-1.3/src/lxml/etree.pyx ============================================================================== --- lxml/branch/lxml-1.3/src/lxml/etree.pyx (original) +++ lxml/branch/lxml-1.3/src/lxml/etree.pyx Mon Oct 8 22:59:18 2007 @@ -231,7 +231,7 @@ When instances of this class are garbage collected, the libxml document is cleaned up. """ - cdef int _ns_counter + cdef object _ns_counter cdef xmlDoc* _c_doc cdef _BaseParser _parser @@ -297,7 +297,7 @@ return self._c_doc.URL cdef buildNewPrefix(self): - ns = python.PyString_FromFormat("ns%d", self._ns_counter) + ns = "ns%d" % self._ns_counter self._ns_counter = self._ns_counter + 1 return ns @@ -306,7 +306,6 @@ """Get or create namespace structure for a node. Reuses the prefix if possible. """ - cdef int i cdef xmlNs* c_ns cdef xmlNs* c_doc_ns # look for existing ns @@ -317,15 +316,12 @@ if c_prefix is NULL or \ tree.xmlSearchNs(self._c_doc, c_node, c_prefix) is not NULL: # try to simulate ElementTree's namespace prefix creation - for i from 0 <= i < 10000: + while 1: prefix = self.buildNewPrefix() c_prefix = _cstr(prefix) # make sure it's not used already if tree.xmlSearchNs(self._c_doc, c_node, c_prefix) is NULL: break - if i >= 10000: - # XXX too many prefixes in use - this is pretty bad! - return NULL return tree.xmlNewNs(c_node, c_href, c_prefix) @@ -335,8 +331,8 @@ c_ns = self._findOrBuildNodeNs(c_node, href, NULL) tree.xmlSetNs(c_node, c_ns) - cdef void _setNodeNamespaces(self, xmlNode* c_node, - object node_ns_utf, object nsmap): + cdef int _setNodeNamespaces(self, xmlNode* c_node, + object node_ns_utf, object nsmap) except -1: """Lookup current namespace prefixes, then set namespace structure for node and register new ns-prefix mappings. @@ -349,7 +345,7 @@ if not nsmap: if node_ns_utf is not None: self._setNodeNs(c_node, _cstr(node_ns_utf)) - return + return 0 c_doc = self._c_doc for prefix, href in nsmap.items(): @@ -370,12 +366,13 @@ if node_ns_utf is not None: self._setNodeNs(c_node, _cstr(node_ns_utf)) + return 0 cdef _Document _documentFactory(xmlDoc* c_doc, _BaseParser parser): cdef _Document result result = _Document() result._c_doc = c_doc - result._ns_counter = 0 + result._ns_counter = 0L if parser is None: parser = __GLOBAL_PARSER_CONTEXT.getDefaultParser() result._parser = parser From scoder at codespeak.net Tue Oct 9 11:20:55 2007 From: scoder at codespeak.net (scoder at codespeak.net) Date: Tue, 9 Oct 2007 11:20:55 +0200 (CEST) Subject: [Lxml-checkins] r47326 - lxml/trunk/src/lxml Message-ID: <20071009092055.7A8F080DA@code0.codespeak.net> Author: scoder Date: Tue Oct 9 11:20:53 2007 New Revision: 47326 Modified: lxml/trunk/src/lxml/apihelpers.pxi lxml/trunk/src/lxml/config.pxd lxml/trunk/src/lxml/etree.pyx lxml/trunk/src/lxml/etreepublic.pxd lxml/trunk/src/lxml/iterparse.pxi lxml/trunk/src/lxml/objectify.pyx lxml/trunk/src/lxml/objectpath.pxi lxml/trunk/src/lxml/parser.pxi lxml/trunk/src/lxml/public-api.pxi lxml/trunk/src/lxml/serializer.pxi lxml/trunk/src/lxml/tree.pxd lxml/trunk/src/lxml/xmlerror.pxi Log: use 'bint' instead of 'int' Pyrex type where appropriate Modified: lxml/trunk/src/lxml/apihelpers.pxi ============================================================================== --- lxml/trunk/src/lxml/apihelpers.pxi (original) +++ lxml/trunk/src/lxml/apihelpers.pxi Tue Oct 9 11:20:53 2007 @@ -448,7 +448,7 @@ element._c_node, _cstr(ns), NULL) return '%s:%s' % (c_ns.prefix, tag) -cdef int _hasChild(xmlNode* c_node): +cdef bint _hasChild(xmlNode* c_node): return c_node is not NULL and _findChildForwards(c_node, 0) is not NULL cdef xmlNode* _findChild(xmlNode* c_node, Py_ssize_t index): @@ -534,7 +534,7 @@ return NULL return c_node -cdef int _tagMatches(xmlNode* c_node, char* c_href, char* c_name): +cdef bint _tagMatches(xmlNode* c_node, char* c_href, char* c_name): """Tests if the node matches namespace URI and tag name. A node matches if it matches both c_href and c_name. @@ -697,7 +697,7 @@ cdef char* s cdef char* c_end cdef char c - cdef int is_non_ascii + cdef bint is_non_ascii s = _cstr(pystring) c_end = s + python.PyString_GET_SIZE(pystring) is_non_ascii = 0 Modified: lxml/trunk/src/lxml/config.pxd ============================================================================== --- lxml/trunk/src/lxml/config.pxd (original) +++ lxml/trunk/src/lxml/config.pxd Tue Oct 9 11:20:53 2007 @@ -1,3 +1,3 @@ cdef extern from "etree_defs.h": - cdef int ENABLE_THREADING - cdef int ENABLE_SCHEMATRON + cdef bint ENABLE_THREADING + cdef bint ENABLE_SCHEMATRON Modified: lxml/trunk/src/lxml/etree.pyx ============================================================================== --- lxml/trunk/src/lxml/etree.pyx (original) +++ lxml/trunk/src/lxml/etree.pyx Tue Oct 9 11:20:53 2007 @@ -853,7 +853,6 @@ return c def __nonzero__(self): - cdef xmlNode* c_node import warnings warnings.warn( "The behavior of this method will change in future versions. " @@ -861,7 +860,7 @@ FutureWarning ) # emulate old behaviour - return bool(_hasChild(self._c_node)) + return _hasChild(self._c_node) def __contains__(self, element): cdef xmlNode* c_node @@ -1384,22 +1383,22 @@ The keyword argument 'method' selects the output method: 'xml' or 'html'. """ - cdef int c_write_declaration + cdef bint write_declaration self._assertHasRoot() # suppress decl. in default case (purely for ElementTree compatibility) if xml_declaration is not None: - c_write_declaration = bool(xml_declaration) + write_declaration = xml_declaration if encoding is None: encoding = 'ASCII' elif encoding is None: encoding = 'ASCII' - c_write_declaration = 0 + write_declaration = 0 else: encoding = encoding.upper() - c_write_declaration = encoding not in \ + write_declaration = encoding not in \ ('US-ASCII', 'ASCII', 'UTF8', 'UTF-8') _tofilelike(file, self._context_node, encoding, method, - c_write_declaration, 1, bool(pretty_print)) + write_declaration, 1, pretty_print) def getpath(self, _Element element not None): """Returns a structural, absolute XPath expression to find that element. @@ -2164,7 +2163,7 @@ """Writes an element tree or element structure to sys.stdout. This function should be used for debugging only. """ - _dumpToFile(sys.stdout, elem._c_node, bool(pretty_print)) + _dumpToFile(sys.stdout, elem._c_node, pretty_print) def tostring(element_or_tree, encoding=None, method="xml", xml_declaration=None, pretty_print=False): @@ -2178,26 +2177,25 @@ The keyword argument 'pretty_print' (bool) enables formatted XML. - The keyword argument 'method' selects the output method: 'xml' or 'html'. + The keyword argument 'method' selects the output method: 'xml', + 'html' or plain 'text'. """ - cdef int write_declaration - cdef int c_pretty_print - c_pretty_print = bool(pretty_print) + cdef bint write_declaration if xml_declaration is None: # by default, write an XML declaration only for non-standard encodings write_declaration = encoding is not None and encoding.upper() not in \ ('ASCII', 'UTF-8', 'UTF8', 'US-ASCII') else: - write_declaration = bool(xml_declaration) + write_declaration = xml_declaration if encoding is None: encoding = 'ASCII' if isinstance(element_or_tree, _Element): return _tostring(<_Element>element_or_tree, encoding, method, - write_declaration, 0, c_pretty_print) + write_declaration, 0, pretty_print) elif isinstance(element_or_tree, _ElementTree): return _tostring((<_ElementTree>element_or_tree)._context_node, - encoding, method, write_declaration, 1, c_pretty_print) + encoding, method, write_declaration, 1, pretty_print) else: raise TypeError, "Type '%s' cannot be serialized." % type(element_or_tree) @@ -2218,17 +2216,16 @@ therefore not necessarily suited for serialization to byte streams without further treatment. - The keyword argument 'pretty_print' (bool) enables formatted XML. + The boolean keyword argument 'pretty_print' enables formatted XML. - The keyword argument 'method' selects the output method: 'xml' or 'html'. + The keyword argument 'method' selects the output method: 'xml', + 'html' or plain 'text'. """ - cdef int c_pretty_print - c_pretty_print = bool(pretty_print) if isinstance(element_or_tree, _Element): - return _tounicode(<_Element>element_or_tree, method, 0, c_pretty_print) + return _tounicode(<_Element>element_or_tree, method, 0, pretty_print) elif isinstance(element_or_tree, _ElementTree): return _tounicode((<_ElementTree>element_or_tree)._context_node, - method, 1, c_pretty_print) + method, 1, pretty_print) else: raise TypeError, "Type '%s' cannot be serialized." % type(element_or_tree) Modified: lxml/trunk/src/lxml/etreepublic.pxd ============================================================================== --- lxml/trunk/src/lxml/etreepublic.pxd (original) +++ lxml/trunk/src/lxml/etreepublic.pxd Tue Oct 9 11:20:53 2007 @@ -5,7 +5,7 @@ cdef extern from "etree_defs.h": # test if c_node is considered an Element (i.e. Element, Comment, etc.) - cdef int _isElement(tree.xmlNode* c_node) + cdef bint _isElement(tree.xmlNode* c_node) # return the namespace URI of the node or NULL cdef char* _getNs(tree.xmlNode* node) @@ -129,7 +129,7 @@ # XML node helper functions # check if the element has at least one child - cdef int hasChild(tree.xmlNode* c_node) + cdef bint hasChild(tree.xmlNode* c_node) # find child element number 'index' (supports negative indexes) cdef tree.xmlNode* findChild(tree.xmlNode* c_node, @@ -191,10 +191,10 @@ cdef object namespacedNameFromNsName(char* c_ns, char* c_tag) # check if the node has a text value (which may be '') - cdef int hasText(tree.xmlNode* c_node) + cdef bint hasText(tree.xmlNode* c_node) # check if the node has a tail value (which may be '') - cdef int hasTail(tree.xmlNode* c_node) + cdef bint hasTail(tree.xmlNode* c_node) # get the text content of an element (or None) cdef object textOf(tree.xmlNode* c_node) Modified: lxml/trunk/src/lxml/iterparse.pxi ============================================================================== --- lxml/trunk/src/lxml/iterparse.pxi (original) +++ lxml/trunk/src/lxml/iterparse.pxi Tue Oct 9 11:20:53 2007 @@ -292,7 +292,6 @@ filename = _encodeFilename(filename) self._source = source - html = bool(html) if html: # make sure we're not looking for namespaces if 'start' in events: Modified: lxml/trunk/src/lxml/objectify.pyx ============================================================================== --- lxml/trunk/src/lxml/objectify.pyx (original) +++ lxml/trunk/src/lxml/objectify.pyx Tue Oct 9 11:20:53 2007 @@ -1045,7 +1045,7 @@ cdef object _makeelement cdef object _namespace cdef object _nsmap - cdef int _annotate + cdef bint _annotate def __init__(self, namespace=None, nsmap=None, annotate=True, makeelement=None): if nsmap is None: @@ -1055,7 +1055,7 @@ self._namespace = None else: self._namespace = "{%s}" % namespace - self._annotate = bool(annotate) + self._annotate = annotate if makeelement is not None: assert callable(makeelement) self._makeelement = makeelement @@ -1077,15 +1077,15 @@ cdef object _tag cdef object _nsmap cdef object _element_factory - cdef int _annotate + cdef bint _annotate def __call__(self, *children, **attrib): cdef _ObjectifyElementMakerCaller elementMaker cdef python.PyObject* pytype cdef _Element element cdef _Element childElement - cdef int has_children - cdef int has_string_value + cdef bint has_children + cdef bint has_string_value if self._element_factory is None: element = _makeElement(self._tag, None, attrib, self._nsmap) else: @@ -1153,7 +1153,7 @@ ################################################################################ # Recursive element dumping -cdef int __RECURSIVE_STR +cdef bint __RECURSIVE_STR __RECURSIVE_STR = 0 # default: off def enableRecursiveStr(on=True): @@ -1161,7 +1161,7 @@ based on objectify.dump(element). """ global __RECURSIVE_STR - __RECURSIVE_STR = bool(on) + __RECURSIVE_STR = on def dump(_Element element not None): """Return a recursively generated string representation of an element. @@ -1323,8 +1323,7 @@ """ cdef _Element element element = cetree.rootNodeOrRaise(element_or_tree) - _annotate(element, 0, 1, bool(ignore_xsi), bool(ignore_old), - None, empty_pytype) + _annotate(element, 0, 1, ignore_xsi, ignore_old, None, empty_pytype) def xsiannotate(element_or_tree, ignore_old=False, ignore_pytype=False, empty_type=None): @@ -1350,8 +1349,7 @@ """ cdef _Element element element = cetree.rootNodeOrRaise(element_or_tree) - _annotate(element, 1, 0, bool(ignore_old), bool(ignore_pytype), - empty_type, None) + _annotate(element, 1, 0, ignore_old, ignore_pytype, empty_type, None) def annotate(element_or_tree, ignore_old=True, ignore_xsi=False, empty_pytype=None, empty_type=None, annotate_xsi=0, @@ -1386,12 +1384,12 @@ """ cdef _Element element element = cetree.rootNodeOrRaise(element_or_tree) - _annotate(element, annotate_xsi, annotate_pytype, bool(ignore_xsi), - bool(ignore_old), empty_type, empty_pytype) + _annotate(element, annotate_xsi, annotate_pytype, ignore_xsi, + ignore_old, empty_type, empty_pytype) -cdef _annotate(_Element element, int annotate_xsi, int annotate_pytype, - int ignore_xsi, int ignore_pytype, +cdef _annotate(_Element element, bint annotate_xsi, bint annotate_pytype, + bint ignore_xsi, bint ignore_pytype, empty_type_name, empty_pytype_name): cdef _Document doc cdef tree.xmlNode* c_node Modified: lxml/trunk/src/lxml/objectpath.pxi ============================================================================== --- lxml/trunk/src/lxml/objectpath.pxi (original) +++ lxml/trunk/src/lxml/objectpath.pxi Tue Oct 9 11:20:53 2007 @@ -86,7 +86,7 @@ """Parse object path string into a 'hrefOnameOhrefOnameOOO' string and an index list. The index list is None if no index was used in the path. """ - cdef int has_dot + cdef bint has_dot new_path = [] path = cetree.utf8(path.strip()) if path == '.': Modified: lxml/trunk/src/lxml/parser.pxi ============================================================================== --- lxml/trunk/src/lxml/parser.pxi (original) +++ lxml/trunk/src/lxml/parser.pxi Tue Oct 9 11:20:53 2007 @@ -431,7 +431,7 @@ cdef object _handleParseResult(self, _BaseParser parser, xmlDoc* result, filename): cdef xmlDoc* c_doc - cdef int recover + cdef bint recover recover = parser._parse_options & xmlparser.XML_PARSE_RECOVER c_doc = _handleParseResult(self, self._c_ctxt, result, filename, recover) @@ -439,7 +439,7 @@ cdef xmlDoc* _handleParseResultDoc(self, _BaseParser parser, xmlDoc* result, filename) except NULL: - cdef int recover + cdef bint recover recover = parser._parse_options & xmlparser.XML_PARSE_RECOVER return _handleParseResult(self, self._c_ctxt, result, filename, recover) @@ -481,8 +481,8 @@ cdef xmlDoc* _handleParseResult(_ParserContext context, xmlparser.xmlParserCtxt* c_ctxt, xmlDoc* result, filename, - int recover) except NULL: - cdef int well_formed + bint recover) except NULL: + cdef bint well_formed if c_ctxt.myDoc is not NULL: if c_ctxt.myDoc != result: tree.xmlFreeDoc(c_ctxt.myDoc) @@ -556,8 +556,8 @@ self._filename = filename self._target = target self._for_html = for_html - self._remove_comments = bool(remove_comments) - self._remove_pis = bool(remove_pis) + self._remove_comments = remove_comments + self._remove_pis = remove_pis self._resolvers = _ResolverRegistry() @@ -711,7 +711,6 @@ cdef python.PyThreadState* state cdef xmlDoc* result cdef xmlparser.xmlParserCtxt* pctxt - cdef int recover cdef Py_ssize_t py_buffer_len cdef int buffer_len cdef char* c_text @@ -752,7 +751,6 @@ cdef python.PyThreadState* state cdef xmlDoc* result cdef xmlparser.xmlParserCtxt* pctxt - cdef int recover cdef char* c_encoding if c_len > python.INT_MAX: raise ParserError, "string is too long to parse it with libxml2" @@ -788,7 +786,6 @@ cdef python.PyThreadState* state cdef xmlDoc* result cdef xmlparser.xmlParserCtxt* pctxt - cdef int recover cdef int orig_options cdef char* c_encoding result = NULL @@ -825,7 +822,6 @@ cdef xmlDoc* result cdef xmlparser.xmlParserCtxt* pctxt cdef char* c_filename - cdef int recover if not filename: filename = None @@ -884,7 +880,6 @@ cdef char* c_encoding cdef int buffer_len cdef int error - cdef int recover if python.PyString_Check(data): c_encoding = NULL c_data = _cstr(data) Modified: lxml/trunk/src/lxml/public-api.pxi ============================================================================== --- lxml/trunk/src/lxml/public-api.pxi (original) +++ lxml/trunk/src/lxml/public-api.pxi Tue Oct 9 11:20:53 2007 @@ -54,10 +54,10 @@ cdef public _Element rootNodeOrRaise(object input): return _rootNodeOrRaise(input) -cdef public int hasText(xmlNode* c_node): +cdef public bint hasText(xmlNode* c_node): return _hasText(c_node) -cdef public int hasTail(xmlNode* c_node): +cdef public bint hasTail(xmlNode* c_node): return _hasTail(c_node) cdef public object textOf(xmlNode* c_node): @@ -106,7 +106,7 @@ char* c_href, char* c_name): return _delAttributeFromNsName(c_element, c_href, c_name) -cdef public int hasChild(xmlNode* c_node): +cdef public bint hasChild(xmlNode* c_node): return _hasChild(c_node) cdef public xmlNode* findChild(xmlNode* c_node, Py_ssize_t index): Modified: lxml/trunk/src/lxml/serializer.pxi ============================================================================== --- lxml/trunk/src/lxml/serializer.pxi (original) +++ lxml/trunk/src/lxml/serializer.pxi Tue Oct 9 11:20:53 2007 @@ -44,8 +44,8 @@ return python.PyUnicode_AsEncodedString(text, encoding, 'strict') cdef _tostring(_Element element, encoding, method, - int write_xml_declaration, int write_complete_document, - int pretty_print): + bint write_xml_declaration, bint write_complete_document, + bint pretty_print): """Serialize an element to an encoded string representation of its XML tree. """ @@ -96,7 +96,7 @@ return result cdef _tounicode(_Element element, method, - int write_complete_document, int pretty_print): + bint write_complete_document, bint pretty_print): """Serialize an element to the Python unicode representation of its XML tree. """ @@ -133,9 +133,9 @@ cdef void _writeNodeToBuffer(tree.xmlOutputBuffer* c_buffer, xmlNode* c_node, char* encoding, int c_method, - int write_xml_declaration, - int write_complete_document, - int pretty_print): + bint write_xml_declaration, + bint write_complete_document, + bint pretty_print): cdef xmlDoc* c_doc cdef xmlNode* c_nsdecl_node c_doc = c_node.doc @@ -222,7 +222,7 @@ tree.xmlOutputBufferWrite(c_buffer, 3, "]>\n") cdef void _writeTail(tree.xmlOutputBuffer* c_buffer, xmlNode* c_node, - char* encoding, int pretty_print): + char* encoding, bint pretty_print): "Write the element tail." c_node = c_node.next while c_node is not NULL and c_node.type == tree.XML_TEXT_NODE: @@ -231,7 +231,7 @@ c_node = c_node.next cdef void _writePrevSiblings(tree.xmlOutputBuffer* c_buffer, xmlNode* c_node, - char* encoding, int pretty_print): + char* encoding, bint pretty_print): cdef xmlNode* c_sibling if c_node.parent is not NULL and _isElement(c_node.parent): return @@ -247,7 +247,7 @@ c_sibling = c_sibling.next cdef void _writeNextSiblings(tree.xmlOutputBuffer* c_buffer, xmlNode* c_node, - char* encoding, int pretty_print): + char* encoding, bint pretty_print): cdef xmlNode* c_sibling if c_node.parent is not NULL and _isElement(c_node.parent): return @@ -307,8 +307,8 @@ return (<_FilelikeWriter>ctxt).close() cdef _tofilelike(f, _Element element, encoding, method, - int write_xml_declaration, int write_doctype, - int pretty_print): + bint write_xml_declaration, bint write_doctype, + bint pretty_print): cdef python.PyThreadState* state cdef _FilelikeWriter writer cdef tree.xmlOutputBuffer* c_buffer @@ -400,7 +400,7 @@ # dump node to file (mainly for debug) -cdef _dumpToFile(f, xmlNode* c_node, int pretty_print): +cdef _dumpToFile(f, xmlNode* c_node, bint pretty_print): cdef tree.xmlOutputBuffer* c_buffer if not python.PyFile_Check(f): raise ValueError, "Not a file" Modified: lxml/trunk/src/lxml/tree.pxd ============================================================================== --- lxml/trunk/src/lxml/tree.pxd (original) +++ lxml/trunk/src/lxml/tree.pxd Tue Oct 9 11:20:53 2007 @@ -300,9 +300,9 @@ cdef void* xmlMalloc(size_t size) cdef extern from "etree_defs.h": - cdef int _isElement(xmlNode* node) - cdef int _isElementOrXInclude(xmlNode* node) + cdef bint _isElement(xmlNode* node) + cdef bint _isElementOrXInclude(xmlNode* node) cdef char* _getNs(xmlNode* node) cdef void BEGIN_FOR_EACH_ELEMENT_FROM(xmlNode* tree_top, - xmlNode* start_node, int inclusive) + xmlNode* start_node, bint inclusive) cdef void END_FOR_EACH_ELEMENT_FROM(xmlNode* start_node) Modified: lxml/trunk/src/lxml/xmlerror.pxi ============================================================================== --- lxml/trunk/src/lxml/xmlerror.pxi (original) +++ lxml/trunk/src/lxml/xmlerror.pxi Tue Oct 9 11:20:53 2007 @@ -98,7 +98,7 @@ return '' cdef void _receive(self, xmlerror.xmlError* error): - cdef int is_error + cdef bint is_error cdef _LogEntry entry entry = _LogEntry() entry._setError(error) @@ -114,6 +114,7 @@ cdef void _receiveGeneric(self, int domain, int type, int level, int line, message, filename): + cdef bint is_error cdef _LogEntry entry entry = _LogEntry() entry._setGeneric(domain, type, level, line, message, filename) @@ -184,7 +185,9 @@ return False def __nonzero__(self): - return bool(self._entries) + cdef bint result + result = self._entries + return result def filter_domains(self, domains): """Filter the errors by the given domains and return a new error log From scoder at codespeak.net Tue Oct 9 12:02:21 2007 From: scoder at codespeak.net (scoder at codespeak.net) Date: Tue, 9 Oct 2007 12:02:21 +0200 (CEST) Subject: [Lxml-checkins] r47330 - lxml/trunk/src/lxml Message-ID: <20071009100221.2483081A8@code0.codespeak.net> Author: scoder Date: Tue Oct 9 12:02:20 2007 New Revision: 47330 Modified: lxml/trunk/src/lxml/etree.pyx Log: keep prefix counter a C integer, change formatting string instead Modified: lxml/trunk/src/lxml/etree.pyx ============================================================================== --- lxml/trunk/src/lxml/etree.pyx (original) +++ lxml/trunk/src/lxml/etree.pyx Tue Oct 9 12:02:20 2007 @@ -229,7 +229,8 @@ When instances of this class are garbage collected, the libxml document is cleaned up. """ - cdef object _ns_counter + cdef unsigned int _ns_counter + cdef object _prefix_format cdef xmlDoc* _c_doc cdef _BaseParser _parser @@ -295,8 +296,12 @@ return self._c_doc.URL cdef buildNewPrefix(self): - ns = "ns%d" % self._ns_counter + ns = python.PyString_FromFormat( + _cstr(self._prefix_format), self._ns_counter) self._ns_counter = self._ns_counter + 1 + if self._ns_counter == 0: + # overflow! + self._prefix_format = self._prefix_format + "A" return ns cdef xmlNs* _findOrBuildNodeNs(self, xmlNode* c_node, @@ -374,7 +379,8 @@ cdef _Document result result = NEW_DOCUMENT(_Document) result._c_doc = c_doc - result._ns_counter = 0L + result._ns_counter = 0 + result._prefix_format = "ns%lu" if parser is None: parser = __GLOBAL_PARSER_CONTEXT.getDefaultParser() result._parser = parser From scoder at codespeak.net Tue Oct 9 12:02:54 2007 From: scoder at codespeak.net (scoder at codespeak.net) Date: Tue, 9 Oct 2007 12:02:54 +0200 (CEST) Subject: [Lxml-checkins] r47331 - lxml/branch/lxml-1.3/src/lxml Message-ID: <20071009100254.4B17581A8@code0.codespeak.net> Author: scoder Date: Tue Oct 9 12:02:53 2007 New Revision: 47331 Modified: lxml/branch/lxml-1.3/src/lxml/etree.pyx Log: keep prefix counter a C integer, change formatting string instead Modified: lxml/branch/lxml-1.3/src/lxml/etree.pyx ============================================================================== --- lxml/branch/lxml-1.3/src/lxml/etree.pyx (original) +++ lxml/branch/lxml-1.3/src/lxml/etree.pyx Tue Oct 9 12:02:53 2007 @@ -231,7 +231,8 @@ When instances of this class are garbage collected, the libxml document is cleaned up. """ - cdef object _ns_counter + cdef unsigned int _ns_counter + cdef object _prefix_format cdef xmlDoc* _c_doc cdef _BaseParser _parser @@ -297,8 +298,12 @@ return self._c_doc.URL cdef buildNewPrefix(self): - ns = "ns%d" % self._ns_counter + ns = python.PyString_FromFormat( + _cstr(self._prefix_format), self._ns_counter) self._ns_counter = self._ns_counter + 1 + if self._ns_counter == 0: + # overflow! + self._prefix_format = self._prefix_format + "A" return ns cdef xmlNs* _findOrBuildNodeNs(self, xmlNode* c_node, @@ -372,7 +377,8 @@ cdef _Document result result = _Document() result._c_doc = c_doc - result._ns_counter = 0L + result._ns_counter = 0 + result._prefix_format = "ns%lu" if parser is None: parser = __GLOBAL_PARSER_CONTEXT.getDefaultParser() result._parser = parser From scoder at codespeak.net Tue Oct 9 14:56:29 2007 From: scoder at codespeak.net (scoder at codespeak.net) Date: Tue, 9 Oct 2007 14:56:29 +0200 (CEST) Subject: [Lxml-checkins] r47343 - lxml/trunk/doc Message-ID: <20071009125629.8F4DE81B1@code0.codespeak.net> Author: scoder Date: Tue Oct 9 14:56:29 2007 New Revision: 47343 Modified: lxml/trunk/doc/build.txt Log: typo Modified: lxml/trunk/doc/build.txt ============================================================================== --- lxml/trunk/doc/build.txt (original) +++ lxml/trunk/doc/build.txt Tue Oct 9 14:56:29 2007 @@ -223,7 +223,7 @@ version numbers:: STATIC_INCLUDE_DIRS = [ - "..\\libxml2-2.6.23.win32\\include ", + "..\\libxml2-2.6.23.win32\\include", "..\\libxslt-1.1.15.win32\\include", "..\\zlib-1.2.3.win32\\include", "..\\iconv-1.9.1.win32\\include" From scoder at codespeak.net Tue Oct 9 14:57:07 2007 From: scoder at codespeak.net (scoder at codespeak.net) Date: Tue, 9 Oct 2007 14:57:07 +0200 (CEST) Subject: [Lxml-checkins] r47344 - lxml/branch/lxml-1.3/doc Message-ID: <20071009125707.98E2280C7@code0.codespeak.net> Author: scoder Date: Tue Oct 9 14:57:07 2007 New Revision: 47344 Modified: lxml/branch/lxml-1.3/doc/build.txt Log: typo Modified: lxml/branch/lxml-1.3/doc/build.txt ============================================================================== --- lxml/branch/lxml-1.3/doc/build.txt (original) +++ lxml/branch/lxml-1.3/doc/build.txt Tue Oct 9 14:57:07 2007 @@ -250,7 +250,7 @@ version numbers:: STATIC_INCLUDE_DIRS = [ - "..\\libxml2-2.6.23.win32\\include ", + "..\\libxml2-2.6.23.win32\\include", "..\\libxslt-1.1.15.win32\\include", "..\\zlib-1.2.3.win32\\include", "..\\iconv-1.9.1.win32\\include" From scoder at codespeak.net Tue Oct 9 15:03:40 2007 From: scoder at codespeak.net (scoder at codespeak.net) Date: Tue, 9 Oct 2007 15:03:40 +0200 (CEST) Subject: [Lxml-checkins] r47346 - lxml/trunk/doc Message-ID: <20071009130340.0048481B1@code0.codespeak.net> Author: scoder Date: Tue Oct 9 15:03:40 2007 New Revision: 47346 Modified: lxml/trunk/doc/build.txt Log: removed doc leftover Modified: lxml/trunk/doc/build.txt ============================================================================== --- lxml/trunk/doc/build.txt (original) +++ lxml/trunk/doc/build.txt Tue Oct 9 15:03:40 2007 @@ -232,7 +232,6 @@ STATIC_LIBRARY_DIRS = [ "..\\libxml2-2.6.23.win32\\lib", "..\\libxslt-1.1.15.win32\\lib", - "..\\libxslt-1.1.15.win32\\lib", "..\\zlib-1.2.3.win32\\lib", "..\\iconv-1.9.1.win32\\lib" ] From scoder at codespeak.net Tue Oct 9 15:03:54 2007 From: scoder at codespeak.net (scoder at codespeak.net) Date: Tue, 9 Oct 2007 15:03:54 +0200 (CEST) Subject: [Lxml-checkins] r47347 - lxml/branch/lxml-1.3/doc Message-ID: <20071009130354.9DF7381B1@code0.codespeak.net> Author: scoder Date: Tue Oct 9 15:03:54 2007 New Revision: 47347 Modified: lxml/branch/lxml-1.3/doc/build.txt Log: removed doc leftover Modified: lxml/branch/lxml-1.3/doc/build.txt ============================================================================== --- lxml/branch/lxml-1.3/doc/build.txt (original) +++ lxml/branch/lxml-1.3/doc/build.txt Tue Oct 9 15:03:54 2007 @@ -259,7 +259,6 @@ STATIC_LIBRARY_DIRS = [ "..\\libxml2-2.6.23.win32\\lib", "..\\libxslt-1.1.15.win32\\lib", - "..\\libxslt-1.1.15.win32\\lib", "..\\zlib-1.2.3.win32\\lib", "..\\iconv-1.9.1.win32\\lib" ] From scoder at codespeak.net Wed Oct 10 09:13:13 2007 From: scoder at codespeak.net (scoder at codespeak.net) Date: Wed, 10 Oct 2007 09:13:13 +0200 (CEST) Subject: [Lxml-checkins] r47366 - lxml/trunk/doc Message-ID: <20071010071313.983FC81E0@code0.codespeak.net> Author: scoder Date: Wed Oct 10 09:13:10 2007 New Revision: 47366 Modified: lxml/trunk/doc/lxmlhtml.txt Log: doc fix Modified: lxml/trunk/doc/lxmlhtml.txt ============================================================================== --- lxml/trunk/doc/lxmlhtml.txt (original) +++ lxml/trunk/doc/lxmlhtml.txt Wed Oct 10 09:13:10 2007 @@ -2,9 +2,9 @@ lxml.html ========= -Since version 2.0, lxml provides a dedicated package for dealing with HTML: -``lxml.html``. It provides a special Element API for HTML elements, as well -as a number of utilities for common tasks. +Since version 2.0, lxml comes with a dedicated package for dealing +with HTML: ``lxml.html``. It provides a special Element API for HTML +elements, as well as a number of utilities for common tasks. .. contents:: .. From scoder at codespeak.net Wed Oct 10 09:19:28 2007 From: scoder at codespeak.net (scoder at codespeak.net) Date: Wed, 10 Oct 2007 09:19:28 +0200 (CEST) Subject: [Lxml-checkins] r47367 - lxml/trunk/doc Message-ID: <20071010071928.8AA9181E5@code0.codespeak.net> Author: scoder Date: Wed Oct 10 09:19:28 2007 New Revision: 47367 Modified: lxml/trunk/doc/lxmlhtml.txt Log: doc fix Modified: lxml/trunk/doc/lxmlhtml.txt ============================================================================== --- lxml/trunk/doc/lxmlhtml.txt (original) +++ lxml/trunk/doc/lxmlhtml.txt Wed Oct 10 09:19:28 2007 @@ -37,7 +37,8 @@ If you give a URL, or if the object has a ``.geturl()`` method (as file-like objects from ``urllib.urlopen()`` have), then that URL - is used as the base URL. + is used as the base URL. You can also provide an explicit + ``base_url`` keyword argument. ``document_fromstring(string)``: Parses a document from the given string. This always creates a From scoder at codespeak.net Wed Oct 10 11:22:38 2007 From: scoder at codespeak.net (scoder at codespeak.net) Date: Wed, 10 Oct 2007 11:22:38 +0200 (CEST) Subject: [Lxml-checkins] r47375 - lxml/trunk Message-ID: <20071010092238.D7EDD81E6@code0.codespeak.net> Author: scoder Date: Wed Oct 10 11:22:37 2007 New Revision: 47375 Modified: lxml/trunk/setup.py lxml/trunk/setupinfo.py lxml/trunk/versioninfo.py Log: Py3 syntax fixes in build scripts Modified: lxml/trunk/setup.py ============================================================================== --- lxml/trunk/setup.py (original) +++ lxml/trunk/setup.py Wed Oct 10 11:22:37 2007 @@ -38,7 +38,7 @@ # create lxml-version.h file svn_version = versioninfo.svn_version() versioninfo.create_version_h(svn_version) -print "Building lxml version", svn_version +print("Building lxml version %s." % svn_version) branch_link = """ Modified: lxml/trunk/setupinfo.py ============================================================================== --- lxml/trunk/setupinfo.py (original) +++ lxml/trunk/setupinfo.py Wed Oct 10 11:22:37 2007 @@ -3,7 +3,7 @@ try: from Cython.Distutils import build_ext as build_pyx - print "Building with Cython." + print("Building with Cython.") CYTHON_INSTALLED = True except ImportError: CYTHON_INSTALLED = False @@ -131,13 +131,21 @@ return macros def flags(cmd): - wf, rf, ef = os.popen3(cmd) + try: + import subprocess + except ImportError: + # Python 2.3 + _, rf, ef = os.popen3(cmd) + else: + # Python 2.4+ + p = subprocess.Popen(cmd, shell=True, close_fds=True, + stdout=subprocess.PIPE, stderr=subprocess.PIPE) + rf, ef = p.stdout, p.stderr errors = ef.read() if errors: - print "ERROR:", errors - print "** make sure the development packages of libxml2 and libxslt are installed **" - print - return rf.read().split() + print("ERROR: %s" % errors) + print("** make sure the development packages of libxml2 and libxslt are installed **\n") + return str(rf.read()).split() def has_option(name): try: Modified: lxml/trunk/versioninfo.py ============================================================================== --- lxml/trunk/versioninfo.py (original) +++ lxml/trunk/versioninfo.py Wed Oct 10 11:22:37 2007 @@ -35,7 +35,7 @@ if data.startswith('8'): # SVN >= 1.4 - data = map(str.splitlines, data.split('\n\x0c\n')) + data = [ d.splitlines() for d in data.split('\n\x0c\n') ] del data[0][0] # get rid of the '8' dirurl = data[0][3] try: From ianb at codespeak.net Sun Oct 14 02:12:32 2007 From: ianb at codespeak.net (ianb at codespeak.net) Date: Sun, 14 Oct 2007 02:12:32 +0200 (CEST) Subject: [Lxml-checkins] r47440 - in lxml/trunk: . src/lxml Message-ID: <20071014001232.1B0708165@code0.codespeak.net> Author: ianb Date: Sun Oct 14 02:12:31 2007 New Revision: 47440 Modified: lxml/trunk/CHANGES.txt lxml/trunk/src/lxml/doctestcompare.py Log: Improve doctestcompare a little: NOPARSE_MARKUP option to suppress its behavior, and xmlns=... now works Modified: lxml/trunk/CHANGES.txt ============================================================================== --- lxml/trunk/CHANGES.txt (original) +++ lxml/trunk/CHANGES.txt Sun Oct 14 02:12:31 2007 @@ -8,12 +8,20 @@ Features added -------------- +* When using ``lxml.doctestcompare`` you can give the doctest option + ``NOPARSE_MARKUP`` (like ``# doctest: +NOPARSE_MARKUP``) to suppress + the special checking for one test. + Bugs fixed ---------- * lxml.etree could crash when adding more than 10000 namespaces to a document +* With ``lxml.doctestcompare`` if you do ```` in your + output, it will then be namespace-neutral (before the ellipsis was + treated as a real namespace). + Other changes ------------- Modified: lxml/trunk/src/lxml/doctestcompare.py ============================================================================== --- lxml/trunk/src/lxml/doctestcompare.py (original) +++ lxml/trunk/src/lxml/doctestcompare.py Sun Oct 14 02:12:31 2007 @@ -23,6 +23,8 @@ displayed (indented), and a rough diff-like output is given. Anything marked with ``-`` is in the output but wasn't supposed to be, and similarly ``+`` means its in the example but wasn't in the output. + +You can disable parsing on one line with ``# doctest:+NOPARSE_MARKUP`` """ from lxml import etree @@ -36,6 +38,7 @@ PARSE_HTML = doctest.register_optionflag('PARSE_HTML') PARSE_XML = doctest.register_optionflag('PARSE_XML') +NOPARSE_MARKUP = doctest.register_optionflag('NOPARSE_MARKUP') OutputChecker = doctest.OutputChecker @@ -84,6 +87,8 @@ def get_parser(self, want, got, optionflags): parser = None + if NOPARSE_MARKUP & optionflags: + return None if PARSE_HTML & optionflags: parser = document_fromstring elif PARSE_XML & optionflags: @@ -102,7 +107,7 @@ and not _repr_re.search(s)) def compare_docs(self, want, got): - if want.tag != got.tag and want.tag != 'any': + if not self.tag_compare(want.tag, got.tag): return False if not self.text_compare(want.text, got.text, True): return False @@ -143,6 +148,17 @@ else: return False + def tag_compare(self, want, got): + if want == 'any': + return True + want = want or '' + got = got or '' + if want.startswith('{...}'): + # Ellipsis on the namespace + return want.split('}')[-1] == got.split('}')[-1] + else: + return want == got + def output_difference(self, example, got, optionflags): want = example.want parser = self.get_parser(want, got, optionflags) @@ -282,7 +298,7 @@ return ''.join(parts) def collect_diff_tag(self, want, got): - if want.tag != got.tag and want.tag != 'any': + if not self.tag_compare(want.tag, got.tag): tag = '%s (got: %s)' % (want.tag, got.tag) else: tag = got.tag @@ -431,3 +447,21 @@ raise LookupError( "Could not find doctest (only use this function *inside* a doctest)") +__test__ = { + 'basic': ''' + >>> temp_install() + >>> print """stuff""" + ... + >>> print """""" + + + + >>> print """blahblahblah""" # doctest: +NOPARSE_MARKUP, +ELLIPSIS + ...foo /> + '''} + +if __name__ == '__main__': + import doctest + doctest.testmod() + + From scoder at codespeak.net Mon Oct 15 18:28:00 2007 From: scoder at codespeak.net (scoder at codespeak.net) Date: Mon, 15 Oct 2007 18:28:00 +0200 (CEST) Subject: [Lxml-checkins] r47468 - lxml/trunk/src/lxml Message-ID: <20071015162800.944FE8102@code0.codespeak.net> Author: scoder Date: Mon Oct 15 18:27:59 2007 New Revision: 47468 Modified: lxml/trunk/src/lxml/apihelpers.pxi Log: faster attribute collecting, _countElements() helper function Modified: lxml/trunk/src/lxml/apihelpers.pxi ============================================================================== --- lxml/trunk/src/lxml/apihelpers.pxi (original) +++ lxml/trunk/src/lxml/apihelpers.pxi Mon Oct 15 18:27:59 2007 @@ -307,9 +307,21 @@ """Collect all attributes of a node in a list. Depending on collecttype, it collects either the name (1), the value (2) or the name-value tuples. """ + cdef Py_ssize_t count cdef xmlAttr* c_attr c_attr = c_node.properties - attributes = [] + count = 0 + while c_attr is not NULL: + if c_attr.type == tree.XML_ATTRIBUTE_NODE: + count = count + 1 + c_attr = c_attr.next + + if count == 0: + return [] + + attributes = python.PyList_New(count) + c_attr = c_node.properties + count = 0 while c_attr is not NULL: if c_attr.type == tree.XML_ATTRIBUTE_NODE: if collecttype == 1: @@ -320,9 +332,9 @@ item = (_namespacedName(c_attr), _attributeValue(c_node, c_attr)) - ret = python.PyList_Append(attributes, item) - if ret: - raise + python.Py_INCREF(item) + python.PyList_SET_ITEM(attributes, count, item) + count = count + 1 c_attr = c_attr.next return attributes @@ -451,6 +463,16 @@ cdef bint _hasChild(xmlNode* c_node): return c_node is not NULL and _findChildForwards(c_node, 0) is not NULL +cdef Py_ssize_t _countElements(xmlNode* c_node): + "Counts the elements within the following siblings and the node itself." + cdef Py_ssize_t count + count = 0 + while c_node is not NULL: + if _isElement(c_node): + count = count + 1 + c_node = c_node.next + return count + cdef xmlNode* _findChild(xmlNode* c_node, Py_ssize_t index): if index < 0: return _findChildBackwards(c_node, -index - 1) From scoder at codespeak.net Mon Oct 15 18:28:26 2007 From: scoder at codespeak.net (scoder at codespeak.net) Date: Mon, 15 Oct 2007 18:28:26 +0200 (CEST) Subject: [Lxml-checkins] r47469 - lxml/trunk/src/lxml Message-ID: <20071015162826.D27038102@code0.codespeak.net> Author: scoder Date: Mon Oct 15 18:28:26 2007 New Revision: 47469 Modified: lxml/trunk/src/lxml/python.pxd Log: forgotten PyAPI declarations Modified: lxml/trunk/src/lxml/python.pxd ============================================================================== --- lxml/trunk/src/lxml/python.pxd (original) +++ lxml/trunk/src/lxml/python.pxd Mon Oct 15 18:28:26 2007 @@ -41,8 +41,10 @@ cdef Py_ssize_t PyTuple_GET_SIZE(object t) cdef object PyTuple_GET_ITEM(object o, Py_ssize_t pos) + cdef object PyList_New(Py_ssize_t index) cdef Py_ssize_t PyList_GET_SIZE(object l) cdef object PyList_GET_ITEM(object l, Py_ssize_t index) + cdef void PyList_SET_ITEM(object l, Py_ssize_t index, object value) cdef int PyList_Append(object l, object obj) except -1 cdef int PyList_Reverse(object l) except -1 cdef int PyList_Insert(object l, Py_ssize_t index, object o) except -1 @@ -61,14 +63,15 @@ cdef object PySequence_List(object o) cdef object PySequence_Tuple(object o) - cdef int PyDict_Check(object instance) - cdef int PyList_Check(object instance) - cdef int PyTuple_Check(object instance) - cdef int PyNumber_Check(object instance) - cdef int PyBool_Check(object instance) - cdef int PySequence_Check(object instance) - cdef int PyType_Check(object instance) - cdef int PyTuple_CheckExact(object instance) + cdef bint PyDict_Check(object instance) + cdef bint PyList_Check(object instance) + cdef bint PyTuple_Check(object instance) + cdef bint PyNumber_Check(object instance) + cdef bint PyBool_Check(object instance) + cdef bint PySequence_Check(object instance) + cdef bint PyType_Check(object instance) + cdef bint PyTuple_CheckExact(object instance) + cdef bint PySlice_Check(object instance) cdef int PyObject_SetAttr(object o, object name, object value) cdef object PyObject_RichCompare(object o1, object o2, int op) From ianb at codespeak.net Tue Oct 16 05:35:34 2007 From: ianb at codespeak.net (ianb at codespeak.net) Date: Tue, 16 Oct 2007 05:35:34 +0200 (CEST) Subject: [Lxml-checkins] r47483 - lxml/trunk/src/lxml Message-ID: <20071016033534.43F7D80C2@code0.codespeak.net> Author: ianb Date: Tue Oct 16 05:35:33 2007 New Revision: 47483 Modified: lxml/trunk/src/lxml/doctestcompare.py Log: Oops, sometimes the tag we're comparing is a comment Modified: lxml/trunk/src/lxml/doctestcompare.py ============================================================================== --- lxml/trunk/src/lxml/doctestcompare.py (original) +++ lxml/trunk/src/lxml/doctestcompare.py Tue Oct 16 05:35:33 2007 @@ -151,6 +151,9 @@ def tag_compare(self, want, got): if want == 'any': return True + if (not isinstance(want, basestring) + or not isinstance(got, basestring)): + return want == got want = want or '' got = got or '' if want.startswith('{...}'): From scoder at codespeak.net Fri Oct 19 11:45:23 2007 From: scoder at codespeak.net (scoder at codespeak.net) Date: Fri, 19 Oct 2007 11:45:23 +0200 (CEST) Subject: [Lxml-checkins] r47573 - lxml/tag/lxml-1.3.4 Message-ID: <20071019094523.8E94B8121@code0.codespeak.net> Author: scoder Date: Fri Oct 19 11:45:22 2007 New Revision: 47573 Added: lxml/tag/lxml-1.3.4/ - copied from r46182, lxml/branch/lxml-1.3/ Log: lxml 1.3.4 tag From scoder at codespeak.net Fri Oct 19 12:54:43 2007 From: scoder at codespeak.net (scoder at codespeak.net) Date: Fri, 19 Oct 2007 12:54:43 +0200 (CEST) Subject: [Lxml-checkins] r47574 - lxml/trunk Message-ID: <20071019105443.BB72D811F@code0.codespeak.net> Author: scoder Date: Fri Oct 19 12:54:42 2007 New Revision: 47574 Modified: lxml/trunk/CREDITS.txt Log: Holger Modified: lxml/trunk/CREDITS.txt ============================================================================== --- lxml/trunk/CREDITS.txt (original) +++ lxml/trunk/CREDITS.txt Fri Oct 19 12:54:42 2007 @@ -7,6 +7,8 @@ Ian Bicking - lxml.html +Holger Joukl - bug reports, feedback and development on lxml.objectify + Marc-Antoine Parent - XPath extension function help and patches Olivier Grisel - improved (c)ElementTree compatibility patches, From scoder at codespeak.net Fri Oct 19 16:42:20 2007 From: scoder at codespeak.net (scoder at codespeak.net) Date: Fri, 19 Oct 2007 16:42:20 +0200 (CEST) Subject: [Lxml-checkins] r47591 - lxml/trunk/src/lxml Message-ID: <20071019144220.DE82D8145@code0.codespeak.net> Author: scoder Date: Fri Oct 19 16:42:18 2007 New Revision: 47591 Modified: lxml/trunk/src/lxml/etree.pyx Log: fix prefix name creation also for Python <= 2.4 Modified: lxml/trunk/src/lxml/etree.pyx ============================================================================== --- lxml/trunk/src/lxml/etree.pyx (original) +++ lxml/trunk/src/lxml/etree.pyx Fri Oct 19 16:42:18 2007 @@ -229,8 +229,8 @@ When instances of this class are garbage collected, the libxml document is cleaned up. """ - cdef unsigned int _ns_counter - cdef object _prefix_format + cdef int _ns_counter + cdef object _prefix_tail cdef xmlDoc* _c_doc cdef _BaseParser _parser @@ -296,12 +296,17 @@ return self._c_doc.URL cdef buildNewPrefix(self): - ns = python.PyString_FromFormat( - _cstr(self._prefix_format), self._ns_counter) + ns = python.PyString_FromFormat("ns%d", self._ns_counter) + if self._prefix_tail is not None: + ns = ns + self._prefix_tail self._ns_counter = self._ns_counter + 1 - if self._ns_counter == 0: + if self._ns_counter < 0: # overflow! - self._prefix_format = self._prefix_format + "A" + self._ns_counter = 0 + if self._prefix_tail is None: + self._prefix_tail = "A" + else: + self._prefix_tail = self._prefix_tail + "A" return ns cdef xmlNs* _findOrBuildNodeNs(self, xmlNode* c_node, @@ -380,7 +385,7 @@ result = NEW_DOCUMENT(_Document) result._c_doc = c_doc result._ns_counter = 0 - result._prefix_format = "ns%lu" + result._prefix_tail = None if parser is None: parser = __GLOBAL_PARSER_CONTEXT.getDefaultParser() result._parser = parser From scoder at codespeak.net Fri Oct 19 16:45:14 2007 From: scoder at codespeak.net (scoder at codespeak.net) Date: Fri, 19 Oct 2007 16:45:14 +0200 (CEST) Subject: [Lxml-checkins] r47592 - lxml/branch/lxml-1.3/src/lxml Message-ID: <20071019144514.4626E813C@code0.codespeak.net> Author: scoder Date: Fri Oct 19 16:45:13 2007 New Revision: 47592 Modified: lxml/branch/lxml-1.3/src/lxml/etree.pyx Log: fix prefix name creation also for Python <= 2.4 Modified: lxml/branch/lxml-1.3/src/lxml/etree.pyx ============================================================================== --- lxml/branch/lxml-1.3/src/lxml/etree.pyx (original) +++ lxml/branch/lxml-1.3/src/lxml/etree.pyx Fri Oct 19 16:45:13 2007 @@ -231,8 +231,8 @@ When instances of this class are garbage collected, the libxml document is cleaned up. """ - cdef unsigned int _ns_counter - cdef object _prefix_format + cdef int _ns_counter + cdef object _prefix_tail cdef xmlDoc* _c_doc cdef _BaseParser _parser @@ -298,12 +298,17 @@ return self._c_doc.URL cdef buildNewPrefix(self): - ns = python.PyString_FromFormat( - _cstr(self._prefix_format), self._ns_counter) + ns = python.PyString_FromFormat("ns%d", self._ns_counter) + if self._prefix_tail is not None: + ns = ns + self._prefix_tail self._ns_counter = self._ns_counter + 1 - if self._ns_counter == 0: + if self._ns_counter < 0: # overflow! - self._prefix_format = self._prefix_format + "A" + self._ns_counter = 0 + if self._prefix_tail is None: + self._prefix_tail = "A" + else: + self._prefix_tail = self._prefix_tail + "A" return ns cdef xmlNs* _findOrBuildNodeNs(self, xmlNode* c_node, @@ -378,7 +383,7 @@ result = _Document() result._c_doc = c_doc result._ns_counter = 0 - result._prefix_format = "ns%lu" + result._prefix_tail = None if parser is None: parser = __GLOBAL_PARSER_CONTEXT.getDefaultParser() result._parser = parser From scoder at codespeak.net Sat Oct 20 14:44:51 2007 From: scoder at codespeak.net (scoder at codespeak.net) Date: Sat, 20 Oct 2007 14:44:51 +0200 (CEST) Subject: [Lxml-checkins] r47636 - lxml/trunk Message-ID: <20071020124451.A9E0B8143@code0.codespeak.net> Author: scoder Date: Sat Oct 20 14:44:50 2007 New Revision: 47636 Modified: lxml/trunk/setupinfo.py Log: print Cython version in setup.py Modified: lxml/trunk/setupinfo.py ============================================================================== --- lxml/trunk/setupinfo.py (original) +++ lxml/trunk/setupinfo.py Sat Oct 20 14:44:50 2007 @@ -3,7 +3,8 @@ try: from Cython.Distutils import build_ext as build_pyx - print("Building with Cython.") + import Cython.Compiler.Version + print("Building with Cython %s." % Cython.Compiler.Version.version) CYTHON_INSTALLED = True except ImportError: CYTHON_INSTALLED = False From scoder at codespeak.net Sat Oct 20 14:46:30 2007 From: scoder at codespeak.net (scoder at codespeak.net) Date: Sat, 20 Oct 2007 14:46:30 +0200 (CEST) Subject: [Lxml-checkins] r47637 - in lxml/trunk: . src/lxml Message-ID: <20071020124630.E3081814B@code0.codespeak.net> Author: scoder Date: Sat Oct 20 14:46:30 2007 New Revision: 47637 Modified: lxml/trunk/CHANGES.txt lxml/trunk/src/lxml/docloader.pxi lxml/trunk/src/lxml/etree.pyx lxml/trunk/src/lxml/xslt.pxi Log: improved interaction of custom resolvers and XSLT Modified: lxml/trunk/CHANGES.txt ============================================================================== --- lxml/trunk/CHANGES.txt (original) +++ lxml/trunk/CHANGES.txt Sat Oct 20 14:46:30 2007 @@ -8,6 +8,9 @@ Features added -------------- +* Resolvers can now provide a ``base_url`` keyword argument when + resolving a document as string data. + * When using ``lxml.doctestcompare`` you can give the doctest option ``NOPARSE_MARKUP`` (like ``# doctest: +NOPARSE_MARKUP``) to suppress the special checking for one test. @@ -15,6 +18,9 @@ Bugs fixed ---------- +* Using custom resolvers on XSLT stylesheets parsed from a string + could request ill-formed URLs. + * lxml.etree could crash when adding more than 10000 namespaces to a document Modified: lxml/trunk/src/lxml/docloader.pxi ============================================================================== --- lxml/trunk/src/lxml/docloader.pxi (original) +++ lxml/trunk/src/lxml/docloader.pxi Sat Oct 20 14:46:30 2007 @@ -9,43 +9,69 @@ cdef class _InputDocument: cdef _InputDocumentDataType _type cdef object _data_bytes + cdef object _filename cdef object _file cdef class Resolver: "This is the base class of all resolvers." def resolve(self, system_url, public_id, context): + """Override this method to resolve an external source by + ``system_url`` and ``public_id``. The third argument is an + opaque context object. + + Return the result of one of the ``resolve_*()`` methods. + """ return None def resolve_empty(self, context): - "Return an empty input document." + """Return an empty input document. + + Pass context as parameter. + """ cdef _InputDocument doc_ref doc_ref = _InputDocument() doc_ref._type = PARSER_DATA_EMPTY return doc_ref - def resolve_string(self, string, context): - "Return a parsable string as input document." + def resolve_string(self, string, context, base_url=None): + """Return a parsable string as input document. + + Pass data string and context as parameters. + + You can pass the source URL as 'base_url' keyword. + """ cdef _InputDocument doc_ref doc_ref = _InputDocument() doc_ref._type = PARSER_DATA_STRING doc_ref._data_bytes = _utf8(string) + if base_url is not None: + doc_ref._filename = _encodeFilename(base_url) return doc_ref def resolve_filename(self, filename, context): - "Return the name of a parsable file as input document." + """Return the name of a parsable file as input document. + + Pass filename and context as parameters. + """ cdef _InputDocument doc_ref doc_ref = _InputDocument() doc_ref._type = PARSER_DATA_FILENAME - doc_ref._data_bytes = _encodeFilename(filename) + doc_ref._filename = _encodeFilename(filename) return doc_ref def resolve_file(self, f, context): - "Return an open file-like object as input document." + """Return an open file-like object as input document. + + Pass open file and context as parameters. + """ cdef _InputDocument doc_ref - if not hasattr(f, 'read'): + try: + f.read + except AttributeError: raise TypeError, "Argument is not a file-like object" doc_ref = _InputDocument() doc_ref._type = PARSER_DATA_FILE + doc_ref._filename = _getFilenameForFile(f) doc_ref._file = f return doc_ref Modified: lxml/trunk/src/lxml/etree.pyx ============================================================================== --- lxml/trunk/src/lxml/etree.pyx (original) +++ lxml/trunk/src/lxml/etree.pyx Sat Oct 20 14:46:30 2007 @@ -16,6 +16,9 @@ del __builtin__ +cdef object os_path_join +from os.path import join as os_path_join + cdef object _elementpath import _elementpath Modified: lxml/trunk/src/lxml/xslt.pxi ============================================================================== --- lxml/trunk/src/lxml/xslt.pxi (original) +++ lxml/trunk/src/lxml/xslt.pxi Sat Oct 20 14:46:30 2007 @@ -84,25 +84,26 @@ context = <_XSLTResolverContext>c_context try: resolvers = context._resolvers - uri = funicode(c_uri) + if cstd.strncmp('string://', c_uri, 9) == 0: + uri = funicode(c_uri + 9) + if cstd.strncmp('string://', context._c_style_doc.URL, 9) != 0 and \ + cstd.strcmp('', context._c_style_doc.URL) != 0: + # stylesheet URL known => make the target URL absolute + uri = os_path_join(context._c_style_doc.URL, uri) + else: + uri = funicode(c_uri) doc_ref = resolvers.resolve(uri, None, context) c_doc = NULL if doc_ref is not None: if doc_ref._type == PARSER_DATA_STRING: c_doc = _parseDoc( - doc_ref._data_bytes, None, context._parser) + doc_ref._data_bytes, doc_ref._filename, context._parser) elif doc_ref._type == PARSER_DATA_FILENAME: - if python.PyUnicode_Check(doc_ref._data_bytes): - filename = _utf8(doc_ref._data_bytes) - else: - filename = doc_ref._data_bytes - c_doc = _parseDocFromFile(filename, context._parser) + c_doc = _parseDocFromFile(doc_ref._filename, context._parser) elif doc_ref._type == PARSER_DATA_FILE: - filename = _getFilenameForFile(doc_ref._file) - data = doc_ref._file.read() - c_doc = _parseDoc( - data, filename, context._parser) + c_doc = _parseDocFromFilelike( + doc_ref._file, doc_ref._filename, context._parser) elif doc_ref._type == PARSER_DATA_EMPTY: c_doc = _newDoc() if c_doc is not NULL and c_doc.URL is NULL: @@ -115,7 +116,7 @@ cdef void _xslt_store_resolver_exception(char* c_uri, void* context, xslt.xsltLoadType c_type): - message = "Cannot resolve URI %s" % funicode(c_uri) + message = "Cannot resolve URI %s" % c_uri if c_type == xslt.XSLT_LOAD_DOCUMENT: exception = XSLTApplyError(message) else: @@ -299,7 +300,7 @@ # make sure we always have a stylesheet URL if c_doc.URL is NULL: - doc_url_utf = "XSLT:__STRING__XSLT__%s" % id(self) + doc_url_utf = "string://__STRING__XSLT__%s" % id(self) c_doc.URL = tree.xmlStrdup(_cstr(doc_url_utf)) self._error_log = _ErrorLog() From scoder at codespeak.net Sat Oct 20 14:50:20 2007 From: scoder at codespeak.net (scoder at codespeak.net) Date: Sat, 20 Oct 2007 14:50:20 +0200 (CEST) Subject: [Lxml-checkins] r47638 - lxml/trunk/src/lxml Message-ID: <20071020125020.4842B80A4@code0.codespeak.net> Author: scoder Date: Sat Oct 20 14:50:19 2007 New Revision: 47638 Modified: lxml/trunk/src/lxml/etree.pyx lxml/trunk/src/lxml/objectify.pyx Log: fix error handling on PyList_Append() Modified: lxml/trunk/src/lxml/etree.pyx ============================================================================== --- lxml/trunk/src/lxml/etree.pyx (original) +++ lxml/trunk/src/lxml/etree.pyx Sat Oct 20 14:50:19 2007 @@ -845,10 +845,8 @@ result = [] while c_node is not NULL and c < stop: if _isElement(c_node): - ret = python.PyList_Append( + python.PyList_Append( result, _elementFactory(self._doc, c_node)) - if ret: - raise c = c + 1 c_node = c_node.next return result @@ -992,15 +990,12 @@ code should use ``list(element)`` or simply iterate over elements. """ cdef xmlNode* c_node - cdef int ret result = [] c_node = self._c_node.children while c_node is not NULL: if _isElement(c_node): - ret = python.PyList_Append( + python.PyList_Append( result, _elementFactory(self._doc, c_node)) - if ret: - raise c_node = c_node.next return result Modified: lxml/trunk/src/lxml/objectify.pyx ============================================================================== --- lxml/trunk/src/lxml/objectify.pyx (original) +++ lxml/trunk/src/lxml/objectify.pyx Sat Oct 20 14:50:19 2007 @@ -193,15 +193,12 @@ returned in document order. """ cdef tree.xmlNode* c_node - cdef int ret result = [] c_node = self._c_node.children while c_node is not NULL: if tree._isElement(c_node): - ret = python.PyList_Append( + python.PyList_Append( result, cetree.elementFactory(self._doc, c_node)) - if ret: - raise c_node = c_node.next return result From scoder at codespeak.net Sat Oct 20 14:52:33 2007 From: scoder at codespeak.net (scoder at codespeak.net) Date: Sat, 20 Oct 2007 14:52:33 +0200 (CEST) Subject: [Lxml-checkins] r47639 - lxml/branch/lxml-1.3/src/lxml Message-ID: <20071020125233.676AE814E@code0.codespeak.net> Author: scoder Date: Sat Oct 20 14:52:33 2007 New Revision: 47639 Modified: lxml/branch/lxml-1.3/src/lxml/etree.pyx Log: fix error handling on PyList_Append() Modified: lxml/branch/lxml-1.3/src/lxml/etree.pyx ============================================================================== --- lxml/branch/lxml-1.3/src/lxml/etree.pyx (original) +++ lxml/branch/lxml-1.3/src/lxml/etree.pyx Sat Oct 20 14:52:33 2007 @@ -829,10 +829,8 @@ result = [] while c_node is not NULL and c < stop: if _isElement(c_node): - ret = python.PyList_Append( + python.PyList_Append( result, _elementFactory(self._doc, c_node)) - if ret: - raise c = c + 1 c_node = c_node.next return result @@ -967,15 +965,12 @@ """Returns all subelements. The elements are returned in document order. """ cdef xmlNode* c_node - cdef int ret result = [] c_node = self._c_node.children while c_node is not NULL: if _isElement(c_node): - ret = python.PyList_Append( + python.PyList_Append( result, _elementFactory(self._doc, c_node)) - if ret: - raise c_node = c_node.next return result From scoder at codespeak.net Sat Oct 20 14:53:53 2007 From: scoder at codespeak.net (scoder at codespeak.net) Date: Sat, 20 Oct 2007 14:53:53 +0200 (CEST) Subject: [Lxml-checkins] r47640 - lxml/trunk/src/lxml Message-ID: <20071020125353.547B28152@code0.codespeak.net> Author: scoder Date: Sat Oct 20 14:53:53 2007 New Revision: 47640 Modified: lxml/trunk/src/lxml/pyclasslookup.pyx Log: fix error handling on PyList_Append() Modified: lxml/trunk/src/lxml/pyclasslookup.pyx ============================================================================== --- lxml/trunk/src/lxml/pyclasslookup.pyx (original) +++ lxml/trunk/src/lxml/pyclasslookup.pyx Sat Oct 20 14:53:53 2007 @@ -103,10 +103,8 @@ result = [] while c_node is not NULL and c < stop: if tree._isElement(c_node): - ret = python.PyList_Append( + python.PyList_Append( result, _newProxy(self._source_proxy, c_node)) - if ret: - raise c = c + 1 c_node = c_node.next return result @@ -163,16 +161,13 @@ order. """ cdef tree.xmlNode* c_node - cdef int ret self._assertNode() result = [] c_node = self._c_node.children while c_node is not NULL: if tree._isElement(c_node): - ret = python.PyList_Append( + python.PyList_Append( result, _newProxy(self._source_proxy, c_node)) - if ret: - raise c_node = c_node.next return result From scoder at codespeak.net Sat Oct 20 14:54:25 2007 From: scoder at codespeak.net (scoder at codespeak.net) Date: Sat, 20 Oct 2007 14:54:25 +0200 (CEST) Subject: [Lxml-checkins] r47641 - lxml/branch/lxml-1.3/src/lxml Message-ID: <20071020125425.2153C8154@code0.codespeak.net> Author: scoder Date: Sat Oct 20 14:54:24 2007 New Revision: 47641 Modified: lxml/branch/lxml-1.3/src/lxml/pyclasslookup.pyx Log: fix error handling on PyList_Append() Modified: lxml/branch/lxml-1.3/src/lxml/pyclasslookup.pyx ============================================================================== --- lxml/branch/lxml-1.3/src/lxml/pyclasslookup.pyx (original) +++ lxml/branch/lxml-1.3/src/lxml/pyclasslookup.pyx Sat Oct 20 14:54:24 2007 @@ -103,10 +103,8 @@ result = [] while c_node is not NULL and c < stop: if tree._isElement(c_node): - ret = python.PyList_Append( + python.PyList_Append( result, _newProxy(self._source_proxy, c_node)) - if ret: - raise c = c + 1 c_node = c_node.next return result @@ -163,16 +161,13 @@ order. """ cdef tree.xmlNode* c_node - cdef int ret self._assertNode() result = [] c_node = self._c_node.children while c_node is not NULL: if tree._isElement(c_node): - ret = python.PyList_Append( + python.PyList_Append( result, _newProxy(self._source_proxy, c_node)) - if ret: - raise c_node = c_node.next return result From scoder at codespeak.net Sun Oct 21 09:22:42 2007 From: scoder at codespeak.net (scoder at codespeak.net) Date: Sun, 21 Oct 2007 09:22:42 +0200 (CEST) Subject: [Lxml-checkins] r47664 - lxml/trunk/doc Message-ID: <20071021072242.63FC88160@code0.codespeak.net> Author: scoder Date: Sun Oct 21 09:22:40 2007 New Revision: 47664 Modified: lxml/trunk/doc/tutorial.txt Log: tutorial section on serialisation Modified: lxml/trunk/doc/tutorial.txt ============================================================================== --- lxml/trunk/doc/tutorial.txt (original) +++ lxml/trunk/doc/tutorial.txt Sun Oct 21 09:22:40 2007 @@ -334,13 +334,74 @@ .. _`further iterators`: api.html#iteration +Serialisation +------------- + +Serialisation commonly uses with the ``tostring()`` function that +returns a string, or the ``ElementTree.write()`` method that writes to +a file or file-like object. Both accept the same keyword arguments +like ``pretty_print`` for formatted output or ``encoding`` to select a +specific output encoding other than plain ASCII:: + + >>> root = etree.XML('') + + >>> print etree.tostring(root) + + + >>> print etree.tostring(root, xml_declaration=True) + + + + >>> print etree.tostring(root, encoding='iso-8859-1') + + + + >>> print etree.tostring(root, pretty_print=True) + + + + + + + +Since lxml 2.0 (and ElementTree 1.3), the serialisation functions can +do more than XML serialisation and optional pretty printing. You can +serialise to HTML or extract the text content by passing the +``method`` keyword:: + + >>> root = etree.XML('

Hello
World

') + + >>> print etree.tostring(root) # default: method = 'xml' +

Hello
World

+ + >>> print etree.tostring(root, method='xml') # same as above +

Hello
World

+ + >>> print etree.tostring(root, method='html') +

Hello
World

+ + >>> print etree.tostring(root, method='html', pretty_print=True) + + +

Hello
World

+ + + >>> print etree.tostring(root, method='text') + HelloWorld + +For the plain text output, the ``tounicode()`` function might become handy:: + + >>> etree.tounicode(root, method='text') + u'HelloWorld' + + The ElementTree class ===================== An ``ElementTree`` is mainly a document wrapper around a tree with a root node. It provides a couple of methods for parsing, serialisation and general document handling. One of the bigger differences is that it serialises as a -complete document, as opposed to a single Element. This includes top-level +complete document, as opposed to a single ``Element``. This includes top-level processing instructions and comments, as well as a DOCTYPE and other DTD content in the document:: From scoder at codespeak.net Sun Oct 21 09:23:56 2007 From: scoder at codespeak.net (scoder at codespeak.net) Date: Sun, 21 Oct 2007 09:23:56 +0200 (CEST) Subject: [Lxml-checkins] r47665 - in lxml/trunk: doc src/lxml src/lxml/tests Message-ID: <20071021072356.BF8D88160@code0.codespeak.net> Author: scoder Date: Sun Oct 21 09:23:56 2007 New Revision: 47665 Modified: lxml/trunk/doc/parsing.txt lxml/trunk/src/lxml/docloader.pxi lxml/trunk/src/lxml/etree.pyx lxml/trunk/src/lxml/objectify.pyx lxml/trunk/src/lxml/parser.pxi lxml/trunk/src/lxml/tests/test_elementtree.py lxml/trunk/src/lxml/tests/test_etree.py lxml/trunk/src/lxml/tests/test_htmlparser.py Log: use keyword-only arguments in API Modified: lxml/trunk/doc/parsing.txt ============================================================================== --- lxml/trunk/doc/parsing.txt (original) +++ lxml/trunk/doc/parsing.txt Sun Oct 21 09:23:56 2007 @@ -495,7 +495,7 @@ >>> etree.tostring(root) '  +  ' - >>> etree.tostring(root, 'UTF-8', xml_declaration=False) + >>> etree.tostring(root, encoding='UTF-8', xml_declaration=False) ' \xef\xa3\x91 + \xef\xa3\x92 ' As an extension, lxml.etree has a new ``tounicode()`` function that you can Modified: lxml/trunk/src/lxml/docloader.pxi ============================================================================== --- lxml/trunk/src/lxml/docloader.pxi (original) +++ lxml/trunk/src/lxml/docloader.pxi Sun Oct 21 09:23:56 2007 @@ -33,7 +33,7 @@ doc_ref._type = PARSER_DATA_EMPTY return doc_ref - def resolve_string(self, string, context, base_url=None): + def resolve_string(self, string, context, *, base_url=None): """Return a parsable string as input document. Pass data string and context as parameters. Modified: lxml/trunk/src/lxml/etree.pyx ============================================================================== --- lxml/trunk/src/lxml/etree.pyx (original) +++ lxml/trunk/src/lxml/etree.pyx Sun Oct 21 09:23:56 2007 @@ -1383,7 +1383,7 @@ return self._doc._parser return None - def write(self, file, encoding=None, method="xml", + def write(self, file, *, encoding=None, method="xml", pretty_print=False, xml_declaration=None): """Write the tree to a file or file-like object. @@ -2061,7 +2061,7 @@ """ return _makeSubElement(_parent, _tag, None, None, attrib, nsmap, _extra) -def ElementTree(_Element element=None, file=None, _BaseParser parser=None): +def ElementTree(_Element element=None, *, file=None, _BaseParser parser=None): """ElementTree wrapper class. """ cdef xmlNode* c_next @@ -2084,7 +2084,7 @@ return _elementTreeFactory(doc, element) -def HTML(text, _BaseParser parser=None, base_url=None): +def HTML(text, _BaseParser parser=None, *, base_url=None): """Parses an HTML document from a string constant. This function can be used to embed "HTML literals" in Python code. @@ -2106,7 +2106,7 @@ except _TargetParserResult, result_container: return result_container.result -def XML(text, _BaseParser parser=None, base_url=None): +def XML(text, _BaseParser parser=None, *, base_url=None): """Parses an XML document from a string constant. This function can be used to embed "XML literals" in Python code, like in @@ -2130,7 +2130,7 @@ except _TargetParserResult, result_container: return result_container.result -def fromstring(text, _BaseParser parser=None, base_url=None): +def fromstring(text, _BaseParser parser=None, *, base_url=None): """Parses an XML document from a string. To override the default parser with a different parser you can pass it to @@ -2168,13 +2168,13 @@ """ return isinstance(element, _Element) -def dump(_Element elem not None, pretty_print=True): +def dump(_Element elem not None, *, pretty_print=True): """Writes an element tree or element structure to sys.stdout. This function should be used for debugging only. """ _dumpToFile(sys.stdout, elem._c_node, pretty_print) -def tostring(element_or_tree, encoding=None, method="xml", +def tostring(element_or_tree, *, encoding=None, method="xml", xml_declaration=None, pretty_print=False): """Serialize an element to an encoded string representation of its XML tree. @@ -2217,7 +2217,7 @@ """ return [tostring(element_or_tree, *args, **kwargs)] -def tounicode(element_or_tree, method="xml", pretty_print=False): +def tounicode(element_or_tree, *, method="xml", pretty_print=False): """Serialize an element to the Python unicode representation of its XML tree. Modified: lxml/trunk/src/lxml/objectify.pyx ============================================================================== --- lxml/trunk/src/lxml/objectify.pyx (original) +++ lxml/trunk/src/lxml/objectify.pyx Sun Oct 21 09:23:56 2007 @@ -1043,7 +1043,7 @@ cdef object _namespace cdef object _nsmap cdef bint _annotate - def __init__(self, namespace=None, nsmap=None, annotate=True, + def __init__(self, *, namespace=None, nsmap=None, annotate=True, makeelement=None): if nsmap is None: nsmap = _DEFAULT_NSMAP @@ -1301,7 +1301,7 @@ pass return None -def pyannotate(element_or_tree, ignore_old=False, ignore_xsi=False, +def pyannotate(element_or_tree, *, ignore_old=False, ignore_xsi=False, empty_pytype=None): """Recursively annotates the elements of an XML tree with 'pytype' attributes. @@ -1322,7 +1322,7 @@ element = cetree.rootNodeOrRaise(element_or_tree) _annotate(element, 0, 1, ignore_xsi, ignore_old, None, empty_pytype) -def xsiannotate(element_or_tree, ignore_old=False, ignore_pytype=False, +def xsiannotate(element_or_tree, *, ignore_old=False, ignore_pytype=False, empty_type=None): """Recursively annotates the elements of an XML tree with 'xsi:type' attributes. @@ -1348,7 +1348,7 @@ element = cetree.rootNodeOrRaise(element_or_tree) _annotate(element, 1, 0, ignore_old, ignore_pytype, empty_type, None) -def annotate(element_or_tree, ignore_old=True, ignore_xsi=False, +def annotate(element_or_tree, *, ignore_old=True, ignore_xsi=False, empty_pytype=None, empty_type=None, annotate_xsi=0, annotate_pytype=1): """Recursively annotates the elements of an XML tree with 'xsi:type' @@ -1536,7 +1536,7 @@ tree.xmlSetNsProp(c_node, c_ns, "nil", "true") tree.END_FOR_EACH_ELEMENT_FROM(c_node) -def deannotate(element_or_tree, pytype=True, xsi=True): +def deannotate(element_or_tree, *, pytype=True, xsi=True): """Recursively de-annotate the elements of an XML tree by removing 'pytype' and/or 'type' attributes. @@ -1640,7 +1640,7 @@ E = ElementMaker() -def Element(_tag, attrib=None, nsmap=None, _pytype=None, **_attributes): +def Element(_tag, attrib=None, nsmap=None, *, _pytype=None, **_attributes): """Objectify specific version of the lxml.etree Element() factory that always creates a structural (tree) element. @@ -1657,7 +1657,7 @@ _attributes[PYTYPE_ATTRIBUTE] = _pytype return _makeElement(_tag, None, _attributes, nsmap) -def DataElement(_value, attrib=None, nsmap=None, _pytype=None, _xsi=None, +def DataElement(_value, attrib=None, nsmap=None, *, _pytype=None, _xsi=None, **_attributes): """Create a new element from a Python value and XML attributes taken from keyword arguments or a dictionary passed as second argument. Modified: lxml/trunk/src/lxml/parser.pxi ============================================================================== --- lxml/trunk/src/lxml/parser.pxi (original) +++ lxml/trunk/src/lxml/parser.pxi Sun Oct 21 09:23:56 2007 @@ -1037,7 +1037,7 @@ not harmful, it is more efficient to use separate parsers. This does not apply to the default parser. """ - def __init__(self, attribute_defaults=False, dtd_validation=False, + def __init__(self, *, attribute_defaults=False, dtd_validation=False, load_dtd=False, no_network=True, ns_clean=False, recover=False, remove_blank_text=False, compact=True, resolve_entities=True, remove_comments=False, @@ -1076,7 +1076,7 @@ This parser has ``remove_comments`` and ``remove_pis`` enabled by default and thus ignores comments and processing instructions. """ - def __init__(self, attribute_defaults=False, dtd_validation=False, + def __init__(self, *, attribute_defaults=False, dtd_validation=False, load_dtd=False, no_network=True, ns_clean=False, recover=False, remove_blank_text=False, compact=True, resolve_entities=True, remove_comments=True, Modified: lxml/trunk/src/lxml/tests/test_elementtree.py ============================================================================== --- lxml/trunk/src/lxml/tests/test_elementtree.py (original) +++ lxml/trunk/src/lxml/tests/test_elementtree.py Sun Oct 21 09:23:56 2007 @@ -43,13 +43,13 @@ tree = ElementTree(element) self.buildNodes(element, 10, 3) f = open(self.getTestFilePath('testdump.xml'), 'w') - tree.write(f, 'UTF-8') + tree.write(f, encoding='UTF-8') f.close() f = open(self.getTestFilePath('testdump.xml'), 'r') tree = ElementTree(file=f) f.close() f = open(self.getTestFilePath('testdump2.xml'), 'w') - tree.write(f, 'UTF-8') + tree.write(f, encoding='UTF-8') f.close() f = open(self.getTestFilePath('testdump.xml'), 'r') data1 = f.read() @@ -2358,7 +2358,7 @@ f = StringIO() tree = ElementTree(element=a) - tree.write(f, 'utf-8') + tree.write(f, encoding='utf-8') self.assertEquals(u'S?k p? nettet'.encode('UTF-8'), f.getvalue()) @@ -2389,7 +2389,7 @@ f = StringIO() tree = ElementTree(element=a) - tree.write(f, 'iso-8859-1') + tree.write(f, encoding='iso-8859-1') result = f.getvalue() declaration = "" self.assertEncodingDeclaration(result,'iso-8859-1') @@ -2460,7 +2460,7 @@ a = Element('a') a.text = u'S?k p? nettet' self.assertEquals(u'S?k p? nettet'.encode('UTF-8'), - tostring(a, 'utf-8')) + tostring(a, encoding='utf-8')) def test_encoding_tostring_unknown(self): Element = self.etree.Element @@ -2468,7 +2468,8 @@ a = Element('a') a.text = u'S?k p? nettet' - self.assertRaises(LookupError, tostring, a, 'Invalid Encoding') + self.assertRaises(LookupError, tostring, a, + encoding='Invalid Encoding') def test_encoding_tostring_sub(self): Element = self.etree.Element @@ -2479,7 +2480,7 @@ b = SubElement(a, 'b') b.text = u'S?k p? nettet' self.assertEquals(u'S?k p? nettet'.encode('UTF-8'), - tostring(b, 'utf-8')) + tostring(b, encoding='utf-8')) def test_encoding_tostring_sub_tail(self): Element = self.etree.Element @@ -2491,7 +2492,7 @@ b.text = u'S?k p? nettet' b.tail = u'S?k' self.assertEquals(u'S?k p? nettetS?k'.encode('UTF-8'), - tostring(b, 'utf-8')) + tostring(b, encoding='utf-8')) def test_encoding_tostring_default_encoding(self): Element = self.etree.Element @@ -2919,7 +2920,7 @@ try: f = open(filename, 'wb') tree = ElementTree(element=element) - tree.write(f, encoding) + tree.write(f, encoding=encoding) f.close() f = open(filename, 'rb') data = f.read() Modified: lxml/trunk/src/lxml/tests/test_etree.py ============================================================================== --- lxml/trunk/src/lxml/tests/test_etree.py (original) +++ lxml/trunk/src/lxml/tests/test_etree.py Sun Oct 21 09:23:56 2007 @@ -1777,7 +1777,7 @@ b = SubElement(a, 'b') c = SubElement(a, 'c') - result = unicode(tostring(a, 'UTF-16'), 'UTF-16') + result = unicode(tostring(a, encoding='UTF-16'), 'UTF-16') self.assertEquals('', canonicalize(result)) Modified: lxml/trunk/src/lxml/tests/test_htmlparser.py ============================================================================== --- lxml/trunk/src/lxml/tests/test_htmlparser.py (original) +++ lxml/trunk/src/lxml/tests/test_htmlparser.py Sun Oct 21 09:23:56 2007 @@ -16,6 +16,11 @@ etree = etree html_str = "test

page title

" + html_str_pretty = """\ + +test +

page title

+""" broken_html_str = "test<body><h1>page title</h3></p></html>" uhtml_str = u"<html><head><title>test ??\uF8D2

page ??\uF8D2 title

" @@ -29,9 +34,14 @@ def test_module_HTML_unicode(self): element = self.etree.HTML(self.uhtml_str) - self.assertEqual(unicode(self.etree.tostring(element, 'UTF8'), 'UTF8'), + self.assertEqual(unicode(self.etree.tostring(element, encoding='UTF8'), 'UTF8'), unicode(self.uhtml_str.encode('UTF8'), 'UTF8')) + def test_module_HTML_pretty_print(self): + element = self.etree.HTML(self.html_str) + self.assertEqual(self.etree.tostring(element, method="html", pretty_print=True), + self.html_str_pretty) + def test_module_parse_html_error(self): parser = self.etree.HTMLParser(recover=False) parse = self.etree.parse @@ -202,14 +212,14 @@ parser = self.etree.HTMLParser() f = SillyFileLike(self.html_str) tree = self.etree.parse(f, parser) - html = self.etree.tostring(tree.getroot(), 'UTF-8') + html = self.etree.tostring(tree.getroot(), encoding='UTF-8') self.assertEqual(html, self.html_str) ## def test_module_parse_html_filelike_unicode(self): ## parser = self.etree.HTMLParser() ## f = SillyFileLike(self.uhtml_str) ## tree = self.etree.parse(f, parser) -## html = self.etree.tostring(tree.getroot(), 'UTF-8') +## html = self.etree.tostring(tree.getroot(), encoding='UTF-8') ## self.assertEqual(unicode(html, 'UTF-8'), self.uhtml_str) def test_html_file_error(self): From scoder at codespeak.net Sun Oct 21 14:25:47 2007 From: scoder at codespeak.net (scoder at codespeak.net) Date: Sun, 21 Oct 2007 14:25:47 +0200 (CEST) Subject: [Lxml-checkins] r47669 - lxml/branch/pyrex-0.9.6.2-fixes Message-ID: <20071021122547.DB301814E@code0.codespeak.net> Author: scoder Date: Sun Oct 21 14:25:46 2007 New Revision: 47669 Added: lxml/branch/pyrex-0.9.6.2-fixes/ - copied from r47668, lxml/trunk/ Log: new branch for fixes to build with Pyrex 0.9.6 From scoder at codespeak.net Sun Oct 21 14:27:46 2007 From: scoder at codespeak.net (scoder at codespeak.net) Date: Sun, 21 Oct 2007 14:27:46 +0200 (CEST) Subject: [Lxml-checkins] r47670 - lxml/branch/pyrex-0.9.6.2-fixes/src/lxml Message-ID: <20071021122746.1C6D08159@code0.codespeak.net> Author: scoder Date: Sun Oct 21 14:27:45 2007 New Revision: 47670 Modified: lxml/branch/pyrex-0.9.6.2-fixes/src/lxml/classlookup.pxi lxml/branch/pyrex-0.9.6.2-fixes/src/lxml/etree.pyx lxml/branch/pyrex-0.9.6.2-fixes/src/lxml/etreepublic.pxd lxml/branch/pyrex-0.9.6.2-fixes/src/lxml/extensions.pxi lxml/branch/pyrex-0.9.6.2-fixes/src/lxml/objectify.pyx lxml/branch/pyrex-0.9.6.2-fixes/src/lxml/parser.pxi lxml/branch/pyrex-0.9.6.2-fixes/src/lxml/parsertarget.pxi lxml/branch/pyrex-0.9.6.2-fixes/src/lxml/public-api.pxi lxml/branch/pyrex-0.9.6.2-fixes/src/lxml/pyclasslookup.pyx Log: lower case 'gil', public API and import changes Modified: lxml/branch/pyrex-0.9.6.2-fixes/src/lxml/classlookup.pxi ============================================================================== --- lxml/branch/pyrex-0.9.6.2-fixes/src/lxml/classlookup.pxi (original) +++ lxml/branch/pyrex-0.9.6.2-fixes/src/lxml/classlookup.pxi Sun Oct 21 14:27:45 2007 @@ -48,7 +48,7 @@ ################################################################################ # Element class lookup -ctypedef object (*_element_class_lookup_function)(object, _Document, xmlNode*) +ctypedef public object (*_element_class_lookup_function)(object, _Document, xmlNode*) # class to store element class lookup functions cdef public class ElementClassLookup [ type LxmlElementClassLookupType, Modified: lxml/branch/pyrex-0.9.6.2-fixes/src/lxml/etree.pyx ============================================================================== --- lxml/branch/pyrex-0.9.6.2-fixes/src/lxml/etree.pyx (original) +++ lxml/branch/pyrex-0.9.6.2-fixes/src/lxml/etree.pyx Sun Oct 21 14:27:45 2007 @@ -1769,7 +1769,7 @@ return attribs -ctypedef xmlNode* (*_node_to_node_function)(xmlNode*) +ctypedef public xmlNode* (*_node_to_node_function)(xmlNode*) cdef public class _ElementTagMatcher [ object LxmlElementTagMatcher, type LxmlElementTagMatcherType ]: Modified: lxml/branch/pyrex-0.9.6.2-fixes/src/lxml/etreepublic.pxd ============================================================================== --- lxml/branch/pyrex-0.9.6.2-fixes/src/lxml/etreepublic.pxd (original) +++ lxml/branch/pyrex-0.9.6.2-fixes/src/lxml/etreepublic.pxd Sun Oct 21 14:27:45 2007 @@ -16,10 +16,10 @@ int start_node_inclusive) cdef void END_FOR_EACH_ELEMENT_FROM(tree.xmlNode* start_node) -cdef extern from "etree.h": +cdef extern from "etree_api.h": # first function to call! - cdef int import_etree(etree_module) except -1 + cdef int import_etree() except -1 ########################################################################## # public ElementTree API classes Modified: lxml/branch/pyrex-0.9.6.2-fixes/src/lxml/extensions.pxi ============================================================================== --- lxml/branch/pyrex-0.9.6.2-fixes/src/lxml/extensions.pxi (original) +++ lxml/branch/pyrex-0.9.6.2-fixes/src/lxml/extensions.pxi Sun Oct 21 14:27:45 2007 @@ -583,7 +583,7 @@ # lookup the function by name and call it cdef void _xpath_function_call(xpath.xmlXPathParserContext* ctxt, - int nargs) with GIL: + int nargs) with gil: cdef xpath.xmlXPathContext* rctxt cdef _BaseContext context rctxt = ctxt.context Modified: lxml/branch/pyrex-0.9.6.2-fixes/src/lxml/objectify.pyx ============================================================================== --- lxml/branch/pyrex-0.9.6.2-fixes/src/lxml/objectify.pyx (original) +++ lxml/branch/pyrex-0.9.6.2-fixes/src/lxml/objectify.pyx Sun Oct 21 14:27:45 2007 @@ -10,7 +10,7 @@ cdef object etree from lxml import etree # initialize C-API of lxml.etree -import_etree(etree) +import_etree() __version__ = etree.__version__ Modified: lxml/branch/pyrex-0.9.6.2-fixes/src/lxml/parser.pxi ============================================================================== --- lxml/branch/pyrex-0.9.6.2-fixes/src/lxml/parser.pxi (original) +++ lxml/branch/pyrex-0.9.6.2-fixes/src/lxml/parser.pxi Sun Oct 21 14:27:45 2007 @@ -288,7 +288,7 @@ self._exc_context._store_raised() return -1 -cdef int _readFilelikeParser(void* ctxt, char* c_buffer, int c_size) with GIL: +cdef int _readFilelikeParser(void* ctxt, char* c_buffer, int c_size) with gil: return (<_FileReaderContext>ctxt).copyToBuffer(c_buffer, c_size) ############################################################ @@ -297,7 +297,7 @@ cdef xmlparser.xmlParserInput* _parser_resolve_from_python( char* c_url, char* c_pubid, xmlparser.xmlParserCtxt* c_context, - int* error) with GIL: + int* error) with gil: # call the Python document loaders cdef xmlparser.xmlParserInput* c_input cdef _ResolverContext context Modified: lxml/branch/pyrex-0.9.6.2-fixes/src/lxml/parsertarget.pxi ============================================================================== --- lxml/branch/pyrex-0.9.6.2-fixes/src/lxml/parsertarget.pxi (original) +++ lxml/branch/pyrex-0.9.6.2-fixes/src/lxml/parsertarget.pxi Sun Oct 21 14:27:45 2007 @@ -87,7 +87,7 @@ char* c_namespace, int c_nb_namespaces, char** c_namespaces, int c_nb_attributes, int c_nb_defaulted, - char** c_attributes) with GIL: + char** c_attributes) with gil: cdef _TargetParserContext context cdef xmlparser.xmlParserCtxt* c_ctxt cdef int i @@ -118,7 +118,7 @@ _handleSaxTargetException(context, c_ctxt) cdef void _targetSaxEnd(void* ctxt, char* c_localname, char* c_prefix, - char* c_namespace) with GIL: + char* c_namespace) with gil: cdef _TargetParserContext context cdef xmlparser.xmlParserCtxt* c_ctxt c_ctxt = ctxt @@ -131,7 +131,7 @@ except: _handleSaxTargetException(context, c_ctxt) -cdef void _targetSaxData(void* ctxt, char* c_data, int data_len) with GIL: +cdef void _targetSaxData(void* ctxt, char* c_data, int data_len) with gil: cdef _TargetParserContext context cdef xmlparser.xmlParserCtxt* c_ctxt c_ctxt = ctxt @@ -145,7 +145,7 @@ _handleSaxTargetException(context, c_ctxt) cdef void _targetSaxDoctype(void* ctxt, char* c_name, char* c_public, - char* c_system) with GIL: + char* c_system) with gil: cdef _TargetParserContext context cdef xmlparser.xmlParserCtxt* c_ctxt c_ctxt = ctxt @@ -162,7 +162,7 @@ except: _handleSaxTargetException(context, c_ctxt) -cdef void _targetSaxPI(void* ctxt, char* c_target, char* c_data) with GIL: +cdef void _targetSaxPI(void* ctxt, char* c_target, char* c_data) with gil: cdef _TargetParserContext context cdef xmlparser.xmlParserCtxt* c_ctxt c_ctxt = ctxt @@ -176,7 +176,7 @@ except: _handleSaxTargetException(context, c_ctxt) -cdef void _targetSaxComment(void* ctxt, char* c_data, int data_len) with GIL: +cdef void _targetSaxComment(void* ctxt, char* c_data, int data_len) with gil: cdef _TargetParserContext context cdef xmlparser.xmlParserCtxt* c_ctxt c_ctxt = ctxt Modified: lxml/branch/pyrex-0.9.6.2-fixes/src/lxml/public-api.pxi ============================================================================== --- lxml/branch/pyrex-0.9.6.2-fixes/src/lxml/public-api.pxi (original) +++ lxml/branch/pyrex-0.9.6.2-fixes/src/lxml/public-api.pxi Sun Oct 21 14:27:45 2007 @@ -1,156 +1,156 @@ # Public C API for lxml.etree -cdef public _Element deepcopyNodeToDocument(_Document doc, xmlNode* c_root): +cdef api _Element deepcopyNodeToDocument(_Document doc, xmlNode* c_root): "Recursively copy the element into the document. doc is not modified." cdef xmlNode* c_node c_node = _copyNodeToDoc(c_root, doc._c_doc) return _elementFactory(doc, c_node) -cdef public _ElementTree elementTreeFactory(_Element context_node): +cdef api _ElementTree elementTreeFactory(_Element context_node): return newElementTree(context_node, _ElementTree) -cdef public _ElementTree newElementTree(_Element context_node, +cdef api _ElementTree newElementTree(_Element context_node, object subclass): if context_node is NULL or context_node is None: raise TypeError return _newElementTree(context_node._doc, context_node, subclass) -cdef public _Element elementFactory(_Document doc, xmlNode* c_node): +cdef api _Element elementFactory(_Document doc, xmlNode* c_node): if c_node is NULL or doc is None: raise TypeError return _elementFactory(doc, c_node) -cdef public _Element makeElement(tag, _Document doc, parser, +cdef api _Element makeElement(tag, _Document doc, parser, text, tail, attrib, nsmap): return _makeElement(tag, NULL, doc, parser, text, tail, attrib, nsmap, None) -cdef public _Element makeSubElement(_Element parent, tag, text, tail, +cdef api _Element makeSubElement(_Element parent, tag, text, tail, attrib, nsmap): return _makeSubElement(parent, tag, text, tail, attrib, nsmap, None) -cdef public void setElementClassLookupFunction( +cdef api void setElementClassLookupFunction( _element_class_lookup_function function, state): _setElementClassLookupFunction(function, state) -cdef public object lookupDefaultElementClass(state, doc, xmlNode* c_node): +cdef api object lookupDefaultElementClass(state, doc, xmlNode* c_node): return _lookupDefaultElementClass(state, doc, c_node) -cdef public object lookupNamespaceElementClass(state, doc, xmlNode* c_node): +cdef api object lookupNamespaceElementClass(state, doc, xmlNode* c_node): return _find_nselement_class(state, doc, c_node) -cdef public object callLookupFallback(FallbackElementClassLookup lookup, +cdef api object callLookupFallback(FallbackElementClassLookup lookup, _Document doc, xmlNode* c_node): return lookup._callFallback(doc, c_node) -cdef public int tagMatches(xmlNode* c_node, char* c_href, char* c_name): +cdef api int tagMatches(xmlNode* c_node, char* c_href, char* c_name): if c_node is NULL: return -1 return _tagMatches(c_node, c_href, c_name) -cdef public _Document documentOrRaise(object input): +cdef api _Document documentOrRaise(object input): return _documentOrRaise(input) -cdef public _Element rootNodeOrRaise(object input): +cdef api _Element rootNodeOrRaise(object input): return _rootNodeOrRaise(input) -cdef public bint hasText(xmlNode* c_node): +cdef api bint hasText(xmlNode* c_node): return _hasText(c_node) -cdef public bint hasTail(xmlNode* c_node): +cdef api bint hasTail(xmlNode* c_node): return _hasTail(c_node) -cdef public object textOf(xmlNode* c_node): +cdef api object textOf(xmlNode* c_node): if c_node is NULL: return None return _collectText(c_node.children) -cdef public object tailOf(xmlNode* c_node): +cdef api object tailOf(xmlNode* c_node): if c_node is NULL: return None return _collectText(c_node.next) -cdef public int setNodeText(xmlNode* c_node, text) except -1: +cdef api int setNodeText(xmlNode* c_node, text) except -1: if c_node is NULL: raise ValueError return _setNodeText(c_node, text) -cdef public int setTailText(xmlNode* c_node, text) except -1: +cdef api int setTailText(xmlNode* c_node, text) except -1: if c_node is NULL: raise ValueError return _setTailText(c_node, text) -cdef public object attributeValue(xmlNode* c_element, xmlAttr* c_attrib_node): +cdef api object attributeValue(xmlNode* c_element, xmlAttr* c_attrib_node): return _attributeValue(c_element, c_attrib_node) -cdef public object attributeValueFromNsName(xmlNode* c_element, +cdef api object attributeValueFromNsName(xmlNode* c_element, char* ns, char* name): return _attributeValueFromNsName(c_element, ns, name) -cdef public object getAttributeValue(_Element element, key, default): +cdef api object getAttributeValue(_Element element, key, default): return _getAttributeValue(element, key, default) -cdef public object iterattributes(_Element element, int keysvalues): +cdef api object iterattributes(_Element element, int keysvalues): return _attributeIteratorFactory(element, keysvalues) -cdef public object collectAttributes(xmlNode* c_element, int keysvalues): +cdef api object collectAttributes(xmlNode* c_element, int keysvalues): return _collectAttributes(c_element, keysvalues) -cdef public int setAttributeValue(_Element element, key, value) except -1: +cdef api int setAttributeValue(_Element element, key, value) except -1: return _setAttributeValue(element, key, value) -cdef public int delAttribute(_Element element, key) except -1: +cdef api int delAttribute(_Element element, key) except -1: return _delAttribute(element, key) -cdef public int delAttributeFromNsName(tree.xmlNode* c_element, +cdef api int delAttributeFromNsName(tree.xmlNode* c_element, char* c_href, char* c_name): return _delAttributeFromNsName(c_element, c_href, c_name) -cdef public bint hasChild(xmlNode* c_node): +cdef api bint hasChild(xmlNode* c_node): return _hasChild(c_node) -cdef public xmlNode* findChild(xmlNode* c_node, Py_ssize_t index): +cdef api xmlNode* findChild(xmlNode* c_node, Py_ssize_t index): return _findChild(c_node, index) -cdef public xmlNode* findChildForwards(xmlNode* c_node, Py_ssize_t index): +cdef api xmlNode* findChildForwards(xmlNode* c_node, Py_ssize_t index): return _findChildForwards(c_node, index) -cdef public xmlNode* findChildBackwards(xmlNode* c_node, Py_ssize_t index): +cdef api xmlNode* findChildBackwards(xmlNode* c_node, Py_ssize_t index): return _findChildBackwards(c_node, index) -cdef public xmlNode* nextElement(xmlNode* c_node): +cdef api xmlNode* nextElement(xmlNode* c_node): return _nextElement(c_node) -cdef public xmlNode* previousElement(xmlNode* c_node): +cdef api xmlNode* previousElement(xmlNode* c_node): return _previousElement(c_node) -cdef public void appendChild(_Element parent, _Element child): +cdef api void appendChild(_Element parent, _Element child): _appendChild(parent, child) -cdef public object pyunicode(char* s): +cdef api object pyunicode(char* s): if s is NULL: raise TypeError return funicode(s) -cdef public object utf8(object s): +cdef api object utf8(object s): return _utf8(s) -cdef public object getNsTag(object tag): +cdef api object getNsTag(object tag): return _getNsTag(tag) -cdef public object namespacedName(xmlNode* c_node): +cdef api object namespacedName(xmlNode* c_node): return _namespacedName(c_node) -cdef public object namespacedNameFromNsName(char* href, char* name): +cdef api object namespacedNameFromNsName(char* href, char* name): return _namespacedNameFromNsName(href, name) -cdef public void iteratorStoreNext(_ElementIterator iterator, _Element node): +cdef api void iteratorStoreNext(_ElementIterator iterator, _Element node): iterator._storeNext(node) -cdef public void initTagMatch(_ElementTagMatcher matcher, tag): +cdef api void initTagMatch(_ElementTagMatcher matcher, tag): matcher._initTagMatch(tag) -cdef public tree.xmlNs* findOrBuildNodeNsPrefix( +cdef api tree.xmlNs* findOrBuildNodeNsPrefix( _Document doc, xmlNode* c_node, char* href, char* prefix) except NULL: if doc is None: raise TypeError Modified: lxml/branch/pyrex-0.9.6.2-fixes/src/lxml/pyclasslookup.pyx ============================================================================== --- lxml/branch/pyrex-0.9.6.2-fixes/src/lxml/pyclasslookup.pyx (original) +++ lxml/branch/pyrex-0.9.6.2-fixes/src/lxml/pyclasslookup.pyx Sun Oct 21 14:27:45 2007 @@ -13,7 +13,7 @@ cdef object etree from lxml import etree # initialize C-API of lxml.etree -import_etree(etree) +import_etree() __version__ = etree.__version__ From scoder at codespeak.net Mon Oct 22 09:33:25 2007 From: scoder at codespeak.net (scoder at codespeak.net) Date: Mon, 22 Oct 2007 09:33:25 +0200 (CEST) Subject: [Lxml-checkins] r47696 - lxml/trunk/src/lxml Message-ID: <20071022073325.98F968181@code0.codespeak.net> Author: scoder Date: Mon Oct 22 09:33:23 2007 New Revision: 47696 Added: lxml/trunk/src/lxml/lxml.etree.pyx - copied unchanged from r47695, lxml/trunk/src/lxml/etree.pyx Removed: lxml/trunk/src/lxml/etree.pyx Log: use FQMN in filename to make Pyrex' import mechanism happy Deleted: /lxml/trunk/src/lxml/etree.pyx ============================================================================== --- /lxml/trunk/src/lxml/etree.pyx Mon Oct 22 09:33:23 2007 +++ (empty file) @@ -1,2317 +0,0 @@ -cimport tree, python, config -from tree cimport xmlDoc, xmlNode, xmlAttr, xmlNs, _isElement, _getNs -from python cimport callable, _cstr, _isString -cimport xpath -cimport xinclude -cimport c14n -cimport cstd - -import __builtin__ - -cdef object set -try: - set = __builtin__.set -except AttributeError: - from sets import Set as set - -del __builtin__ - -cdef object os_path_join -from os.path import join as os_path_join - -cdef object _elementpath -import _elementpath - -cdef object sys -import sys - -cdef object re -import re - -cdef object ITER_EMPTY -ITER_EMPTY = iter(()) - -# the rules -# any libxml C argument/variable is prefixed with c_ -# any non-public function/class is prefixed with an underscore -# instance creation is always through factories - -# what to do with libxml2/libxslt error messages? -# 0 : drop -# 1 : use log -cdef int __DEBUG -__DEBUG = 1 - -# maximum number of lines in the libxml2/xslt log if __DEBUG == 1 -cdef int __MAX_LOG_SIZE -__MAX_LOG_SIZE = 100 - -# make the compiled-in debug state publicly available -DEBUG = __DEBUG - -# global per-thread setup -tree.xmlThrDefIndentTreeOutput(1) -tree.xmlThrDefLineNumbersDefaultValue(1) - -_initThreadLogging() - -# initialize parser (and threading) -xmlparser.xmlInitParser() - -# filename encoding -cdef object _FILENAME_ENCODING -_FILENAME_ENCODING = sys.getfilesystemencoding() -if _FILENAME_ENCODING is None: - _FILENAME_ENCODING = sys.getdefaultencoding() -if _FILENAME_ENCODING is None: - _FILENAME_ENCODING = 'ascii' -cdef char* _C_FILENAME_ENCODING -_C_FILENAME_ENCODING = _cstr(_FILENAME_ENCODING) - - -# Error superclass for ElementTree compatibility -class Error(Exception): - pass - -# module level superclass for all exceptions -class LxmlError(Error): - """Main exception base class for lxml. All other exceptions inherit from - this one. - """ - def __init__(self, *args): - _initError(self, *args) - self.error_log = __copyGlobalErrorLog() - -cdef object _LxmlError -_LxmlError = LxmlError - -def _superError(obj, *args): - super(_LxmlError, obj).__init__(*args) - -cdef object _initError -if isinstance(_LxmlError, type): - _initError = _superError # Python >= 2.5 -else: - _initError = Error.__init__ # Python <= 2.4 - -del _superError - - -# superclass for all syntax errors -class LxmlSyntaxError(LxmlError, SyntaxError): - """Base class for all syntax errors. - """ - pass - -class XIncludeError(LxmlError): - """Error during XInclude processing. - """ - pass - -class C14NError(LxmlError): - """Error during C14N serialisation. - """ - pass - -# version information -cdef __unpackDottedVersion(version): - version_list = [] - l = (version.replace('-', '.').split('.') + [0]*4)[:4] - for item in l: - try: - item = int(item) - except ValueError: - if item.startswith('dev'): - count = item[3:] - item = -300 - elif item.startswith('alpha'): - count = item[5:] - item = -200 - elif item.startswith('beta'): - count = item[4:] - item = -100 - else: - count = 0 - if count: - item = item + int(count) - version_list.append(item) - return tuple(version_list) - -cdef __unpackIntVersion(int c_version): - return ( - ((c_version / (100*100)) % 100), - ((c_version / 100) % 100), - (c_version % 100) - ) - -cdef int _LIBXML_VERSION_INT -try: - _LIBXML_VERSION_INT = int(re.match('[0-9]+', tree.xmlParserVersion).group(0)) -except Exception: - print "Unknown libxml2 version:", tree.xmlParserVersion - _LIBXML_VERSION_INT = 0 - -LIBXML_VERSION = __unpackIntVersion(_LIBXML_VERSION_INT) -LIBXML_COMPILED_VERSION = __unpackIntVersion(tree.LIBXML_VERSION) -LXML_VERSION = __unpackDottedVersion(tree.LXML_VERSION_STRING) - -__version__ = tree.LXML_VERSION_STRING - - -# class for temporary storage of Python references -cdef class _TempStore: - cdef object _storage - def __init__(self): - self._storage = [] - - cdef void add(self, obj): - python.PyList_Append(self._storage, obj) - - cdef void clear(self): - del self._storage[:] - -# class for temporarily storing exceptions raised in extensions -cdef class _ExceptionContext: - cdef object _exc_info - cdef void clear(self): - self._exc_info = None - - cdef void _store_raised(self): - self._exc_info = sys.exc_info() - - cdef void _store_exception(self, exception): - self._exc_info = (exception, None, None) - - cdef int _has_raised(self): - return self._exc_info is not None - - cdef _raise_if_stored(self): - if self._exc_info is None: - return - type, value, traceback = self._exc_info - self._exc_info = None - if value is None and traceback is None: - raise type - else: - raise type, value, traceback - - -cdef class QName: - """QName wrapper. - """ - cdef readonly object text - def __init__(self, text_or_uri, tag=None): - if tag is not None: - _tagValidOrRaise(_utf8(tag)) - text_or_uri = "{%s}%s" % (text_or_uri, tag) - else: - if not _isString(text_or_uri): - text_or_uri = str(text_or_uri) - tag = _getNsTag(text_or_uri)[1] - _tagValidOrRaise(tag) - self.text = text_or_uri - def __str__(self): - return self.text - def __hash__(self): - return self.text.__hash__() - def __richcmp__(one, other, int op): - if not _isString(one): - one = str(one) - if not _isString(other): - other = str(other) - return python.PyObject_RichCompare(one, other, op) - - -# forward declaration of _BaseParser, see parser.pxi -cdef class _BaseParser - - -cdef public class _Document [ type LxmlDocumentType, object LxmlDocument ]: - """Internal base class to reference a libxml document. - - When instances of this class are garbage collected, the libxml - document is cleaned up. - """ - cdef int _ns_counter - cdef object _prefix_tail - cdef xmlDoc* _c_doc - cdef _BaseParser _parser - - def __dealloc__(self): - # if there are no more references to the document, it is safe - # to clean the whole thing up, as all nodes have a reference to - # the document - #print "freeing document:", self._c_doc - #displayNode(self._c_doc, 0) - #print self._c_doc, self._c_doc.dict is __GLOBAL_PARSER_CONTEXT._c_dict - #print self._c_doc, canDeallocateChildNodes(self._c_doc) - tree.xmlFreeDoc(self._c_doc) - #_deallocDocument(self._c_doc) - - cdef getroot(self): - cdef xmlNode* c_node - c_node = tree.xmlDocGetRootElement(self._c_doc) - if c_node is NULL: - return None - return _elementFactory(self, c_node) - - cdef getdoctype(self): - cdef tree.xmlDtd* c_dtd - cdef xmlNode* c_root_node - public_id = None - sys_url = None - c_dtd = self._c_doc.intSubset - if c_dtd is not NULL: - if c_dtd.ExternalID is not NULL: - public_id = funicode(c_dtd.ExternalID) - if c_dtd.SystemID is not NULL: - sys_url = funicode(c_dtd.SystemID) - c_dtd = self._c_doc.extSubset - if c_dtd is not NULL: - if not public_id and c_dtd.ExternalID is not NULL: - public_id = funicode(c_dtd.ExternalID) - if not sys_url and c_dtd.SystemID is not NULL: - sys_url = funicode(c_dtd.SystemID) - c_root_node = tree.xmlDocGetRootElement(self._c_doc) - if c_root_node is NULL: - root_name = None - else: - root_name = funicode(c_root_node.name) - return (root_name, public_id, sys_url) - - cdef getxmlinfo(self): - cdef xmlDoc* c_doc - c_doc = self._c_doc - if c_doc.version is NULL: - version = None - else: - version = c_doc.version - if c_doc.encoding is NULL: - encoding = None - else: - encoding = c_doc.encoding - return (version, encoding) - - cdef getURL(self): - if self._c_doc.URL is NULL: - return None - else: - return self._c_doc.URL - - cdef buildNewPrefix(self): - ns = python.PyString_FromFormat("ns%d", self._ns_counter) - if self._prefix_tail is not None: - ns = ns + self._prefix_tail - self._ns_counter = self._ns_counter + 1 - if self._ns_counter < 0: - # overflow! - self._ns_counter = 0 - if self._prefix_tail is None: - self._prefix_tail = "A" - else: - self._prefix_tail = self._prefix_tail + "A" - return ns - - cdef xmlNs* _findOrBuildNodeNs(self, xmlNode* c_node, - char* c_href, char* c_prefix): - """Get or create namespace structure for a node. Reuses the prefix if - possible. - """ - cdef xmlNs* c_ns - cdef xmlNs* c_doc_ns - # look for existing ns - c_ns = tree.xmlSearchNsByHref(self._c_doc, c_node, c_href) - if c_ns is not NULL: - return c_ns - - if c_prefix is NULL or \ - tree.xmlSearchNs(self._c_doc, c_node, c_prefix) is not NULL: - # try to simulate ElementTree's namespace prefix creation - while 1: - prefix = self.buildNewPrefix() - c_prefix = _cstr(prefix) - # make sure it's not used already - if tree.xmlSearchNs(self._c_doc, c_node, c_prefix) is NULL: - break - - return tree.xmlNewNs(c_node, c_href, c_prefix) - - cdef void _setNodeNs(self, xmlNode* c_node, char* href): - "Lookup namespace structure and set it for the node." - cdef xmlNs* c_ns - c_ns = self._findOrBuildNodeNs(c_node, href, NULL) - tree.xmlSetNs(c_node, c_ns) - - cdef int _setNodeNamespaces(self, xmlNode* c_node, - object node_ns_utf, object nsmap) except -1: - """Lookup current namespace prefixes, then set namespace structure for - node and register new ns-prefix mappings. - - This only works for a newly created node! - """ - cdef xmlNs* c_ns - cdef xmlDoc* c_doc - cdef char* c_prefix - cdef char* c_href - if not nsmap: - if node_ns_utf is not None: - self._setNodeNs(c_node, _cstr(node_ns_utf)) - return 0 - - c_doc = self._c_doc - for prefix, href in nsmap.items(): - href_utf = _utf8(href) - c_href = _cstr(href_utf) - if prefix is not None and prefix: - prefix_utf = _utf8(prefix) - c_prefix = _cstr(prefix_utf) - else: - c_prefix = NULL - # add namespace with prefix if ns is not already known - c_ns = tree.xmlSearchNsByHref(c_doc, c_node, c_href) - if c_ns is NULL: - c_ns = tree.xmlNewNs(c_node, c_href, c_prefix) - if href_utf == node_ns_utf: - tree.xmlSetNs(c_node, c_ns) - node_ns_utf = None - - if node_ns_utf is not None: - self._setNodeNs(c_node, _cstr(node_ns_utf)) - return 0 - -cdef extern from "etree_defs.h": - # macro call to 't->tp_new()' for fast instantiation - cdef _Document NEW_DOCUMENT "PY_NEW" (object t) - -cdef _Document _documentFactory(xmlDoc* c_doc, _BaseParser parser): - cdef _Document result - result = NEW_DOCUMENT(_Document) - result._c_doc = c_doc - result._ns_counter = 0 - result._prefix_tail = None - if parser is None: - parser = __GLOBAL_PARSER_CONTEXT.getDefaultParser() - result._parser = parser - return result - - -cdef class DocInfo: - "Document information provided by parser and DTD." - cdef _Document _doc - def __init__(self, tree): - "Create a DocInfo object for an ElementTree object or root Element." - self._doc = _documentOrRaise(tree) - root_name, public_id, system_url = self._doc.getdoctype() - if not root_name and (public_id or system_url): - raise ValueError, "Could not find root node" - - property root_name: - "Returns the name of the root node as defined by the DOCTYPE." - def __get__(self): - root_name, public_id, system_url = self._doc.getdoctype() - return root_name - - property public_id: - "Returns the public ID of the DOCTYPE." - def __get__(self): - root_name, public_id, system_url = self._doc.getdoctype() - return public_id - - property system_url: - "Returns the system ID of the DOCTYPE." - def __get__(self): - root_name, public_id, system_url = self._doc.getdoctype() - return system_url - - property xml_version: - "Returns the XML version as declared by the document." - def __get__(self): - xml_version, encoding = self._doc.getxmlinfo() - return xml_version - - property encoding: - "Returns the encoding name as declared by the document." - def __get__(self): - xml_version, encoding = self._doc.getxmlinfo() - return encoding - - property URL: - "Returns the source URL of the document (or None if unknown)." - def __get__(self): - return self._doc.getURL() - - property doctype: - "Returns a DOCTYPE declaration string for the document." - def __get__(self): - root_name, public_id, system_url = self._doc.getdoctype() - if public_id: - if system_url: - return '' % ( - root_name, public_id, system_url) - else: - return '' % ( - root_name, public_id) - elif system_url: - return '' % ( - root_name, system_url) - else: - return "" - - property internalDTD: - "Returns a DTD validator based on the internal subset of the document." - def __get__(self): - return _dtdFactory(self._doc._c_doc.intSubset) - - property externalDTD: - "Returns a DTD validator based on the external subset of the document." - def __get__(self): - return _dtdFactory(self._doc._c_doc.extSubset) - - -cdef public class _Element [ type LxmlElementType, object LxmlElement ]: - """Element class. References a document object and a libxml node. - - By pointing to a Document instance, a reference is kept to - _Document as long as there is some pointer to a node in it. - """ - cdef python.PyObject* _gc_doc - cdef _Document _doc - cdef xmlNode* _c_node - cdef object _tag - cdef object _attrib - - def _init(self): - """Called after object initialisation. Custom subclasses may override - this if they recursively call _init() in the superclasses. - """ - - def __dealloc__(self): - #print "trying to free node:", self._c_node - #displayNode(self._c_node, 0) - if self._c_node is not NULL: - _unregisterProxy(self) - attemptDeallocation(self._c_node) - _releaseProxy(self) - - # MANIPULATORS - - def __setitem__(self, Py_ssize_t index, _Element element not None): - """Replaces the given subelement. - """ - cdef xmlNode* c_node - cdef xmlNode* c_next - c_node = _findChild(self._c_node, index) - if c_node is NULL: - raise IndexError, index - c_next = element._c_node.next - _removeText(c_node.next) - tree.xmlReplaceNode(c_node, element._c_node) - _moveTail(c_next, element._c_node) - moveNodeToDocument(self._doc, element._c_node) - if not attemptDeallocation(c_node): - moveNodeToDocument(self._doc, c_node) - - def __delitem__(self, Py_ssize_t index): - """Deletes the given subelement. - """ - cdef xmlNode* c_node - c_node = _findChild(self._c_node, index) - if c_node is NULL: - raise IndexError, index - _removeText(c_node.next) - _removeNode(self._doc, c_node) - - def __delslice__(self, Py_ssize_t start, Py_ssize_t stop): - """Deletes a number of subelements. - """ - cdef xmlNode* c_node - c_node = _findChild(self._c_node, start) - _deleteSlice(self._doc, c_node, start, stop) - - def __setslice__(self, Py_ssize_t start, Py_ssize_t stop, value): - """Replaces a number of subelements with elements - from a sequence. - """ - cdef xmlNode* c_node - cdef xmlNode* c_next - cdef _Element element - # first, find start of slice - if start == python.PY_SSIZE_T_MAX: - c_node = NULL - else: - c_node = _findChild(self._c_node, start) - # now delete the slice - if c_node is not NULL and start != stop: - c_node = _deleteSlice(self._doc, c_node, start, stop) - # if the insertion point is at the end, append there - if c_node is NULL: - for element in value: - _appendChild(self, element) - return - # if the next element is in the list, insert before it - for element in value: - if element is None: - raise TypeError, "Node must not be None." - # store possible text tail - c_next = element._c_node.next - # now move node previous to insertion point - tree.xmlAddPrevSibling(c_node, element._c_node) - # and move tail just behind his node - _moveTail(c_next, element._c_node) - # move it into a new document - moveNodeToDocument(self._doc, element._c_node) - - def __deepcopy__(self, memo): - return self.__copy__() - - def __copy__(self): - cdef xmlDoc* c_doc - cdef xmlNode* c_node - cdef _Document new_doc - c_doc = _copyDocRoot(self._doc._c_doc, self._c_node) # recursive - new_doc = _documentFactory(c_doc, self._doc._parser) - root = new_doc.getroot() - if root is not None: - return root - # Comment/PI - c_node = c_doc.children - while c_node is not NULL and c_node.type != self._c_node.type: - c_node = c_node.next - if c_node is NULL: - return None - return _elementFactory(new_doc, c_node) - - def set(self, key, value): - """Sets an element attribute. - """ - _setAttributeValue(self, key, value) - - def append(self, _Element element not None): - """Adds a subelement to the end of this element. - """ - _appendChild(self, element) - - def addnext(self, _Element element): - """Adds the element as a following sibling directly after this - element. - - This is normally used to set a processing instruction or comment after - the root node of a document. Note that tail text is automatically - discarded when adding at the root level. - """ - if self._c_node.parent != NULL and not _isElement(self._c_node.parent): - if element._c_node.type != tree.XML_PI_NODE: - if element._c_node.type != tree.XML_COMMENT_NODE: - raise TypeError, "Only processing instructions and comments can be siblings of the root element" - element.tail = None - _appendSibling(self, element) - - def addprevious(self, _Element element): - """Adds the element as a preceding sibling directly before this - element. - - This is normally used to set a processing instruction or comment - before the root node of a document. Note that tail text is - automatically discarded when adding at the root level. - """ - if self._c_node.parent != NULL and not _isElement(self._c_node.parent): - if element._c_node.type != tree.XML_PI_NODE: - if element._c_node.type != tree.XML_COMMENT_NODE: - raise TypeError, "Only processing instructions and comments can be siblings of the root element" - element.tail = None - _prependSibling(self, element) - - def extend(self, elements): - """Extends the current children by the elements in the iterable. - """ - for element in elements: - _appendChild(self, element) - - def clear(self): - """Resets an element. This function removes all subelements, - clears all attributes and sets the text and tail - attributes to None. - """ - cdef xmlAttr* c_attr - cdef xmlAttr* c_attr_next - cdef xmlNode* c_node - cdef xmlNode* c_node_next - c_node = self._c_node - # remove self.text and self.tail - _removeText(c_node.children) - _removeText(c_node.next) - # remove all attributes - c_attr = c_node.properties - while c_attr is not NULL: - c_attr_next = c_attr.next - tree.xmlRemoveProp(c_attr) - c_attr = c_attr_next - # remove all subelements - c_node = c_node.children - while c_node is not NULL: - c_node_next = c_node.next - if _isElement(c_node): - while c_node_next is not NULL and not _isElement(c_node_next): - c_node_next = c_node_next.next - _removeNode(self._doc, c_node) - c_node = c_node_next - - def insert(self, index, _Element element not None): - """Inserts a subelement at the given position in this element - """ - cdef xmlNode* c_node - cdef xmlNode* c_next - c_node = _findChild(self._c_node, index) - if c_node is NULL: - _appendChild(self, element) - return - c_next = element._c_node.next - tree.xmlAddPrevSibling(c_node, element._c_node) - _moveTail(c_next, element._c_node) - moveNodeToDocument(self._doc, element._c_node) - - def remove(self, _Element element not None): - """Removes a matching subelement. Unlike the find methods, this - method compares elements based on identity, not on tag value - or contents. - """ - cdef xmlNode* c_node - cdef xmlNode* c_next - c_node = element._c_node - if c_node.parent is not self._c_node: - raise ValueError, "Element is not a child of this node." - c_next = element._c_node.next - tree.xmlUnlinkNode(c_node) - _moveTail(c_next, c_node) - # fix namespace declarations - moveNodeToDocument(self._doc, c_node) - - def replace(self, _Element old_element not None, - _Element new_element not None): - """Replaces a subelement with the element passed as second argument. - """ - cdef xmlNode* c_old_node - cdef xmlNode* c_old_next - cdef xmlNode* c_new_node - cdef xmlNode* c_new_next - c_old_node = old_element._c_node - if c_old_node.parent is not self._c_node: - raise ValueError, "Element is not a child of this node." - c_old_next = c_old_node.next - c_new_node = new_element._c_node - c_new_next = c_new_node.next - tree.xmlReplaceNode(c_old_node, c_new_node) - _moveTail(c_new_next, c_new_node) - _moveTail(c_old_next, c_old_node) - moveNodeToDocument(self._doc, c_new_node) - # fix namespace declarations - moveNodeToDocument(self._doc, c_old_node) - - # PROPERTIES - property tag: - """Element tag - """ - def __get__(self): - if self._tag is not None: - return self._tag - self._tag = _namespacedName(self._c_node) - return self._tag - - def __set__(self, value): - cdef _BaseParser parser - ns, name = _getNsTag(value) - parser = self._doc._parser - if parser is not None and parser._for_html: - _htmlTagValidOrRaise(name) - else: - _tagValidOrRaise(name) - self._tag = value - tree.xmlNodeSetName(self._c_node, _cstr(name)) - if ns is None: - self._c_node.ns = NULL - else: - self._doc._setNodeNs(self._c_node, _cstr(ns)) - - property attrib: - """Element attribute dictionary. Where possible, use get(), set(), - keys(), values() and items() to access element attributes. - """ - def __get__(self): - if self._attrib is None: - self._attrib = _Attrib(self) - return self._attrib - - property text: - """Text before the first subelement. This is either a string or - the value None, if there was no text. - """ - def __get__(self): - return _collectText(self._c_node.children) - - def __set__(self, value): - if isinstance(value, QName): - value = python.PyUnicode_FromEncodedObject( - _resolveQNameText(self, value), 'UTF-8', 'strict') - _setNodeText(self._c_node, value) - - property tail: - """Text after this element's end tag, but before the next sibling - element's start tag. This is either a string or the value None, if - there was no text. - """ - def __get__(self): - return _collectText(self._c_node.next) - - def __set__(self, value): - _setTailText(self._c_node, value) - - # not in ElementTree, read-only - property prefix: - """Namespace prefix or None. - """ - def __get__(self): - if self._c_node.ns is not NULL: - if self._c_node.ns.prefix is not NULL: - return funicode(self._c_node.ns.prefix) - return None - - # not in ElementTree, read-only - property sourceline: - """Original line number as found by the parser or None if unknown. - """ - def __get__(self): - cdef long line - line = tree.xmlGetLineNo(self._c_node) - if line > 0: - return line - else: - return None - - def __set__(self, line): - if line < 0: - self._c_node.line = 0 - else: - self._c_node.line = line - - # not in ElementTree, read-only - property nsmap: - """Namespace prefix->URI mapping known in the context of this Element. - """ - def __get__(self): - cdef xmlNode* c_node - cdef xmlNs* c_ns - nsmap = {} - c_node = self._c_node - while c_node is not NULL and c_node.type == tree.XML_ELEMENT_NODE: - c_ns = c_node.nsDef - while c_ns is not NULL: - if c_ns.prefix is NULL: - prefix = None - else: - prefix = funicode(c_ns.prefix) - if not python.PyDict_Contains(nsmap, prefix): - python.PyDict_SetItem( - nsmap, prefix, funicode(c_ns.href)) - c_ns = c_ns.next - c_node = c_node.parent - return nsmap - - # ACCESSORS - def __repr__(self): - return "" % (self.tag, id(self)) - - def __getitem__(self, Py_ssize_t index): - """Returns the subelement at the given position. - """ - cdef xmlNode* c_node - c_node = _findChild(self._c_node, index) - if c_node is NULL: - raise IndexError, "list index out of range" - return _elementFactory(self._doc, c_node) - - def __getslice__(self, Py_ssize_t start, Py_ssize_t stop): - """Returns a list containing subelements in the given range. - """ - cdef xmlNode* c_node - cdef _Document doc - cdef Py_ssize_t c - # this does not work for negative start, stop, however, - # python seems to convert these to positive start, stop before - # calling, so this all works perfectly (at the cost of a len() call) - c_node = _findChild(self._c_node, start) - if c_node is NULL: - return [] - c = start - result = [] - while c_node is not NULL and c < stop: - if _isElement(c_node): - python.PyList_Append( - result, _elementFactory(self._doc, c_node)) - c = c + 1 - c_node = c_node.next - return result - - def __len__(self): - """Returns the number of subelements. - """ - cdef Py_ssize_t c - cdef xmlNode* c_node - c = 0 - c_node = self._c_node.children - while c_node is not NULL: - if _isElement(c_node): - c = c + 1 - c_node = c_node.next - return c - - def __nonzero__(self): - import warnings - warnings.warn( - "The behavior of this method will change in future versions. " - "Use specific 'len(elem)' or 'elem is not None' test instead.", - FutureWarning - ) - # emulate old behaviour - return _hasChild(self._c_node) - - def __contains__(self, element): - cdef xmlNode* c_node - if not isinstance(element, _Element): - return 0 - c_node = (<_Element>element)._c_node - return c_node is not NULL and c_node.parent is self._c_node - - def __iter__(self): - return ElementChildIterator(self) - - def __reversed__(self): - return ElementChildIterator(self, reversed=True) - - def index(self, _Element x not None, start=None, stop=None): - """Find the position of the child within the parent. - - This method is not part of the original ElementTree API. - """ - cdef Py_ssize_t k, l - cdef Py_ssize_t c_start, c_stop - cdef xmlNode* c_child - cdef xmlNode* c_start_node - c_child = x._c_node - if c_child.parent is not self._c_node: - raise ValueError, "Element is not a child of this node." - - if start is None: - c_start = 0 - else: - c_start = start - if stop is None: - c_stop = 0 - else: - c_stop = stop - if c_stop == 0 or \ - c_start >= c_stop and (c_stop > 0 or c_start < 0): - raise ValueError, "list.index(x): x not in slice" - - # for negative slice indices, check slice before searching index - if c_start < 0 or c_stop < 0: - # start from right, at most up to leftmost(c_start, c_stop) - if c_start < c_stop: - k = -c_start - else: - k = -c_stop - c_start_node = self._c_node.last - l = 1 - while c_start_node != c_child and l < k: - if _isElement(c_start_node): - l = l + 1 - c_start_node = c_start_node.prev - if c_start_node == c_child: - # found! before slice end? - if c_stop < 0 and l <= -c_stop: - raise ValueError, "list.index(x): x not in slice" - elif c_start < 0: - raise ValueError, "list.index(x): x not in slice" - - # now determine the index backwards from child - c_child = c_child.prev - k = 0 - if c_stop > 0: - # we can optimize: stop after c_stop elements if not found - while c_child != NULL and k < c_stop: - if _isElement(c_child): - k = k + 1 - c_child = c_child.prev - if k < c_stop: - return k - else: - # traverse all - while c_child != NULL: - if _isElement(c_child): - k = k + 1 - c_child = c_child.prev - if c_start > 0: - if k >= c_start: - return k - else: - return k - if c_start != 0 or c_stop != 0: - raise ValueError, "list.index(x): x not in slice" - else: - raise ValueError, "list.index(x): x not in list" - - def get(self, key, default=None): - """Gets an element attribute. - """ - return _getAttributeValue(self, key, default) - - def keys(self): - """Gets a list of attribute names. The names are returned in an - arbitrary order (just like for an ordinary Python dictionary). - """ - return _collectAttributes(self._c_node, 1) - - def values(self): - """Gets element attribute values as a sequence of strings. The - attributes are returned in an arbitrary order. - """ - return _collectAttributes(self._c_node, 2) - - def items(self): - """Gets element attributes, as a sequence. The attributes are returned in - an arbitrary order. - """ - return _collectAttributes(self._c_node, 3) - - def getchildren(self): - """Returns all direct children. The elements are returned in document - order. - - Note that this method has been deprecated as of ElementTree 1.3. New - code should use ``list(element)`` or simply iterate over elements. - """ - cdef xmlNode* c_node - result = [] - c_node = self._c_node.children - while c_node is not NULL: - if _isElement(c_node): - python.PyList_Append( - result, _elementFactory(self._doc, c_node)) - c_node = c_node.next - return result - - def getparent(self): - """Returns the parent of this element or None for the root element. - """ - cdef xmlNode* c_node - c_node = _parentElement(self._c_node) - if c_node is NULL: - return None - else: - return _elementFactory(self._doc, c_node) - - def getnext(self): - """Returns the following sibling of this element or None. - """ - cdef xmlNode* c_node - c_node = _nextElement(self._c_node) - if c_node is not NULL: - return _elementFactory(self._doc, c_node) - return None - - def getprevious(self): - """Returns the preceding sibling of this element or None. - """ - cdef xmlNode* c_node - c_node = _previousElement(self._c_node) - if c_node is not NULL: - return _elementFactory(self._doc, c_node) - return None - - def itersiblings(self, preceding=False, tag=None): - """Iterate over the following or preceding siblings of this element. - - The direction is determined by the 'preceding' keyword which defaults - to False, i.e. forward iteration over the following siblings. The - generated elements can be restricted to a specific tag name with the - 'tag' keyword. - """ - return SiblingsIterator(self, preceding, tag) - - def iterancestors(self, tag=None): - """Iterate over the ancestors of this element (from parent to parent). - - The generated elements can be restricted to a specific tag name with - the 'tag' keyword. - """ - return AncestorsIterator(self, tag) - - def iterdescendants(self, tag=None): - """Iterate over the descendants of this element in document order. - - As opposed to ``el.iter()``, this iterator does not yield the element - itself. The generated elements can be restricted to a specific tag - name with the 'tag' keyword. - """ - return ElementDepthFirstIterator(self, tag, False) - - def iterchildren(self, reversed=False, tag=None): - """Iterate over the children of this element. - - As opposed to using normal iteration on this element, the generated - elements can be restricted to a specific tag name with the 'tag' - keyword and reversed with the 'reversed' keyword. - """ - return ElementChildIterator(self, reversed, tag) - - def getroottree(self): - """Return an ElementTree for the root node of the document that - contains this element. - - This is the same as following element.getparent() up the tree until it - returns None (for the root element) and then build an ElementTree for - the last parent that was returned.""" - return _elementTreeFactory(self._doc, None) - - def getiterator(self, tag=None): - """Returns a sequence of all elements in the subtree in document order - (depth first pre-order), starting with this element. - - Can be restricted to find only elements with a specific tag or from a - namespace. - - You can also pass the Element, Comment, ProcessingInstruction and - Entity factory functions to look only for the specific element type. - - Note that this method previously returned an iterator, which diverged - from the original ElementTree behaviour. If you want an efficient - iterator, use the ``el.iter()`` method instead. - """ - return list(ElementDepthFirstIterator(self, tag)) - - def iter(self, tag=None): - """Iterate over all elements in the subtree in document order (depth - first pre-order), starting with this element. - - Can be restricted to find only elements with a specific tag or from a - namespace. - - You can also pass the Element, Comment, ProcessingInstruction and - Entity factory functions to look only for the specific element type. - """ - return ElementDepthFirstIterator(self, tag) - - def itertext(self, tag=None, with_tail=True): - """Iterates over the text content of a subtree. - - You can pass the ``tag`` keyword argument to restrict text content to - a specific tag name. - - You can set the ``with_tail`` keyword argument to ``False`` to skip - over tail text. - """ - return ElementTextIterator(self, tag, with_tail) - - def makeelement(self, _tag, attrib=None, nsmap=None, **_extra): - """Creates a new element associated with the same document. - """ - return _makeElement(_tag, NULL, self._doc, None, None, None, - attrib, nsmap, _extra) - - def find(self, path): - """Finds the first matching subelement, by tag name or path. - """ - if isinstance(path, QName): - path = (path).text - return _elementpath.find(self, path) - - def findtext(self, path, default=None): - """Finds text for the first matching subelement, by tag name or path. - """ - if isinstance(path, QName): - path = (path).text - return _elementpath.findtext(self, path, default) - - def findall(self, path): - """Finds all matching subelements, by tag name or path. - """ - if isinstance(path, QName): - path = (path).text - return _elementpath.findall(self, path) - - def iterfind(self, path): - """Iterates over all matching subelements, by tag name or path. - """ - if isinstance(path, QName): - path = (path).text - return _elementpath.iterfind(self, path) - - def xpath(self, _path, namespaces=None, extensions=None, **_variables): - """Evaluate an xpath expression using the element as context node. - """ - evaluator = XPathElementEvaluator(self, namespaces, extensions) - return evaluator.evaluate(_path, **_variables) - - -cdef python.PyThread_type_lock ELEMENT_CREATION_LOCK -if config.ENABLE_THREADING: - ELEMENT_CREATION_LOCK = python.PyThread_allocate_lock() -else: - ELEMENT_CREATION_LOCK = NULL - -cdef extern from "etree_defs.h": - # macro call to 't->tp_new()' for fast instantiation - cdef _Element NEW_ELEMENT "PY_NEW" (object t) - -cdef _Element _elementFactory(_Document doc, xmlNode* c_node): - cdef python.PyThreadState* state - cdef _Element result - result = getProxy(c_node) - if result is not None: - return result - if c_node is NULL: - return None - - if config.ENABLE_THREADING: - state = python.PyEval_SaveThread() - python.PyThread_acquire_lock(ELEMENT_CREATION_LOCK, python.WAIT_LOCK) - python.PyEval_RestoreThread(state) - result = getProxy(c_node) - if result is not None: - python.PyThread_release_lock(ELEMENT_CREATION_LOCK) - return result - - element_class = LOOKUP_ELEMENT_CLASS( - ELEMENT_CLASS_LOOKUP_STATE, doc, c_node) - if element_class is _Element: - # fast path for standard _Element class - result = NEW_ELEMENT(_Element) - else: - result = element_class() - if hasProxy(c_node): - # prevent re-entry race condition - we just called into Python - result._c_node = NULL - return getProxy(c_node) - result._doc = doc - result._c_node = c_node - _registerProxy(result) - - if config.ENABLE_THREADING: - python.PyThread_release_lock(ELEMENT_CREATION_LOCK) - - if element_class is not _Element: - result._init() - return result - - -cdef class __ContentOnlyElement(_Element): - cdef int _raiseImmutable(self) except -1: - raise TypeError, "this element does not have children or attributes" - - def set(self, key, value): - self._raiseImmutable() - - def append(self, value): - self._raiseImmutable() - - def insert(self, index, value): - self._raiseImmutable() - - def __setitem__(self, index, value): - self._raiseImmutable() - - def __setslice__(self, start, end, value): - self._raiseImmutable() - - property attrib: - def __get__(self): - return {} - - property text: - def __get__(self): - if self._c_node.content is NULL: - return '' - else: - return funicode(self._c_node.content) - - def __set__(self, value): - cdef tree.xmlDict* c_dict - cdef char* c_text - if value is None: - c_text = NULL - else: - value = _utf8(value) - c_text = _cstr(value) - tree.xmlNodeSetContent(self._c_node, c_text) - - # ACCESSORS - def __getitem__(self, n): - raise IndexError - - def __len__(self): - return 0 - - def get(self, key, default=None): - return None - - def keys(self): - return [] - - def items(self): - return [] - - def values(self): - return [] - -cdef class _Comment(__ContentOnlyElement): - property tag: - def __get__(self): - return Comment - - def __repr__(self): - return "" % self.text - -cdef class _ProcessingInstruction(__ContentOnlyElement): - property tag: - def __get__(self): - return ProcessingInstruction - - property target: - # not in ElementTree - def __get__(self): - return funicode(self._c_node.name) - - def __set__(self, value): - value = _utf8(value) - c_text = _cstr(value) - tree.xmlNodeSetName(self._c_node, c_text) - - def __repr__(self): - text = self.text - if text: - return "" % (self.target, text) - else: - return "" % self.target - -cdef class _Entity(__ContentOnlyElement): - property tag: - def __get__(self): - return Entity - - property name: - # not in ElementTree - def __get__(self): - return funicode(self._c_node.name) - - def __set__(self, value): - value = _utf8(value) - c_text = _cstr(value) - tree.xmlNodeSetName(self._c_node, c_text) - - def __repr__(self): - return "&%s;" % self.name - - -cdef public class _ElementTree [ type LxmlElementTreeType, - object LxmlElementTree ]: - cdef _Document _doc - cdef _Element _context_node - - # Note that _doc is only used to store the original document if we do not - # have a _context_node. All methods should prefer self._context_node._doc - # to honour tree restructuring. _doc can happily be None! - - cdef _assertHasRoot(self): - """We have to take care here: the document may not have a root node! - This can happen if ElementTree() is called without any argument and - the caller 'forgets' to call parse() afterwards, so this is a bug in - the caller program. - """ - assert self._context_node is not None, \ - "ElementTree not initialized, missing root" - - def parse(self, source, _BaseParser parser=None): - """Updates self with the content of source and returns its root - """ - cdef _Document doc - doc = _parseDocument(source, parser) - self._context_node = doc.getroot() - if self._context_node is None: - self._doc = doc - else: - self._doc = None - return self._context_node - - def _setroot(self, _Element root not None): - """Relocate the ElementTree to a new root node. - """ - if root._c_node.type != tree.XML_ELEMENT_NODE: - raise TypeError, "Only elements can be the root of an ElementTree" - self._context_node = root - self._doc = None - - def getroot(self): - """Gets the root element for this tree. - """ - return self._context_node - - def __copy__(self): - return ElementTree(self._context_node) - - def __deepcopy__(self, memo): - if self._context_node is None: - return ElementTree() - else: - return ElementTree( self._context_node.__copy__() ) - - property docinfo: - """Information about the document provided by parser and DTD. This - value is only defined for ElementTree objects based on the root node - of a parsed document (e.g. those returned by the parse functions). - """ - def __get__(self): - self._assertHasRoot() - return DocInfo(self._context_node._doc) - - property parser: - """The parser that was used to parse the document in this ElementTree. - """ - def __get__(self): - if self._context_node is not None and \ - self._context_node._doc is not None: - return self._context_node._doc._parser - if self._doc is not None: - return self._doc._parser - return None - - def write(self, file, *, encoding=None, method="xml", - pretty_print=False, xml_declaration=None): - """Write the tree to a file or file-like object. - - Defaults to ASCII encoding and writing a declaration as needed. - - The keyword argument 'method' selects the output method: 'xml' or - 'html'. - """ - cdef bint write_declaration - self._assertHasRoot() - # suppress decl. in default case (purely for ElementTree compatibility) - if xml_declaration is not None: - write_declaration = xml_declaration - if encoding is None: - encoding = 'ASCII' - elif encoding is None: - encoding = 'ASCII' - write_declaration = 0 - else: - encoding = encoding.upper() - write_declaration = encoding not in \ - ('US-ASCII', 'ASCII', 'UTF8', 'UTF-8') - _tofilelike(file, self._context_node, encoding, method, - write_declaration, 1, pretty_print) - - def getpath(self, _Element element not None): - """Returns a structural, absolute XPath expression to find that element. - """ - cdef _Document doc - cdef xmlDoc* c_doc - cdef char* c_path - doc = self._context_node._doc - if element._doc is not doc: - raise ValueError, "Element is not in this tree." - c_doc = _fakeRootDoc(doc._c_doc, self._context_node._c_node) - c_path = tree.xmlGetNodePath(element._c_node) - _destroyFakeDoc(doc._c_doc, c_doc) - if c_path is NULL: - raise LxmlError, "Error creating node path." - path = c_path - tree.xmlFree(c_path) - return path - - def getiterator(self, tag=None): - """Creates an iterator for the root element. The iterator loops over all elements - in this tree, in document order. - - Note that this method is deprecated in favour of the ``el.iter()`` - method. In new code, use it only if you require backwards - compatibility. - """ - root = self.getroot() - if root is None: - return () - return root.iter(tag) - - def iter(self, tag=None): - """Creates an iterator for the root element. The iterator loops over - all elements in this tree, in document order. - """ - root = self.getroot() - if root is None: - return () - return root.iter(tag) - - def find(self, path): - """Finds the first toplevel element with given tag. Same as - getroot().find(path). - """ - self._assertHasRoot() - root = self.getroot() - if path[:1] == "/": - path = "." + path - return root.find(path) - - def findtext(self, path, default=None): - """Finds the text for the first element matching the ElementPath - expression. Same as getroot().findtext(path) - """ - self._assertHasRoot() - root = self.getroot() - if path[:1] == "/": - path = "." + path - return root.findtext(path, default) - - def findall(self, path): - """Finds all elements matching the ElementPath expression. Same as - getroot().findall(path). - """ - self._assertHasRoot() - root = self.getroot() - if path[:1] == "/": - path = "." + path - return root.findall(path) - - def iterfind(self, path): - """Iterates over all elements matching the ElementPath expression. - Same as getroot().finditer(path). - """ - self._assertHasRoot() - root = self.getroot() - if path[:1] == "/": - path = "." + path - return root.iterfind(path) - - def xpath(self, _path, namespaces=None, extensions=None, **_variables): - """XPath evaluate in context of document. - - ``namespaces`` is an optional dictionary with prefix to namespace URI - mappings, used by XPath. ``extensions`` defines additional extension - functions. - - Returns a list (nodeset), or bool, float or string. - - In case of a list result, return Element for element nodes, - string for text and attribute values. - - Note: if you are going to apply multiple XPath expressions - against the same document, it is more efficient to use - XPathEvaluator directly. - """ - self._assertHasRoot() - evaluator = XPathDocumentEvaluator(self, namespaces, extensions) - return evaluator.evaluate(_path, **_variables) - - def xslt(self, _xslt, extensions=None, access_control=None, **_kw): - """Transform this document using other document. - - xslt is a tree that should be XSLT - keyword parameters are XSLT transformation parameters. - - Returns the transformed tree. - - Note: if you are going to apply the same XSLT stylesheet against - multiple documents, it is more efficient to use the XSLT - class directly. - """ - self._assertHasRoot() - style = XSLT(_xslt, extensions=extensions, - access_control=access_control) - return style(self, **_kw) - - def relaxng(self, relaxng): - """Validate this document using other document. - - The relaxng argument is a tree that should contain a Relax NG schema. - - Returns True or False, depending on whether validation - succeeded. - - Note: if you are going to apply the same Relax NG schema against - multiple documents, it is more efficient to use the RelaxNG - class directly. - """ - self._assertHasRoot() - schema = RelaxNG(relaxng) - return schema.validate(self) - - def xmlschema(self, xmlschema): - """Validate this document using other document. - - The xmlschema argument is a tree that should contain an XML Schema. - - Returns True or False, depending on whether validation - succeeded. - - Note: If you are going to apply the same XML Schema against - multiple documents, it is more efficient to use the XMLSchema - class directly. - """ - self._assertHasRoot() - schema = XMLSchema(xmlschema) - return schema.validate(self) - - def xinclude(self): - """Process the XInclude nodes in this document and include the - referenced XML fragments. - - There is support for loading files through the file system, HTTP and - FTP. - - Note that XInclude does not support custom resolvers in Python space - due to restrictions of libxml2 <= 2.6.29. - """ - cdef python.PyThreadState* state - cdef int result - # We cannot pass the XML_PARSE_NOXINCNODE option as this would free - # the XInclude nodes - there may still be Python references to them! - # Therefore, we allow XInclude nodes to be converted to - # XML_XINCLUDE_START nodes. XML_XINCLUDE_END nodes are added as - # siblings. Tree traversal will simply ignore them as they are not - # typed as elements. The included fragment is added between the two, - # i.e. as a sibling, which does not conflict with traversal. - self._assertHasRoot() - state = python.PyEval_SaveThread() - if self._context_node._doc._parser is not None: - result = xinclude.xmlXIncludeProcessTreeFlags( - self._context_node._c_node, - self._context_node._doc._parser._parse_options) - else: - result = xinclude.xmlXIncludeProcessTree( - self._context_node._c_node) - python.PyEval_RestoreThread(state) - if result == -1: - raise XIncludeError, "XInclude processing failed" - - def write_c14n(self, file): - """C14N write of document. Always writes UTF-8. - """ - self._assertHasRoot() - _tofilelikeC14N(file, self._context_node) - -cdef _ElementTree _elementTreeFactory(_Document doc, _Element context_node): - return _newElementTree(doc, context_node, _ElementTree) - -cdef _ElementTree _newElementTree(_Document doc, _Element context_node, - object baseclass): - cdef _ElementTree result - result = baseclass() - if context_node is None and doc is not None: - context_node = doc.getroot() - if context_node is None: - result._doc = doc - result._context_node = context_node - return result - - -cdef class _Attrib: - cdef _Element _element - def __init__(self, _Element element not None): - self._element = element - - # MANIPULATORS - def __setitem__(self, key, value): - _setAttributeValue(self._element, key, value) - - def __delitem__(self, key): - _delAttribute(self._element, key) - - def update(self, sequence_or_dict): - if isinstance(sequence_or_dict, dict): - sequence_or_dict = sequence_or_dict.iteritems() - for key, value in sequence_or_dict: - _setAttributeValue(self._element, key, value) - - def pop(self, key, *default): - if python.PyTuple_GET_SIZE(default) > 1: - raise TypeError, "pop expected at most 2 arguments, got %d" % \ - (python.PyTuple_GET_SIZE(default)+1) - result = _getAttributeValue(self._element, key, None) - if result is None: - if python.PyTuple_GET_SIZE(default) == 0: - raise KeyError, key - else: - result = python.PyTuple_GET_ITEM(default, 0) - python.Py_INCREF(result) - else: - _delAttribute(self._element, key) - return result - - def clear(self): - cdef xmlNode* c_node - c_node = self._element._c_node - while c_node.properties is not NULL: - tree.xmlRemoveProp(c_node.properties) - - # ACCESSORS - def __repr__(self): - return repr(dict( _attributeIteratorFactory(self._element, 3) )) - - def __getitem__(self, key): - result = _getAttributeValue(self._element, key, None) - if result is None: - raise KeyError, key - else: - return result - - def __nonzero__(self): - cdef xmlAttr* c_attr - c_attr = self._element._c_node.properties - while c_attr is not NULL: - if c_attr.type == tree.XML_ATTRIBUTE_NODE: - return 1 - c_attr = c_attr.next - return 0 - - def __len__(self): - cdef xmlAttr* c_attr - cdef Py_ssize_t c - c = 0 - c_attr = self._element._c_node.properties - while c_attr is not NULL: - if c_attr.type == tree.XML_ATTRIBUTE_NODE: - c = c + 1 - c_attr = c_attr.next - return c - - def get(self, key, default=None): - return _getAttributeValue(self._element, key, default) - - def keys(self): - return _collectAttributes(self._element._c_node, 1) - - def __iter__(self): - return iter(_collectAttributes(self._element._c_node, 1)) - - def iterkeys(self): - return iter(_collectAttributes(self._element._c_node, 1)) - - def values(self): - return _collectAttributes(self._element._c_node, 2) - - def itervalues(self): - return iter(_collectAttributes(self._element._c_node, 2)) - - def items(self): - return _collectAttributes(self._element._c_node, 3) - - def iteritems(self): - return iter(_collectAttributes(self._element._c_node, 3)) - - def has_key(self, key): - if key in self: - return True - else: - return False - - def __contains__(self, key): - cdef xmlNode* c_node - cdef char* c_result - cdef char* c_tag - ns, tag = _getNsTag(key) - c_tag = _cstr(tag) - c_node = self._element._c_node - if ns is None: - c_result = tree.xmlGetNoNsProp(c_node, c_tag) - else: - c_result = tree.xmlGetNsProp(c_node, c_tag, _cstr(ns)) - if c_result is NULL: - return 0 - else: - tree.xmlFree(c_result) - return 1 - -cdef class _AttribIterator: - """Attribute iterator - for internal use only! - """ - # XML attributes must not be removed while running! - cdef _Element _node - cdef xmlAttr* _c_attr - cdef int _keysvalues # 1 - keys, 2 - values, 3 - items (key, value) - def __iter__(self): - return self - - def __next__(self): - cdef xmlAttr* c_attr - if self._node is None: - raise StopIteration - c_attr = self._c_attr - while c_attr is not NULL and c_attr.type != tree.XML_ATTRIBUTE_NODE: - c_attr = c_attr.next - if c_attr is NULL: - self._node = None - raise StopIteration - - self._c_attr = c_attr.next - if self._keysvalues == 1: - return _namespacedName(c_attr) - elif self._keysvalues == 2: - return _attributeValue(self._node._c_node, c_attr) - else: - return (_namespacedName(c_attr), - _attributeValue(self._node._c_node, c_attr)) - -cdef object _attributeIteratorFactory(_Element element, int keysvalues): - cdef _AttribIterator attribs - if element._c_node.properties is NULL: - return ITER_EMPTY - attribs = _AttribIterator() - attribs._node = element - attribs._c_attr = element._c_node.properties - attribs._keysvalues = keysvalues - return attribs - - -ctypedef xmlNode* (*_node_to_node_function)(xmlNode*) - -cdef public class _ElementTagMatcher [ object LxmlElementTagMatcher, - type LxmlElementTagMatcherType ]: - cdef object _pystrings - cdef int _node_type - cdef char* _href - cdef char* _name - cdef _initTagMatch(self, tag): - self._href = NULL - self._name = NULL - if tag is None: - self._node_type = 0 - elif tag is Comment: - self._node_type = tree.XML_COMMENT_NODE - elif tag is ProcessingInstruction: - self._node_type = tree.XML_PI_NODE - elif tag is Entity: - self._node_type = tree.XML_ENTITY_REF_NODE - elif tag is Element: - self._node_type = tree.XML_ELEMENT_NODE - else: - self._node_type = tree.XML_ELEMENT_NODE - self._pystrings = _getNsTag(tag) - if self._pystrings[0] is not None: - self._href = _cstr(self._pystrings[0]) - self._name = _cstr(self._pystrings[1]) - if self._name[0] == c'*' and self._name[1] == c'\0': - self._name = NULL - -cdef public class _ElementIterator(_ElementTagMatcher) [ - object LxmlElementIterator, type LxmlElementIteratorType ]: - # we keep Python references here to control GC - cdef _Element _node - cdef _node_to_node_function _next_element - def __iter__(self): - return self - - cdef void _storeNext(self, _Element node): - cdef xmlNode* c_node - c_node = self._next_element(node._c_node) - while c_node is not NULL and \ - self._node_type != 0 and \ - (self._node_type != c_node.type or - not _tagMatches(c_node, self._href, self._name)): - c_node = self._next_element(c_node) - if c_node is NULL: - self._node = None - else: - # Python ref: - self._node = _elementFactory(node._doc, c_node) - - def __next__(self): - cdef xmlNode* c_node - cdef _Element current_node - # Python ref: - current_node = self._node - if current_node is None: - raise StopIteration - self._storeNext(current_node) - return current_node - -cdef class ElementChildIterator(_ElementIterator): - "Iterates over the children of an element." - def __init__(self, _Element node not None, reversed=False, tag=None): - cdef xmlNode* c_node - self._initTagMatch(tag) - if reversed: - c_node = _findChildBackwards(node._c_node, 0) - self._next_element = _previousElement - else: - c_node = _findChildForwards(node._c_node, 0) - self._next_element = _nextElement - if tag is not None: - while c_node is not NULL and \ - self._node_type != 0 and \ - (self._node_type != c_node.type or - not _tagMatches(c_node, self._href, self._name)): - c_node = self._next_element(c_node) - if c_node is not NULL: - # store Python ref: - self._node = _elementFactory(node._doc, c_node) - -cdef class SiblingsIterator(_ElementIterator): - """Iterates over the siblings of an element. - - You can pass the boolean keyword ``preceding`` to specify the direction. - """ - def __init__(self, _Element node not None, preceding=False, tag=None): - self._initTagMatch(tag) - if preceding: - self._next_element = _previousElement - else: - self._next_element = _nextElement - self._storeNext(node) - -cdef class AncestorsIterator(_ElementIterator): - "Iterates over the ancestors of an element (from parent to parent)." - def __init__(self, _Element node not None, tag=None): - self._initTagMatch(tag) - self._next_element = _parentElement - self._storeNext(node) - -cdef class ElementDepthFirstIterator(_ElementTagMatcher): - """Iterates over an element and its sub-elements in document order (depth - first pre-order). Note that this also includes comments, entities and - processing instructions. To filter them out, check if the ``tag`` - property of the returned element is a string (i.e. not None and not a - factory function). - - If the optional 'tag' argument is not None, the iterator returns only the - elements that match the respective name and namespace. - - The optional boolean argument 'inclusive' defaults to True and can be set - to False to exclude the start element itself. - - Note that the behaviour of this iterator is completely undefined if the - tree it traverses is modified during iteration. - """ - # we keep Python references here to control GC - # keep next node to return and a depth counter in the tree - cdef _Element _next_node - cdef _Element _top_node - def __init__(self, _Element node not None, tag=None, inclusive=True): - self._top_node = node - self._next_node = node - self._initTagMatch(tag) - if not inclusive or \ - tag is not None and \ - self._node_type != 0 and \ - (self._node_type != node._c_node.type or - not _tagMatches(node._c_node, self._href, self._name)): - # this cannot raise StopIteration, self._next_node != None - self.next() - - def __iter__(self): - return self - - def __next__(self): - cdef xmlNode* c_node - cdef _Element current_node - current_node = self._next_node - if current_node is None: - raise StopIteration - c_node = self._next_node._c_node - if self._name is NULL and self._href is NULL: - c_node = self._nextNodeAnyTag(c_node) - else: - c_node = self._nextNodeMatchTag(c_node) - if c_node is NULL: - self._next_node = None - else: - self._next_node = _elementFactory(current_node._doc, c_node) - return current_node - - cdef xmlNode* _nextNodeAnyTag(self, xmlNode* c_node): - tree.BEGIN_FOR_EACH_ELEMENT_FROM(self._top_node._c_node, c_node, 0) - if self._node_type == 0 or self._node_type == c_node.type: - return c_node - tree.END_FOR_EACH_ELEMENT_FROM(c_node) - return NULL - - cdef xmlNode* _nextNodeMatchTag(self, xmlNode* c_node): - tree.BEGIN_FOR_EACH_ELEMENT_FROM(self._top_node._c_node, c_node, 0) - if c_node.type == tree.XML_ELEMENT_NODE: - if _tagMatches(c_node, self._href, self._name): - return c_node - tree.END_FOR_EACH_ELEMENT_FROM(c_node) - return NULL - -cdef class ElementTextIterator: - """Iterates over the text content of a subtree. - - You can pass the ``tag`` keyword argument to restrict text content to a - specific tag name. - - You can set the ``with_tail`` keyword argument to ``False`` to skip over - tail text. - """ - cdef object _nextEvent - cdef _Element _start_element - def __init__(self, _Element element not None, tag=None, with_tail=True): - if with_tail: - events = ("start", "end") - else: - events = ("start",) - self._start_element = element - self._nextEvent = iterwalk(element, events=events, tag=tag).next - - def __iter__(self): - return self - - def __next__(self): - cdef _Element element - while result is None: - event, element = self._nextEvent() # raises StopIteration - if event == "start": - result = element.text - elif element is not self._start_element: - result = element.tail - return result - -cdef xmlNode* _createElement(xmlDoc* c_doc, object name_utf) except NULL: - cdef xmlNode* c_node - c_node = tree.xmlNewDocNode(c_doc, NULL, _cstr(name_utf), NULL) - return c_node - -cdef xmlNode* _createComment(xmlDoc* c_doc, char* text): - cdef xmlNode* c_node - c_node = tree.xmlNewDocComment(c_doc, text) - return c_node - -cdef xmlNode* _createPI(xmlDoc* c_doc, char* target, char* text): - cdef xmlNode* c_node - c_node = tree.xmlNewDocPI(c_doc, target, text) - return c_node - -cdef xmlNode* _createEntity(xmlDoc* c_doc, char* name): - cdef xmlNode* c_node - c_node = tree.xmlNewReference(c_doc, name) - return c_node - -# module-level API for ElementTree - -def Element(_tag, attrib=None, nsmap=None, **_extra): - """Element factory. This function returns an object implementing the - Element interface. - """ - ### also look at _Element.makeelement() and _BaseParser.makeelement() ### - return _makeElement(_tag, NULL, None, None, None, None, - attrib, nsmap, _extra) - -def Comment(text=None): - """Comment element factory. This factory function creates a special element that will - be serialized as an XML comment. - """ - cdef _Document doc - cdef xmlNode* c_node - cdef xmlDoc* c_doc - if text is None: - text = '' - else: - text = _utf8(text) - c_doc = _newDoc() - doc = _documentFactory(c_doc, None) - c_node = _createComment(c_doc, _cstr(text)) - tree.xmlAddChild(c_doc, c_node) - return _elementFactory(doc, c_node) - -def ProcessingInstruction(target, text=None): - """ProcessingInstruction element factory. This factory function creates a - special element that will be serialized as an XML processing instruction. - """ - cdef _Document doc - cdef xmlNode* c_node - cdef xmlDoc* c_doc - target = _utf8(target) - if text is None: - text = '' - else: - text = _utf8(text) - c_doc = _newDoc() - doc = _documentFactory(c_doc, None) - c_node = _createPI(c_doc, _cstr(target), _cstr(text)) - tree.xmlAddChild(c_doc, c_node) - return _elementFactory(doc, c_node) - -PI = ProcessingInstruction - -def Entity(name): - """Entity factory. This factory function creates a special element that - will be serialized as an XML entity. Note, however, that the entity will - not be automatically declared in the document. A document that uses - entities requires a DTD. - """ - cdef _Document doc - cdef xmlNode* c_node - cdef xmlDoc* c_doc - name = _utf8(name) - c_doc = _newDoc() - doc = _documentFactory(c_doc, None) - c_node = _createEntity(c_doc, _cstr(name)) - tree.xmlAddChild(c_doc, c_node) - return _elementFactory(doc, c_node) - -def SubElement(_Element _parent not None, _tag, - attrib=None, nsmap=None, **_extra): - """Subelement factory. This function creates an element instance, and - appends it to an existing element. - """ - return _makeSubElement(_parent, _tag, None, None, attrib, nsmap, _extra) - -def ElementTree(_Element element=None, *, file=None, _BaseParser parser=None): - """ElementTree wrapper class. - """ - cdef xmlNode* c_next - cdef xmlNode* c_node - cdef xmlNode* c_node_copy - cdef xmlDoc* c_doc - cdef _ElementTree etree - cdef _Document doc - - if element is not None: - doc = element._doc - elif file is not None: - try: - doc = _parseDocument(file, parser) - except _TargetParserResult, result_container: - return result_container.result - else: - c_doc = _newDoc() - doc = _documentFactory(c_doc, parser) - - return _elementTreeFactory(doc, element) - -def HTML(text, _BaseParser parser=None, *, base_url=None): - """Parses an HTML document from a string constant. This function can be used - to embed "HTML literals" in Python code. - - To override the parser with a different ``HTMLParser`` you can pass it to - the ``parser`` keyword argument. - - The ``base_url`` keyword argument allows to set the original base URL of - the document to support relative Paths when looking up external entities - (DTD, XInclude, ...). - """ - cdef _Document doc - if parser is None: - parser = __GLOBAL_PARSER_CONTEXT.getDefaultParser() - if not isinstance(parser, HTMLParser): - parser = __DEFAULT_HTML_PARSER - try: - doc = _parseMemoryDocument(text, base_url, parser) - return doc.getroot() - except _TargetParserResult, result_container: - return result_container.result - -def XML(text, _BaseParser parser=None, *, base_url=None): - """Parses an XML document from a string constant. This function can be used - to embed "XML literals" in Python code, like in - - >>> root = etree.XML("") - - To override the parser with a different ``XMLParser`` you can pass it to - the ``parser`` keyword argument. - - The ``base_url`` keyword argument allows to set the original base URL of - the document to support relative Paths when looking up external entities - (DTD, XInclude, ...). - """ - cdef _Document doc - if parser is None: - parser = __GLOBAL_PARSER_CONTEXT.getDefaultParser() - if not isinstance(parser, XMLParser): - parser = __DEFAULT_XML_PARSER - try: - doc = _parseMemoryDocument(text, base_url, parser) - return doc.getroot() - except _TargetParserResult, result_container: - return result_container.result - -def fromstring(text, _BaseParser parser=None, *, base_url=None): - """Parses an XML document from a string. - - To override the default parser with a different parser you can pass it to - the ``parser`` keyword argument. - - The ``base_url`` keyword argument allows to set the original base URL of - the document to support relative Paths when looking up external entities - (DTD, XInclude, ...). - """ - cdef _Document doc - if parser is None: - parser = __GLOBAL_PARSER_CONTEXT.getDefaultParser() - try: - doc = _parseMemoryDocument(text, base_url, parser) - return doc.getroot() - except _TargetParserResult, result_container: - return result_container.result - -def fromstringlist(strings, _BaseParser parser=None): - """Parses an XML document from a sequence of strings. - - To override the default parser with a different parser you can pass it to - the ``parser`` keyword argument. - """ - cdef _Document doc - if parser is None: - parser = __GLOBAL_PARSER_CONTEXT.getDefaultParser() - feed = parser.feed - for data in strings: - feed(data) - return parser.close() - -def iselement(element): - """Checks if an object appears to be a valid element object. - """ - return isinstance(element, _Element) - -def dump(_Element elem not None, *, pretty_print=True): - """Writes an element tree or element structure to sys.stdout. This function - should be used for debugging only. - """ - _dumpToFile(sys.stdout, elem._c_node, pretty_print) - -def tostring(element_or_tree, *, encoding=None, method="xml", - xml_declaration=None, pretty_print=False): - """Serialize an element to an encoded string representation of its XML - tree. - - Defaults to ASCII encoding without XML declaration. This behaviour can be - configured with the keyword arguments 'encoding' (string) and - 'xml_declaration' (bool). Note that changing the encoding to a non UTF-8 - compatible encoding will enable a declaration by default. - - The keyword argument 'pretty_print' (bool) enables formatted XML. - - The keyword argument 'method' selects the output method: 'xml', - 'html' or plain 'text'. - """ - cdef bint write_declaration - if xml_declaration is None: - # by default, write an XML declaration only for non-standard encodings - write_declaration = encoding is not None and encoding.upper() not in \ - ('ASCII', 'UTF-8', 'UTF8', 'US-ASCII') - else: - write_declaration = xml_declaration - if encoding is None: - encoding = 'ASCII' - - if isinstance(element_or_tree, _Element): - return _tostring(<_Element>element_or_tree, encoding, method, - write_declaration, 0, pretty_print) - elif isinstance(element_or_tree, _ElementTree): - return _tostring((<_ElementTree>element_or_tree)._context_node, - encoding, method, write_declaration, 1, pretty_print) - else: - raise TypeError, "Type '%s' cannot be serialized." % type(element_or_tree) - -def tostringlist(element_or_tree, *args, **kwargs): - """Serialize an element to an encoded string representation of its XML - tree, stored in a list of partial strings. - - This is purely for ElementTree 1.3 compatibility. The result is a - single string wrapped in a list. - """ - return [tostring(element_or_tree, *args, **kwargs)] - -def tounicode(element_or_tree, *, method="xml", pretty_print=False): - """Serialize an element to the Python unicode representation of its XML - tree. - - Note that the result does not carry an XML encoding declaration and is - therefore not necessarily suited for serialization to byte streams without - further treatment. - - The boolean keyword argument 'pretty_print' enables formatted XML. - - The keyword argument 'method' selects the output method: 'xml', - 'html' or plain 'text'. - """ - if isinstance(element_or_tree, _Element): - return _tounicode(<_Element>element_or_tree, method, 0, pretty_print) - elif isinstance(element_or_tree, _ElementTree): - return _tounicode((<_ElementTree>element_or_tree)._context_node, - method, 1, pretty_print) - else: - raise TypeError, "Type '%s' cannot be serialized." % type(element_or_tree) - -def parse(source, _BaseParser parser=None): - """Return an ElementTree object loaded with source elements. If no parser - is provided as second argument, the default parser is used. - """ - cdef _Document doc - try: - doc = _parseDocument(source, parser) - return ElementTree(doc.getroot()) - except _TargetParserResult, result_container: - return result_container.result - - -################################################################################ -# Include submodules - -include "proxy.pxi" # Proxy handling (element backpointers/memory/etc.) -include "apihelpers.pxi" # Private helper functions -include "xmlerror.pxi" # Error and log handling -include "classlookup.pxi" # Element class lookup mechanisms -include "nsclasses.pxi" # Namespace implementation and registry -include "docloader.pxi" # Support for custom document loaders -include "parser.pxi" # XML Parser -include "parsertarget.pxi" # ET Parser target -include "serializer.pxi" # XML output functions -include "iterparse.pxi" # incremental XML parsing -include "xmlid.pxi" # XMLID and IDDict -include "extensions.pxi" # XPath/XSLT extension functions -include "xpath.pxi" # XPath evaluation -include "xslt.pxi" # XSL transformations - - -################################################################################ -# Validation - -class DocumentInvalid(LxmlError): - """Validation error. Raised by all document validators when their - ``assertValid(tree)`` method fails. - """ - pass - -cdef class _Validator: - "Base class for XML validators." - cdef _ErrorLog _error_log - def __init__(self): - self._error_log = _ErrorLog() - - def validate(self, etree): - """Validate the document using this schema. - - Returns true if document is valid, false if not.""" - return self(etree) - - def assertValid(self, etree): - "Raises DocumentInvalid if the document does not comply with the schema." - if not self(etree): - raise DocumentInvalid, self._error_log._buildExceptionMessage( - "Document does not comply with schema") - - def assert_(self, etree): - "Raises AssertionError if the document does not comply with the schema." - if not self(etree): - raise AssertionError, self._error_log._buildExceptionMessage( - "Document does not comply with schema") - - property error_log: - def __get__(self): - return self._error_log.copy() - -include "dtd.pxi" # DTD -include "relaxng.pxi" # RelaxNG -include "xmlschema.pxi" # XMLSchema -include "schematron.pxi" # Schematron (requires libxml2 2.6.21+) - -################################################################################ -# Public C API - -include "public-api.pxi" From scoder at codespeak.net Mon Oct 22 09:33:26 2007 From: scoder at codespeak.net (scoder at codespeak.net) Date: Mon, 22 Oct 2007 09:33:26 +0200 (CEST) Subject: [Lxml-checkins] r47697 - lxml/trunk/src/lxml Message-ID: <20071022073326.3E3AA8181@code0.codespeak.net> Author: scoder Date: Mon Oct 22 09:33:25 2007 New Revision: 47697 Added: lxml/trunk/src/lxml/lxml.objectify.pyx - copied unchanged from r47696, lxml/trunk/src/lxml/objectify.pyx Removed: lxml/trunk/src/lxml/objectify.pyx Log: use FQMN in filename to make Pyrex' import mechanism happy Deleted: /lxml/trunk/src/lxml/objectify.pyx ============================================================================== --- /lxml/trunk/src/lxml/objectify.pyx Mon Oct 22 09:33:25 2007 +++ (empty file) @@ -1,1773 +0,0 @@ -from etreepublic cimport _Document, _Element, ElementBase -from etreepublic cimport _ElementIterator, ElementClassLookup -from etreepublic cimport elementFactory, import_etree, textOf -from python cimport callable, _cstr -cimport etreepublic as cetree -cimport python -cimport tree -cimport cstd - -cdef object etree -from lxml import etree -# initialize C-API of lxml.etree -import_etree(etree) - -__version__ = etree.__version__ - -cdef object re -import re - -cdef object __builtin__ -import __builtin__ - -cdef object set -try: - set = __builtin__.set -except AttributeError: - from sets import Set as set - -cdef object IGNORABLE_ERRORS -IGNORABLE_ERRORS = (ValueError, TypeError) - -cdef object islice -from itertools import islice - -cdef object _typename(object t): - cdef char* c_name - cdef char* s - c_name = python._fqtypename(t) - s = cstd.strrchr(c_name, c'.') - if s == NULL: - return c_name - else: - return (s+1) - -# namespace/name for "pytype" hint attribute -cdef object PYTYPE_NAMESPACE -cdef char* _PYTYPE_NAMESPACE - -cdef object PYTYPE_ATTRIBUTE_NAME -cdef char* _PYTYPE_ATTRIBUTE_NAME - -PYTYPE_ATTRIBUTE = None - -cdef object TREE_PYTYPE_NAME -TREE_PYTYPE_NAME = "TREE" - -def setPytypeAttributeTag(attribute_tag=None): - """Changes name and namespace of the XML attribute that holds Python type - information. - - Reset by calling without argument. - - Default: "{http://codespeak.net/lxml/objectify/pytype}pytype" - """ - global PYTYPE_ATTRIBUTE, _PYTYPE_NAMESPACE, _PYTYPE_ATTRIBUTE_NAME - global PYTYPE_NAMESPACE, PYTYPE_ATTRIBUTE_NAME - if attribute_tag is None: - PYTYPE_NAMESPACE = "http://codespeak.net/lxml/objectify/pytype" - PYTYPE_ATTRIBUTE_NAME = "pytype" - else: - PYTYPE_NAMESPACE, PYTYPE_ATTRIBUTE_NAME = cetree.getNsTag(attribute_tag) - _PYTYPE_NAMESPACE = _cstr(PYTYPE_NAMESPACE) - _PYTYPE_ATTRIBUTE_NAME = _cstr(PYTYPE_ATTRIBUTE_NAME) - PYTYPE_ATTRIBUTE = cetree.namespacedNameFromNsName( - _PYTYPE_NAMESPACE, _PYTYPE_ATTRIBUTE_NAME) - -setPytypeAttributeTag() - - -# namespaces for XML Schema -cdef object XML_SCHEMA_NS -XML_SCHEMA_NS = "http://www.w3.org/2001/XMLSchema" -cdef char* _XML_SCHEMA_NS -_XML_SCHEMA_NS = _cstr(XML_SCHEMA_NS) - -cdef object XML_SCHEMA_INSTANCE_NS -XML_SCHEMA_INSTANCE_NS = "http://www.w3.org/2001/XMLSchema-instance" -cdef char* _XML_SCHEMA_INSTANCE_NS -_XML_SCHEMA_INSTANCE_NS = _cstr(XML_SCHEMA_INSTANCE_NS) - -cdef object XML_SCHEMA_INSTANCE_NIL_ATTR -XML_SCHEMA_INSTANCE_NIL_ATTR = "{%s}nil" % XML_SCHEMA_INSTANCE_NS -cdef object XML_SCHEMA_INSTANCE_TYPE_ATTR -XML_SCHEMA_INSTANCE_TYPE_ATTR = "{%s}type" % XML_SCHEMA_INSTANCE_NS - - -################################################################################ -# Element class for the main API - -cdef class ObjectifiedElement(ElementBase): - """Main XML Element class. - - Element children are accessed as object attributes. Multiple children - with the same name are available through a list index. Example: - - >>> root = etree.XML("01") - >>> second_c2 = root.c1.c2[1] - """ - def __iter__(self): - """Iterate over self and all siblings with the same tag. - """ - parent = self.getparent() - if parent is None: - return iter([self]) - return etree.ElementChildIterator(parent, tag=self.tag) - - def __str__(self): - if __RECURSIVE_STR: - return _dump(self, 0) - else: - return textOf(self._c_node) or '' - - property text: - def __get__(self): - return textOf(self._c_node) - - property __dict__: - """A fake implementation for __dict__ to support dir() etc. - - Note that this only considers the first child with a given name. - """ - def __get__(self): - cdef char* c_ns - cdef char* c_child_ns - cdef _Element child - c_ns = tree._getNs(self._c_node) - if c_ns is NULL: - tag = None - else: - tag = "{%s}*" % c_ns - children = {} - for child in etree.ElementChildIterator(self, tag=tag): - if c_ns is NULL and tree._getNs(child._c_node) is not NULL: - continue - name = child._c_node.name - if python.PyDict_GetItem(children, name) is NULL: - python.PyDict_SetItem(children, name, child) - return children - - def __len__(self): - """Count self and siblings with the same tag. - """ - cdef tree.xmlNode* c_self_node - cdef tree.xmlNode* c_node - cdef char* c_href - cdef char* c_tag - cdef Py_ssize_t count - c_self_node = self._c_node - c_tag = c_self_node.name - c_href = tree._getNs(c_self_node) - count = 1 - c_node = c_self_node.next - while c_node is not NULL: - if c_node.type == tree.XML_ELEMENT_NODE and \ - cetree.tagMatches(c_node, c_href, c_tag): - count = count + 1 - c_node = c_node.next - c_node = c_self_node.prev - while c_node is not NULL: - if c_node.type == tree.XML_ELEMENT_NODE and \ - cetree.tagMatches(c_node, c_href, c_tag): - count = count + 1 - c_node = c_node.prev - return count - - def countchildren(self): - """Return the number of children of this element, regardless of their - name. - """ - # copied from etree - cdef Py_ssize_t c - cdef tree.xmlNode* c_node - c = 0 - c_node = self._c_node.children - while c_node is not NULL: - if tree._isElement(c_node): - c = c + 1 - c_node = c_node.next - return c - - def getchildren(self): - """Returns a sequence of all direct children. The elements are - returned in document order. - """ - cdef tree.xmlNode* c_node - result = [] - c_node = self._c_node.children - while c_node is not NULL: - if tree._isElement(c_node): - python.PyList_Append( - result, cetree.elementFactory(self._doc, c_node)) - c_node = c_node.next - return result - - def __getattr__(self, tag): - """Return the (first) child with the given tag name. If no namespace - is provided, the child will be looked up in the same one as self. - """ - return _lookupChildOrRaise(self, tag) - - def __setattr__(self, tag, value): - """Set the value of the (first) child with the given tag name. If no - namespace is provided, the child will be looked up in the same one as - self. - """ - cdef _Element element - # properties are looked up /after/ __setattr__, so we must emulate them - if tag == 'text' or tag == 'pyval': - # read-only ! - raise TypeError, "attribute '%s' of '%s' objects is not writable"% \ - (tag, _typename(self)) - elif tag == 'tail': - cetree.setTailText(self._c_node, value) - return - elif tag == 'tag': - ElementBase.tag.__set__(self, value) - return - - tag = _buildChildTag(self, tag) - element = _lookupChild(self, tag) - if element is None: - _appendValue(self, tag, value) - else: - _replaceElement(element, value) - - def __delattr__(self, tag): - child = _lookupChildOrRaise(self, tag) - self.remove(child) - - def addattr(self, tag, value): - """Add a child value to the element. - - As opposed to append(), it sets a data value, not an element. - """ - _appendValue(self, _buildChildTag(self, tag), value) - - def __getitem__(self, key): - """Return a sibling, counting from the first child of the parent. The - method behaves like both a dict and a sequence. - - * If argument is an integer, returns the sibling at that position. - - * If argument is a string, does the same as getattr(). This can be - used to provide namespaces for element lookup, or to look up - children with special names (``text`` etc.). - """ - cdef tree.xmlNode* c_self_node - cdef tree.xmlNode* c_parent - cdef tree.xmlNode* c_node - if python._isString(key): - return _lookupChildOrRaise(self, key) - c_self_node = self._c_node - c_parent = c_self_node.parent - if c_parent is NULL: - if key == 0: - return self - else: - raise IndexError, key - if key < 0: - c_node = c_parent.last - else: - c_node = c_parent.children - c_node = _findFollowingSibling( - c_node, tree._getNs(c_self_node), c_self_node.name, key) - if c_node is NULL: - raise IndexError, key - return elementFactory(self._doc, c_node) - - def __setitem__(self, key, value): - """Set the value of a sibling, counting from the first child of the - parent. - - * If argument is an integer, sets the sibling at that position. - - * If argument is a string, does the same as setattr(). This is used - to provide namespaces for element lookup. - - * If argument is a sequence (list, tuple, etc.), assign the contained - items to the siblings. - """ - cdef _Element element - cdef _Element new_element - cdef tree.xmlNode* c_self_node - cdef tree.xmlNode* c_parent - cdef tree.xmlNode* c_node - if python._isString(key): - key = _buildChildTag(self, key) - element = _lookupChild(self, key) - if element is None: - _appendValue(self, key, value) - else: - _replaceElement(element, value) - return - - c_self_node = self._c_node - c_parent = c_self_node.parent - if c_parent is NULL: - # the 'root[i] = ...' case - raise TypeError, "index assignment to root element is invalid" - if key < 0: - c_node = c_parent.last - else: - c_node = c_parent.children - c_node = _findFollowingSibling( - c_node, tree._getNs(c_self_node), c_self_node.name, key) - if c_node is NULL: - raise IndexError, key - element = elementFactory(self._doc, c_node) - _replaceElement(element, value) - - def __getslice__(self, Py_ssize_t start, Py_ssize_t end): - return list(islice(self, start, end)) - - def __setslice__(self, Py_ssize_t start, Py_ssize_t end, values): - cdef _Element el - parent = self.getparent() - if parent is None: - raise TypeError, "deleting slices of root element not supported" - # replace existing items - new_items = iter(values) - del_items = iter(list(islice(self, start, end))) - try: - for el in del_items: - item = new_items.next() - _replaceElement(el, item) - except StopIteration: - remove = parent.remove - remove(el) - for el in del_items: - remove(el) - return - - # append remaining new items - tag = self.tag - for item in new_items: - _appendValue(parent, tag, item) - - def __delslice__(self, Py_ssize_t start, Py_ssize_t end): - parent = self.getparent() - if parent is None: - raise TypeError, "deleting slices of root element not supported" - remove = parent.remove - for el in list(islice(self, start, end)): - remove(el) - - def __delitem__(self, key): - parent = self.getparent() - if parent is None: - raise TypeError, "deleting items not supported by root element" - sibling = self.__getitem__(key) - parent.remove(sibling) - - def iterfind(self, path): - # Reimplementation of Element.iterfind() to make it work without child - # iteration. - xpath = etree.ETXPath(path) - return iter(xpath(self)) - - def findall(self, path): - # Reimplementation of Element.findall() to make it work without child - # iteration. - xpath = etree.ETXPath(path) - return xpath(self) - - def find(self, path): - # Reimplementation of Element.find() to make it work without child - # iteration. - result = self.findall(path) - if isinstance(result, list) and len(result): - return result[0] - elif isinstance(result, _Element): - return result - else: - return None - - def findtext(self, path, default=None): - # Reimplementation of Element.findtext() to make it work without child - # iteration. - result = self.find(path) - if isinstance(result, _Element): - return result.text or "" - else: - return default - - def descendantpaths(self, prefix=None): - """Returns a list of object path expressions for all descendants. - """ - if prefix is not None and not python._isString(prefix): - prefix = '.'.join(prefix) - return _buildDescendantPaths(self._c_node, prefix) - -cdef tree.xmlNode* _findFollowingSibling(tree.xmlNode* c_node, - char* href, char* name, - Py_ssize_t index): - cdef tree.xmlNode* (*next)(tree.xmlNode*) - if index >= 0: - next = cetree.nextElement - else: - index = -1 - index - next = cetree.previousElement - while c_node is not NULL: - if c_node.type == tree.XML_ELEMENT_NODE and \ - cetree.tagMatches(c_node, href, name): - index = index - 1 - if index < 0: - return c_node - c_node = next(c_node) - return NULL - -cdef object _lookupChild(_Element parent, tag): - cdef tree.xmlNode* c_result - cdef tree.xmlNode* c_node - cdef char* c_href - cdef char* c_tag - ns, tag = cetree.getNsTag(tag) - c_tag = _cstr(tag) - c_node = parent._c_node - if ns is None: - c_href = tree._getNs(c_node) - else: - c_href = _cstr(ns) - c_result = _findFollowingSibling(c_node.children, c_href, c_tag, 0) - if c_result is NULL: - return None - return elementFactory(parent._doc, c_result) - -cdef object _lookupChildOrRaise(_Element parent, tag): - element = _lookupChild(parent, tag) - if element is None: - raise AttributeError, "no such child: " + \ - _buildChildTag(parent, tag) - return element - -cdef object _buildChildTag(_Element parent, tag): - cdef char* c_href - cdef char* c_tag - ns, tag = cetree.getNsTag(tag) - c_tag = _cstr(tag) - if ns is None: - c_href = tree._getNs(parent._c_node) - else: - c_href = _cstr(ns) - return cetree.namespacedNameFromNsName(c_href, c_tag) - -cdef object _replaceElement(_Element element, value): - cdef _Element new_element - if isinstance(value, _Element): - # deep copy the new element - new_element = cetree.deepcopyNodeToDocument( - element._doc, (<_Element>value)._c_node) - new_element.tag = element.tag - elif python.PyList_Check(value) or python.PyTuple_Check(value): - element.__setslice__(0, python.PY_SSIZE_T_MAX, value) - return - else: - new_element = element.makeelement(element.tag) - _setElementValue(new_element, value) - element.getparent().replace(element, new_element) - -cdef object _appendValue(_Element parent, tag, value): - cdef _Element new_element - if isinstance(value, _Element): - # deep copy the new element - new_element = cetree.deepcopyNodeToDocument( - parent._doc, (<_Element>value)._c_node) - new_element.tag = tag - cetree.appendChild(parent, new_element) - elif python.PyList_Check(value) or python.PyTuple_Check(value): - for item in value: - _appendValue(parent, tag, item) - else: - new_element = cetree.makeSubElement( - parent, tag, None, None, None, None) - _setElementValue(new_element, value) - -cdef _setElementValue(_Element element, value): - cdef python.PyObject* dict_result - if value is None: - cetree.setAttributeValue( - element, XML_SCHEMA_INSTANCE_NIL_ATTR, "true") - elif isinstance(value, _Element): - _replaceElement(element, value) - else: - cetree.delAttributeFromNsName( - element._c_node, _XML_SCHEMA_INSTANCE_NS, "nil") - if python._isString(value): - pytype_name = "str" - else: - pytype_name = _typename(value) - if isinstance(value, bool): - value = _lower_bool(value) - else: - value = str(value) - dict_result = python.PyDict_GetItem(_PYTYPE_DICT, pytype_name) - if dict_result is not NULL: - cetree.setAttributeValue(element, PYTYPE_ATTRIBUTE, pytype_name) - else: - cetree.delAttributeFromNsName(element._c_node, PYTYPE_NAMESPACE, - PYTYPE_ATTRIBUTE_NAME) - cetree.setNodeText(element._c_node, value) - -################################################################################ -# Data type support in subclasses - -cdef class ObjectifiedDataElement(ObjectifiedElement): - """This is the base class for all data type Elements. Subclasses should - override the 'pyval' property and possibly the __str__ method. - """ - property pyval: - def __get__(self): - return textOf(self._c_node) - - def __str__(self): - return textOf(self._c_node) or '' - - def __repr__(self): - return textOf(self._c_node) or '' - - def _setText(self, s): - """For use in subclasses only. Don't use unless you know what you are - doing. - """ - cetree.setNodeText(self._c_node, s) - -cdef class NumberElement(ObjectifiedDataElement): - cdef object _type - def _setValueParser(self, function): - "Set the function that parses the Python value from a string." - self._type = function - - cdef _value(self): - return self._type(textOf(self._c_node)) - - property pyval: - def __get__(self): - return self._value() - - def __int__(self): - return int(textOf(self._c_node)) - - def __long__(self): - return long(textOf(self._c_node)) - - def __float__(self): - return float(textOf(self._c_node)) - - def __str__(self): - return str(self._type(textOf(self._c_node))) - - def __repr__(self): - return repr(self._type(textOf(self._c_node))) - -# def __oct__(self): -# def __hex__(self): - - def __richcmp__(self, other, int op): - if hasattr(other, 'pyval'): - other = other.pyval - return python.PyObject_RichCompare( - _numericValueOf(self), other, op) - - def __add__(self, other): - return _numericValueOf(self) + _numericValueOf(other) - - def __sub__(self, other): - return _numericValueOf(self) - _numericValueOf(other) - - def __mul__(self, other): - return _numericValueOf(self) * _numericValueOf(other) - - def __div__(self, other): - return _numericValueOf(self) / _numericValueOf(other) - - def __truediv__(self, other): - return _numericValueOf(self) / _numericValueOf(other) - - def __mod__(self, other): - return _numericValueOf(self) % _numericValueOf(other) - - def __pow__(self, other, modulo): - if modulo is None: - return _numericValueOf(self) ** _numericValueOf(other) - else: - return pow(_numericValueOf(self), _numericValueOf(other), modulo) - - def __neg__(self): - return - _numericValueOf(self) - - def __pos__(self): - return + _numericValueOf(self) - - def __abs__(self): - return abs( _numericValueOf(self) ) - - def __nonzero__(self): - return _numericValueOf(self) != 0 - - def __invert__(self): - return ~ _numericValueOf(self) - - def __lshift__(self, other): - return _numericValueOf(self) << _numericValueOf(other) - - def __rshift__(self, other): - return _numericValueOf(self) >> _numericValueOf(other) - - def __and__(self, other): - return _numericValueOf(self) & _numericValueOf(other) - - def __or__(self, other): - return _numericValueOf(self) | _numericValueOf(other) - - def __xor__(self, other): - return _numericValueOf(self) ^ _numericValueOf(other) - -cdef class IntElement(NumberElement): - def _init(self): - self._type = int - -cdef class LongElement(NumberElement): - def _init(self): - self._type = long - -cdef class FloatElement(NumberElement): - def _init(self): - self._type = float - -cdef class StringElement(ObjectifiedDataElement): - """String data class. - - Note that this class does *not* support the sequence protocol of strings: - len(), iter(), str_attr[0], str_attr[0:1], etc. are *not* supported. - Instead, use the .text attribute to get a 'real' string. - """ - property pyval: - def __get__(self): - return textOf(self._c_node) or '' - - def __repr__(self): - return repr(textOf(self._c_node) or '') - - def strlen(self): - text = textOf(self._c_node) - if text is None: - return 0 - else: - return len(text) - - def __nonzero__(self): - text = textOf(self._c_node) - if text is None: - return False - return len(text) > 0 - - def __richcmp__(self, other, int op): - if hasattr(other, 'pyval'): - other = other.pyval - return python.PyObject_RichCompare( - _strValueOf(self), other, op) - - def __add__(self, other): - text = _strValueOf(self) - other = _strValueOf(other) - if text is None: - return other - if other is None: - return text - return text + other - - def __mul__(self, other): - if isinstance(self, StringElement): - return textOf((self)._c_node) * _numericValueOf(other) - elif isinstance(other, StringElement): - return _numericValueOf(self) * textOf((other)._c_node) - else: - raise TypeError, "invalid types for * operator" - - def __mod__(self, other): - if python.PyTuple_Check(other): - l = [] - for item in other: - python.PyList_Append(l, _strValueOf(item)) - other = tuple(l) - else: - other = _strValueOf(other) - return _strValueOf(self) % other - -cdef class NoneElement(ObjectifiedDataElement): - def __str__(self): - return "None" - - def __repr__(self): - return "None" - - def __nonzero__(self): - return False - - def __richcmp__(self, other, int op): - if other is None or self is None: - return python.PyObject_RichCompare(None, None, op) - if isinstance(self, NoneElement): - return python.PyObject_RichCompare(None, other, op) - else: - return python.PyObject_RichCompare(self, None, op) - - property pyval: - def __get__(self): - return None - -cdef class BoolElement(ObjectifiedDataElement): - """Boolean type base on string values: 'true' or 'false'. - """ - cdef int _boolval(self) except -1: - cdef char* c_str - text = textOf(self._c_node) - if text is None: - return 0 - c_str = _cstr(text) - if c_str[0] == c'0' or c_str[0] == c'f' or c_str[0] == c'F': - if c_str[1] == c'\0' or text == "false" or text.lower() == "false": - # '0' or 'f' or 'false' - return 0 - elif c_str[0] == c'1' or c_str[0] == c't' or c_str[0] == c'T': - if c_str[1] == c'\0' or text == "true" or text.lower() == "true": - # '1' or 't' or 'true' - return 1 - raise ValueError, "Invalid boolean value: '%s'" % text - - def __nonzero__(self): - if self._boolval(): - return True - else: - return False - - def __richcmp__(self, other, int op): - if hasattr(other, 'pyval'): - other = other.pyval - if hasattr(self, 'pyval'): - self_val = self.pyval - else: - self_val = bool(self) - return python.PyObject_RichCompare(self_val, other, op) - - def __str__(self): - if self._boolval(): - return "True" - else: - return "False" - - def __repr__(self): - if self._boolval(): - return "True" - else: - return "False" - - property pyval: - def __get__(self): - return self.__nonzero__() - -def __checkBool(s): - if s != 'true' and s != 'false' and s != '1' and s != '0': - raise ValueError - -cdef object _strValueOf(obj): - if python._isString(obj): - return obj - if isinstance(obj, _Element): - return textOf((<_Element>obj)._c_node) - if obj is None: - return '' - return str(obj) - -cdef object _numericValueOf(obj): - if isinstance(obj, NumberElement): - return (obj)._type( - textOf((obj)._c_node)) - elif hasattr(obj, 'pyval'): - # not always numeric, but Python will raise the right exception - return obj.pyval - return obj - -################################################################################ -# Python type registry - -cdef class PyType: - """User defined type. - - Named type that contains a type check function and a type class that - inherits from ObjectifiedDataElement. The type check must take a string - as argument and raise ValueError or TypeError if it cannot handle the - string value. It may be None in which case it is not considered for type - guessing. - - Example:: - PyType('int', int, MyIntClass).register() - - Note that the order in which types are registered matters. The first - matching type will be used. - """ - cdef readonly object name - cdef readonly object type_check - cdef object _add_text - cdef object _type - cdef object _schema_types - def __init__(self, name, type_check, type_class, stringify=None): - if not python._isString(name): - raise TypeError, "Type name must be a string" - if type_check is not None and not callable(type_check): - raise TypeError, "Type check function must be callable (or None)" - if name != TREE_PYTYPE_NAME and \ - not issubclass(type_class, ObjectifiedDataElement): - raise TypeError, \ - "Data classes must inherit from ObjectifiedDataElement" - self.name = name - self._type = type_class - self.type_check = type_check - if stringify is None: - self._add_text = _StringValueSetter(str) - else: - self._add_text = _StringValueSetter(stringify) - self._schema_types = [] - - def __repr__(self): - return "PyType(%s, %s)" % (self.name, self._type.__name__) - - def register(self, before=None, after=None): - """Register the type. - - The additional keyword arguments 'before' and 'after' accept a - sequence of type names that must appear before/after the new type in - the type list. If any of them is not currently known, it is simply - ignored. Raises ValueError if the dependencies cannot be fulfilled. - """ - if self.name == TREE_PYTYPE_NAME: - raise ValueError, "Cannot register tree type" - if self.type_check is not None: - for item in _TYPE_CHECKS: - if item[0] is self.type_check: - _TYPE_CHECKS.remove(item) - break - entry = (self.type_check, self) - first_pos = 0 - last_pos = -1 - if before or after: - if before is None: - before = () - elif after is None: - after = () - for i, (check, pytype) in enumerate(_TYPE_CHECKS): - if last_pos == -1 and pytype.name in before: - last_pos = i - if pytype.name in after: - first_pos = i+1 - if last_pos == -1: - _TYPE_CHECKS.append(entry) - elif first_pos > last_pos: - raise ValueError, "inconsistent before/after dependencies" - else: - _TYPE_CHECKS.insert(last_pos, entry) - - _PYTYPE_DICT[self.name] = self - for xs_type in self._schema_types: - _SCHEMA_TYPE_DICT[xs_type] = self - - def unregister(self): - if _PYTYPE_DICT.get(self.name) is self: - del _PYTYPE_DICT[self.name] - for xs_type, pytype in _SCHEMA_TYPE_DICT.items(): - if pytype is self: - del _SCHEMA_TYPE_DICT[xs_type] - if self.type_check is None: - return - try: - _TYPE_CHECKS.remove( (self.type_check, self) ) - except ValueError: - pass - - property xmlSchemaTypes: - """The list of XML Schema datatypes this Python type maps to. - - Note that this must be set before registering the type! - """ - def __get__(self): - return self._schema_types - def __set__(self, types): - self._schema_types = list(types) - -cdef class _StringValueSetter: - cdef object _stringify - def __init__(self, stringify): - self._stringify = stringify - - def __call__(self, elem, value): - _add_text(elem, self._stringify(value)) - - -cdef object _PYTYPE_DICT -_PYTYPE_DICT = {} - -cdef object _SCHEMA_TYPE_DICT -_SCHEMA_TYPE_DICT = {} - -cdef object _TYPE_CHECKS -_TYPE_CHECKS = [] - -cdef _lower_bool(b): - if b: - return "true" - else: - return "false" - -def __lower_bool(b): - return _lower_bool(b) - -cdef _pytypename(obj): - if python._isString(obj): - return "str" - else: - return _typename(obj) - -def pytypename(obj): - """Find the name of the corresponding PyType for a Python object. - """ - return _pytypename(obj) - -cdef _registerPyTypes(): - pytype = PyType('int', int, IntElement) - pytype.xmlSchemaTypes = ("int", "short", "byte", "unsignedShort", - "unsignedByte",) - - pytype.register() - - pytype = PyType('long', long, LongElement) - pytype.xmlSchemaTypes = ("integer", "nonPositiveInteger", "negativeInteger", - "long", "nonNegativeInteger", "unsignedLong", - "unsignedInt", "positiveInteger",) - pytype.register() - - pytype = PyType('float', float, FloatElement) - pytype.xmlSchemaTypes = ("double", "float") - pytype.register() - - pytype = PyType('bool', __checkBool, BoolElement, __lower_bool) - pytype.xmlSchemaTypes = ("boolean",) - pytype.register() - - pytype = PyType('str', None, StringElement) - pytype.xmlSchemaTypes = ("string", "normalizedString", "token", "language", - "Name", "NCName", "ID", "IDREF", "ENTITY", - "NMTOKEN", ) - pytype.register() - - # since lxml 2.0 - pytype = PyType('NoneType', None, NoneElement) - pytype.register() - - # backwards compatibility - pytype = PyType('none', None, NoneElement) - pytype.register() - -# non-registered PyType for inner tree elements -cdef object TREE_PYTYPE -TREE_PYTYPE = PyType(TREE_PYTYPE_NAME, None, ObjectifiedElement) - -_registerPyTypes() - -def getRegisteredTypes(): - """Returns a list of the currently registered PyType objects. - - To add a new type, retrieve this list and call unregister() for all - entries. Then add the new type at a suitable position (possibly replacing - an existing one) and call register() for all entries. - - This is necessary if the new type interferes with the type check functions - of existing ones (normally only int/float/bool) and must the tried before - other types. To add a type that is not yet parsable by the current type - check functions, you can simply register() it, which will append it to the - end of the type list. - """ - types = [] - known = set() - add_to_known = known.add - for check, pytype in _TYPE_CHECKS: - name = pytype.name - if name not in known: - add_to_known(name) - python.PyList_Append(types, pytype) - for pytype in _PYTYPE_DICT.itervalues(): - name = pytype.name - if name not in known: - add_to_known(name) - python.PyList_Append(types, pytype) - return types - -cdef PyType _guessPyType(value, PyType defaulttype): - if value is None: - return None - for type_check, tested_pytype in _TYPE_CHECKS: - try: - type_check(value) - return tested_pytype - except IGNORABLE_ERRORS: - # could not be parsed as the specififed type => ignore - pass - return defaulttype - -cdef object _guessElementClass(tree.xmlNode* c_node): - value = textOf(c_node) - if value is None: - return None - if value == '': - return StringElement - - for type_check, pytype in _TYPE_CHECKS: - try: - type_check(value) - return (pytype)._type - except IGNORABLE_ERRORS: - pass - return None - -################################################################################ -# adapted ElementMaker supports registered PyTypes - -cdef class _ObjectifyElementMakerCaller # forward declaration - -cdef extern from "etree_defs.h": - # macro call to 't->tp_new()' for fast instantiation - cdef _ObjectifyElementMakerCaller NEW_ELEMENT_MAKER "PY_NEW" (object t) - -cdef class ElementMaker: - cdef object _makeelement - cdef object _namespace - cdef object _nsmap - cdef bint _annotate - def __init__(self, *, namespace=None, nsmap=None, annotate=True, - makeelement=None): - if nsmap is None: - nsmap = _DEFAULT_NSMAP - self._nsmap = nsmap - if namespace is None: - self._namespace = None - else: - self._namespace = "{%s}" % namespace - self._annotate = annotate - if makeelement is not None: - assert callable(makeelement) - self._makeelement = makeelement - else: - self._makeelement = None - - def __getattr__(self, tag): - cdef _ObjectifyElementMakerCaller element_maker - if self._namespace is not None and tag[0] != "{": - tag = self._namespace + tag - element_maker = NEW_ELEMENT_MAKER(_ObjectifyElementMakerCaller) - element_maker._tag = tag - element_maker._nsmap = self._nsmap - element_maker._annotate = self._annotate - element_maker._element_factory = self._makeelement - return element_maker - -cdef class _ObjectifyElementMakerCaller: - cdef object _tag - cdef object _nsmap - cdef object _element_factory - cdef bint _annotate - - def __call__(self, *children, **attrib): - cdef _ObjectifyElementMakerCaller elementMaker - cdef python.PyObject* pytype - cdef _Element element - cdef _Element childElement - cdef bint has_children - cdef bint has_string_value - if self._element_factory is None: - element = _makeElement(self._tag, None, attrib, self._nsmap) - else: - element = self._element_factory(self._tag, attrib, self._nsmap) - - pytype_name = None - has_children = 0 - has_string_value = 0 - for child in children: - if child is None: - if python.PyTuple_GET_SIZE(children) == 1: - cetree.setAttributeValue( - element, XML_SCHEMA_INSTANCE_NIL_ATTR, "true") - elif python._isString(child): - _add_text(element, child) - has_string_value = 1 - elif isinstance(child, _Element): - cetree.appendChild(element, <_Element>child) - has_children = 1 - elif isinstance(child, _ObjectifyElementMakerCaller): - elementMaker = <_ObjectifyElementMakerCaller>child - if elementMaker._element_factory is None: - cetree.makeSubElement(element, elementMaker._tag, - None, None, None, None) - else: - childElement = elementMaker._element_factory( - elementMaker._tag) - cetree.appendChild(element, childElement) - has_children = 1 - else: - if pytype_name is not None: - # concatenation always makes the result a string - has_string_value = 1 - pytype_name = _typename(child) - pytype = python.PyDict_GetItem(_PYTYPE_DICT, pytype_name) - if pytype is not NULL: - (pytype)._add_text(element, child) - else: - has_string_value = 1 - child = str(child) - _add_text(element, child) - - if self._annotate and not has_children: - if has_string_value: - cetree.setAttributeValue(element, PYTYPE_ATTRIBUTE, "str") - elif pytype_name is not None: - cetree.setAttributeValue(element, PYTYPE_ATTRIBUTE, pytype_name) - - return element - -cdef _add_text(_Element elem, text): - cdef tree.xmlNode* c_child - c_child = cetree.findChildBackwards(elem._c_node, 0) - if c_child is not NULL: - old = cetree.tailOf(c_child) - if old is not None: - text = old + text - cetree.setTailText(c_child, text) - else: - old = cetree.textOf(elem._c_node) - if old is not None: - text = old + text - cetree.setNodeText(elem._c_node, text) - -################################################################################ -# Recursive element dumping - -cdef bint __RECURSIVE_STR -__RECURSIVE_STR = 0 # default: off - -def enableRecursiveStr(on=True): - """Enable a recursively generated tree representation for str(element), - based on objectify.dump(element). - """ - global __RECURSIVE_STR - __RECURSIVE_STR = on - -def dump(_Element element not None): - """Return a recursively generated string representation of an element. - """ - return _dump(element, 0) - -cdef object _dump(_Element element, int indent): - indentstr = " " * indent - if isinstance(element, ObjectifiedDataElement): - value = repr(element) - else: - value = textOf(element._c_node) - if value is not None: - if python.PyString_GET_SIZE( value.strip() ) == 0: - value = None - else: - value = repr(value) - result = "%s%s = %s [%s]\n" % (indentstr, element.tag, - value, _typename(element)) - xsi_ns = "{%s}" % XML_SCHEMA_INSTANCE_NS - pytype_ns = "{%s}" % PYTYPE_NAMESPACE - for name, value in cetree.iterattributes(element, 3): - if '{' in name: - if name == PYTYPE_ATTRIBUTE: - if value == TREE_PYTYPE_NAME: - continue - else: - name = name.replace(pytype_ns, 'py:') - name = name.replace(xsi_ns, 'xsi:') - result = result + "%s * %s = %r\n" % (indentstr, name, value) - - indent = indent + 1 - for child in element.iterchildren(): - result = result + _dump(child, indent) - if indent == 1: - return result[:-1] # strip last '\n' - else: - return result - - -################################################################################ -# Pickle support - -cdef void _setupPickle(reduceFunction): - import copy_reg - copy_reg.constructor(fromstring) - copy_reg.pickle(ObjectifiedElement, reduceFunction, fromstring) - -def pickleReduce(obj): - return (fromstring, (etree.tostring(obj),)) - -_setupPickle(pickleReduce) -del pickleReduce - -################################################################################ -# Element class lookup - -cdef class ObjectifyElementClassLookup(ElementClassLookup): - """Element class lookup method that uses the objectify classes. - """ - cdef object empty_data_class - cdef object tree_class - def __init__(self, tree_class=None, empty_data_class=None): - """Lookup mechanism for objectify. - - The default Element classes can be replaced by passing subclasses of - ObjectifiedElement and ObjectifiedDataElement as keyword arguments. - 'tree_class' defines inner tree classes (defaults to - ObjectifiedElement), 'empty_data_class' defines the default class for - empty data elements (defauls to StringElement). - """ - self._lookup_function = _lookupElementClass - if tree_class is None: - tree_class = ObjectifiedElement - self.tree_class = tree_class - if empty_data_class is None: - empty_data_class = StringElement - self.empty_data_class = empty_data_class - -cdef object _lookupElementClass(state, _Document doc, tree.xmlNode* c_node): - cdef ObjectifyElementClassLookup lookup - cdef python.PyObject* dict_result - lookup = state - # if element has children => no data class - if cetree.hasChild(c_node): - return lookup.tree_class - - # if element is defined as xsi:nil, return NoneElement class - if "true" == cetree.attributeValueFromNsName( - c_node, _XML_SCHEMA_INSTANCE_NS, "nil"): - return NoneElement - - # check for Python type hint - value = cetree.attributeValueFromNsName( - c_node, _PYTYPE_NAMESPACE, _PYTYPE_ATTRIBUTE_NAME) - if value is not None: - if value == TREE_PYTYPE_NAME: - return lookup.tree_class - dict_result = python.PyDict_GetItem(_PYTYPE_DICT, value) - if dict_result is not NULL: - return (dict_result)._type - # unknown 'pyval' => try to figure it out ourself, just go on - - # check for XML Schema type hint - value = cetree.attributeValueFromNsName( - c_node, _XML_SCHEMA_INSTANCE_NS, "type") - - if value is not None: - dict_result = python.PyDict_GetItem(_SCHEMA_TYPE_DICT, value) - if dict_result is NULL and ':' in value: - prefix, value = value.split(':', 1) - dict_result = python.PyDict_GetItem(_SCHEMA_TYPE_DICT, value) - if dict_result is not NULL: - return (dict_result)._type - - # otherwise determine class based on text content type - el_class = _guessElementClass(c_node) - if el_class is not None: - return el_class - - # if element is a root node => default to tree node - if c_node.parent is NULL or not tree._isElement(c_node.parent): - return lookup.tree_class - - return lookup.empty_data_class - - -################################################################################ -# Type annotations - -cdef PyType _check_type(tree.xmlNode* c_node, PyType pytype): - if pytype is None: - return None - value = textOf(c_node) - try: - pytype.type_check(value) - return pytype - except IGNORABLE_ERRORS: - # could not be parsed as the specified type => ignore - pass - return None - -def pyannotate(element_or_tree, *, ignore_old=False, ignore_xsi=False, - empty_pytype=None): - """Recursively annotates the elements of an XML tree with 'pytype' - attributes. - - If the 'ignore_old' keyword argument is True (the default), current 'pytype' - attributes will be ignored and replaced. Otherwise, they will be checked - and only replaced if they no longer fit the current text value. - - Setting the keyword argument ``ignore_xsi`` to True makes the function - additionally ignore existing ``xsi:type`` annotations. The default is to - use them as a type hint. - - The default annotation of empty elements can be set with the - ``empty_pytype`` keyword argument. The default is not to annotate empty - elements. Pass 'str', for example, to make string values the default. - """ - cdef _Element element - element = cetree.rootNodeOrRaise(element_or_tree) - _annotate(element, 0, 1, ignore_xsi, ignore_old, None, empty_pytype) - -def xsiannotate(element_or_tree, *, ignore_old=False, ignore_pytype=False, - empty_type=None): - """Recursively annotates the elements of an XML tree with 'xsi:type' - attributes. - - If the 'ignore_old' keyword argument is True (the default), current - 'xsi:type' attributes will be ignored and replaced. Otherwise, they will be - checked and only replaced if they no longer fit the current text value. - - Note that the mapping from Python types to XSI types is usually ambiguous. - Currently, only the first XSI type name in the corresponding PyType - definition will be used for annotation. Thus, you should consider naming - the widest type first if you define additional types. - - Setting the keyword argument ``ignore_pytype`` to True makes the function - additionally ignore existing ``pytype`` annotations. The default is to - use them as a type hint. - - The default annotation of empty elements can be set with the - ``empty_type`` keyword argument. The default is not to annotate empty - elements. Pass 'string', for example, to make string values the default. - """ - cdef _Element element - element = cetree.rootNodeOrRaise(element_or_tree) - _annotate(element, 1, 0, ignore_old, ignore_pytype, empty_type, None) - -def annotate(element_or_tree, *, ignore_old=True, ignore_xsi=False, - empty_pytype=None, empty_type=None, annotate_xsi=0, - annotate_pytype=1): - """Recursively annotates the elements of an XML tree with 'xsi:type' - and/or 'py:pytype' attributes. - - If the 'ignore_old' keyword argument is True (the default), current - 'py:pytype' attributes will be ignored for the type annotation. Set to False - if you want reuse existing 'py:pytype' information (iff appropriate for the - element text value). - - If the 'ignore_xsi' keyword argument is False (the default), existing - 'xsi:type' attributes will be used for the type annotation, if they fit the - element text values. - - Note that the mapping from Python types to XSI types is usually ambiguous. - Currently, only the first XSI type name in the corresponding PyType - definition will be used for annotation. Thus, you should consider naming - the widest type first if you define additional types. - - The default 'py:pytype' annotation of empty elements can be set with the - ``empty_pytype`` keyword argument. Pass 'str', for example, to make - string values the default. - - The default 'xsi:type' annotation of empty elements can be set with the - ``empty_type`` keyword argument. The default is not to annotate empty - elements. Pass 'string', for example, to make string values the default. - - The keyword arguments 'annotate_xsi' (default: 0) and 'annotate_pytype' - (default: 1) control which kind(s) of annotation to use. - """ - cdef _Element element - element = cetree.rootNodeOrRaise(element_or_tree) - _annotate(element, annotate_xsi, annotate_pytype, ignore_xsi, - ignore_old, empty_type, empty_pytype) - - -cdef _annotate(_Element element, bint annotate_xsi, bint annotate_pytype, - bint ignore_xsi, bint ignore_pytype, - empty_type_name, empty_pytype_name): - cdef _Document doc - cdef tree.xmlNode* c_node - cdef tree.xmlNs* c_ns - cdef python.PyObject* dict_result - cdef PyType pytype, empty_pytype, StrType, NoneType - - if not annotate_xsi and not annotate_pytype: - return - - doc = element._doc - - if empty_type_name is not None: - dict_result = python.PyDict_GetItem(_SCHEMA_TYPE_DICT, empty_type_name) - elif empty_pytype_name is not None: - dict_result = python.PyDict_GetItem(_PYTYPE_DICT, empty_pytype_name) - else: - dict_result = NULL - if dict_result is not NULL: - empty_pytype = dict_result - else: - empty_pytype = None - - StrType = _PYTYPE_DICT.get('str') - NoneType = _PYTYPE_DICT.get('NoneType') - c_node = element._c_node - tree.BEGIN_FOR_EACH_ELEMENT_FROM(c_node, c_node, 1) - if c_node.type == tree.XML_ELEMENT_NODE: - typename = None - pytype = None - value = None - istree = 0 - # if element is defined as xsi:nil, represent it as None - if cetree.attributeValueFromNsName( - c_node, _XML_SCHEMA_INSTANCE_NS, "nil") == "true": - pytype = NoneType - - if pytype is None and not ignore_xsi: - # check that old xsi type value is valid - typename = cetree.attributeValueFromNsName( - c_node, _XML_SCHEMA_INSTANCE_NS, "type") - if typename is not None: - dict_result = python.PyDict_GetItem( - _SCHEMA_TYPE_DICT, typename) - if dict_result is NULL and ':' in typename: - prefix, typename = typename.split(':', 1) - dict_result = python.PyDict_GetItem( - _SCHEMA_TYPE_DICT, typename) - if dict_result is not NULL: - pytype = dict_result - if pytype is not StrType: - # StrType does not have a typecheck but is the default - # anyway, so just accept it if given as type - # information - pytype = _check_type(c_node, pytype) - if pytype is None: - typename = None - - if pytype is None and not ignore_pytype: - # check that old pytype value is valid - old_pytypename = cetree.attributeValueFromNsName( - c_node, _PYTYPE_NAMESPACE, _PYTYPE_ATTRIBUTE_NAME) - if old_pytypename is not None: - if old_pytypename == TREE_PYTYPE_NAME: - if not cetree.hasChild(c_node): - # only case where we should keep it, - # everything else is clear enough - pytype = TREE_PYTYPE - else: - if old_pytypename == 'none': - # transition from lxml 1.x - old_pytypename = "NoneType" - dict_result = python.PyDict_GetItem( - _PYTYPE_DICT, old_pytypename) - if dict_result is not NULL: - pytype = dict_result - if pytype is not StrType: - # StrType does not have a typecheck but is the - # default anyway, so just accept it if given as - # type information - pytype = _check_type(c_node, pytype) - - if pytype is None: - # try to guess type - if not cetree.hasChild(c_node): - # element has no children => data class - pytype = _guessPyType(textOf(c_node), StrType) - else: - istree = 1 - - if pytype is None: - # use default type for empty elements - if cetree.hasText(c_node): - pytype = StrType - else: - pytype = empty_pytype - if typename is None: - typename = empty_type_name - - if pytype is not None: - if typename is None: - if not istree: - if python.PyList_GET_SIZE(pytype._schema_types) > 0: - # pytype->xsi:type is a 1:n mapping - # simply take the first - typename = pytype._schema_types[0] - elif typename not in pytype._schema_types: - typename = pytype._schema_types[0] - - if annotate_xsi: - if typename is None or istree: - cetree.delAttributeFromNsName( - c_node, _XML_SCHEMA_INSTANCE_NS, "type") - else: - # update or create attribute - c_ns = cetree.findOrBuildNodeNsPrefix( - doc, c_node, _XML_SCHEMA_NS, 'xsd') - if c_ns is not NULL: - if ':' in typename: - prefix, name = typename.split(':', 1) - if c_ns.prefix is NULL or c_ns.prefix[0] == c'\0': - typename = name - elif cstd.strcmp(_cstr(prefix), c_ns.prefix) != 0: - prefix = c_ns.prefix - typename = prefix + ':' + name - elif c_ns.prefix is not NULL or c_ns.prefix[0] != c'\0': - prefix = c_ns.prefix - typename = prefix + ':' + typename - c_ns = cetree.findOrBuildNodeNsPrefix( - doc, c_node, _XML_SCHEMA_INSTANCE_NS, 'xsi') - tree.xmlSetNsProp(c_node, c_ns, "type", _cstr(typename)) - - if annotate_pytype: - if pytype is None: - # delete attribute if it exists - cetree.delAttributeFromNsName( - c_node, _PYTYPE_NAMESPACE, _PYTYPE_ATTRIBUTE_NAME) - else: - # update or create attribute - c_ns = cetree.findOrBuildNodeNsPrefix( - doc, c_node, _PYTYPE_NAMESPACE, 'py') - tree.xmlSetNsProp(c_node, c_ns, _PYTYPE_ATTRIBUTE_NAME, - _cstr(pytype.name)) - if pytype is NoneType: - c_ns = cetree.findOrBuildNodeNsPrefix( - doc, c_node, _XML_SCHEMA_INSTANCE_NS, 'xsi') - tree.xmlSetNsProp(c_node, c_ns, "nil", "true") - tree.END_FOR_EACH_ELEMENT_FROM(c_node) - -def deannotate(element_or_tree, *, pytype=True, xsi=True): - """Recursively de-annotate the elements of an XML tree by removing 'pytype' - and/or 'type' attributes. - - If the 'pytype' keyword argument is True (the default), 'pytype' attributes - will be removed. If the 'xsi' keyword argument is True (the default), - 'xsi:type' attributes will be removed. - """ - cdef _Element element - cdef tree.xmlNode* c_node - - element = cetree.rootNodeOrRaise(element_or_tree) - c_node = element._c_node - if pytype and xsi: - tree.BEGIN_FOR_EACH_ELEMENT_FROM(c_node, c_node, 1) - if c_node.type == tree.XML_ELEMENT_NODE: - cetree.delAttributeFromNsName( - c_node, _PYTYPE_NAMESPACE, _PYTYPE_ATTRIBUTE_NAME) - cetree.delAttributeFromNsName( - c_node, _XML_SCHEMA_INSTANCE_NS, "type") - tree.END_FOR_EACH_ELEMENT_FROM(c_node) - elif pytype: - tree.BEGIN_FOR_EACH_ELEMENT_FROM(c_node, c_node, 1) - if c_node.type == tree.XML_ELEMENT_NODE: - cetree.delAttributeFromNsName( - c_node, _PYTYPE_NAMESPACE, _PYTYPE_ATTRIBUTE_NAME) - tree.END_FOR_EACH_ELEMENT_FROM(c_node) - else: - tree.BEGIN_FOR_EACH_ELEMENT_FROM(c_node, c_node, 1) - if c_node.type == tree.XML_ELEMENT_NODE: - cetree.delAttributeFromNsName( - c_node, _XML_SCHEMA_INSTANCE_NS, "type") - tree.END_FOR_EACH_ELEMENT_FROM(c_node) - - -################################################################################ -# Module level parser setup - -cdef object __DEFAULT_PARSER -__DEFAULT_PARSER = etree.XMLParser(remove_blank_text=True) -__DEFAULT_PARSER.setElementClassLookup( ObjectifyElementClassLookup() ) - -cdef object objectify_parser -objectify_parser = __DEFAULT_PARSER - -def setDefaultParser(new_parser = None): - set_default_parser(new_parser) - -def set_default_parser(new_parser = None): - """Replace the default parser used by objectify's Element() and - fromstring() functions. - - The new parser must be an etree.XMLParser. - - Call without arguments to reset to the original parser. - """ - global objectify_parser - if new_parser is None: - objectify_parser = __DEFAULT_PARSER - elif isinstance(new_parser, etree.XMLParser): - objectify_parser = new_parser - else: - raise TypeError, "parser must inherit from lxml.etree.XMLParser" - -cdef _Element _makeElement(tag, text, attrib, nsmap): - return cetree.makeElement(tag, None, objectify_parser, text, None, attrib, nsmap) - -################################################################################ -# Module level factory functions - -cdef object _fromstring -_fromstring = etree.fromstring - -def fromstring(xml, parser=None): - """Objectify specific version of the lxml.etree fromstring() function - that uses the objectify parser. - - You can pass a different parser as second argument. - """ - if parser is None: - parser = objectify_parser - return _fromstring(xml, parser) - -XML = fromstring - -cdef object _parse -_parse = etree.parse - -def parse(f, parser=None): - """Parse a file or file-like object with the objectify parser. - - You can pass a different parser as second argument. - """ - if parser is None: - parser = objectify_parser - return _parse(f, parser) - -cdef object _DEFAULT_NSMAP -_DEFAULT_NSMAP = { "py" : PYTYPE_NAMESPACE, - "xsi" : XML_SCHEMA_INSTANCE_NS, - "xsd" : XML_SCHEMA_NS} - -E = ElementMaker() - -def Element(_tag, attrib=None, nsmap=None, *, _pytype=None, **_attributes): - """Objectify specific version of the lxml.etree Element() factory that - always creates a structural (tree) element. - - NOTE: requires parser based element class lookup activated in lxml.etree! - """ - if attrib is not None: - if python.PyDict_Size(_attributes): - attrib.update(_attributes) - _attributes = attrib - if _pytype is None: - _pytype = TREE_PYTYPE_NAME - if nsmap is None: - nsmap = _DEFAULT_NSMAP - _attributes[PYTYPE_ATTRIBUTE] = _pytype - return _makeElement(_tag, None, _attributes, nsmap) - -def DataElement(_value, attrib=None, nsmap=None, *, _pytype=None, _xsi=None, - **_attributes): - """Create a new element from a Python value and XML attributes taken from - keyword arguments or a dictionary passed as second argument. - - Automatically adds a 'pytype' attribute for the Python type of the value, - if the type can be identified. If '_pytype' or '_xsi' are among the - keyword arguments, they will be used instead. - - If the _value argument is an ObjectifiedDataElement instance, its py:pytype, - xsi:type and other attributes and nsmap are reused unless they are redefined - in attrib and/or keyword arguments. - """ - cdef python.PyObject* dict_result - if nsmap is None: - nsmap = _DEFAULT_NSMAP - if attrib is not None and attrib: - if python.PyDict_Size(_attributes): - attrib = dict(attrib) - attrib.update(_attributes) - _attributes = attrib - if isinstance(_value, ObjectifiedElement): - if _pytype is None: - if _xsi is None and not _attributes and nsmap is _DEFAULT_NSMAP: - # special case: no change! - return _value.__copy__() - if isinstance(_value, ObjectifiedDataElement): - # reuse existing nsmap unless redefined in nsmap parameter - temp = _value.nsmap - if temp is not None and temp: - temp = dict(temp) - temp.update(nsmap) - nsmap = temp - # reuse existing attributes unless redefined in attrib/_attributes - temp = _value.attrib - if temp is not None and temp: - temp = dict(temp) - temp.update(_attributes) - _attributes = temp - # reuse existing xsi:type or py:pytype attributes, unless provided as - # arguments - if _xsi is None and _pytype is None: - dict_result = python.PyDict_GetItem(_attributes, - XML_SCHEMA_INSTANCE_TYPE_ATTR) - if dict_result is not NULL: - _xsi = dict_result - dict_result = python.PyDict_GetItem(_attributes, PYTYPE_ATTRIBUTE) - if dict_result is not NULL: - _pytype = dict_result - - if _xsi is not None: - if ':' in _xsi: - prefix, name = _xsi.split(':', 1) - ns = nsmap.get(prefix) - if ns != XML_SCHEMA_NS: - raise ValueError, "XSD types require the XSD namespace" - elif nsmap is _DEFAULT_NSMAP: - name = _xsi - _xsi = 'xsd:' + _xsi - else: - name = _xsi - for prefix, ns in nsmap.items(): - if ns == XML_SCHEMA_NS: - if prefix is not None and prefix: - _xsi = prefix + ':' + _xsi - break - else: - raise ValueError, "XSD types require the XSD namespace" - python.PyDict_SetItem(_attributes, XML_SCHEMA_INSTANCE_TYPE_ATTR, _xsi) - if _pytype is None: - # allow using unregistered or even wrong xsi:type names - dict_result = python.PyDict_GetItem(_SCHEMA_TYPE_DICT, _xsi) - if dict_result is NULL: - dict_result = python.PyDict_GetItem(_SCHEMA_TYPE_DICT, name) - if dict_result is not NULL: - _pytype = (dict_result).name - - if _value is None and _pytype != "str": - _pytype = _pytype or "NoneType" - strval = None - elif python._isString(_value): - strval = _value - elif python.PyBool_Check(_value): - if _value: - strval = "true" - else: - strval = "false" - else: - strval = str(_value) - - if _pytype is None: - _pytype = _pytypename(_value) - - if _pytype is not None: - if _pytype == "NoneType" or _pytype == "none": - strval = None - python.PyDict_SetItem(_attributes, XML_SCHEMA_INSTANCE_NIL_ATTR, "true") - else: - # check if type information from arguments is valid - dict_result = python.PyDict_GetItem(_PYTYPE_DICT, _pytype) - if dict_result is not NULL: - type_check = (dict_result).type_check - if type_check is not None: - type_check(strval) - - python.PyDict_SetItem(_attributes, PYTYPE_ATTRIBUTE, _pytype) - - return _makeElement("value", strval, _attributes, nsmap) - - -################################################################################ -# ObjectPath - -include "objectpath.pxi" From scoder at codespeak.net Mon Oct 22 09:33:28 2007 From: scoder at codespeak.net (scoder at codespeak.net) Date: Mon, 22 Oct 2007 09:33:28 +0200 (CEST) Subject: [Lxml-checkins] r47698 - lxml/trunk/src/lxml Message-ID: <20071022073328.51C37818B@code0.codespeak.net> Author: scoder Date: Mon Oct 22 09:33:27 2007 New Revision: 47698 Added: lxml/trunk/src/lxml/lxml.pyclasslookup.pyx - copied unchanged from r47697, lxml/trunk/src/lxml/pyclasslookup.pyx Removed: lxml/trunk/src/lxml/pyclasslookup.pyx Log: use FQMN in filename to make Pyrex' import mechanism happy Deleted: /lxml/trunk/src/lxml/pyclasslookup.pyx ============================================================================== --- /lxml/trunk/src/lxml/pyclasslookup.pyx Mon Oct 22 09:33:27 2007 +++ (empty file) @@ -1,274 +0,0 @@ -from etreepublic cimport _Document, _Element, ElementBase -from etreepublic cimport ElementClassLookup, FallbackElementClassLookup -from etreepublic cimport elementFactory, import_etree -from python cimport str, repr, isinstance, issubclass, iter -from python cimport _cstr, Py_ssize_t -cimport etreepublic as cetree -cimport python -cimport tree -cimport cstd - -__all__ = ["PythonElementClassLookup"] - -cdef object etree -from lxml import etree -# initialize C-API of lxml.etree -import_etree(etree) - -__version__ = etree.__version__ - -cdef class _ElementProxy: - cdef tree.xmlNode* _c_node - cdef object _source_proxy - cdef object _dependent_proxies - - cdef int _assertNode(self) except -1: - """This is our way of saying: this proxy is invalid! - """ - assert self._c_node is not NULL, "Proxy invalidated!" - return 0 - - property tag: - """Element tag - """ - def __get__(self): - self._assertNode() - return cetree.namespacedName(self._c_node) - - property text: - """Text before the first subelement. This is either a string or - the value None, if there was no text. - """ - def __get__(self): - self._assertNode() - return cetree.textOf(self._c_node) - - property tail: - """Text after this element's end tag, but before the next sibling - element's start tag. This is either a string or the value None, if - there was no text. - """ - def __get__(self): - self._assertNode() - return cetree.tailOf(self._c_node) - - property attrib: - def __get__(self): - self._assertNode() - return dict(cetree.collectAttributes(self._c_node, 3)) - - property prefix: - """Namespace prefix or None. - """ - def __get__(self): - self._assertNode() - if self._c_node.ns is not NULL: - if self._c_node.ns.prefix is not NULL: - return cetree.pyunicode(self._c_node.ns.prefix) - return None - - property sourceline: - """Original line number as found by the parser or None if unknown. - """ - def __get__(self): - cdef long line - self._assertNode() - line = tree.xmlGetLineNo(self._c_node) - if line > 0: - return line - else: - return None - - def __repr__(self): - return "" % (self.tag, id(self)) - - def __getitem__(self, Py_ssize_t index): - """Returns the subelement at the given position. - """ - cdef tree.xmlNode* c_node - c_node = cetree.findChild(self._c_node, index) - if c_node is NULL: - raise IndexError, "list index out of range" - return _newProxy(self._source_proxy, c_node) - - def __getslice__(self, Py_ssize_t start, Py_ssize_t stop): - """Returns a list containing subelements in the given range. - """ - cdef tree.xmlNode* c_node - cdef Py_ssize_t c - c_node = cetree.findChild(self._c_node, start) - if c_node is NULL: - return [] - c = start - result = [] - while c_node is not NULL and c < stop: - if tree._isElement(c_node): - python.PyList_Append( - result, _newProxy(self._source_proxy, c_node)) - c = c + 1 - c_node = c_node.next - return result - - def __len__(self): - """Returns the number of subelements. - """ - cdef Py_ssize_t c - cdef tree.xmlNode* c_node - self._assertNode() - c = 0 - c_node = self._c_node.children - while c_node is not NULL: - if tree._isElement(c_node): - c = c + 1 - c_node = c_node.next - return c - - def __nonzero__(self): - cdef tree.xmlNode* c_node - self._assertNode() - c_node = cetree.findChildBackwards(self._c_node, 0) - return c_node != NULL - - def get(self, key, default=None): - """Gets an element attribute. - """ - self._assertNode() - return _getAttributeValue(self._c_node, key, default) - - def keys(self): - """Gets a list of attribute names. The names are returned in an - arbitrary order (just like for an ordinary Python dictionary). - """ - self._assertNode() - return cetree.collectAttributes(self._c_node, 1) - - def values(self): - """Gets element attributes, as a sequence. The attributes are returned - in an arbitrary order. - """ - self._assertNode() - return cetree.collectAttributes(self._c_node, 2) - - def items(self): - """Gets element attributes, as a sequence. The attributes are returned - in an arbitrary order. - """ - self._assertNode() - return cetree.collectAttributes(self._c_node, 3) - - def getchildren(self): - """Returns all subelements. The elements are returned in document - order. - """ - cdef tree.xmlNode* c_node - self._assertNode() - result = [] - c_node = self._c_node.children - while c_node is not NULL: - if tree._isElement(c_node): - python.PyList_Append( - result, _newProxy(self._source_proxy, c_node)) - c_node = c_node.next - return result - - def getparent(self): - """Returns the parent of this element or None for the root element. - """ - cdef tree.xmlNode* c_parent - self._assertNode() - c_parent = self._c_node.parent - if c_parent is NULL or not tree._isElement(c_parent): - return None - else: - return _newProxy(self._source_proxy, c_parent) - - def getnext(self): - """Returns the following sibling of this element or None. - """ - cdef tree.xmlNode* c_node - self._assertNode() - c_node = cetree.nextElement(self._c_node) - if c_node is not NULL: - return _newProxy(self._source_proxy, c_node) - return None - - def getprevious(self): - """Returns the preceding sibling of this element or None. - """ - cdef tree.xmlNode* c_node - self._assertNode() - c_node = cetree.previousElement(self._c_node) - if c_node is not NULL: - return _newProxy(self._source_proxy, c_node) - return None - -cdef _ElementProxy _newProxy(_ElementProxy sourceProxy, tree.xmlNode* c_node): - cdef _ElementProxy el - el = _ElementProxy() - el._c_node = c_node - if sourceProxy is None: - sourceProxy = el - el._dependent_proxies = [] - el._source_proxy = sourceProxy - python.PyList_Append(sourceProxy._dependent_proxies, el) - return el - -cdef _freeProxies(_ElementProxy sourceProxy): - cdef _ElementProxy el - if sourceProxy is None: - return - if sourceProxy._dependent_proxies is None: - return - for el in sourceProxy._dependent_proxies: - el._c_node = NULL - del sourceProxy._dependent_proxies[:] - -cdef object _getAttributeValue(tree.xmlNode* c_node, key, default): - cdef char* c_tag - cdef char* c_href - ns, tag = cetree.getNsTag(key) - c_tag = _cstr(tag) - if ns is None: - c_href = NULL - else: - c_href = _cstr(ns) - result = cetree.attributeValueFromNsName(c_node, c_href, c_tag) - if result is None: - return default - return result - - -cdef class PythonElementClassLookup(FallbackElementClassLookup): - """Element class lookup based on a subclass method. - - To use it, inherit from this class and override the lookup method to - lookup the element class for a node:: - - lookup(self, document, node_proxy) - - The first argument is the opaque document instance that contains the - Element. The second arguments is a lightweight Element proxy - implementation that is only valid during the lookup. Do not try to keep a - reference to it. Once the lookup is done, the proxy will be invalid. - - If you return None from this method, the fallback will be called. - """ - def __init__(self, ElementClassLookup fallback=None): - FallbackElementClassLookup.__init__(self, fallback) - self._lookup_function = _lookup_class - - def lookup(self, doc, element): - return None - -cdef object _lookup_class(state, _Document doc, tree.xmlNode* c_node): - cdef PythonElementClassLookup lookup - cdef _ElementProxy proxy - lookup = state - - proxy = _newProxy(None, c_node) - cls = lookup.lookup(doc, proxy) - _freeProxies(proxy) - - if cls is not None: - return cls - return cetree.callLookupFallback(lookup, doc, c_node) From scoder at codespeak.net Mon Oct 22 09:51:26 2007 From: scoder at codespeak.net (scoder at codespeak.net) Date: Mon, 22 Oct 2007 09:51:26 +0200 (CEST) Subject: [Lxml-checkins] r47699 - in lxml/trunk: . src/lxml Message-ID: <20071022075126.9647F8181@code0.codespeak.net> Author: scoder Date: Mon Oct 22 09:51:25 2007 New Revision: 47699 Modified: lxml/trunk/setupinfo.py lxml/trunk/src/lxml/etreepublic.pxd Log: adapted to new module source names Modified: lxml/trunk/setupinfo.py ============================================================================== --- lxml/trunk/setupinfo.py (original) +++ lxml/trunk/setupinfo.py Mon Oct 22 09:51:25 2007 @@ -10,9 +10,9 @@ CYTHON_INSTALLED = False EXT_MODULES = [ - ("etree", "lxml.etree"), - ("objectify", "lxml.objectify"), - ("pyclasslookup", "lxml.pyclasslookup") + ("lxml.etree", "lxml.etree"), + ("lxml.objectify", "lxml.objectify"), + ("lxml.pyclasslookup", "lxml.pyclasslookup") ] def env_var(name): Modified: lxml/trunk/src/lxml/etreepublic.pxd ============================================================================== --- lxml/trunk/src/lxml/etreepublic.pxd (original) +++ lxml/trunk/src/lxml/etreepublic.pxd Mon Oct 22 09:51:25 2007 @@ -16,7 +16,7 @@ int start_node_inclusive) cdef void END_FOR_EACH_ELEMENT_FROM(tree.xmlNode* start_node) -cdef extern from "etree.h": +cdef extern from "lxml.etree.h": # first function to call! cdef int import_etree(etree_module) except -1 From scoder at codespeak.net Mon Oct 22 10:06:58 2007 From: scoder at codespeak.net (scoder at codespeak.net) Date: Mon, 22 Oct 2007 10:06:58 +0200 (CEST) Subject: [Lxml-checkins] r47700 - in lxml/trunk: . doc Message-ID: <20071022080658.0796A816E@code0.codespeak.net> Author: scoder Date: Mon Oct 22 10:06:56 2007 New Revision: 47700 Modified: lxml/trunk/CHANGES.txt lxml/trunk/doc/tutorial.txt Log: doc fix Modified: lxml/trunk/CHANGES.txt ============================================================================== --- lxml/trunk/CHANGES.txt (original) +++ lxml/trunk/CHANGES.txt Mon Oct 22 10:06:56 2007 @@ -31,6 +31,10 @@ Other changes ------------- +* The module source files are now called "lxml.*.pyx", such as + "lxml.etree.pyx". This was changed for consistency with the way + Pyrex commonly handles package imports. + 2.0alpha4 (2007-10-07) ====================== Modified: lxml/trunk/doc/tutorial.txt ============================================================================== --- lxml/trunk/doc/tutorial.txt (original) +++ lxml/trunk/doc/tutorial.txt Mon Oct 22 10:06:56 2007 @@ -363,11 +363,9 @@ - Since lxml 2.0 (and ElementTree 1.3), the serialisation functions can -do more than XML serialisation and optional pretty printing. You can -serialise to HTML or extract the text content by passing the -``method`` keyword:: +do more than XML serialisation. You can serialise to HTML or extract +the text content by passing the ``method`` keyword:: >>> root = etree.XML('

Hello
World

') From scoder at codespeak.net Mon Oct 22 14:25:08 2007 From: scoder at codespeak.net (scoder at codespeak.net) Date: Mon, 22 Oct 2007 14:25:08 +0200 (CEST) Subject: [Lxml-checkins] r47704 - lxml/trunk Message-ID: <20071022122508.35B2E817B@code0.codespeak.net> Author: scoder Date: Mon Oct 22 14:25:06 2007 New Revision: 47704 Modified: lxml/trunk/CHANGES.txt lxml/trunk/setupinfo.py Log: cleanup Modified: lxml/trunk/CHANGES.txt ============================================================================== --- lxml/trunk/CHANGES.txt (original) +++ lxml/trunk/CHANGES.txt Mon Oct 22 14:25:06 2007 @@ -31,9 +31,11 @@ Other changes ------------- -* The module source files are now called "lxml.*.pyx", such as +* The module source files were renamed to "lxml.*.pyx", such as "lxml.etree.pyx". This was changed for consistency with the way - Pyrex commonly handles package imports. + Pyrex commonly handles package imports. The main effect is that + classes now know about their fully qualified class name, including + the package name of their module. 2.0alpha4 (2007-10-07) Modified: lxml/trunk/setupinfo.py ============================================================================== --- lxml/trunk/setupinfo.py (original) +++ lxml/trunk/setupinfo.py Mon Oct 22 14:25:06 2007 @@ -9,11 +9,7 @@ except ImportError: CYTHON_INSTALLED = False -EXT_MODULES = [ - ("lxml.etree", "lxml.etree"), - ("lxml.objectify", "lxml.objectify"), - ("lxml.pyclasslookup", "lxml.pyclasslookup") - ] +EXT_MODULES = ["lxml.etree", "lxml.objectify","lxml.pyclasslookup"] def env_var(name): value = os.getenv(name, '') @@ -44,10 +40,10 @@ runtime_library_dirs = [] result = [] - for module, package in modules: + for module in modules: result.append( Extension( - package, + module, sources = ["src/lxml/" + module + source_extension], extra_compile_args = ['-w'] + _cflags, define_macros = _define_macros, From scoder at codespeak.net Mon Oct 22 14:25:15 2007 From: scoder at codespeak.net (scoder at codespeak.net) Date: Mon, 22 Oct 2007 14:25:15 +0200 (CEST) Subject: [Lxml-checkins] r47705 - lxml/trunk Message-ID: <20071022122515.82892817F@code0.codespeak.net> Author: scoder Date: Mon Oct 22 14:25:15 2007 New Revision: 47705 Modified: lxml/trunk/TODO.txt Log: todo Modified: lxml/trunk/TODO.txt ============================================================================== --- lxml/trunk/TODO.txt (original) +++ lxml/trunk/TODO.txt Mon Oct 22 14:25:15 2007 @@ -61,3 +61,5 @@ * clean support for entities (is the Entity element class enough?) * implement 'position' property on ParseError exception + +* rewrite iterparse() to accept a parser as argument instead of being one From scoder at codespeak.net Mon Oct 22 20:37:14 2007 From: scoder at codespeak.net (scoder at codespeak.net) Date: Mon, 22 Oct 2007 20:37:14 +0200 (CEST) Subject: [Lxml-checkins] r47710 - in lxml/branch/lxml-1.3: . doc Message-ID: <20071022183714.460628106@code0.codespeak.net> Author: scoder Date: Mon Oct 22 20:37:14 2007 New Revision: 47710 Modified: lxml/branch/lxml-1.3/CHANGES.txt lxml/branch/lxml-1.3/doc/main.txt Log: prepare release of 1.3.5 Modified: lxml/branch/lxml-1.3/CHANGES.txt ============================================================================== --- lxml/branch/lxml-1.3/CHANGES.txt (original) +++ lxml/branch/lxml-1.3/CHANGES.txt Mon Oct 22 20:37:14 2007 @@ -2,8 +2,8 @@ lxml changelog ============== -Under development -================= +1.3.5 (2007-10-22) +================== Features added -------------- Modified: lxml/branch/lxml-1.3/doc/main.txt ============================================================================== --- lxml/branch/lxml-1.3/doc/main.txt (original) +++ lxml/branch/lxml-1.3/doc/main.txt Mon Oct 22 20:37:14 2007 @@ -130,7 +130,7 @@ .. _`lxml at the Python Package Index`: http://pypi.python.org/pypi/lxml/ .. _`this key`: pubkey.asc -The latest version is `lxml 1.3.4`_, released 2007-08-30 (`changes for 1.3.4`_). +The latest version is `lxml 1.3.5`_, released 2007-10-2 (`changes for 1.3.5`_). `Older versions`_ are listed below. .. _`Older versions`: #old-versions @@ -191,6 +191,8 @@ Old Versions ------------ +* `lxml 1.3.4`_, released 2007-08-30 (`changes for 1.3.4`_) + * `lxml 1.3.3`_, released 2007-07-26 (`changes for 1.3.3`_) * `lxml 1.3.2`_, released 2007-07-03 (`changes for 1.3.2`_) @@ -235,6 +237,7 @@ * `lxml 0.5`_, released 2005-04-08 +.. _`lxml 1.3.5`: lxml-1.3.5.tgz .. _`lxml 1.3.4`: lxml-1.3.4.tgz .. _`lxml 1.3.3`: lxml-1.3.3.tgz .. _`lxml 1.3.2`: lxml-1.3.2.tgz @@ -258,6 +261,7 @@ .. _`lxml 0.5.1`: lxml-0.5.1.tgz .. _`lxml 0.5`: lxml-0.5.tgz +.. _`changes for 1.3.5`: changes-1.3.5.html .. _`changes for 1.3.4`: changes-1.3.4.html .. _`changes for 1.3.3`: changes-1.3.3.html .. _`changes for 1.3.2`: changes-1.3.2.html From scoder at codespeak.net Mon Oct 22 20:37:44 2007 From: scoder at codespeak.net (scoder at codespeak.net) Date: Mon, 22 Oct 2007 20:37:44 +0200 (CEST) Subject: [Lxml-checkins] r47711 - lxml/branch/lxml-1.3/doc Message-ID: <20071022183744.C85018106@code0.codespeak.net> Author: scoder Date: Mon Oct 22 20:37:44 2007 New Revision: 47711 Modified: lxml/branch/lxml-1.3/doc/main.txt Log: doc fix Modified: lxml/branch/lxml-1.3/doc/main.txt ============================================================================== --- lxml/branch/lxml-1.3/doc/main.txt (original) +++ lxml/branch/lxml-1.3/doc/main.txt Mon Oct 22 20:37:44 2007 @@ -130,7 +130,7 @@ .. _`lxml at the Python Package Index`: http://pypi.python.org/pypi/lxml/ .. _`this key`: pubkey.asc -The latest version is `lxml 1.3.5`_, released 2007-10-2 (`changes for 1.3.5`_). +The latest version is `lxml 1.3.5`_, released 2007-10-22 (`changes for 1.3.5`_). `Older versions`_ are listed below. .. _`Older versions`: #old-versions From scoder at codespeak.net Mon Oct 22 20:39:24 2007 From: scoder at codespeak.net (scoder at codespeak.net) Date: Mon, 22 Oct 2007 20:39:24 +0200 (CEST) Subject: [Lxml-checkins] r47712 - lxml/tag/lxml-1.3.5 Message-ID: <20071022183924.837DD8106@code0.codespeak.net> Author: scoder Date: Mon Oct 22 20:39:24 2007 New Revision: 47712 Added: lxml/tag/lxml-1.3.5/ - copied from r47711, lxml/branch/lxml-1.3/ Log: tag for lxml 1.3.5 From jholg at codespeak.net Wed Oct 24 14:18:21 2007 From: jholg at codespeak.net (jholg at codespeak.net) Date: Wed, 24 Oct 2007 14:18:21 +0200 (CEST) Subject: [Lxml-checkins] r47819 - in lxml/trunk/src/lxml: . tests Message-ID: <20071024121821.17B2D80DD@code0.codespeak.net> Author: jholg Date: Wed Oct 24 14:18:20 2007 New Revision: 47819 Modified: lxml/trunk/src/lxml/lxml.objectify.pyx lxml/trunk/src/lxml/tests/test_objectify.py Log: Fixed objctify StringElement __cmp__ and __mod__. Modified: lxml/trunk/src/lxml/lxml.objectify.pyx ============================================================================== --- lxml/trunk/src/lxml/lxml.objectify.pyx (original) +++ lxml/trunk/src/lxml/lxml.objectify.pyx Wed Oct 24 14:18:20 2007 @@ -686,13 +686,6 @@ raise TypeError, "invalid types for * operator" def __mod__(self, other): - if python.PyTuple_Check(other): - l = [] - for item in other: - python.PyList_Append(l, _strValueOf(item)) - other = tuple(l) - else: - other = _strValueOf(other) return _strValueOf(self) % other cdef class NoneElement(ObjectifiedDataElement): @@ -775,7 +768,7 @@ if python._isString(obj): return obj if isinstance(obj, _Element): - return textOf((<_Element>obj)._c_node) + return textOf((<_Element>obj)._c_node) or '' if obj is None: return '' return str(obj) Modified: lxml/trunk/src/lxml/tests/test_objectify.py ============================================================================== --- lxml/trunk/src/lxml/tests/test_objectify.py (original) +++ lxml/trunk/src/lxml/tests/test_objectify.py Wed Oct 24 14:18:20 2007 @@ -916,7 +916,7 @@ def test_type_str_cmp(self): XML = self.XML - root = XML(u'testtaste') + root = XML(u'testtaste') self.assertFalse(root.b[0] < root.b[1]) self.assertFalse(root.b[0] <= root.b[1]) self.assertFalse(root.b[0] == root.b[1]) @@ -930,10 +930,18 @@ self.assert_(root.b[0] > 5) self.assert_(5 < root.b[0]) + self.assertEquals("", root.b[2]) + self.assertEquals(root.b[2], "") + self.assertEquals("", root.b[3]) + self.assertEquals(root.b[3], "") + self.assertEquals(root.b[2], root.b[3]) + root.b = "test" self.assert_(root.b) root.b = "" self.assertFalse(root.b) + self.assertEquals(root.b, "") + self.assertEquals("", root.b) def test_type_int_cmp(self): XML = self.XML @@ -955,6 +963,8 @@ self.assert_(root.b) root.b = 0 self.assertFalse(root.b) + + # float + long share the NumberElement implementation with int def test_type_bool_cmp(self): XML = self.XML @@ -980,6 +990,45 @@ root.b = False self.assertFalse(root.b) + def test_type_none_cmp(self): + XML = self.XML + root = XML(u""" + + + """) + self.assert_(root.b[0] == root.b[1]) + self.assertFalse(root.b[0]) + self.assertEquals(root.b[0], None) + self.assertEquals(None, root.b[0]) + + for comparison in ["abc", 5, 7.3, True, [], ()]: + none = root.b[1] + self.assert_(none < comparison, "%s (%s) should be < %s" % + (none, type(none), comparison) ) + self.assert_(comparison > none, "%s should be > %s (%s)" % + (comparison, none, type(none)) ) + + def test_type_str_mod(self): + s = "%d %f %s %r" + el = objectify.DataElement(s) + values = (1, 7.0, "abcd", None) + self.assertEquals(s % values, el % values) + + s = "%d" + el = objectify.DataElement(s) + val = 5 + self.assertEquals(s % val, el % val) + + s = "%d %s" + el = objectify.DataElement(s) + val = 5 + self.assertRaises(TypeError, el.__mod__, val) + + s = "" + el = objectify.DataElement(s) + val = 5 + self.assertRaises(TypeError, el.__mod__, val) + def test_dataelement_xsi(self): el = objectify.DataElement(1, _xsi="string") self.assertEquals( From jholg at codespeak.net Wed Oct 24 14:22:40 2007 From: jholg at codespeak.net (jholg at codespeak.net) Date: Wed, 24 Oct 2007 14:22:40 +0200 (CEST) Subject: [Lxml-checkins] r47820 - in lxml/branch/lxml-1.3/src/lxml: . tests Message-ID: <20071024122240.E1BDE80EE@code0.codespeak.net> Author: jholg Date: Wed Oct 24 14:22:40 2007 New Revision: 47820 Modified: lxml/branch/lxml-1.3/src/lxml/objectify.pyx lxml/branch/lxml-1.3/src/lxml/tests/test_objectify.py Log: Fixed StringElement __cmp__ and __mod__. Modified: lxml/branch/lxml-1.3/src/lxml/objectify.pyx ============================================================================== --- lxml/branch/lxml-1.3/src/lxml/objectify.pyx (original) +++ lxml/branch/lxml-1.3/src/lxml/objectify.pyx Wed Oct 24 14:22:40 2007 @@ -681,13 +681,6 @@ raise TypeError, "invalid types for * operator" def __mod__(self, other): - if python.PyTuple_Check(other): - l = [] - for item in other: - python.PyList_Append(l, _strValueOf(item)) - other = tuple(l) - else: - other = _strValueOf(other) return _strValueOf(self) % other cdef class NoneElement(ObjectifiedDataElement): @@ -770,7 +763,7 @@ if python._isString(obj): return obj if isinstance(obj, _Element): - return textOf((<_Element>obj)._c_node) + return textOf((<_Element>obj)._c_node) or '' if obj is None: return '' return str(obj) Modified: lxml/branch/lxml-1.3/src/lxml/tests/test_objectify.py ============================================================================== --- lxml/branch/lxml-1.3/src/lxml/tests/test_objectify.py (original) +++ lxml/branch/lxml-1.3/src/lxml/tests/test_objectify.py Wed Oct 24 14:22:40 2007 @@ -748,7 +748,7 @@ def test_type_str_cmp(self): XML = self.XML - root = XML(u'testtaste') + root = XML(u'testtaste') self.assertFalse(root.b[0] < root.b[1]) self.assertFalse(root.b[0] <= root.b[1]) self.assertFalse(root.b[0] == root.b[1]) @@ -762,10 +762,18 @@ self.assert_(root.b[0] > 5) self.assert_(5 < root.b[0]) + self.assertEquals("", root.b[2]) + self.assertEquals(root.b[2], "") + self.assertEquals("", root.b[3]) + self.assertEquals(root.b[3], "") + self.assertEquals(root.b[2], root.b[3]) + root.b = "test" self.assert_(root.b) root.b = "" self.assertFalse(root.b) + self.assertEquals(root.b, "") + self.assertEquals("", root.b) def test_type_int_cmp(self): XML = self.XML @@ -788,6 +796,8 @@ root.b = 0 self.assertFalse(root.b) + # float + long share the NumberElement implementation with int + def test_type_bool_cmp(self): XML = self.XML root = XML(u'falsetrue') @@ -812,6 +822,45 @@ root.b = False self.assertFalse(root.b) + def test_type_none_cmp(self): + XML = self.XML + root = XML(u""" + + + """) + self.assert_(root.b[0] == root.b[1]) + self.assertFalse(root.b[0]) + self.assertEquals(root.b[0], None) + self.assertEquals(None, root.b[0]) + + for comparison in ["abc", 5, 7.3, True, [], ()]: + none = root.b[1] + self.assert_(none < comparison, "%s (%s) should be < %s" % + (none, type(none), comparison) ) + self.assert_(comparison > none, "%s should be > %s (%s)" % + (comparison, none, type(none))) + + def test_type_str_mod(self): + s = "%d %f %s %r" + el = objectify.DataElement(s) + values = (1, 7.0, "abcd", None) + self.assertEquals(s % values, el % values) + + s = "%d" + el = objectify.DataElement(s) + val = 5 + self.assertEquals(s % val, el % val) + + s = "%d %s" + el = objectify.DataElement(s) + val = 5 + self.assertRaises(TypeError, el.__mod__, val) + + s = "" + el = objectify.DataElement(s) + val = 5 + self.assertRaises(TypeError, el.__mod__, val) + def test_pytype_annotation(self): XML = self.XML root = XML(u'''\ From scoder at codespeak.net Wed Oct 24 14:36:03 2007 From: scoder at codespeak.net (scoder at codespeak.net) Date: Wed, 24 Oct 2007 14:36:03 +0200 (CEST) Subject: [Lxml-checkins] r47821 - lxml/trunk/src/lxml/tests Message-ID: <20071024123603.0B271811A@code0.codespeak.net> Author: scoder Date: Wed Oct 24 14:36:02 2007 New Revision: 47821 Modified: lxml/trunk/src/lxml/tests/test_objectify.py Log: test cleanup, string formatting must also work for DataElements Modified: lxml/trunk/src/lxml/tests/test_objectify.py ============================================================================== --- lxml/trunk/src/lxml/tests/test_objectify.py (original) +++ lxml/trunk/src/lxml/tests/test_objectify.py Wed Oct 24 14:36:02 2007 @@ -598,6 +598,36 @@ s = "toast" self.assertEquals("test" + s, root.s + s) self.assertEquals(s + "test", s + root.s) + + def test_type_str_mod(self): + s = "%d %f %s %r" + el = objectify.DataElement(s) + values = (1, 7.0, "abcd", None) + self.assertEquals(s % values, el % values) + + s = "%d" + el = objectify.DataElement(s) + val = 5 + self.assertEquals(s % val, el % val) + + s = "%d %s" + el = objectify.DataElement(s) + val = 5 + self.assertRaises(TypeError, el.__mod__, val) + + s = "" + el = objectify.DataElement(s) + val = 5 + self.assertRaises(TypeError, el.__mod__, val) + + def test_type_str_mod_data_elements(self): + s = "%d %f %s %r" + el = objectify.DataElement(s) + values = (objectify.DataElement(1), + objectify.DataElement(7.0), + objectify.DataElement("abcd"), + objectify.DataElement(None)) + self.assertEquals(s % values, el % values) def test_data_element_str(self): value = objectify.DataElement("test") @@ -1007,27 +1037,6 @@ (none, type(none), comparison) ) self.assert_(comparison > none, "%s should be > %s (%s)" % (comparison, none, type(none)) ) - - def test_type_str_mod(self): - s = "%d %f %s %r" - el = objectify.DataElement(s) - values = (1, 7.0, "abcd", None) - self.assertEquals(s % values, el % values) - - s = "%d" - el = objectify.DataElement(s) - val = 5 - self.assertEquals(s % val, el % val) - - s = "%d %s" - el = objectify.DataElement(s) - val = 5 - self.assertRaises(TypeError, el.__mod__, val) - - s = "" - el = objectify.DataElement(s) - val = 5 - self.assertRaises(TypeError, el.__mod__, val) def test_dataelement_xsi(self): el = objectify.DataElement(1, _xsi="string") From scoder at codespeak.net Wed Oct 24 14:50:30 2007 From: scoder at codespeak.net (scoder at codespeak.net) Date: Wed, 24 Oct 2007 14:50:30 +0200 (CEST) Subject: [Lxml-checkins] r47822 - lxml/trunk/src/lxml/html/tests Message-ID: <20071024125030.E07D98113@code0.codespeak.net> Author: scoder Date: Wed Oct 24 14:50:30 2007 New Revision: 47822 Modified: lxml/trunk/src/lxml/html/tests/test_elementsoup.py Log: skip ElementSoup test if BeautifulSoup is not installed Modified: lxml/trunk/src/lxml/html/tests/test_elementsoup.py ============================================================================== --- lxml/trunk/src/lxml/html/tests/test_elementsoup.py (original) +++ lxml/trunk/src/lxml/html/tests/test_elementsoup.py Wed Oct 24 14:50:30 2007 @@ -1,9 +1,17 @@ import unittest from lxml.tests.common_imports import doctest +try: + import BeautifulSoup + BS_INSTALLED = True +except: + BS_INSTALLED = False + + def test_suite(): suite = unittest.TestSuite() - suite.addTests([doctest.DocFileSuite('../../../../doc/elementsoup.txt')]) + if BS_INSTALLED: + suite.addTests([doctest.DocFileSuite('../../../../doc/elementsoup.txt')]) return suite if __name__ == '__main__': From scoder at codespeak.net Wed Oct 24 14:51:29 2007 From: scoder at codespeak.net (scoder at codespeak.net) Date: Wed, 24 Oct 2007 14:51:29 +0200 (CEST) Subject: [Lxml-checkins] r47823 - lxml/trunk/src/lxml/html/tests Message-ID: <20071024125129.B4C438113@code0.codespeak.net> Author: scoder Date: Wed Oct 24 14:51:29 2007 New Revision: 47823 Modified: lxml/trunk/src/lxml/html/tests/test_elementsoup.py Log: be explicit :) Modified: lxml/trunk/src/lxml/html/tests/test_elementsoup.py ============================================================================== --- lxml/trunk/src/lxml/html/tests/test_elementsoup.py (original) +++ lxml/trunk/src/lxml/html/tests/test_elementsoup.py Wed Oct 24 14:51:29 2007 @@ -4,7 +4,7 @@ try: import BeautifulSoup BS_INSTALLED = True -except: +except ImportError: BS_INSTALLED = False From scoder at codespeak.net Thu Oct 25 09:59:39 2007 From: scoder at codespeak.net (scoder at codespeak.net) Date: Thu, 25 Oct 2007 09:59:39 +0200 (CEST) Subject: [Lxml-checkins] r47887 - lxml/trunk/benchmark Message-ID: <20071025075939.029908130@code0.codespeak.net> Author: scoder Date: Thu Oct 25 09:59:39 2007 New Revision: 47887 Modified: lxml/trunk/benchmark/benchbase.py Log: fix API usage Modified: lxml/trunk/benchmark/benchbase.py ============================================================================== --- lxml/trunk/benchmark/benchbase.py (original) +++ lxml/trunk/benchmark/benchbase.py Thu Oct 25 09:59:39 2007 @@ -150,7 +150,7 @@ return getattr(self, name) def _serialize_tree(self, root): - return self.etree.tostring(root, 'UTF-8') + return self.etree.tostring(root, encoding='UTF-8') def et_make_clone_factory(self, elem): def generate_elem(append, elem, level): From scoder at codespeak.net Thu Oct 25 10:02:06 2007 From: scoder at codespeak.net (scoder at codespeak.net) Date: Thu, 25 Oct 2007 10:02:06 +0200 (CEST) Subject: [Lxml-checkins] r47888 - lxml/trunk/src/lxml Message-ID: <20071025080206.D75ED8127@code0.codespeak.net> Author: scoder Date: Thu Oct 25 10:02:06 2007 New Revision: 47888 Modified: lxml/trunk/src/lxml/lxml.pyclasslookup.pyx Log: broken import Modified: lxml/trunk/src/lxml/lxml.pyclasslookup.pyx ============================================================================== --- lxml/trunk/src/lxml/lxml.pyclasslookup.pyx (original) +++ lxml/trunk/src/lxml/lxml.pyclasslookup.pyx Thu Oct 25 10:02:06 2007 @@ -2,7 +2,7 @@ from etreepublic cimport ElementClassLookup, FallbackElementClassLookup from etreepublic cimport elementFactory, import_etree from python cimport str, repr, isinstance, issubclass, iter -from python cimport _cstr, Py_ssize_t +from python cimport _cstr cimport etreepublic as cetree cimport python cimport tree From scoder at codespeak.net Thu Oct 25 10:03:32 2007 From: scoder at codespeak.net (scoder at codespeak.net) Date: Thu, 25 Oct 2007 10:03:32 +0200 (CEST) Subject: [Lxml-checkins] r47889 - lxml/trunk/src/lxml Message-ID: <20071025080332.E9FE78127@code0.codespeak.net> Author: scoder Date: Thu Oct 25 10:03:32 2007 New Revision: 47889 Modified: lxml/trunk/src/lxml/etreepublic.pxd Log: fixed public API description Modified: lxml/trunk/src/lxml/etreepublic.pxd ============================================================================== --- lxml/trunk/src/lxml/etreepublic.pxd (original) +++ lxml/trunk/src/lxml/etreepublic.pxd Thu Oct 25 10:03:32 2007 @@ -133,15 +133,15 @@ # find child element number 'index' (supports negative indexes) cdef tree.xmlNode* findChild(tree.xmlNode* c_node, - python.Py_ssize_t index) + Py_ssize_t index) # find child element number 'index' starting at first one cdef tree.xmlNode* findChildForwards(tree.xmlNode* c_node, - python.Py_ssize_t index) + Py_ssize_t index) # find child element number 'index' starting at last one cdef tree.xmlNode* findChildBackwards(tree.xmlNode* c_node, - python.Py_ssize_t index) + Py_ssize_t index) # return next/previous sibling element of the node cdef tree.xmlNode* nextElement(tree.xmlNode* c_node) From scoder at codespeak.net Thu Oct 25 10:04:17 2007 From: scoder at codespeak.net (scoder at codespeak.net) Date: Thu, 25 Oct 2007 10:04:17 +0200 (CEST) Subject: [Lxml-checkins] r47890 - lxml/trunk/src/lxml Message-ID: <20071025080417.4A8B48127@code0.codespeak.net> Author: scoder Date: Thu Oct 25 10:04:15 2007 New Revision: 47890 Modified: lxml/trunk/src/lxml/classlookup.pxi Log: forgotten export Modified: lxml/trunk/src/lxml/classlookup.pxi ============================================================================== --- lxml/trunk/src/lxml/classlookup.pxi (original) +++ lxml/trunk/src/lxml/classlookup.pxi Thu Oct 25 10:04:15 2007 @@ -48,7 +48,7 @@ ################################################################################ # Element class lookup -ctypedef object (*_element_class_lookup_function)(object, _Document, xmlNode*) +ctypedef public object (*_element_class_lookup_function)(object, _Document, xmlNode*) # class to store element class lookup functions cdef public class ElementClassLookup [ type LxmlElementClassLookupType, From scoder at codespeak.net Thu Oct 25 10:04:58 2007 From: scoder at codespeak.net (scoder at codespeak.net) Date: Thu, 25 Oct 2007 10:04:58 +0200 (CEST) Subject: [Lxml-checkins] r47891 - lxml/trunk/src/lxml Message-ID: <20071025080458.112E38127@code0.codespeak.net> Author: scoder Date: Thu Oct 25 10:04:57 2007 New Revision: 47891 Modified: lxml/trunk/src/lxml/proxy.pxi Log: prevent exceptions from being dropped Modified: lxml/trunk/src/lxml/proxy.pxi ============================================================================== --- lxml/trunk/src/lxml/proxy.pxi (original) +++ lxml/trunk/src/lxml/proxy.pxi Thu Oct 25 10:04:57 2007 @@ -181,7 +181,7 @@ c_new_ns = c_new_ns.next c_parent = c_parent.parent -cdef void moveNodeToDocument(_Document doc, xmlNode* c_element): +cdef int moveNodeToDocument(_Document doc, xmlNode* c_element) except -1: """Fix the xmlNs pointers of a node and its subtree that were moved. Mainly copied from libxml2's xmlReconciliateNs(). Expects libxml2 doc @@ -200,7 +200,7 @@ cdef cstd.size_t i, c_cache_size, c_cache_last if not tree._isElementOrXInclude(c_element): - return + return 0 c_doc = c_element.doc c_start_node = c_element @@ -347,3 +347,5 @@ python.PyMem_Free(c_ns_new_cache) if c_ns_old_cache is not NULL: python.PyMem_Free(c_ns_old_cache) + + return 0 From scoder at codespeak.net Thu Oct 25 10:08:00 2007 From: scoder at codespeak.net (scoder at codespeak.net) Date: Thu, 25 Oct 2007 10:08:00 +0200 (CEST) Subject: [Lxml-checkins] r47893 - in lxml/trunk: . src/lxml src/lxml/tests Message-ID: <20071025080800.E75308129@code0.codespeak.net> Author: scoder Date: Thu Oct 25 10:08:00 2007 New Revision: 47893 Modified: lxml/trunk/CHANGES.txt lxml/trunk/src/lxml/apihelpers.pxi lxml/trunk/src/lxml/lxml.etree.pyx lxml/trunk/src/lxml/python.pxd lxml/trunk/src/lxml/tests/test_elementtree.py lxml/trunk/src/lxml/tests/test_etree.py Log: replaced __getslice__() etc. by __*item__() equivalents and a custom slicing implementation Modified: lxml/trunk/CHANGES.txt ============================================================================== --- lxml/trunk/CHANGES.txt (original) +++ lxml/trunk/CHANGES.txt Thu Oct 25 10:08:00 2007 @@ -8,6 +8,8 @@ Features added -------------- +* Extended slicing of Elements as in ``element[1:-1:2]`` + * Resolvers can now provide a ``base_url`` keyword argument when resolving a document as string data. Modified: lxml/trunk/src/lxml/apihelpers.pxi ============================================================================== --- lxml/trunk/src/lxml/apihelpers.pxi (original) +++ lxml/trunk/src/lxml/apihelpers.pxi Thu Oct 25 10:08:00 2007 @@ -473,6 +473,60 @@ c_node = c_node.next return count +cdef int _findChildSlice( + python.slice sliceobject, xmlNode* c_parent, + xmlNode** c_start_node, Py_ssize_t* c_step, Py_ssize_t* c_length) except -1: + """Resolve a children slice. + + Returns the start node, step size and the slice length in the + pointer arguments. + """ + cdef Py_ssize_t start, stop, childcount + childcount = _countElements(c_parent.children) + if childcount == 0: + c_start_node[0] = NULL + c_length[0] = 0 + if sliceobject.step is None: + c_step[0] = 1 + else: + python._PyEval_SliceIndex(sliceobject.step, c_step) + return 0 + python.PySlice_GetIndicesEx( + sliceobject, childcount, &start, &stop, c_step, c_length) + if start > childcount / 2: + c_start_node[0] = _findChildBackwards(c_parent, childcount - start - 1) + else: + c_start_node[0] = _findChild(c_parent, start) + return 0 + +cdef bint _isFullSlice(python.slice sliceobject): + """Conservative guess if this slice is a full slice as in ``s[:]``. + """ + cdef Py_ssize_t step + if sliceobject is None: + return 0 + if sliceobject.start is None and \ + sliceobject.stop is None: + if sliceobject.step is None: + return 1 + python._PyEval_SliceIndex(sliceobject.step, &step) + if step == 1: + return 1 + return 0 + return 0 + +cdef _collectChildren(_Element element): + cdef xmlNode* c_node + result = [] + c_node = element._c_node.children + if c_node is not NULL: + if not _isElement(c_node): + c_node = _nextElement(c_node) + while c_node is not NULL: + python.PyList_Append(result, _elementFactory(element._doc, c_node)) + c_node = _nextElement(c_node) + return result + cdef xmlNode* _findChild(xmlNode* c_node, Py_ssize_t index): if index < 0: return _findChildBackwards(c_node, -index - 1) @@ -530,6 +584,8 @@ cdef xmlNode* _nextElement(xmlNode* c_node): """Given a node, find the next sibling that is an element. """ + if c_node is NULL: + return NULL c_node = c_node.next while c_node is not NULL: if _isElement(c_node): @@ -540,6 +596,8 @@ cdef xmlNode* _previousElement(xmlNode* c_node): """Given a node, find the next sibling that is an element. """ + if c_node is NULL: + return NULL c_node = c_node.prev while c_node is not NULL: if _isElement(c_node): @@ -599,7 +657,7 @@ else: return 0 -cdef void _removeNode(_Document doc, xmlNode* c_node): +cdef int _removeNode(_Document doc, xmlNode* c_node) except -1: """Unlink and free a node and subnodes if possible. Otherwise, make sure it's self-contained. """ @@ -610,6 +668,7 @@ if not attemptDeallocation(c_node): # make namespaces absolute moveNodeToDocument(doc, c_node) + return 0 cdef void _moveTail(xmlNode* c_tail, xmlNode* c_target): cdef xmlNode* c_next @@ -637,27 +696,169 @@ c_target = c_new_tail c_tail = _textNodeOrSkip(c_tail.next) -cdef xmlNode* _deleteSlice(_Document doc, xmlNode* c_node, - Py_ssize_t start, Py_ssize_t stop): - """Delete slice, starting with c_node, start counting at start, end at stop. +cdef int _deleteSlice(_Document doc, xmlNode* c_node, + Py_ssize_t count, Py_ssize_t step) except -1: + """Delete slice, ``count`` items starting with ``c_node`` with a step + width of ``step``. """ cdef xmlNode* c_next - cdef Py_ssize_t c + cdef Py_ssize_t c, i + cdef _node_to_node_function next_element if c_node is NULL: - return NULL + return 0 + if step > 0: + next_element = _nextElement + else: + step = -step + next_element = _previousElement # now start deleting nodes - c = start - while c_node is not NULL and c < stop: - c_next = c_node.next - if _isElement(c_node): - while c_next is not NULL and not _isElement(c_next): - c_next = c_next.next - _removeNode(doc, c_node) - c = c + 1 + c = 0 + c_next = c_node + while c_node is not NULL and c < count: + for i from 0 <= i < step: + c_next = next_element(c_next) + _removeNode(doc, c_node) + c = c + 1 c_node = c_next - return c_node + return 0 + +cdef int _replaceSlice(_Element parent, xmlNode* c_node, + Py_ssize_t slicelength, Py_ssize_t step, + bint left_to_right, elements) except -1: + """Replace the slice of ``count`` elements starting at ``c_node`` with + positive step width ``step`` by the Elements in ``elements``. The + direction is given by the boolean argument ``left_to_right``. + + ``c_node`` may be NULL to indicate the end of the children list. + """ + cdef xmlNode* c_orig_neighbour + cdef xmlNode* c_next + cdef _Element element + cdef Py_ssize_t seqlength, i, c + cdef _node_to_node_function next_element + assert step > 0 + if left_to_right: + next_element = _nextElement + else: + next_element = _previousElement + + if not python.PyList_Check(elements) and \ + not python.PyTuple_Check(elements): + elements = list(elements) + + if step > 1: + # *replacing* children stepwise with list => check size! + seqlength = len(elements) + if seqlength != slicelength: + raise ValueError( + "attempt to assign sequence of size %d " + "to extended slice of size %d" % (seqlength, c)) + + if c_node is NULL: + # no children yet => add all elements straight away + if left_to_right: + for element in elements: + assert element is not None, "Node must not be None" + _appendChild(parent, element) + else: + for element in elements: + assert element is not None, "Node must not be None" + _prependChild(parent, element) + return 0 + + # remove the elements first as some might be re-added + if left_to_right: + # L->R, remember left neighbour + c_orig_neighbour = _previousElement(c_node) + else: + # R->L, remember right neighbour + c_orig_neighbour = _nextElement(c_node) + + c = 0 + c_next = c_node + while c_node is not NULL and c < slicelength: + for i from 0 <= i < step: + c_next = next_element(c_next) + _removeNode(parent._doc, c_node) + c = c + 1 + c_node = c_next + + # make sure each element is inserted only once + elements = iter(elements) + + # find the first node right of the new insertion point + if left_to_right: + if c_orig_neighbour is not NULL: + c_node = next_element(c_orig_neighbour) + else: + # before the first element + c_node = _findChildForwards(parent._c_node, 0) + elif c_orig_neighbour is NULL: + # at the end, but reversed stepping + # append one element and go to the next insertion point + for element in elements: + assert element is not None, "Node must not be None" + _appendChild(parent, element) + c_node = element._c_node + if slicelength > 0: + slicelength = slicelength - 1 + for i from 1 <= i < step: + c_node = next_element(c_node) + break + + if left_to_right: + # adjust step size after removing slice as we are not stepping + # over the newly inserted elements + step = step - 1 + + # now insert elements where we removed them + if c_node is not NULL: + for element in elements: + assert element is not None, "Node must not be None" + + # move element and tail over + c_next = element._c_node.next + tree.xmlAddPrevSibling(c_node, element._c_node) + _moveTail(c_next, element._c_node) + + # integrate element into new document + moveNodeToDocument(parent._doc, element._c_node) + + # stop at the end of the slice + if slicelength > 0: + slicelength = slicelength - 1 + for i from 0 <= i < step: + c_node = next_element(c_node) + if c_node is NULL: + break + else: + # everything inserted + return 0 + + # append the remaining elements at the respective end + if left_to_right: + for element in elements: + _appendChild(parent, element) + else: + for element in elements: + _prependChild(parent, element) + + return 0 + +cdef _fillUpChildrenSlice(_Element sibling, elements, bint append_right): + cdef _Element element + if append_right: + for element in elements: + assert element is not None, "Node must not be None" + _appendSibling(sibling, element) + sibling = element + else: + for element in elements: + assert element is not None, "Node must not be None" + _prependSibling(sibling, element) + sibling = element -cdef void _appendChild(_Element parent, _Element child): +cdef int _appendChild(_Element parent, _Element child) except -1: """Append a new child to a parent element. """ cdef xmlNode* c_next @@ -665,15 +866,36 @@ c_node = child._c_node # store possible text node c_next = c_node.next - tree.xmlUnlinkNode(c_node) # move node itself + tree.xmlUnlinkNode(c_node) tree.xmlAddChild(parent._c_node, c_node) _moveTail(c_next, c_node) # uh oh, elements may be pointing to different doc when # parent element has moved; change them too.. moveNodeToDocument(parent._doc, c_node) -cdef void _appendSibling(_Element element, _Element sibling): +cdef int _prependChild(_Element parent, _Element child) except -1: + """Prepend a new child to a parent element. + """ + cdef xmlNode* c_next + cdef xmlNode* c_child + cdef xmlNode* c_node + c_node = child._c_node + # store possible text node + c_next = c_node.next + # move node itself + c_child = _findChildForwards(parent._c_node, 0) + if c_child is NULL: + tree.xmlUnlinkNode(c_node) + tree.xmlAddChild(parent._c_node, c_node) + else: + tree.xmlAddPrevSibling(c_child, c_node) + _moveTail(c_next, c_node) + # uh oh, elements may be pointing to different doc when + # parent element has moved; change them too.. + moveNodeToDocument(parent._doc, c_node) + +cdef int _appendSibling(_Element element, _Element sibling) except -1: """Append a new child to a parent element. """ cdef xmlNode* c_next @@ -681,7 +903,6 @@ c_node = sibling._c_node # store possible text node c_next = c_node.next - tree.xmlUnlinkNode(c_node) # move node itself tree.xmlAddNextSibling(element._c_node, c_node) _moveTail(c_next, c_node) @@ -689,7 +910,7 @@ # parent element has moved; change them too.. moveNodeToDocument(element._doc, c_node) -cdef void _prependSibling(_Element element, _Element sibling): +cdef int _prependSibling(_Element element, _Element sibling) except -1: """Append a new child to a parent element. """ cdef xmlNode* c_next @@ -697,7 +918,6 @@ c_node = sibling._c_node # store possible text node c_next = c_node.next - tree.xmlUnlinkNode(c_node) # move node itself tree.xmlAddPrevSibling(element._c_node, c_node) _moveTail(c_next, c_node) Modified: lxml/trunk/src/lxml/lxml.etree.pyx ============================================================================== --- lxml/trunk/src/lxml/lxml.etree.pyx (original) +++ lxml/trunk/src/lxml/lxml.etree.pyx Thu Oct 25 10:08:00 2007 @@ -225,6 +225,8 @@ # forward declaration of _BaseParser, see parser.pxi cdef class _BaseParser +ctypedef public xmlNode* (*_node_to_node_function)(xmlNode*) + cdef public class _Document [ type LxmlDocumentType, object LxmlDocument ]: """Internal base class to reference a libxml document. @@ -495,71 +497,67 @@ # MANIPULATORS - def __setitem__(self, Py_ssize_t index, _Element element not None): - """Replaces the given subelement. - """ - cdef xmlNode* c_node - cdef xmlNode* c_next - c_node = _findChild(self._c_node, index) - if c_node is NULL: - raise IndexError, index - c_next = element._c_node.next - _removeText(c_node.next) - tree.xmlReplaceNode(c_node, element._c_node) - _moveTail(c_next, element._c_node) - moveNodeToDocument(self._doc, element._c_node) - if not attemptDeallocation(c_node): - moveNodeToDocument(self._doc, c_node) - - def __delitem__(self, Py_ssize_t index): - """Deletes the given subelement. - """ - cdef xmlNode* c_node - c_node = _findChild(self._c_node, index) - if c_node is NULL: - raise IndexError, index - _removeText(c_node.next) - _removeNode(self._doc, c_node) - - def __delslice__(self, Py_ssize_t start, Py_ssize_t stop): - """Deletes a number of subelements. - """ - cdef xmlNode* c_node - c_node = _findChild(self._c_node, start) - _deleteSlice(self._doc, c_node, start, stop) - - def __setslice__(self, Py_ssize_t start, Py_ssize_t stop, value): - """Replaces a number of subelements with elements - from a sequence. + def __setitem__(self, x, value): + """Replaces the given subelement index or slice. """ cdef xmlNode* c_node cdef xmlNode* c_next cdef _Element element - # first, find start of slice - if start == python.PY_SSIZE_T_MAX: - c_node = NULL - else: - c_node = _findChild(self._c_node, start) - # now delete the slice - if c_node is not NULL and start != stop: - c_node = _deleteSlice(self._doc, c_node, start, stop) - # if the insertion point is at the end, append there - if c_node is NULL: - for element in value: - _appendChild(self, element) + cdef bint left_to_right + cdef Py_ssize_t slicelength, step + if value is None: + raise ValueError("cannot assign None") + if python.PySlice_Check(x): + # slice assignment + _findChildSlice(x, self._c_node, &c_node, &step, &slicelength) + if step > 0: + left_to_right = 1 + else: + left_to_right = 0 + step = -step + _replaceSlice(self, c_node, slicelength, step, left_to_right, value) return - # if the next element is in the list, insert before it - for element in value: - if element is None: - raise TypeError, "Node must not be None." - # store possible text tail + else: + # otherwise: normal item assignment + element = value + c_node = _findChild(self._c_node, x) + if c_node is NULL: + raise IndexError, "list index out of range" c_next = element._c_node.next - # now move node previous to insertion point - tree.xmlAddPrevSibling(c_node, element._c_node) - # and move tail just behind his node + _removeText(c_node.next) + tree.xmlReplaceNode(c_node, element._c_node) _moveTail(c_next, element._c_node) - # move it into a new document moveNodeToDocument(self._doc, element._c_node) + if not attemptDeallocation(c_node): + moveNodeToDocument(self._doc, c_node) + + def __delitem__(self, x): + """Deletes the given subelement or a slice. + """ + cdef xmlNode* c_node + cdef xmlNode* c_next + cdef Py_ssize_t index, step, slicelength + if python.PySlice_Check(x): + # slice deletion + if _isFullSlice(x): + c_node = self._c_node.children + if c_node is not NULL: + if not _isElement(c_node): + c_node = _nextElement(c_node) + while c_node is not NULL: + c_next = _nextElement(c_node) + _removeNode(self._doc, c_node) + c_node = c_next + else: + _findChildSlice(x, self._c_node, &c_node, &step, &slicelength) + _deleteSlice(self._doc, c_node, slicelength, step) + else: + # item deletion + c_node = _findChild(self._c_node, x) + if c_node is NULL: + raise IndexError, index + _removeText(c_node.next) + _removeNode(self._doc, c_node) def __deepcopy__(self, memo): return self.__copy__() @@ -648,14 +646,14 @@ c_attr = c_attr_next # remove all subelements c_node = c_node.children - while c_node is not NULL: - c_node_next = c_node.next - if _isElement(c_node): - while c_node_next is not NULL and not _isElement(c_node_next): - c_node_next = c_node_next.next + if c_node is not NULL: + if not _isElement(c_node): + c_node = _nextElement(c_node) + while c_node is not NULL: + c_node_next = _nextElement(c_node) _removeNode(self._doc, c_node) - c_node = c_node_next - + c_node = c_node_next + def insert(self, index, _Element element not None): """Inserts a subelement at the given position in this element """ @@ -820,49 +818,46 @@ def __repr__(self): return "" % (self.tag, id(self)) - def __getitem__(self, Py_ssize_t index): - """Returns the subelement at the given position. - """ - cdef xmlNode* c_node - c_node = _findChild(self._c_node, index) - if c_node is NULL: - raise IndexError, "list index out of range" - return _elementFactory(self._doc, c_node) - - def __getslice__(self, Py_ssize_t start, Py_ssize_t stop): - """Returns a list containing subelements in the given range. - """ - cdef xmlNode* c_node - cdef _Document doc - cdef Py_ssize_t c - # this does not work for negative start, stop, however, - # python seems to convert these to positive start, stop before - # calling, so this all works perfectly (at the cost of a len() call) - c_node = _findChild(self._c_node, start) - if c_node is NULL: - return [] - c = start - result = [] - while c_node is not NULL and c < stop: - if _isElement(c_node): + def __getitem__(self, x): + """Returns the subelement at the given position or the requested + slice. + """ + cdef xmlNode* c_node + cdef Py_ssize_t step, slicelength + cdef Py_ssize_t c, i + cdef _node_to_node_function next_element + if python.PySlice_Check(x): + # slicing + if _isFullSlice(x): + return _collectChildren(self) + _findChildSlice(x, self._c_node, &c_node, &step, &slicelength) + if c_node is NULL: + return [] + if step > 0: + next_element = _nextElement + else: + step = -step + next_element = _previousElement + result = [] + c = 0 + while c_node is not NULL and c < slicelength: python.PyList_Append( result, _elementFactory(self._doc, c_node)) c = c + 1 - c_node = c_node.next - return result + for i from 0 <= i < step: + c_node = next_element(c_node) + return result + else: + # indexing + c_node = _findChild(self._c_node, x) + if c_node is NULL: + raise IndexError, "list index out of range" + return _elementFactory(self._doc, c_node) def __len__(self): """Returns the number of subelements. """ - cdef Py_ssize_t c - cdef xmlNode* c_node - c = 0 - c_node = self._c_node.children - while c_node is not NULL: - if _isElement(c_node): - c = c + 1 - c_node = c_node.next - return c + return _countElements(self._c_node.children) def __nonzero__(self): import warnings @@ -989,15 +984,7 @@ Note that this method has been deprecated as of ElementTree 1.3. New code should use ``list(element)`` or simply iterate over elements. """ - cdef xmlNode* c_node - result = [] - c_node = self._c_node.children - while c_node is not NULL: - if _isElement(c_node): - python.PyList_Append( - result, _elementFactory(self._doc, c_node)) - c_node = c_node.next - return result + return _collectChildren(self) def getparent(self): """Returns the parent of this element or None for the root element. @@ -1205,7 +1192,7 @@ cdef class __ContentOnlyElement(_Element): cdef int _raiseImmutable(self) except -1: - raise TypeError, "this element does not have children or attributes" + raise TypeError("this element does not have children or attributes") def set(self, key, value): self._raiseImmutable() @@ -1244,8 +1231,11 @@ tree.xmlNodeSetContent(self._c_node, c_text) # ACCESSORS - def __getitem__(self, n): - raise IndexError + def __getitem__(self, x): + if python.PySlice_Check(x): + return [] + else: + raise IndexError("list index out of range") def __len__(self): return 0 @@ -1769,8 +1759,6 @@ return attribs -ctypedef xmlNode* (*_node_to_node_function)(xmlNode*) - cdef public class _ElementTagMatcher [ object LxmlElementTagMatcher, type LxmlElementTagMatcherType ]: cdef object _pystrings Modified: lxml/trunk/src/lxml/python.pxd ============================================================================== --- lxml/trunk/src/lxml/python.pxd (original) +++ lxml/trunk/src/lxml/python.pxd Thu Oct 25 10:08:00 2007 @@ -4,7 +4,6 @@ ctypedef struct PyObject ctypedef struct PyThreadState ctypedef int size_t - ctypedef int Py_ssize_t cdef int INT_MAX cdef int PY_SSIZE_T_MAX @@ -12,6 +11,11 @@ cdef void Py_DECREF(object o) cdef void Py_XDECREF(PyObject* o) + ctypedef class __builtin__.slice [object PySliceObject]: + cdef object start + cdef object stop + cdef object step + cdef FILE* PyFile_AsFile(object p) cdef int PyFile_Check(object p) cdef object PyFile_Name(object p) @@ -73,6 +77,11 @@ cdef bint PyTuple_CheckExact(object instance) cdef bint PySlice_Check(object instance) + cdef int _PyEval_SliceIndex(object value, Py_ssize_t* index) except 0 + cdef int PySlice_GetIndicesEx(object slice, Py_ssize_t length, + Py_ssize_t *start, Py_ssize_t *stop, Py_ssize_t *step, + Py_ssize_t *slicelength) except -1 + cdef int PyObject_SetAttr(object o, object name, object value) cdef object PyObject_RichCompare(object o1, object o2, int op) cdef int PyObject_RichCompareBool(object o1, object o2, int op) Modified: lxml/trunk/src/lxml/tests/test_elementtree.py ============================================================================== --- lxml/trunk/src/lxml/tests/test_elementtree.py (original) +++ lxml/trunk/src/lxml/tests/test_elementtree.py Thu Oct 25 10:08:00 2007 @@ -1244,16 +1244,6 @@ self.assertXML('', b) self.assertXML('', c) - def test_delslice_tail(self): - XML = self.etree.XML - a = XML('B2C2') - b, c = a - - del a[:] - - self.assertEquals("B2", b.tail) - self.assertEquals("C2", c.tail) - def test_replace_slice_tail(self): XML = self.etree.XML a = XML('B2C2') @@ -1718,6 +1708,32 @@ [b, c], a[-3:2]) + def test_getslice_step(self): + Element = self.etree.Element + SubElement = self.etree.SubElement + + a = Element('a') + b = SubElement(a, 'b') + c = SubElement(a, 'c') + d = SubElement(a, 'd') + e = SubElement(a, 'e') + + self.assertEquals( + [e,d,c,b], + a[::-1]) + self.assertEquals( + [b,d], + a[::2]) + self.assertEquals( + [e,c], + a[::-2]) + self.assertEquals( + [d,c], + a[-2:0:-1]) + self.assertEquals( + [e], + a[:1:-2]) + def test_getslice_text(self): ElementTree = self.etree.ElementTree @@ -1805,7 +1821,52 @@ [b, e], list(a)) - def test_delslice_tail(self): + def test_delslice_step(self): + Element = self.etree.Element + SubElement = self.etree.SubElement + + a = Element('a') + b = SubElement(a, 'b') + c = SubElement(a, 'c') + d = SubElement(a, 'd') + e = SubElement(a, 'e') + + del a[1::2] + self.assertEquals( + [b, d], + list(a)) + + def test_delslice_step_negative(self): + Element = self.etree.Element + SubElement = self.etree.SubElement + + a = Element('a') + b = SubElement(a, 'b') + c = SubElement(a, 'c') + d = SubElement(a, 'd') + e = SubElement(a, 'e') + + del a[::-1] + self.assertEquals( + [], + list(a)) + + def test_delslice_step_negative2(self): + Element = self.etree.Element + SubElement = self.etree.SubElement + + a = Element('a') + b = SubElement(a, 'b') + c = SubElement(a, 'c') + d = SubElement(a, 'd') + e = SubElement(a, 'e') + + del a[::-2] + self.assertEquals( + [b, d], + list(a)) + + def test_delslice_child_tail(self): ElementTree = self.etree.ElementTree f = StringIO('B2C2D2E2') doc = ElementTree(file=f) @@ -1815,6 +1876,16 @@ 'B2E2', a) + def test_delslice_tail(self): + XML = self.etree.XML + a = XML('B2C2') + b, c = a + + del a[:] + + self.assertEquals("B2", b.tail) + self.assertEquals("C2", c.tail) + def test_delslice_memory(self): # this could trigger a crash Element = self.etree.Element @@ -1845,6 +1916,118 @@ [b, e, f, g, d], list(a)) + def test_setslice_all(self): + Element = self.etree.Element + SubElement = self.etree.SubElement + + a = Element('a') + b = SubElement(a, 'b') + c = SubElement(a, 'c') + + e = Element('e') + f = Element('f') + g = Element('g') + + s = [e, f, g] + a[:] = s + self.assertEquals( + [e, f, g], + list(a)) + + def test_setslice_all_empty(self): + Element = self.etree.Element + SubElement = self.etree.SubElement + + a = Element('a') + + e = Element('e') + f = Element('f') + g = Element('g') + + s = [e, f, g] + a[:] = s + self.assertEquals( + [e, f, g], + list(a)) + + def test_setslice_all_replace(self): + Element = self.etree.Element + SubElement = self.etree.SubElement + + a = Element('a') + b = SubElement(a, 'b') + c = SubElement(a, 'c') + d = SubElement(a, 'd') + + s = [b, c, d] + a[:] = s + self.assertEquals( + [b, c, d], + list(a)) + + def test_setslice_all_replace_reversed(self): + Element = self.etree.Element + SubElement = self.etree.SubElement + + a = Element('a') + b = SubElement(a, 'b') + c = SubElement(a, 'c') + d = SubElement(a, 'd') + + s = [d, c, b] + a[:] = s + self.assertEquals( + [d, c, b], + list(a)) + + def test_setslice_end(self): + Element = self.etree.Element + SubElement = self.etree.SubElement + + a = Element('a') + b = SubElement(a, 'b') + c = SubElement(a, 'c') + + e = Element('e') + f = Element('f') + g = Element('g') + h = Element('h') + + s = [e, f] + a[99:] = s + self.assertEquals( + [a, b, e, f], + list(a)) + + s = [g, h] + a[:0] = s + self.assertEquals( + [g, h, a, b, e, f], + list(a)) + + def test_setslice_single(self): + Element = self.etree.Element + SubElement = self.etree.SubElement + + a = Element('a') + b = SubElement(a, 'b') + c = SubElement(a, 'c') + + e = Element('e') + f = Element('f') + + s = [e] + a[0:1] = s + self.assertEquals( + [e, c], + list(a)) + + s = [f] + a[1:2] = s + self.assertEquals( + [e, f], + list(a)) + def test_setslice_tail(self): ElementTree = self.etree.ElementTree Element = self.etree.Element @@ -1861,7 +2044,7 @@ self.assertXML( 'B2X2Y2Z2E2', a) - + def test_setslice_negative(self): Element = self.etree.Element SubElement = self.etree.SubElement @@ -1879,6 +2062,23 @@ [b, x, y, d], list(a)) + def test_setslice_negative2(self): + Element = self.etree.Element + SubElement = self.etree.SubElement + + a = Element('a') + b = SubElement(a, 'b') + c = SubElement(a, 'c') + d = SubElement(a, 'd') + + x = Element('x') + y = Element('y') + + a[1:-2] = [x, y] + self.assertEquals( + [b, x, y, c, d], + list(a)) + def test_setslice_end(self): Element = self.etree.Element SubElement = self.etree.SubElement Modified: lxml/trunk/src/lxml/tests/test_etree.py ============================================================================== --- lxml/trunk/src/lxml/tests/test_etree.py (original) +++ lxml/trunk/src/lxml/tests/test_etree.py Thu Oct 25 10:08:00 2007 @@ -8,7 +8,7 @@ """ -import unittest, copy, sys +import unittest, copy, sys, operator from common_imports import etree, StringIO, HelperTestCase, fileInTestDir from common_imports import SillyFileLike, canonicalize, doctest @@ -1572,6 +1572,103 @@ self.assertEquals( child1, e[1]) + def test_setslice_all_empty_reversed(self): + Element = self.etree.Element + SubElement = self.etree.SubElement + + a = Element('a') + + e = Element('e') + f = Element('f') + g = Element('g') + + s = [e, f, g] + a[::-1] = s + self.assertEquals( + [g, f, e], + list(a)) + + def test_setslice_step(self): + Element = self.etree.Element + SubElement = self.etree.SubElement + + a = Element('a') + b = SubElement(a, 'b') + c = SubElement(a, 'c') + d = SubElement(a, 'd') + e = SubElement(a, 'e') + + x = Element('x') + y = Element('y') + + a[1::2] = [x, y] + self.assertEquals( + [b, x, d, y], + list(a)) + + def test_setslice_step_negative(self): + Element = self.etree.Element + SubElement = self.etree.SubElement + + a = Element('a') + b = SubElement(a, 'b') + c = SubElement(a, 'c') + d = SubElement(a, 'd') + e = SubElement(a, 'e') + + x = Element('x') + y = Element('y') + + a[1::-1] = [x, y] + self.assertEquals( + [y, x, d, e], + list(a)) + + def test_setslice_step_negative2(self): + Element = self.etree.Element + SubElement = self.etree.SubElement + + a = Element('a') + b = SubElement(a, 'b') + c = SubElement(a, 'c') + d = SubElement(a, 'd') + e = SubElement(a, 'e') + + x = Element('x') + y = Element('y') + + a[::-2] = [x, y] + self.assertEquals( + [b, y, d, x], + list(a)) + + def test_setslice_step_overrun(self): + Element = self.etree.Element + SubElement = self.etree.SubElement + try: + slice + except NameError: + print "slice() not found" + return + + a = Element('a') + b = SubElement(a, 'b') + c = SubElement(a, 'c') + d = SubElement(a, 'd') + e = SubElement(a, 'e') + + x = Element('x') + y = Element('y') + z = Element('z') + + self.assertRaises( + ValueError, + operator.setitem, a, slice(1,None,2), [x, y, z]) + + self.assertEquals( + [b, c, d, e], + list(a)) + def test_extend(self): etree = self.etree root = etree.Element('foo') From scoder at codespeak.net Thu Oct 25 10:14:06 2007 From: scoder at codespeak.net (scoder at codespeak.net) Date: Thu, 25 Oct 2007 10:14:06 +0200 (CEST) Subject: [Lxml-checkins] r47894 - lxml/trunk/src/lxml Message-ID: <20071025081406.86E948129@code0.codespeak.net> Author: scoder Date: Thu Oct 25 10:14:06 2007 New Revision: 47894 Modified: lxml/trunk/src/lxml/apihelpers.pxi Log: cleanup Modified: lxml/trunk/src/lxml/apihelpers.pxi ============================================================================== --- lxml/trunk/src/lxml/apihelpers.pxi (original) +++ lxml/trunk/src/lxml/apihelpers.pxi Thu Oct 25 10:14:06 2007 @@ -845,19 +845,6 @@ return 0 -cdef _fillUpChildrenSlice(_Element sibling, elements, bint append_right): - cdef _Element element - if append_right: - for element in elements: - assert element is not None, "Node must not be None" - _appendSibling(sibling, element) - sibling = element - else: - for element in elements: - assert element is not None, "Node must not be None" - _prependSibling(sibling, element) - sibling = element - cdef int _appendChild(_Element parent, _Element child) except -1: """Append a new child to a parent element. """ From scoder at codespeak.net Thu Oct 25 11:10:31 2007 From: scoder at codespeak.net (scoder at codespeak.net) Date: Thu, 25 Oct 2007 11:10:31 +0200 (CEST) Subject: [Lxml-checkins] r47900 - lxml/trunk/src/lxml Message-ID: <20071025091031.99D5F812E@code0.codespeak.net> Author: scoder Date: Thu Oct 25 11:10:31 2007 New Revision: 47900 Modified: lxml/trunk/src/lxml/schematron.pxi Log: raise memory error in schematron where appropriate Modified: lxml/trunk/src/lxml/schematron.pxi ============================================================================== --- lxml/trunk/src/lxml/schematron.pxi (original) +++ lxml/trunk/src/lxml/schematron.pxi Thu Oct 25 11:10:31 2007 @@ -101,9 +101,8 @@ raise SchematronParseError, "No tree or file given" if parser_ctxt is NULL: - if c_doc is not NULL: - tree.xmlFreeDoc(c_doc) - raise SchematronParseError, "Document is not parsable as Schematron" + python.PyErr_NoMemory() + self._c_schema = schematron.xmlSchematronParse(parser_ctxt) schematron.xmlSchematronFreeParserCtxt(parser_ctxt) From ianb at codespeak.net Thu Oct 25 18:25:51 2007 From: ianb at codespeak.net (ianb at codespeak.net) Date: Thu, 25 Oct 2007 18:25:51 +0200 (CEST) Subject: [Lxml-checkins] r47957 - in lxml/trunk: . src/lxml/html Message-ID: <20071025162551.B722E813B@code0.codespeak.net> Author: ianb Date: Thu Oct 25 18:25:51 2007 New Revision: 47957 Modified: lxml/trunk/CHANGES.txt lxml/trunk/src/lxml/html/__init__.py Log: Added link parsing for the tag, which has some special rules, and the archive attribute Modified: lxml/trunk/CHANGES.txt ============================================================================== --- lxml/trunk/CHANGES.txt (original) +++ lxml/trunk/CHANGES.txt Thu Oct 25 18:25:51 2007 @@ -30,6 +30,10 @@ output, it will then be namespace-neutral (before the ellipsis was treated as a real namespace). +* In the ``lxml.html`` ``iter_links`` method, links in ```` + tags weren't recognized. (Note: plugin-specific link parameters + still aren't recognized.) + Other changes ------------- Modified: lxml/trunk/src/lxml/html/__init__.py ============================================================================== --- lxml/trunk/src/lxml/html/__init__.py (original) +++ lxml/trunk/src/lxml/html/__init__.py Thu Oct 25 18:25:51 2007 @@ -27,6 +27,7 @@ _css_url_re = re.compile(r'url\((.*?)\)', re.I) _css_import_re = re.compile(r'@import "(.*?)"') _label_xpath = etree.XPath("//label[@for=$id]") +_archive_re = re.compile(r'[^ ]+') class HtmlMixin(object): @@ -245,9 +246,39 @@ link_attrs = defs.link_attrs for el in self.getiterator(): attribs = el.attrib - for attrib in link_attrs: - if attrib in attribs: - yield (el, attrib, attribs[attrib], 0) + if el.tag != 'object': + for attrib in link_attrs: + if attrib in attribs: + yield (el, attrib, attribs[attrib], 0) + elif el.tag == 'object': + codebase = None + ## tags have attributes that are relative to + ## codebase + if 'codebase' in attribs: + codebase = el.get('codebase') + yield (el, 'codebase', codebase, 0) + for attrib in 'classid', 'data': + if attrib in attribs: + value = el.get(attrib) + if codebase is not None: + value = urlparse.urljoin(codebase, value) + yield (el, attrib, value, 0) + if 'archive' in attribs: + for match in _archive_re.finditer(el.get('archive')): + value = match.group(0) + if codebase is not None: + value = urlparse.urljoin(codebase, value) + yield (el, 'archive', value, match.start()) + if el.tag == 'param': + valuetype = el.get('valuetype') or '' + if valuetype.lower() == 'ref': + ## FIXME: while it's fine we *find* this link, + ## according to the spec we aren't supposed to + ## actually change the value, including resolving + ## it. It can also still be a link, even if it + ## doesn't have a valuetype="ref" (which seems to be the norm) + ## http://www.w3.org/TR/html401/struct/objects.html#adef-valuetype + yield (el, 'value', el.get('value'), 0) if el.tag == 'style' and el.text: for match in _css_url_re.finditer(el.text): yield (el, None, match.group(1), match.start(1)) From ianb at codespeak.net Thu Oct 25 18:26:49 2007 From: ianb at codespeak.net (ianb at codespeak.net) Date: Thu, 25 Oct 2007 18:26:49 +0200 (CEST) Subject: [Lxml-checkins] r47958 - in lxml/trunk: . src/lxml/html Message-ID: <20071025162649.4D2AB813B@code0.codespeak.net> Author: ianb Date: Thu Oct 25 18:26:48 2007 New Revision: 47958 Modified: lxml/trunk/CHANGES.txt lxml/trunk/src/lxml/html/defs.py Log: Include in tags Modified: lxml/trunk/CHANGES.txt ============================================================================== --- lxml/trunk/CHANGES.txt (original) +++ lxml/trunk/CHANGES.txt Thu Oct 25 18:26:48 2007 @@ -32,7 +32,9 @@ * In the ``lxml.html`` ``iter_links`` method, links in ```` tags weren't recognized. (Note: plugin-specific link parameters - still aren't recognized.) + still aren't recognized.) Also, the ```` tag, though not + standard, is now included in + ``lxml.html.defs.special_inline_tags``. Other changes ------------- Modified: lxml/trunk/src/lxml/html/defs.py ============================================================================== --- lxml/trunk/src/lxml/html/defs.py (original) +++ lxml/trunk/src/lxml/html/defs.py Thu Oct 25 18:26:48 2007 @@ -94,7 +94,7 @@ ] special_inline_tags = [ - 'a', 'applet', 'basefont', 'bdo', 'br', 'font', 'iframe', + 'a', 'applet', 'basefont', 'bdo', 'br', 'embed', 'font', 'iframe', 'img', 'map', 'area', 'object', 'param', 'q', 'script', 'span', 'sub', 'sup', ] From ianb at codespeak.net Thu Oct 25 18:31:49 2007 From: ianb at codespeak.net (ianb at codespeak.net) Date: Thu, 25 Oct 2007 18:31:49 +0200 (CEST) Subject: [Lxml-checkins] r47960 - in lxml/trunk: . doc src/lxml/html src/lxml/html/tests Message-ID: <20071025163149.5FCBE813B@code0.codespeak.net> Author: ianb Date: Thu Oct 25 18:31:48 2007 New Revision: 47960 Modified: lxml/trunk/CHANGES.txt lxml/trunk/doc/lxmlhtml.txt lxml/trunk/src/lxml/html/clean.py lxml/trunk/src/lxml/html/tests/test_clean.py lxml/trunk/src/lxml/html/tests/test_clean.txt Log: Added a host_whitelist option and some other opt-in options to lxml.html.clean Modified: lxml/trunk/CHANGES.txt ============================================================================== --- lxml/trunk/CHANGES.txt (original) +++ lxml/trunk/CHANGES.txt Thu Oct 25 18:31:48 2007 @@ -17,6 +17,10 @@ ``NOPARSE_MARKUP`` (like ``# doctest: +NOPARSE_MARKUP``) to suppress the special checking for one test. +* ``lxml.html.clean.Cleaner`` now allows for a ``host_whitelist``, and + two overridable methods: ``allow_embedded_url(el, url)`` and the + more general ``allow_element(el)``. + Bugs fixed ---------- Modified: lxml/trunk/doc/lxmlhtml.txt ============================================================================== --- lxml/trunk/doc/lxmlhtml.txt (original) +++ lxml/trunk/doc/lxmlhtml.txt Thu Oct 25 18:31:48 2007 @@ -531,6 +531,11 @@ +You can also whitelist some otherwise dangerous content with +``Cleaner(host_whitelist=['www.youtube.com'])``, which would allow +embedded media from YouTube, while still filtering out embedded media +from other sites. + See the docstring of ``Cleaner`` for the details of what can be cleaned. Modified: lxml/trunk/src/lxml/html/clean.py ============================================================================== --- lxml/trunk/src/lxml/html/clean.py (original) +++ lxml/trunk/src/lxml/html/clean.py Thu Oct 25 18:31:48 2007 @@ -1,4 +1,5 @@ import re +import urlparse from lxml import etree from lxml.html import defs from lxml.html import fromstring, tostring @@ -124,6 +125,25 @@ ``add_nofollow``: If true, then any tags will have ``rel="nofollow"`` added to them. + ``host_whitelist``: + A list or set of hosts that you can use for embedded content + (for content like ````, ````, etc). + You can also implement/override the method + ``allow_embedded_url(el, url)`` or ``allow_element(el)`` to + implement more complex rules for what can be embedded. + Anything that passes this test will be shown, regardless of + the value of (for instance) ``embedded``. + + Note that this parameter might not work as intended if you do not + make the links absolute before doing the cleaning. + + ``whitelist_tags``: + A set of tags that can be included with ``host_whitelist``. + The default is ``iframe`` and ``embed``; you may wish to + include other tags like ``script``, or you may want to + implement ``allow_embedded_url`` for more control. Set to None to + include all tags. + This modifies the document *in place*. """ @@ -144,6 +164,8 @@ remove_unknown_tags = True safe_attrs_only = True add_nofollow = False + host_whitelist = () + whitelist_tags = set(['iframe', 'embed']) def __init__(self, **kw): for name, value in kw.items(): @@ -152,12 +174,34 @@ "Unknown parameter: %s=%r" % (name, value)) setattr(self, name, value) + # Used to lookup the primary URL for a given tag that is up for + # removal: + _tag_link_attrs = dict( + script='src', + link='href', + # From: http://java.sun.com/j2se/1.4.2/docs/guide/misc/applet.html + # From what I can tell, both attributes can contain a link: + applet=['code', 'object'], + iframe='src', + embed='src', + layer='src', + # FIXME: there doesn't really seem like a general way to figure out what + # links an tag uses; links often go in tags with values + # that we don't really know. You'd have to have knowledge about specific + # kinds of plugins (probably keyed off classid), and match against those. + ##object=?, + # FIXME: not looking at the action currently, because it is more complex + # than than -- if you keep the form, you should keep the form controls. + ##form='action', + a='href', + ) + def __call__(self, doc): """ Cleans the document. """ if hasattr(doc, 'getroot'): - # ElementTree + # ElementTree instance, instead of an element doc = doc.getroot() # Normalize a case that IE treats like , and that # can confuse either this step or later steps. @@ -243,12 +287,22 @@ remove_tags.update(('head', 'html', 'title')) if self.embedded: # FIXME: is really embedded? - kill_tags.update(('applet', 'param')) + # We should get rid of any tags not inside ; + # These are not really valid anyway. + for el in list(doc.getiterator('param')): + found_parent = False + parent = el.getparent() + while parent is not None and parent.tag not in ('applet', 'object'): + parent = parent.getparent() + if parent is None: + el.drop_tree() + kill_tags.update(('applet',)) # The alternate contents that are in an iframe are a good fallback: - # FIXME: somehow embed seems to be getting data, but from what I - # can tell the embed tag is supposed to always be empty - remove_tags.update(('iframe', 'object', 'embed', 'layer')) + remove_tags.update(('iframe', 'embed', 'layer', 'object', 'param')) if self.frames: + # FIXME: ideally we should look at the frame links, but + # generally frames don't mix properly with an HTML + # fragment anyway. kill_tags.update(defs.frame_tags) if self.forms: remove_tags.add('form') @@ -260,8 +314,12 @@ _kill = [] for el in doc.getiterator(): if el.tag in kill_tags: + if self.allow_element(el): + continue _kill.append(el) elif el.tag in remove_tags: + if self.allow_element(el): + continue _remove.append(el) if _remove and _remove[0] == doc: @@ -298,7 +356,34 @@ el.drop_tag() if self.add_nofollow: for el in _find_external_links(doc): - el.set('rel', 'nofollow') + if not self.allow_follow(el): + el.set('rel', 'nofollow') + + def allow_follow(self, anchor): + """ + Override to suppress rel="nofollow" on some anchors. + """ + return False + + def allow_element(self, el): + if el.tag not in self._tag_link_attrs: + return False + url = el.get(self._tag_link_attrs[el.tag]) + if not url: + return False + return self.allow_embedded_url(el, url) + + def allow_embedded_url(self, el, url): + if (self.whitelist_tags is not None + and el.tag not in self.whitelist_tags): + return False + scheme, netloc, path, query, fragment = urlparse.urlsplit(url) + netloc = netloc.lower().split(':', 1)[0] + if scheme not in ('http', 'https'): + return False + if netloc in self.host_whitelist: + return True + return False def kill_conditional_comments(self, doc): """ Modified: lxml/trunk/src/lxml/html/tests/test_clean.py ============================================================================== --- lxml/trunk/src/lxml/html/tests/test_clean.py (original) +++ lxml/trunk/src/lxml/html/tests/test_clean.py Thu Oct 25 18:31:48 2007 @@ -5,6 +5,3 @@ suite = unittest.TestSuite() suite.addTests([doctest.DocFileSuite('test_clean.txt')]) return suite - -if __name__ == '__main__': - unittest.main() Modified: lxml/trunk/src/lxml/html/tests/test_clean.txt ============================================================================== --- lxml/trunk/src/lxml/html/tests/test_clean.txt (original) +++ lxml/trunk/src/lxml/html/tests/test_clean.txt Thu Oct 25 18:31:48 2007 @@ -117,3 +117,29 @@ + +>>> doc_embed = '''
+... +... +... +... +...
''' +>>> print tostring(fromstring(doc_embed)) +
+ + + + +
+>>> print Cleaner().clean_html(doc_embed) +
+
+>>> print Cleaner(host_whitelist=['www.youtube.com']).clean_html(doc_embed) +
+ +
+>>> print Cleaner(host_whitelist=['www.youtube.com'], whitelist_tags=None).clean_html(doc_embed) +
+ + +
From scoder at codespeak.net Thu Oct 25 19:27:03 2007 From: scoder at codespeak.net (scoder at codespeak.net) Date: Thu, 25 Oct 2007 19:27:03 +0200 (CEST) Subject: [Lxml-checkins] r47968 - in lxml/trunk: . src/lxml Message-ID: <20071025172703.D76A78137@code0.codespeak.net> Author: scoder Date: Thu Oct 25 19:27:03 2007 New Revision: 47968 Modified: lxml/trunk/CHANGES.txt lxml/trunk/src/lxml/lxml.objectify.pyx Log: extended slicing in objectify Modified: lxml/trunk/CHANGES.txt ============================================================================== --- lxml/trunk/CHANGES.txt (original) +++ lxml/trunk/CHANGES.txt Thu Oct 25 19:27:03 2007 @@ -8,7 +8,8 @@ Features added -------------- -* Extended slicing of Elements as in ``element[1:-1:2]`` +* Extended slicing of Elements as in ``element[1:-1:2]``, both in + etree and in objectify * Resolvers can now provide a ``base_url`` keyword argument when resolving a document as string data. Modified: lxml/trunk/src/lxml/lxml.objectify.pyx ============================================================================== --- lxml/trunk/src/lxml/lxml.objectify.pyx (original) +++ lxml/trunk/src/lxml/lxml.objectify.pyx Thu Oct 25 19:27:03 2007 @@ -150,28 +150,7 @@ def __len__(self): """Count self and siblings with the same tag. """ - cdef tree.xmlNode* c_self_node - cdef tree.xmlNode* c_node - cdef char* c_href - cdef char* c_tag - cdef Py_ssize_t count - c_self_node = self._c_node - c_tag = c_self_node.name - c_href = tree._getNs(c_self_node) - count = 1 - c_node = c_self_node.next - while c_node is not NULL: - if c_node.type == tree.XML_ELEMENT_NODE and \ - cetree.tagMatches(c_node, c_href, c_tag): - count = count + 1 - c_node = c_node.next - c_node = c_self_node.prev - while c_node is not NULL: - if c_node.type == tree.XML_ELEMENT_NODE and \ - cetree.tagMatches(c_node, c_href, c_tag): - count = count + 1 - c_node = c_node.prev - return count + return _countSiblings(self._c_node) def countchildren(self): """Return the number of children of this element, regardless of their @@ -253,12 +232,24 @@ * If argument is a string, does the same as getattr(). This can be used to provide namespaces for element lookup, or to look up children with special names (``text`` etc.). + + * If argument is a slice object, returns the matching slice. """ cdef tree.xmlNode* c_self_node cdef tree.xmlNode* c_parent cdef tree.xmlNode* c_node + cdef Py_ssize_t start, stop, step, slicelength if python._isString(key): return _lookupChildOrRaise(self, key) + elif python.PySlice_Check(key): + python.PySlice_GetIndicesEx( + key, _countSiblings(self._c_node), + &start, &stop, &step, &slicelength) + if step < 0: + return list(self)[start:stop:step] + else: + return list(islice(self, start, stop, step)) + # normal item access c_self_node = self._c_node c_parent = c_self_node.parent if c_parent is NULL: @@ -289,10 +280,12 @@ items to the siblings. """ cdef _Element element + cdef _Element parent cdef _Element new_element cdef tree.xmlNode* c_self_node cdef tree.xmlNode* c_parent cdef tree.xmlNode* c_node + cdef Py_ssize_t start, stop, step, slicelength if python._isString(key): key = _buildChildTag(self, key) element = _lookupChild(self, key) @@ -306,59 +299,73 @@ c_parent = c_self_node.parent if c_parent is NULL: # the 'root[i] = ...' case - raise TypeError, "index assignment to root element is invalid" - if key < 0: - c_node = c_parent.last - else: - c_node = c_parent.children - c_node = _findFollowingSibling( - c_node, tree._getNs(c_self_node), c_self_node.name, key) - if c_node is NULL: - raise IndexError, key - element = elementFactory(self._doc, c_node) - _replaceElement(element, value) + raise TypeError, "assignment to root element is invalid" - def __getslice__(self, Py_ssize_t start, Py_ssize_t end): - return list(islice(self, start, end)) + if python.PySlice_Check(key): + # slice assignment + python.PySlice_GetIndicesEx( + key, _countSiblings(self._c_node), + &start, &stop, &step, &slicelength) + # replace existing items + new_items = iter(value) + if step < 0: + del_items = list(self)[start:stop:step] + else: + del_items = list(islice(self, start, stop, step)) + del_items = iter(del_items) + parent = self.getparent() + try: + for el in del_items: + item = new_items.next() + _replaceElement(el, item) + except StopIteration: + remove = parent.remove + remove(el) + for el in del_items: + remove(el) + return + else: + # append remaining new items + tag = self.tag + for item in new_items: + _appendValue(parent, tag, item) + else: + # normal index assignment + if key < 0: + c_node = c_parent.last + else: + c_node = c_parent.children + c_node = _findFollowingSibling( + c_node, tree._getNs(c_self_node), c_self_node.name, key) + if c_node is NULL: + raise IndexError, key + element = elementFactory(self._doc, c_node) + _replaceElement(element, value) - def __setslice__(self, Py_ssize_t start, Py_ssize_t end, values): - cdef _Element el - parent = self.getparent() - if parent is None: - raise TypeError, "deleting slices of root element not supported" - # replace existing items - new_items = iter(values) - del_items = iter(list(islice(self, start, end))) - try: - for el in del_items: - item = new_items.next() - _replaceElement(el, item) - except StopIteration: + def __delitem__(self, key): + cdef Py_ssize_t start, stop, step, slicelength + if python.PySlice_Check(key): + # slice deletion + python.PySlice_GetIndicesEx( + key, _countSiblings(self._c_node), + &start, &stop, &step, &slicelength) + parent = self.getparent() + if parent is None: + raise TypeError, "deleting slices of root element not supported" + if step < 0: + del_items = list(self)[start:stop:step] + else: + del_items = list(islice(self, start, stop, step)) remove = parent.remove - remove(el) for el in del_items: remove(el) - return - - # append remaining new items - tag = self.tag - for item in new_items: - _appendValue(parent, tag, item) - - def __delslice__(self, Py_ssize_t start, Py_ssize_t end): - parent = self.getparent() - if parent is None: - raise TypeError, "deleting slices of root element not supported" - remove = parent.remove - for el in list(islice(self, start, end)): - remove(el) - - def __delitem__(self, key): - parent = self.getparent() - if parent is None: - raise TypeError, "deleting items not supported by root element" - sibling = self.__getitem__(key) - parent.remove(sibling) + else: + # normal index deletion + parent = self.getparent() + if parent is None: + raise TypeError, "deleting items not supported by root element" + sibling = self.__getitem__(key) + parent.remove(sibling) def iterfind(self, path): # Reimplementation of Element.iterfind() to make it work without child @@ -399,6 +406,28 @@ prefix = '.'.join(prefix) return _buildDescendantPaths(self._c_node, prefix) +cdef Py_ssize_t _countSiblings(tree.xmlNode* c_start_node): + cdef tree.xmlNode* c_node + cdef char* c_href + cdef char* c_tag + cdef Py_ssize_t count + c_tag = c_start_node.name + c_href = tree._getNs(c_start_node) + count = 1 + c_node = c_start_node.next + while c_node is not NULL: + if c_node.type == tree.XML_ELEMENT_NODE and \ + cetree.tagMatches(c_node, c_href, c_tag): + count = count + 1 + c_node = c_node.next + c_node = c_start_node.prev + while c_node is not NULL: + if c_node.type == tree.XML_ELEMENT_NODE and \ + cetree.tagMatches(c_node, c_href, c_tag): + count = count + 1 + c_node = c_node.prev + return count + cdef tree.xmlNode* _findFollowingSibling(tree.xmlNode* c_node, char* href, char* name, Py_ssize_t index): @@ -460,7 +489,7 @@ element._doc, (<_Element>value)._c_node) new_element.tag = element.tag elif python.PyList_Check(value) or python.PyTuple_Check(value): - element.__setslice__(0, python.PY_SSIZE_T_MAX, value) + element[:] = value return else: new_element = element.makeelement(element.tag) From scoder at codespeak.net Thu Oct 25 19:32:50 2007 From: scoder at codespeak.net (scoder at codespeak.net) Date: Thu, 25 Oct 2007 19:32:50 +0200 (CEST) Subject: [Lxml-checkins] r47969 - lxml/trunk Message-ID: <20071025173250.641608137@code0.codespeak.net> Author: scoder Date: Thu Oct 25 19:32:50 2007 New Revision: 47969 Modified: lxml/trunk/CHANGES.txt Log: changelog cleanup Modified: lxml/trunk/CHANGES.txt ============================================================================== --- lxml/trunk/CHANGES.txt (original) +++ lxml/trunk/CHANGES.txt Thu Oct 25 19:32:50 2007 @@ -8,6 +8,10 @@ Features added -------------- +* ``lxml.html.clean.Cleaner`` now allows for a ``host_whitelist``, and + two overridable methods: ``allow_embedded_url(el, url)`` and the + more general ``allow_element(el)``. + * Extended slicing of Elements as in ``element[1:-1:2]``, both in etree and in objectify @@ -18,29 +22,22 @@ ``NOPARSE_MARKUP`` (like ``# doctest: +NOPARSE_MARKUP``) to suppress the special checking for one test. -* ``lxml.html.clean.Cleaner`` now allows for a ``host_whitelist``, and - two overridable methods: ``allow_embedded_url(el, url)`` and the - more general ``allow_element(el)``. - Bugs fixed ---------- +* In the ``lxml.html`` ``iter_links`` method, links in ```` + tags weren't recognized. (Note: plugin-specific link parameters + still aren't recognized.) Also, the ```` tag, though not + standard, is now included in + ``lxml.html.defs.special_inline_tags``. + * Using custom resolvers on XSLT stylesheets parsed from a string could request ill-formed URLs. -* lxml.etree could crash when adding more than 10000 namespaces to a - document - * With ``lxml.doctestcompare`` if you do ```` in your output, it will then be namespace-neutral (before the ellipsis was treated as a real namespace). -* In the ``lxml.html`` ``iter_links`` method, links in ```` - tags weren't recognized. (Note: plugin-specific link parameters - still aren't recognized.) Also, the ```` tag, though not - standard, is now included in - ``lxml.html.defs.special_inline_tags``. - Other changes ------------- @@ -51,6 +48,22 @@ the package name of their module. +1.3.5 (2007-10-22) +================== + +Features added +-------------- + +Bugs fixed +---------- + +* lxml.etree could crash when adding more than 10000 namespaces to a + document + +* lxml failed to serialise namespace declarations of elements other + than the root node of a tree + + 2.0alpha4 (2007-10-07) ====================== From scoder at codespeak.net Thu Oct 25 19:44:39 2007 From: scoder at codespeak.net (scoder at codespeak.net) Date: Thu, 25 Oct 2007 19:44:39 +0200 (CEST) Subject: [Lxml-checkins] r47971 - lxml/trunk Message-ID: <20071025174439.16313813A@code0.codespeak.net> Author: scoder Date: Thu Oct 25 19:44:39 2007 New Revision: 47971 Modified: lxml/trunk/CHANGES.txt Log: changelog cleanup Modified: lxml/trunk/CHANGES.txt ============================================================================== --- lxml/trunk/CHANGES.txt (original) +++ lxml/trunk/CHANGES.txt Thu Oct 25 19:44:39 2007 @@ -28,8 +28,7 @@ * In the ``lxml.html`` ``iter_links`` method, links in ```` tags weren't recognized. (Note: plugin-specific link parameters still aren't recognized.) Also, the ```` tag, though not - standard, is now included in - ``lxml.html.defs.special_inline_tags``. + standard, is now included in ``lxml.html.defs.special_inline_tags``. * Using custom resolvers on XSLT stylesheets parsed from a string could request ill-formed URLs. From scoder at codespeak.net Thu Oct 25 20:51:18 2007 From: scoder at codespeak.net (scoder at codespeak.net) Date: Thu, 25 Oct 2007 20:51:18 +0200 (CEST) Subject: [Lxml-checkins] r47984 - lxml/trunk/doc Message-ID: <20071025185118.E6A9280D0@code0.codespeak.net> Author: scoder Date: Thu Oct 25 20:51:18 2007 New Revision: 47984 Modified: lxml/trunk/doc/build.txt Log: require 0.9.6.7 Modified: lxml/trunk/doc/build.txt ============================================================================== --- lxml/trunk/doc/build.txt (original) +++ lxml/trunk/doc/build.txt Thu Oct 25 20:51:18 2007 @@ -33,11 +33,11 @@ be an lxml developer, you do need a working Cython installation. You can use EasyInstall_ to install it:: - easy_install Cython==0.9.6.6 + easy_install Cython==0.9.6.7 .. _EasyInstall: http://peak.telecommunity.com/DevCenter/EasyInstall -lxml currently requires Cython 0.9.6.6, but it should work with later +lxml currently requires Cython 0.9.6.7, but it should work with later versions. From scoder at codespeak.net Thu Oct 25 20:52:00 2007 From: scoder at codespeak.net (scoder at codespeak.net) Date: Thu, 25 Oct 2007 20:52:00 +0200 (CEST) Subject: [Lxml-checkins] r47985 - lxml/trunk Message-ID: <20071025185200.59C8B811E@code0.codespeak.net> Author: scoder Date: Thu Oct 25 20:52:00 2007 New Revision: 47985 Modified: lxml/trunk/version.txt Log: version Modified: lxml/trunk/version.txt ============================================================================== --- lxml/trunk/version.txt (original) +++ lxml/trunk/version.txt Thu Oct 25 20:52:00 2007 @@ -1 +1 @@ -2.0alpha4 +2.0alpha5 From scoder at codespeak.net Fri Oct 26 11:35:34 2007 From: scoder at codespeak.net (scoder at codespeak.net) Date: Fri, 26 Oct 2007 11:35:34 +0200 (CEST) Subject: [Lxml-checkins] r48023 - lxml/branch/lxml-1.3/doc Message-ID: <20071026093534.ABE4280DE@code0.codespeak.net> Author: scoder Date: Fri Oct 26 11:35:34 2007 New Revision: 48023 Modified: lxml/branch/lxml-1.3/doc/FAQ.txt Log: stand-alone FAQ page title Modified: lxml/branch/lxml-1.3/doc/FAQ.txt ============================================================================== --- lxml/branch/lxml-1.3/doc/FAQ.txt (original) +++ lxml/branch/lxml-1.3/doc/FAQ.txt Fri Oct 26 11:35:34 2007 @@ -1,6 +1,6 @@ -================================ -Frequently Asked Questions (FAQ) -================================ +======================================= +lxml - Frequently Asked Questions (FAQ) +======================================= .. meta:: :description: Frequently Asked Questions about lxml (FAQ) From scoder at codespeak.net Fri Oct 26 11:35:42 2007 From: scoder at codespeak.net (scoder at codespeak.net) Date: Fri, 26 Oct 2007 11:35:42 +0200 (CEST) Subject: [Lxml-checkins] r48024 - lxml/trunk/doc Message-ID: <20071026093542.3840580DE@code0.codespeak.net> Author: scoder Date: Fri Oct 26 11:35:41 2007 New Revision: 48024 Modified: lxml/trunk/doc/FAQ.txt Log: stand-alone FAQ page title Modified: lxml/trunk/doc/FAQ.txt ============================================================================== --- lxml/trunk/doc/FAQ.txt (original) +++ lxml/trunk/doc/FAQ.txt Fri Oct 26 11:35:41 2007 @@ -1,6 +1,6 @@ -================================ -Frequently Asked Questions (FAQ) -================================ +======================================= +lxml - Frequently Asked Questions (FAQ) +======================================= .. meta:: :description: Frequently Asked Questions about lxml (FAQ) From scoder at codespeak.net Fri Oct 26 11:36:08 2007 From: scoder at codespeak.net (scoder at codespeak.net) Date: Fri, 26 Oct 2007 11:36:08 +0200 (CEST) Subject: [Lxml-checkins] r48025 - lxml/trunk/doc Message-ID: <20071026093608.68DFA813B@code0.codespeak.net> Author: scoder Date: Fri Oct 26 11:36:08 2007 New Revision: 48025 Modified: lxml/trunk/doc/lxmlhtml.txt Log: mention ElementSoup in lxmlhtml.txt Modified: lxml/trunk/doc/lxmlhtml.txt ============================================================================== --- lxml/trunk/doc/lxmlhtml.txt (original) +++ lxml/trunk/doc/lxmlhtml.txt Fri Oct 26 11:36:08 2007 @@ -8,13 +8,24 @@ .. contents:: .. - 1 Running HTML doctests - 2 Parsing HTML - 2.1 Parsing HTML fragments - 3 Creating HTML with the E-factory - 4 Working with links - 5 Cleaning up HTML - + 1 Parsing HTML + 1.1 Parsing HTML fragments + 1.2 Really broken pages + 2 HTML Element Methods + 3 Running HTML doctests + 4 Creating HTML with the E-factory + 4.1 Viewing your HTML + 5 Working with links + 5.1 Functions + 6 Forms + 6.1 Form Filling Example + 6.2 Form Submission + 7 Cleaning up HTML + 7.1 autolink + 7.2 wordwrap + 8 HTML Diff + 9 Examples + 9.1 Microformat Example The main API is based on the `lxml.etree`_ API, and thus, on the ElementTree_ API. @@ -59,6 +70,19 @@ on whether the string looks like a full document, or just a fragment. +Really broken pages +------------------- + +The normal HTML parser is capable of handling broken HTML, but for +pages that are far enough from HTML to call them 'tag soup', it may +still fail to parse the page. A way to deal with this is +ElementSoup_, which deploys the well-known BeautifulSoup_ parser to +build an lxml HTML tree. + +.. _BeautifulSoup: http://www.crummy.com/software/BeautifulSoup/ +.. _ElementSoup: elementsoup.html + + HTML Element Methods ==================== From scoder at codespeak.net Sat Oct 27 09:48:56 2007 From: scoder at codespeak.net (scoder at codespeak.net) Date: Sat, 27 Oct 2007 09:48:56 +0200 (CEST) Subject: [Lxml-checkins] r48098 - lxml/trunk/src/lxml Message-ID: <20071027074856.C343D8150@code0.codespeak.net> Author: scoder Date: Sat Oct 27 09:48:55 2007 New Revision: 48098 Modified: lxml/trunk/src/lxml/apihelpers.pxi Log: forgotten assertions Modified: lxml/trunk/src/lxml/apihelpers.pxi ============================================================================== --- lxml/trunk/src/lxml/apihelpers.pxi (original) +++ lxml/trunk/src/lxml/apihelpers.pxi Sat Oct 27 09:48:55 2007 @@ -815,7 +815,6 @@ if c_node is not NULL: for element in elements: assert element is not None, "Node must not be None" - # move element and tail over c_next = element._c_node.next tree.xmlAddPrevSibling(c_node, element._c_node) @@ -838,9 +837,11 @@ # append the remaining elements at the respective end if left_to_right: for element in elements: + assert element is not None, "Node must not be None" _appendChild(parent, element) else: for element in elements: + assert element is not None, "Node must not be None" _prependChild(parent, element) return 0 From scoder at codespeak.net Sat Oct 27 10:36:25 2007 From: scoder at codespeak.net (scoder at codespeak.net) Date: Sat, 27 Oct 2007 10:36:25 +0200 (CEST) Subject: [Lxml-checkins] r48099 - lxml/trunk/src/lxml/tests Message-ID: <20071027083625.A6194816B@code0.codespeak.net> Author: scoder Date: Sat Oct 27 10:36:24 2007 New Revision: 48099 Modified: lxml/trunk/src/lxml/tests/test_classlookup.py Log: doc fixes Modified: lxml/trunk/src/lxml/tests/test_classlookup.py ============================================================================== --- lxml/trunk/src/lxml/tests/test_classlookup.py (original) +++ lxml/trunk/src/lxml/tests/test_classlookup.py Sat Oct 27 10:36:24 2007 @@ -1,10 +1,7 @@ # -*- coding: utf-8 -*- """ -Tests specific to the extended etree API - -Tests that apply to the general ElementTree API should go into -test_elementtree +Tests for different Element class lookup mechanisms. """ @@ -23,7 +20,7 @@ ''' class ClassLookupTestCase(HelperTestCase): - """Test cases for lxml.elementlib.classlookup + """Test cases for different Element class lookup mechanisms. """ etree = etree From scoder at codespeak.net Mon Oct 29 11:09:26 2007 From: scoder at codespeak.net (scoder at codespeak.net) Date: Mon, 29 Oct 2007 11:09:26 +0100 (CET) Subject: [Lxml-checkins] r48142 - in lxml/trunk: . src/lxml Message-ID: <20071029100926.0803E810F@code0.codespeak.net> Author: scoder Date: Mon Oct 29 11:09:25 2007 New Revision: 48142 Modified: lxml/trunk/CHANGES.txt lxml/trunk/src/lxml/objectpath.pxi Log: fixed crash in ObjectPath Modified: lxml/trunk/CHANGES.txt ============================================================================== --- lxml/trunk/CHANGES.txt (original) +++ lxml/trunk/CHANGES.txt Mon Oct 29 11:09:25 2007 @@ -25,6 +25,8 @@ Bugs fixed ---------- +* Well hidden free-while-in-use crash bug in ObjectPath + * In the ``lxml.html`` ``iter_links`` method, links in ```` tags weren't recognized. (Note: plugin-specific link parameters still aren't recognized.) Also, the ```` tag, though not Modified: lxml/trunk/src/lxml/objectpath.pxi ============================================================================== --- lxml/trunk/src/lxml/objectpath.pxi (original) +++ lxml/trunk/src/lxml/objectpath.pxi Mon Oct 29 11:09:25 2007 @@ -140,7 +140,7 @@ if index_pos is NULL: index = 0 else: - name = python.PyString_FromStringAndSize( + new_name = python.PyString_FromStringAndSize( c_name, (index_pos - c_name)) index_pos = index_pos + 1 index_end = cstd.strchr(index_pos, c']') @@ -151,6 +151,7 @@ index_pos, (index_end - index_pos))) if python.PyList_GET_SIZE(new_path) == 0 and index != 0: raise ValueError, "index not allowed on root node" + name = new_name python.PyList_Append(new_path, (ns, name, index)) if python.PyList_GET_SIZE(new_path) == 0: raise ValueError, "invalid path" From scoder at codespeak.net Mon Oct 29 11:09:50 2007 From: scoder at codespeak.net (scoder at codespeak.net) Date: Mon, 29 Oct 2007 11:09:50 +0100 (CET) Subject: [Lxml-checkins] r48143 - in lxml/branch/lxml-1.3: . src/lxml Message-ID: <20071029100950.DAFA5810F@code0.codespeak.net> Author: scoder Date: Mon Oct 29 11:09:50 2007 New Revision: 48143 Modified: lxml/branch/lxml-1.3/CHANGES.txt lxml/branch/lxml-1.3/src/lxml/objectify.pyx Log: fixed crash in ObjectPath Modified: lxml/branch/lxml-1.3/CHANGES.txt ============================================================================== --- lxml/branch/lxml-1.3/CHANGES.txt (original) +++ lxml/branch/lxml-1.3/CHANGES.txt Mon Oct 29 11:09:50 2007 @@ -2,6 +2,18 @@ lxml changelog ============== +Under development +================= + +Features added +-------------- + +Bugs fixed +---------- + +* Well hidden free-while-in-use crash bug in ObjectPath + + 1.3.5 (2007-10-22) ================== Modified: lxml/branch/lxml-1.3/src/lxml/objectify.pyx ============================================================================== --- lxml/branch/lxml-1.3/src/lxml/objectify.pyx (original) +++ lxml/branch/lxml-1.3/src/lxml/objectify.pyx Mon Oct 29 11:09:50 2007 @@ -1254,7 +1254,7 @@ if index_pos is NULL: index = 0 else: - name = python.PyString_FromStringAndSize( + new_name = python.PyString_FromStringAndSize( c_name, (index_pos - c_name)) index_pos = index_pos + 1 index_end = cstd.strchr(index_pos, c']') @@ -1265,6 +1265,7 @@ index_pos, (index_end - index_pos))) if python.PyList_GET_SIZE(new_path) == 0 and index != 0: raise ValueError, "index not allowed on root node" + name = new_name python.PyList_Append(new_path, (ns, name, index)) if python.PyList_GET_SIZE(new_path) == 0: raise ValueError, "invalid path" From scoder at codespeak.net Mon Oct 29 12:22:00 2007 From: scoder at codespeak.net (scoder at codespeak.net) Date: Mon, 29 Oct 2007 12:22:00 +0100 (CET) Subject: [Lxml-checkins] r48148 - lxml/trunk Message-ID: <20071029112200.CA3388124@code0.codespeak.net> Author: scoder Date: Mon Oct 29 12:22:00 2007 New Revision: 48148 Modified: lxml/trunk/test.py Log: run garbage collection after each test case Modified: lxml/trunk/test.py ============================================================================== --- lxml/trunk/test.py (original) +++ lxml/trunk/test.py Mon Oct 29 12:22:00 2007 @@ -63,6 +63,8 @@ # and cleaner though, at the expense of more limited functionality. # +import gc + import re import os import sys @@ -416,6 +418,7 @@ test(result) stopTime = time.time() timeTaken = float(stopTime - startTime) + gc.collect() result.printErrors() run = result.testsRun if not self.cfg.quiet: From scoder at codespeak.net Mon Oct 29 16:57:23 2007 From: scoder at codespeak.net (scoder at codespeak.net) Date: Mon, 29 Oct 2007 16:57:23 +0100 (CET) Subject: [Lxml-checkins] r48163 - lxml/trunk/src/lxml Message-ID: <20071029155723.C6A468124@code0.codespeak.net> Author: scoder Date: Mon Oct 29 16:57:23 2007 New Revision: 48163 Modified: lxml/trunk/src/lxml/objectpath.pxi Log: cleanup Modified: lxml/trunk/src/lxml/objectpath.pxi ============================================================================== --- lxml/trunk/src/lxml/objectpath.pxi (original) +++ lxml/trunk/src/lxml/objectpath.pxi Mon Oct 29 16:57:23 2007 @@ -83,8 +83,7 @@ _RELATIVE_PATH_SEGMENT = (None, None, 0) cdef _parseObjectPathString(path): - """Parse object path string into a 'hrefOnameOhrefOnameOOO' string and an - index list. The index list is None if no index was used in the path. + """Parse object path string into a (ns, name, index) list. """ cdef bint has_dot new_path = [] @@ -120,8 +119,7 @@ return new_path cdef _parseObjectPathList(path): - """Parse object path sequence into a 'hrefOnameOhrefOnameOOO' string and - an index list. The index list is None if no index was used in the path. + """Parse object path sequence into a (ns, name, index) list. """ cdef char* index_pos cdef char* index_end @@ -140,18 +138,16 @@ if index_pos is NULL: index = 0 else: - new_name = python.PyString_FromStringAndSize( - c_name, (index_pos - c_name)) - index_pos = index_pos + 1 - index_end = cstd.strchr(index_pos, c']') + index_end = cstd.strchr(index_pos + 1, c']') if index_end is NULL: raise ValueError, "index must be enclosed in []" index = python.PyNumber_Int( python.PyString_FromStringAndSize( - index_pos, (index_end - index_pos))) + index_pos + 1, (index_end - index_pos - 1))) if python.PyList_GET_SIZE(new_path) == 0 and index != 0: raise ValueError, "index not allowed on root node" - name = new_name + name = python.PyString_FromStringAndSize( + c_name, (index_pos - c_name)) python.PyList_Append(new_path, (ns, name, index)) if python.PyList_GET_SIZE(new_path) == 0: raise ValueError, "invalid path" @@ -160,13 +156,10 @@ cdef _ObjectPath* _buildObjectPathSegments(path_list) except NULL: cdef _ObjectPath* c_path cdef _ObjectPath* c_path_segments - cdef Py_ssize_t c_len - c_len = python.PyList_GET_SIZE(path_list) - c_path_segments = <_ObjectPath*>python.PyMem_Malloc(sizeof(_ObjectPath) * - c_len) + c_path_segments = <_ObjectPath*>python.PyMem_Malloc( + sizeof(_ObjectPath) * python.PyList_GET_SIZE(path_list)) if c_path_segments is NULL: python.PyErr_NoMemory() - return NULL c_path = c_path_segments for href, name, index in path_list: if href is None: @@ -201,7 +194,7 @@ while c_node is not NULL: c_path_len = c_path_len - 1 if c_path_len <= 0: - return cetree.elementFactory(root._doc, c_node) + break c_path = c_path + 1 if c_path[0].href is not NULL: @@ -215,7 +208,9 @@ c_node = c_node.children c_node = _findFollowingSibling(c_node, c_href, c_name, c_index) - if use_default: + if c_node is not NULL: + return cetree.elementFactory(root._doc, c_node) + elif use_default: return default_value else: tag = cetree.namespacedNameFromNsName(c_href, c_name) From scoder at codespeak.net Mon Oct 29 17:09:24 2007 From: scoder at codespeak.net (scoder at codespeak.net) Date: Mon, 29 Oct 2007 17:09:24 +0100 (CET) Subject: [Lxml-checkins] r48165 - lxml/branch/lxml-1.3/src/lxml Message-ID: <20071029160924.820278192@code0.codespeak.net> Author: scoder Date: Mon Oct 29 17:09:24 2007 New Revision: 48165 Modified: lxml/branch/lxml-1.3/src/lxml/objectify.pyx Log: ObjectPath cleanup Modified: lxml/branch/lxml-1.3/src/lxml/objectify.pyx ============================================================================== --- lxml/branch/lxml-1.3/src/lxml/objectify.pyx (original) +++ lxml/branch/lxml-1.3/src/lxml/objectify.pyx Mon Oct 29 17:09:24 2007 @@ -1197,8 +1197,7 @@ _RELATIVE_PATH_SEGMENT = (None, None, 0) cdef _parseObjectPathString(path): - """Parse object path string into a 'hrefOnameOhrefOnameOOO' string and an - index list. The index list is None if no index was used in the path. + """Parse object path string into a (ns, name, index) list. """ cdef int has_dot new_path = [] @@ -1234,8 +1233,7 @@ return new_path cdef _parseObjectPathList(path): - """Parse object path sequence into a 'hrefOnameOhrefOnameOOO' string and - an index list. The index list is None if no index was used in the path. + """Parse object path sequence into a (ns, name, index) list. """ cdef char* index_pos cdef char* index_end @@ -1254,18 +1252,16 @@ if index_pos is NULL: index = 0 else: - new_name = python.PyString_FromStringAndSize( - c_name, (index_pos - c_name)) - index_pos = index_pos + 1 - index_end = cstd.strchr(index_pos, c']') + index_end = cstd.strchr(index_pos + 1, c']') if index_end is NULL: raise ValueError, "index must be enclosed in []" index = python.PyNumber_Int( python.PyString_FromStringAndSize( - index_pos, (index_end - index_pos))) + index_pos + 1, (index_end - index_pos - 1))) if python.PyList_GET_SIZE(new_path) == 0 and index != 0: raise ValueError, "index not allowed on root node" - name = new_name + name = python.PyString_FromStringAndSize( + c_name, (index_pos - c_name)) python.PyList_Append(new_path, (ns, name, index)) if python.PyList_GET_SIZE(new_path) == 0: raise ValueError, "invalid path" @@ -1274,13 +1270,10 @@ cdef _ObjectPath* _buildObjectPathSegments(path_list) except NULL: cdef _ObjectPath* c_path cdef _ObjectPath* c_path_segments - cdef Py_ssize_t c_len - c_len = python.PyList_GET_SIZE(path_list) - c_path_segments = <_ObjectPath*>python.PyMem_Malloc(sizeof(_ObjectPath) * - c_len) + c_path_segments = <_ObjectPath*>python.PyMem_Malloc( + sizeof(_ObjectPath) * python.PyList_GET_SIZE(path_list)) if c_path_segments is NULL: PyErr_NoMemory() - return NULL c_path = c_path_segments for href, name, index in path_list: if href is None: @@ -1315,7 +1308,7 @@ while c_node is not NULL: c_path_len = c_path_len - 1 if c_path_len <= 0: - return cetree.elementFactory(root._doc, c_node) + break c_path = c_path + 1 if c_path[0].href is not NULL: @@ -1329,7 +1322,9 @@ c_node = c_node.children c_node = _findFollowingSibling(c_node, c_href, c_name, c_index) - if use_default: + if c_node is not NULL: + return cetree.elementFactory(root._doc, c_node) + elif use_default: return default_value else: tag = cetree.namespacedNameFromNsName(c_href, c_name) From scoder at codespeak.net Mon Oct 29 20:01:56 2007 From: scoder at codespeak.net (scoder at codespeak.net) Date: Mon, 29 Oct 2007 20:01:56 +0100 (CET) Subject: [Lxml-checkins] r48170 - in lxml/trunk: . src/lxml/tests Message-ID: <20071029190156.84ABE8181@code0.codespeak.net> Author: scoder Date: Mon Oct 29 20:01:50 2007 New Revision: 48170 Modified: lxml/trunk/src/lxml/tests/common_imports.py lxml/trunk/src/lxml/tests/test_classlookup.py lxml/trunk/src/lxml/tests/test_elementtree.py lxml/trunk/src/lxml/tests/test_htmlparser.py lxml/trunk/src/lxml/tests/test_io.py lxml/trunk/src/lxml/tests/test_nsclasses.py lxml/trunk/src/lxml/tests/test_objectify.py lxml/trunk/src/lxml/tests/test_pyclasslookup.py lxml/trunk/test.py Log: move gc.collect() into tearDown() methods in tests to run it after each test Modified: lxml/trunk/src/lxml/tests/common_imports.py ============================================================================== --- lxml/trunk/src/lxml/tests/common_imports.py (original) +++ lxml/trunk/src/lxml/tests/common_imports.py Mon Oct 29 20:01:50 2007 @@ -1,7 +1,7 @@ import unittest import os.path from StringIO import StringIO -import re +import re, gc from lxml import etree @@ -40,6 +40,12 @@ return seq class HelperTestCase(unittest.TestCase): + def setUp(self): + gc.collect() + + def tearDown(self): + gc.collect() + def parse(self, text): f = StringIO(text) return etree.parse(f) Modified: lxml/trunk/src/lxml/tests/test_classlookup.py ============================================================================== --- lxml/trunk/src/lxml/tests/test_classlookup.py (original) +++ lxml/trunk/src/lxml/tests/test_classlookup.py Mon Oct 29 20:01:50 2007 @@ -26,6 +26,7 @@ def tearDown(self): etree.setElementClassLookup() + super(ClassLookupTestCase, self).tearDown() def test_namespace_lookup(self): class TestElement(etree.ElementBase): Modified: lxml/trunk/src/lxml/tests/test_elementtree.py ============================================================================== --- lxml/trunk/src/lxml/tests/test_elementtree.py (original) +++ lxml/trunk/src/lxml/tests/test_elementtree.py Mon Oct 29 20:01:50 2007 @@ -9,7 +9,7 @@ """ import unittest, doctest -import os, re, shutil, tempfile, copy, operator +import os, re, shutil, tempfile, copy, operator, gc from common_imports import StringIO, etree, ElementTree, cElementTree from common_imports import fileInTestDir, canonicalize @@ -24,8 +24,10 @@ def setUp(self): self._temp_dir = tempfile.mkdtemp() + gc.collect() def tearDown(self): + gc.collect() shutil.rmtree(self._temp_dir) def getTestFilePath(self, name): Modified: lxml/trunk/src/lxml/tests/test_htmlparser.py ============================================================================== --- lxml/trunk/src/lxml/tests/test_htmlparser.py (original) +++ lxml/trunk/src/lxml/tests/test_htmlparser.py Mon Oct 29 20:01:50 2007 @@ -10,7 +10,7 @@ from common_imports import StringIO, etree, fileInTestDir from common_imports import SillyFileLike, HelperTestCase -class HtmlParserTestCaseBase(HelperTestCase): +class HtmlParserTestCase(HelperTestCase): """HTML parser test cases """ etree = etree @@ -25,6 +25,7 @@ uhtml_str = u"test ??\uF8D2

page ??\uF8D2 title

" def tearDown(self): + super(HtmlParserTestCase, self).tearDown() self.etree.setDefaultParser() def test_module_HTML(self): @@ -276,7 +277,7 @@ def test_suite(): suite = unittest.TestSuite() - suite.addTests([unittest.makeSuite(HtmlParserTestCaseBase)]) + suite.addTests([unittest.makeSuite(HtmlParserTestCase)]) return suite if __name__ == '__main__': Modified: lxml/trunk/src/lxml/tests/test_io.py ============================================================================== --- lxml/trunk/src/lxml/tests/test_io.py (original) +++ lxml/trunk/src/lxml/tests/test_io.py Mon Oct 29 20:01:50 2007 @@ -5,7 +5,7 @@ """ import unittest -import tempfile, gzip, os +import tempfile, gzip, os, gc from common_imports import etree, ElementTree, fileInTestDir from common_imports import SillyFileLike, LargeFileLike @@ -18,10 +18,14 @@ def setUp(self): """Setting up a minimal tree """ + gc.collect() self.root = self.etree.Element('a') self.root_str = self.etree.tostring(self.root) self.tree = self.etree.ElementTree(self.root) + def tearDown(self): + gc.collect() + def test_write_filename(self): # (c)ElementTree supports filename strings as write argument Modified: lxml/trunk/src/lxml/tests/test_nsclasses.py ============================================================================== --- lxml/trunk/src/lxml/tests/test_nsclasses.py (original) +++ lxml/trunk/src/lxml/tests/test_nsclasses.py Mon Oct 29 20:01:50 2007 @@ -21,6 +21,7 @@ return u'bluff' def setUp(self): + super(ETreeNamespaceClassesTestCase, self).setUp() lookup = etree.ElementNamespaceClassLookup() self.Namespace = lookup.get_namespace parser = etree.XMLParser() @@ -30,6 +31,7 @@ def tearDown(self): etree.setDefaultParser() del self.Namespace + super(ETreeNamespaceClassesTestCase, self).tearDown() def test_registry(self): ns = self.Namespace(u'ns01') Modified: lxml/trunk/src/lxml/tests/test_objectify.py ============================================================================== --- lxml/trunk/src/lxml/tests/test_objectify.py (original) +++ lxml/trunk/src/lxml/tests/test_objectify.py Mon Oct 29 20:01:50 2007 @@ -72,6 +72,7 @@ return self.etree.XML(xml, self.parser) def setUp(self): + super(ObjectifyTestCase, self).setUp() self.parser = self.etree.XMLParser(remove_blank_text=True) self.lookup = etree.ElementNamespaceClassLookup( objectify.ObjectifyElementClassLookup() ) @@ -87,6 +88,7 @@ objectify.setPytypeAttributeTag() del self.lookup del self.parser + super(ObjectifyTestCase, self).tearDown() def test_element_nsmap_default(self): elt = objectify.Element("test") Modified: lxml/trunk/src/lxml/tests/test_pyclasslookup.py ============================================================================== --- lxml/trunk/src/lxml/tests/test_pyclasslookup.py (original) +++ lxml/trunk/src/lxml/tests/test_pyclasslookup.py Mon Oct 29 20:01:50 2007 @@ -33,6 +33,7 @@ def tearDown(self): self.parser.setElementClassLookup(None) + super(PyClassLookupTestCase, self).tearDown() def _setClassLookup(self, lookup_function): class Lookup(PythonElementClassLookup): Modified: lxml/trunk/test.py ============================================================================== --- lxml/trunk/test.py (original) +++ lxml/trunk/test.py Mon Oct 29 20:01:50 2007 @@ -63,8 +63,6 @@ # and cleaner though, at the expense of more limited functionality. # -import gc - import re import os import sys @@ -418,7 +416,6 @@ test(result) stopTime = time.time() timeTaken = float(stopTime - startTime) - gc.collect() result.printErrors() run = result.testsRun if not self.cfg.quiet: From scoder at codespeak.net Mon Oct 29 20:02:26 2007 From: scoder at codespeak.net (scoder at codespeak.net) Date: Mon, 29 Oct 2007 20:02:26 +0100 (CET) Subject: [Lxml-checkins] r48171 - lxml/trunk/src/lxml/tests Message-ID: <20071029190226.E96638181@code0.codespeak.net> Author: scoder Date: Mon Oct 29 20:02:25 2007 New Revision: 48171 Modified: lxml/trunk/src/lxml/tests/test_xslt.py Log: API usage fix Modified: lxml/trunk/src/lxml/tests/test_xslt.py ============================================================================== --- lxml/trunk/src/lxml/tests/test_xslt.py (original) +++ lxml/trunk/src/lxml/tests/test_xslt.py Mon Oct 29 20:02:25 2007 @@ -143,7 +143,7 @@ \uF8D2""" f = StringIO() - res.write(f, 'UTF-16') + res.write(f, encoding='UTF-16') result = unicode(f.getvalue(), 'UTF-16') self.assertEquals(expected, result) From scoder at codespeak.net Mon Oct 29 20:06:59 2007 From: scoder at codespeak.net (scoder at codespeak.net) Date: Mon, 29 Oct 2007 20:06:59 +0100 (CET) Subject: [Lxml-checkins] r48172 - lxml/trunk/src/lxml/tests Message-ID: <20071029190659.E71598187@code0.codespeak.net> Author: scoder Date: Mon Oct 29 20:06:59 2007 New Revision: 48172 Modified: lxml/trunk/src/lxml/tests/test_etree.py Log: API usage fix Modified: lxml/trunk/src/lxml/tests/test_etree.py ============================================================================== --- lxml/trunk/src/lxml/tests/test_etree.py (original) +++ lxml/trunk/src/lxml/tests/test_etree.py Mon Oct 29 20:06:59 2007 @@ -1992,7 +1992,7 @@ ElementTree = self.etree.ElementTree f = StringIO() tree = ElementTree(element=element) - tree.write(f, encoding) + tree.write(f, encoding=encoding) data = f.getvalue() return canonicalize(data) From scoder at codespeak.net Mon Oct 29 20:07:51 2007 From: scoder at codespeak.net (scoder at codespeak.net) Date: Mon, 29 Oct 2007 20:07:51 +0100 (CET) Subject: [Lxml-checkins] r48173 - lxml/trunk/src/lxml/tests Message-ID: <20071029190751.935F7818A@code0.codespeak.net> Author: scoder Date: Mon Oct 29 20:07:51 2007 New Revision: 48173 Modified: lxml/trunk/src/lxml/tests/test_etree.py Log: Pyrex 0.9.6 C-API test fix Modified: lxml/trunk/src/lxml/tests/test_etree.py ============================================================================== --- lxml/trunk/src/lxml/tests/test_etree.py (original) +++ lxml/trunk/src/lxml/tests/test_etree.py Mon Oct 29 20:07:51 2007 @@ -48,7 +48,13 @@ str(etree.LXML_VERSION[0]))) def test_c_api(self): - self.assert_(hasattr(self.etree, '_import_c_api')) + if hasattr(self.etree, '__pyx_capi__'): + # newer Pyrex compatible C-API + self.assert_(isinstance(self.etree.__pyx_capi__, dict)) + self.assert_(len(self.etree.__pyx_capi__) > 0) + else: + # older C-API mechanism + self.assert_(hasattr(self.etree, '_import_c_api')) def test_element_names(self): Element = self.etree.Element From scoder at codespeak.net Mon Oct 29 20:25:03 2007 From: scoder at codespeak.net (scoder at codespeak.net) Date: Mon, 29 Oct 2007 20:25:03 +0100 (CET) Subject: [Lxml-checkins] r48174 - lxml/branch/lxml-1.3 Message-ID: <20071029192503.EED108170@code0.codespeak.net> Author: scoder Date: Mon Oct 29 20:25:03 2007 New Revision: 48174 Modified: lxml/branch/lxml-1.3/Makefile Log: longer stack trace in valgrind Modified: lxml/branch/lxml-1.3/Makefile ============================================================================== --- lxml/branch/lxml-1.3/Makefile (original) +++ lxml/branch/lxml-1.3/Makefile Mon Oct 29 20:25:03 2007 @@ -21,7 +21,7 @@ PYTHONPATH=src $(PYTHON) selftest2.py valgrind_test_inplace: inplace - valgrind --tool=memcheck --leak-check=full --suppressions=valgrind-python.supp \ + valgrind --tool=memcheck --leak-check=full --num-callers=30 --suppressions=valgrind-python.supp \ $(PYTHON) test.py bench_inplace: inplace From scoder at codespeak.net Mon Oct 29 20:36:17 2007 From: scoder at codespeak.net (scoder at codespeak.net) Date: Mon, 29 Oct 2007 20:36:17 +0100 (CET) Subject: [Lxml-checkins] r48175 - in lxml/branch/lxml-1.3: . src/lxml Message-ID: <20071029193617.EFAB7819A@code0.codespeak.net> Author: scoder Date: Mon Oct 29 20:36:17 2007 New Revision: 48175 Modified: lxml/branch/lxml-1.3/CHANGES.txt lxml/branch/lxml-1.3/src/lxml/etree.pyx lxml/branch/lxml-1.3/src/lxml/proxy.pxi lxml/branch/lxml-1.3/src/lxml/python.pxd Log: backported GC crash fix from trunk Modified: lxml/branch/lxml-1.3/CHANGES.txt ============================================================================== --- lxml/branch/lxml-1.3/CHANGES.txt (original) +++ lxml/branch/lxml-1.3/CHANGES.txt Mon Oct 29 20:36:17 2007 @@ -11,6 +11,8 @@ Bugs fixed ---------- +* Backported decref crash fix from 2.0 + * Well hidden free-while-in-use crash bug in ObjectPath Modified: lxml/branch/lxml-1.3/src/lxml/etree.pyx ============================================================================== --- lxml/branch/lxml-1.3/src/lxml/etree.pyx (original) +++ lxml/branch/lxml-1.3/src/lxml/etree.pyx Mon Oct 29 20:36:17 2007 @@ -484,8 +484,9 @@ #print "trying to free node:", self._c_node #displayNode(self._c_node, 0) if self._c_node is not NULL: - unregisterProxy(self) + _unregisterProxy(self) attemptDeallocation(self._c_node) + _releaseProxy(self) # MANIPULATORS @@ -1127,7 +1128,7 @@ result = element_class() result._doc = doc result._c_node = c_node - registerProxy(result) + _registerProxy(result) if config.ENABLE_THREADING: python.PyThread_release_lock(ELEMENT_CREATION_LOCK) Modified: lxml/branch/lxml-1.3/src/lxml/proxy.pxi ============================================================================== --- lxml/branch/lxml-1.3/src/lxml/proxy.pxi (original) +++ lxml/branch/lxml-1.3/src/lxml/proxy.pxi Mon Oct 29 20:36:17 2007 @@ -16,14 +16,14 @@ cdef int hasProxy(xmlNode* c_node): return c_node._private is not NULL -cdef registerProxy(_Element proxy): +cdef int _registerProxy(_Element proxy) except -1: """Register a proxy and type for the node it's proxying for. """ cdef xmlNode* c_node # cannot register for NULL c_node = proxy._c_node if c_node is NULL: - return + return 0 #print "registering for:", proxy._c_node assert c_node._private is NULL, "double registering proxy!" c_node._private = proxy @@ -31,14 +31,20 @@ proxy._gc_doc = proxy._doc python.Py_INCREF(proxy._doc) -cdef unregisterProxy(_Element proxy): +cdef int _unregisterProxy(_Element proxy) except -1: """Unregister a proxy for the node it's proxying for. """ cdef xmlNode* c_node c_node = proxy._c_node assert c_node._private is proxy, "Tried to unregister unknown proxy" c_node._private = NULL - python.Py_DECREF(proxy._gc_doc) + return 0 + +cdef _releaseProxy(_Element proxy): + """An additional DECREF for the document. + """ + python.Py_XDECREF(proxy._gc_doc) + proxy._gc_doc = NULL ################################################################################ # temporarily make a node the root node of its document Modified: lxml/branch/lxml-1.3/src/lxml/python.pxd ============================================================================== --- lxml/branch/lxml-1.3/src/lxml/python.pxd (original) +++ lxml/branch/lxml-1.3/src/lxml/python.pxd Mon Oct 29 20:36:17 2007 @@ -10,6 +10,7 @@ cdef void Py_INCREF(object o) cdef void Py_DECREF(object o) + cdef void Py_XDECREF(PyObject* o) cdef FILE* PyFile_AsFile(object p) cdef int PyFile_Check(object p) From scoder at codespeak.net Mon Oct 29 20:46:36 2007 From: scoder at codespeak.net (scoder at codespeak.net) Date: Mon, 29 Oct 2007 20:46:36 +0100 (CET) Subject: [Lxml-checkins] r48176 - lxml/branch/lxml-1.3/src/lxml Message-ID: <20071029194636.7B4AC818F@code0.codespeak.net> Author: scoder Date: Mon Oct 29 20:46:36 2007 New Revision: 48176 Modified: lxml/branch/lxml-1.3/src/lxml/proxy.pxi Log: cleanup Modified: lxml/branch/lxml-1.3/src/lxml/proxy.pxi ============================================================================== --- lxml/branch/lxml-1.3/src/lxml/proxy.pxi (original) +++ lxml/branch/lxml-1.3/src/lxml/proxy.pxi Mon Oct 29 20:46:36 2007 @@ -40,7 +40,7 @@ c_node._private = NULL return 0 -cdef _releaseProxy(_Element proxy): +cdef void _releaseProxy(_Element proxy): """An additional DECREF for the document. """ python.Py_XDECREF(proxy._gc_doc) From scoder at codespeak.net Mon Oct 29 20:50:16 2007 From: scoder at codespeak.net (scoder at codespeak.net) Date: Mon, 29 Oct 2007 20:50:16 +0100 (CET) Subject: [Lxml-checkins] r48177 - in lxml/branch/lxml-1.3: . src/lxml/tests Message-ID: <20071029195016.32525816C@code0.codespeak.net> Author: scoder Date: Mon Oct 29 20:50:15 2007 New Revision: 48177 Modified: lxml/branch/lxml-1.3/CHANGES.txt lxml/branch/lxml-1.3/src/lxml/tests/common_imports.py lxml/branch/lxml-1.3/src/lxml/tests/test_classlookup.py lxml/branch/lxml-1.3/src/lxml/tests/test_elementtree.py lxml/branch/lxml-1.3/src/lxml/tests/test_htmlparser.py lxml/branch/lxml-1.3/src/lxml/tests/test_io.py lxml/branch/lxml-1.3/src/lxml/tests/test_nsclasses.py lxml/branch/lxml-1.3/src/lxml/tests/test_objectify.py lxml/branch/lxml-1.3/src/lxml/tests/test_pyclasslookup.py Log: run gc.collect() after each test to make sure we catch GC bugs Modified: lxml/branch/lxml-1.3/CHANGES.txt ============================================================================== --- lxml/branch/lxml-1.3/CHANGES.txt (original) +++ lxml/branch/lxml-1.3/CHANGES.txt Mon Oct 29 20:50:15 2007 @@ -15,6 +15,14 @@ * Well hidden free-while-in-use crash bug in ObjectPath +Other changes +------------- + +* The test suites now run ``gc.collect()`` in the ``tearDown()`` + methods. While this makes them take a lot longer to run, it also + makes it easier to link a specific test to garbage collection + problems that would otherwise appear in later tests. + 1.3.5 (2007-10-22) ================== Modified: lxml/branch/lxml-1.3/src/lxml/tests/common_imports.py ============================================================================== --- lxml/branch/lxml-1.3/src/lxml/tests/common_imports.py (original) +++ lxml/branch/lxml-1.3/src/lxml/tests/common_imports.py Mon Oct 29 20:50:15 2007 @@ -1,7 +1,7 @@ import unittest import os.path from StringIO import StringIO -import re +import re, gc from lxml import etree @@ -30,6 +30,9 @@ return lambda obj: obj[item] class HelperTestCase(unittest.TestCase): + def tearDown(self): + gc.collect() + def parse(self, text): f = StringIO(text) return etree.parse(f) Modified: lxml/branch/lxml-1.3/src/lxml/tests/test_classlookup.py ============================================================================== --- lxml/branch/lxml-1.3/src/lxml/tests/test_classlookup.py (original) +++ lxml/branch/lxml-1.3/src/lxml/tests/test_classlookup.py Mon Oct 29 20:50:15 2007 @@ -31,6 +31,7 @@ etree.setElementClassLookup() etree.Namespace("myNS").clear() etree.Namespace("otherNS").clear() + super(ClassLookupTestCase, self).tearDown() def test_namespace_lookup(self): class TestElement(etree.ElementBase): Modified: lxml/branch/lxml-1.3/src/lxml/tests/test_elementtree.py ============================================================================== --- lxml/branch/lxml-1.3/src/lxml/tests/test_elementtree.py (original) +++ lxml/branch/lxml-1.3/src/lxml/tests/test_elementtree.py Mon Oct 29 20:50:15 2007 @@ -18,10 +18,12 @@ etree = None def setUp(self): + super(ETreeTestCaseBase, self).setUp() self._temp_dir = tempfile.mkdtemp() def tearDown(self): shutil.rmtree(self._temp_dir) + super(ETreeTestCaseBase, self).tearDown() def getTestFilePath(self, name): return os.path.join(self._temp_dir, name) Modified: lxml/branch/lxml-1.3/src/lxml/tests/test_htmlparser.py ============================================================================== --- lxml/branch/lxml-1.3/src/lxml/tests/test_htmlparser.py (original) +++ lxml/branch/lxml-1.3/src/lxml/tests/test_htmlparser.py Mon Oct 29 20:50:15 2007 @@ -10,7 +10,7 @@ from common_imports import StringIO, etree, fileInTestDir from common_imports import SillyFileLike, HelperTestCase -class HtmlParserTestCaseBase(HelperTestCase): +class HtmlParserTestCase(HelperTestCase): """HTML parser test cases """ etree = etree @@ -21,6 +21,7 @@ def tearDown(self): self.etree.setDefaultParser() + super(HtmlParserTestCase, self).tearDown() def test_module_HTML(self): element = self.etree.HTML(self.html_str) @@ -111,7 +112,7 @@ def test_suite(): suite = unittest.TestSuite() - suite.addTests([unittest.makeSuite(HtmlParserTestCaseBase)]) + suite.addTests([unittest.makeSuite(HtmlParserTestCase)]) return suite if __name__ == '__main__': Modified: lxml/branch/lxml-1.3/src/lxml/tests/test_io.py ============================================================================== --- lxml/branch/lxml-1.3/src/lxml/tests/test_io.py (original) +++ lxml/branch/lxml-1.3/src/lxml/tests/test_io.py Mon Oct 29 20:50:15 2007 @@ -5,7 +5,7 @@ """ import unittest -import tempfile, gzip, os +import tempfile, gzip, os, gc from common_imports import etree, ElementTree, fileInTestDir from common_imports import SillyFileLike, LargeFileLike @@ -22,6 +22,9 @@ self.root_str = self.etree.tostring(self.root) self.tree = self.etree.ElementTree(self.root) + def tearDown(self): + gc.collect() + def test_write_filename(self): # (c)ElementTree supports filename strings as write argument Modified: lxml/branch/lxml-1.3/src/lxml/tests/test_nsclasses.py ============================================================================== --- lxml/branch/lxml-1.3/src/lxml/tests/test_nsclasses.py (original) +++ lxml/branch/lxml-1.3/src/lxml/tests/test_nsclasses.py Mon Oct 29 20:50:15 2007 @@ -21,6 +21,7 @@ return u'bluff' def setUp(self): + super(ETreeNamespaceClassesTestCase, self).setUp() parser = etree.XMLParser() parser.setElementClassLookup( etree.ElementNamespaceClassLookup() ) @@ -28,6 +29,7 @@ def tearDown(self): etree.setDefaultParser() + super(ETreeNamespaceClassesTestCase, self).tearDown() def test_registry(self): ns = etree.Namespace(u'ns01') Modified: lxml/branch/lxml-1.3/src/lxml/tests/test_objectify.py ============================================================================== --- lxml/branch/lxml-1.3/src/lxml/tests/test_objectify.py (original) +++ lxml/branch/lxml-1.3/src/lxml/tests/test_objectify.py Mon Oct 29 20:50:15 2007 @@ -60,6 +60,7 @@ return self.etree.XML(xml, self.parser) def setUp(self): + super(ObjectifyTestCase, self).setUp() self.parser = self.etree.XMLParser(remove_blank_text=True) lookup = etree.ElementNamespaceClassLookup( objectify.ObjectifyElementClassLookup() ) @@ -73,6 +74,7 @@ def tearDown(self): self.etree.Namespace("otherNS").clear() objectify.setPytypeAttributeTag() + super(ObjectifyTestCase, self).tearDown() def test_element_nsmap_default(self): elt = objectify.Element("test") Modified: lxml/branch/lxml-1.3/src/lxml/tests/test_pyclasslookup.py ============================================================================== --- lxml/branch/lxml-1.3/src/lxml/tests/test_pyclasslookup.py (original) +++ lxml/branch/lxml-1.3/src/lxml/tests/test_pyclasslookup.py Mon Oct 29 20:50:15 2007 @@ -34,6 +34,7 @@ def tearDown(self): self.parser.setElementClassLookup(None) + super(PyClassLookupTestCase, self).tearDown() def _setClassLookup(self, lookup_function): class Lookup(PythonElementClassLookup): From scoder at codespeak.net Mon Oct 29 20:52:30 2007 From: scoder at codespeak.net (scoder at codespeak.net) Date: Mon, 29 Oct 2007 20:52:30 +0100 (CET) Subject: [Lxml-checkins] r48178 - in lxml/branch/lxml-1.3: . doc Message-ID: <20071029195230.9FA2C818F@code0.codespeak.net> Author: scoder Date: Mon Oct 29 20:52:30 2007 New Revision: 48178 Modified: lxml/branch/lxml-1.3/CHANGES.txt lxml/branch/lxml-1.3/doc/main.txt lxml/branch/lxml-1.3/version.txt Log: prepare release of 1.3.6 Modified: lxml/branch/lxml-1.3/CHANGES.txt ============================================================================== --- lxml/branch/lxml-1.3/CHANGES.txt (original) +++ lxml/branch/lxml-1.3/CHANGES.txt Mon Oct 29 20:52:30 2007 @@ -2,11 +2,8 @@ lxml changelog ============== -Under development -================= - -Features added --------------- +1.3.6 (2007-10-29) +================== Bugs fixed ---------- @@ -27,9 +24,6 @@ 1.3.5 (2007-10-22) ================== -Features added --------------- - Bugs fixed ---------- Modified: lxml/branch/lxml-1.3/doc/main.txt ============================================================================== --- lxml/branch/lxml-1.3/doc/main.txt (original) +++ lxml/branch/lxml-1.3/doc/main.txt Mon Oct 29 20:52:30 2007 @@ -130,7 +130,7 @@ .. _`lxml at the Python Package Index`: http://pypi.python.org/pypi/lxml/ .. _`this key`: pubkey.asc -The latest version is `lxml 1.3.5`_, released 2007-10-22 (`changes for 1.3.5`_). +The latest version is `lxml 1.3.6`_, released 2007-10-29 (`changes for 1.3.6`_). `Older versions`_ are listed below. .. _`Older versions`: #old-versions @@ -191,6 +191,8 @@ Old Versions ------------ +* `lxml 1.3.5`_, released 2007-10-22 (`changes for 1.3.5`_) + * `lxml 1.3.4`_, released 2007-08-30 (`changes for 1.3.4`_) * `lxml 1.3.3`_, released 2007-07-26 (`changes for 1.3.3`_) @@ -237,6 +239,7 @@ * `lxml 0.5`_, released 2005-04-08 +.. _`lxml 1.3.6`: lxml-1.3.6.tgz .. _`lxml 1.3.5`: lxml-1.3.5.tgz .. _`lxml 1.3.4`: lxml-1.3.4.tgz .. _`lxml 1.3.3`: lxml-1.3.3.tgz @@ -261,6 +264,7 @@ .. _`lxml 0.5.1`: lxml-0.5.1.tgz .. _`lxml 0.5`: lxml-0.5.tgz +.. _`changes for 1.3.6`: changes-1.3.6.html .. _`changes for 1.3.5`: changes-1.3.5.html .. _`changes for 1.3.4`: changes-1.3.4.html .. _`changes for 1.3.3`: changes-1.3.3.html Modified: lxml/branch/lxml-1.3/version.txt ============================================================================== --- lxml/branch/lxml-1.3/version.txt (original) +++ lxml/branch/lxml-1.3/version.txt Mon Oct 29 20:52:30 2007 @@ -1 +1 @@ -1.3.5 +1.3.6 From scoder at codespeak.net Mon Oct 29 20:53:58 2007 From: scoder at codespeak.net (scoder at codespeak.net) Date: Mon, 29 Oct 2007 20:53:58 +0100 (CET) Subject: [Lxml-checkins] r48179 - lxml/trunk/src/lxml Message-ID: <20071029195358.F3367818F@code0.codespeak.net> Author: scoder Date: Mon Oct 29 20:53:58 2007 New Revision: 48179 Modified: lxml/trunk/src/lxml/proxy.pxi Log: cleanup Modified: lxml/trunk/src/lxml/proxy.pxi ============================================================================== --- lxml/trunk/src/lxml/proxy.pxi (original) +++ lxml/trunk/src/lxml/proxy.pxi Mon Oct 29 20:53:58 2007 @@ -38,8 +38,9 @@ c_node = proxy._c_node assert c_node._private is proxy, "Tried to unregister unknown proxy" c_node._private = NULL + return 0 -cdef _releaseProxy(_Element proxy): +cdef void _releaseProxy(_Element proxy): """An additional DECREF for the document. """ python.Py_XDECREF(proxy._gc_doc) From scoder at codespeak.net Mon Oct 29 20:56:25 2007 From: scoder at codespeak.net (scoder at codespeak.net) Date: Mon, 29 Oct 2007 20:56:25 +0100 (CET) Subject: [Lxml-checkins] r48180 - lxml/trunk/src/lxml/tests Message-ID: <20071029195625.D3622818F@code0.codespeak.net> Author: scoder Date: Mon Oct 29 20:56:25 2007 New Revision: 48180 Modified: lxml/trunk/src/lxml/tests/common_imports.py lxml/trunk/src/lxml/tests/test_elementtree.py lxml/trunk/src/lxml/tests/test_io.py Log: cleanup Modified: lxml/trunk/src/lxml/tests/common_imports.py ============================================================================== --- lxml/trunk/src/lxml/tests/common_imports.py (original) +++ lxml/trunk/src/lxml/tests/common_imports.py Mon Oct 29 20:56:25 2007 @@ -40,9 +40,6 @@ return seq class HelperTestCase(unittest.TestCase): - def setUp(self): - gc.collect() - def tearDown(self): gc.collect() Modified: lxml/trunk/src/lxml/tests/test_elementtree.py ============================================================================== --- lxml/trunk/src/lxml/tests/test_elementtree.py (original) +++ lxml/trunk/src/lxml/tests/test_elementtree.py Mon Oct 29 20:56:25 2007 @@ -24,7 +24,6 @@ def setUp(self): self._temp_dir = tempfile.mkdtemp() - gc.collect() def tearDown(self): gc.collect() Modified: lxml/trunk/src/lxml/tests/test_io.py ============================================================================== --- lxml/trunk/src/lxml/tests/test_io.py (original) +++ lxml/trunk/src/lxml/tests/test_io.py Mon Oct 29 20:56:25 2007 @@ -18,7 +18,6 @@ def setUp(self): """Setting up a minimal tree """ - gc.collect() self.root = self.etree.Element('a') self.root_str = self.etree.tostring(self.root) self.tree = self.etree.ElementTree(self.root) From scoder at codespeak.net Mon Oct 29 21:02:25 2007 From: scoder at codespeak.net (scoder at codespeak.net) Date: Mon, 29 Oct 2007 21:02:25 +0100 (CET) Subject: [Lxml-checkins] r48181 - in lxml/trunk: . doc Message-ID: <20071029200225.ED56B818F@code0.codespeak.net> Author: scoder Date: Mon Oct 29 21:02:22 2007 New Revision: 48181 Modified: lxml/trunk/CHANGES.txt lxml/trunk/doc/main.txt Log: ChangeLog update after 1.3.6 release Modified: lxml/trunk/CHANGES.txt ============================================================================== --- lxml/trunk/CHANGES.txt (original) +++ lxml/trunk/CHANGES.txt Mon Oct 29 21:02:22 2007 @@ -49,6 +49,25 @@ the package name of their module. +1.3.6 (2007-10-29) +================== + +Bugs fixed +---------- + +* Backported decref crash fix from 2.0 + +* Well hidden free-while-in-use crash bug in ObjectPath + +Other changes +------------- + +* The test suites now run ``gc.collect()`` in the ``tearDown()`` + methods. While this makes them take a lot longer to run, it also + makes it easier to link a specific test to garbage collection + problems that would otherwise appear in later tests. + + 1.3.5 (2007-10-22) ================== Modified: lxml/trunk/doc/main.txt ============================================================================== --- lxml/trunk/doc/main.txt (original) +++ lxml/trunk/doc/main.txt Mon Oct 29 21:02:22 2007 @@ -205,6 +205,10 @@ * `lxml 2.0alpha1`_, released 2007-09-02 (`changes for 2.0alpha1`_) +* `lxml 1.3.6`_, released 2007-10-29 (`changes for 1.3.6`_) + +* `lxml 1.3.5`_, released 2007-10-22 (`changes for 1.3.5`_) + * `lxml 1.3.4`_, released 2007-08-30 (`changes for 1.3.4`_) * `lxml 1.3.3`_, released 2007-07-26 (`changes for 1.3.3`_) @@ -255,6 +259,8 @@ .. _`lxml 2.0alpha3`: lxml-2.0alpha3.tgz .. _`lxml 2.0alpha2`: lxml-2.0alpha2.tgz .. _`lxml 2.0alpha1`: lxml-2.0alpha1.tgz +.. _`lxml 1.3.6`: lxml-1.3.6.tgz +.. _`lxml 1.3.5`: lxml-1.3.5.tgz .. _`lxml 1.3.4`: lxml-1.3.4.tgz .. _`lxml 1.3.3`: lxml-1.3.3.tgz .. _`lxml 1.3.2`: lxml-1.3.2.tgz @@ -282,6 +288,8 @@ .. _`changes for 2.0alpha3`: changes-2.0alpha3.html .. _`changes for 2.0alpha2`: changes-2.0alpha2.html .. _`changes for 2.0alpha1`: changes-2.0alpha1.html +.. _`changes for 1.3.6`: changes-1.3.6.html +.. _`changes for 1.3.5`: changes-1.3.5.html .. _`changes for 1.3.4`: changes-1.3.4.html .. _`changes for 1.3.3`: changes-1.3.3.html .. _`changes for 1.3.2`: changes-1.3.2.html From scoder at codespeak.net Tue Oct 30 11:13:50 2007 From: scoder at codespeak.net (scoder at codespeak.net) Date: Tue, 30 Oct 2007 11:13:50 +0100 (CET) Subject: [Lxml-checkins] r48187 - lxml/trunk/doc Message-ID: <20071030101350.8A544819B@code0.codespeak.net> Author: scoder Date: Tue Oct 30 11:13:48 2007 New Revision: 48187 Modified: lxml/trunk/doc/FAQ.txt Log: FAQ entry on using XSLT in different threads Modified: lxml/trunk/doc/FAQ.txt ============================================================================== --- lxml/trunk/doc/FAQ.txt (original) +++ lxml/trunk/doc/FAQ.txt Tue Oct 30 11:13:48 2007 @@ -35,7 +35,8 @@ 5.1 Can I use threads to concurrently access the lxml API? 5.2 Does my program run faster if I use threads? 5.3 Would my single-threaded program run faster if I turned off threading? - 5.4 My program crashes when run with mod_python/Pyro/Zope/Plone/... + 5.4 Why can't I reuse XSLT stylesheets in other threads? + 5.5 My program crashes when run with mod_python/Pyro/Zope/Plone/... 6 Parsing and Serialisation 6.1 Why doesn't the ``pretty_print`` option reformat my XML output? 6.2 Why can't lxml parse my XML from unicode strings? @@ -263,7 +264,8 @@ contribute, don't bother with the details, a Python implementation of your contribution is better than none. And keep in mind that lxml's flexible API often favours an implementation of features in pure Python, without bothering -with C-code at all. +with C-code at all. For example, the ``lxml.html`` package is entirely written +in Python. Please contact the `mailing list`_ if you need any help. @@ -440,6 +442,36 @@ lxml from source. +Why can't I reuse XSLT stylesheets in other threads? +---------------------------------------------------- + +lxml currently has the restriction that an XSLT object can only be +used in a thread if it was created either in the thread itself or in +the main thread. This is due to some interfering optimisations in +libxslt and lxml.etree. To work around this, you can do a couple of +things: + +* create all XSLT objects in the main program and reuse them wherever + you want. + +* create them in the thread where you use them and maybe cache them in + thread local storage (see the threading module). + +If your stylesheets are diverse and status specific, you can still +prepare them in advance if you: + +* use XSLT parameters that you pass at call time to configure the + stylesheets + +* create the stylesheets (partially) programmatically in the main + program, e.g. by adding ``xsl:output`` tags, ``xsl:include`` tags or + Templates (be careful with the order here) to the XSL tree, and then + create the ``XSLT`` objects and store them in a read-only + dictionary. That way, you can access and use them in any thread. + Note that passing the same XSL tree into multiple ``XSLT()`` + instances will create independent stylesheets. + + My program crashes when run with mod_python/Pyro/Zope/Plone/... --------------------------------------------------------------- From scoder at codespeak.net Tue Oct 30 12:43:43 2007 From: scoder at codespeak.net (scoder at codespeak.net) Date: Tue, 30 Oct 2007 12:43:43 +0100 (CET) Subject: [Lxml-checkins] r48188 - lxml/branch/lxml-1.3/doc Message-ID: <20071030114343.2FBA581A4@code0.codespeak.net> Author: scoder Date: Tue Oct 30 12:43:41 2007 New Revision: 48188 Modified: lxml/branch/lxml-1.3/doc/FAQ.txt Log: FAQ update from trunk Modified: lxml/branch/lxml-1.3/doc/FAQ.txt ============================================================================== --- lxml/branch/lxml-1.3/doc/FAQ.txt (original) +++ lxml/branch/lxml-1.3/doc/FAQ.txt Tue Oct 30 12:43:41 2007 @@ -21,6 +21,7 @@ 1.3 What standards does lxml implement? 1.4 What is the difference between lxml.etree and lxml.objectify? 1.5 How can I make my application run faster? + 1.6 What about that trailing text on serialised Elements? 2 Installation 2.1 Which version of libxml2 and libxslt should I use or require? 2.2 Where are the Windows binaries? @@ -35,6 +36,8 @@ 5.1 Can I use threads to concurrently access the lxml API? 5.2 Does my program run faster if I use threads? 5.3 Would my single-threaded program run faster if I turned off threading? + 5.4 Why can't I reuse XSLT stylesheets in other threads? + 5.5 My program crashes when run with mod_python/Pyro/Zope/Plone/... 6 Parsing and Serialisation 6.1 Why doesn't the ``pretty_print`` option reformat my XML output? 6.2 Why can't lxml parse my XML from unicode strings? @@ -262,7 +265,8 @@ contribute, don't bother with the details, a Python implementation of your contribution is better than none. And keep in mind that lxml's flexible API often favours an implementation of features in pure Python, without bothering -with C-code at all. +with C-code at all. For example, the ``lxml.html`` package is entirely written +in Python. Please contact the `mailing list`_ if you need any help. @@ -436,6 +440,94 @@ lxml from source. +Why can't I reuse XSLT stylesheets in other threads? +---------------------------------------------------- + +lxml currently has the restriction that an XSLT object can only be +used in a thread if it was created either in the thread itself or in +the main thread. This is due to some interfering optimisations in +libxslt and lxml.etree. To work around this, you can do a couple of +things: + +* create all XSLT objects in the main program and reuse them wherever + you want. + +* create them in the thread where you use them and maybe cache them in + thread local storage (see the threading module). + +If your stylesheets are diverse and status specific, you can still +prepare them in advance if you: + +* use XSLT parameters that you pass at call time to configure the + stylesheets + +* create the stylesheets (partially) programmatically in the main + program, e.g. by adding ``xsl:output`` tags, ``xsl:include`` tags or + Templates (be careful with the order here) to the XSL tree, and then + create the ``XSLT`` objects and store them in a read-only + dictionary. That way, you can access and use them in any thread. + Note that passing the same XSL tree into multiple ``XSLT()`` + instances will create independent stylesheets. + + +My program crashes when run with mod_python/Pyro/Zope/Plone/... +--------------------------------------------------------------- + +These environments can use threads in a way that may not make it obvious when +threads are created and what happens in which thread. This makes it hard to +ensure lxml's threading support is used in a reliable way. Sadly, if problems +arise, they are as diverse as the applications, so it is difficult to provide +any generally applicable solution. Also, these environments are so complex +that problems become hard to debug and even harder to reproduce in a +predictable way. If you encounter crashes in one of these systems, but your +code runs perfectly when started by hand, the following gives you a few hints +for possible approaches to solve your specific problem: + +* make sure you use recent versions of libxml2, libxslt and lxml. The libxml2 + developers keep fixing bugs in each release, and lxml also tries to become + more robust against possible pitfalls. So newer versions might already fix + your problem in a reliable way. + +* make sure the library versions you installed are really used. Do not rely + on what your operating system tells you! Print the version constants in + ``lxml.etree`` from within your runtime environment to make sure it is the + case. This is especially a problem under MacOS-X when newer library + versions were installed in addition to the outdated system libraries. + +* if you use ``mod_python``, try setting this option: + + PythonInterpreter main_interpreter + + There was a discussion on the mailing list about this problem: + + http://comments.gmane.org/gmane.comp.python.lxml.devel/2942 + +* compile lxml without threading support by running ``setup.py`` with the + ``--without-threading`` option. While this might be slower in certain + scenarios on multi-processor systems, it *might* also keep your application + from crashing, which should be worth more to you than peek performance. + Remember that lxml is fast anyway, so concurrency may not even be worth it. + +* avoid doing fancy XSLT stuff like foreign document access or passing in + subtrees trough XSLT variables. This might or might not work, depending on + your specific usage. + +* try copying trees at suspicious places and working with those instead of a + tree shared between threads. A good candidate might be the result of an + XSLT or the stylesheet itself. + +* try keeping thread-local copies of XSLT stylesheets, i.e. one per thread, + instead of sharing one. Also see the question above. + +* you can try to serialise suspicious parts of your code with explicit thread + locks, thus disabling the concurrency of the runtime system. + +* report back on the mailing list to see if there are other ways to work + around your specific problems. Do not forget to report the version numbers + of lxml, libxml2 and libxslt you are using (see the question on reporting + a bug). + + Parsing and Serialisation ========================= From scoder at codespeak.net Tue Oct 30 12:44:32 2007 From: scoder at codespeak.net (scoder at codespeak.net) Date: Tue, 30 Oct 2007 12:44:32 +0100 (CET) Subject: [Lxml-checkins] r48189 - lxml/trunk/doc Message-ID: <20071030114432.AFB7C81A4@code0.codespeak.net> Author: scoder Date: Tue Oct 30 12:44:32 2007 New Revision: 48189 Modified: lxml/trunk/doc/FAQ.txt Log: FAQ fixes Modified: lxml/trunk/doc/FAQ.txt ============================================================================== --- lxml/trunk/doc/FAQ.txt (original) +++ lxml/trunk/doc/FAQ.txt Tue Oct 30 12:44:32 2007 @@ -481,9 +481,9 @@ arise, they are as diverse as the applications, so it is difficult to provide any generally applicable solution. Also, these environments are so complex that problems become hard to debug and even harder to reproduce in a -predictable way. If you encounter crashes in one these systems, but your code -runs perfectly when started by hand, the following gives you a few hints for -possible approaches to solve your specific problem: +predictable way. If you encounter crashes in one of these systems, but your +code runs perfectly when started by hand, the following gives you a few hints +for possible approaches to solve your specific problem: * make sure you use recent versions of libxml2, libxslt and lxml. The libxml2 developers keep fixing bugs in each release, and lxml also tries to become @@ -519,14 +519,15 @@ XSLT or the stylesheet itself. * try keeping thread-local copies of XSLT stylesheets, i.e. one per thread, - instead of sharing one. + instead of sharing one. Also see the question above. * you can try to serialise suspicious parts of your code with explicit thread locks, thus disabling the concurrency of the runtime system. * report back on the mailing list to see if there are other ways to work around your specific problems. Do not forget to report the version numbers - of lxml, libxml2 and libxslt you are using. + of lxml, libxml2 and libxslt you are using (see the question on reporting + a bug). Parsing and Serialisation From scoder at codespeak.net Tue Oct 30 12:51:30 2007 From: scoder at codespeak.net (scoder at codespeak.net) Date: Tue, 30 Oct 2007 12:51:30 +0100 (CET) Subject: [Lxml-checkins] r48190 - lxml/branch/lxml-1.3/doc Message-ID: <20071030115130.76D7081AA@code0.codespeak.net> Author: scoder Date: Tue Oct 30 12:51:30 2007 New Revision: 48190 Modified: lxml/branch/lxml-1.3/doc/FAQ.txt Log: FAQ clarification Modified: lxml/branch/lxml-1.3/doc/FAQ.txt ============================================================================== --- lxml/branch/lxml-1.3/doc/FAQ.txt (original) +++ lxml/branch/lxml-1.3/doc/FAQ.txt Tue Oct 30 12:51:30 2007 @@ -467,7 +467,9 @@ create the ``XSLT`` objects and store them in a read-only dictionary. That way, you can access and use them in any thread. Note that passing the same XSL tree into multiple ``XSLT()`` - instances will create independent stylesheets. + instances will create independent stylesheets, so later + modifications of the tree will not be reflected in the already + created stylesheets. My program crashes when run with mod_python/Pyro/Zope/Plone/... From scoder at codespeak.net Tue Oct 30 12:51:45 2007 From: scoder at codespeak.net (scoder at codespeak.net) Date: Tue, 30 Oct 2007 12:51:45 +0100 (CET) Subject: [Lxml-checkins] r48191 - lxml/trunk/doc Message-ID: <20071030115145.A755981AA@code0.codespeak.net> Author: scoder Date: Tue Oct 30 12:51:45 2007 New Revision: 48191 Modified: lxml/trunk/doc/FAQ.txt Log: FAQ clarification Modified: lxml/trunk/doc/FAQ.txt ============================================================================== --- lxml/trunk/doc/FAQ.txt (original) +++ lxml/trunk/doc/FAQ.txt Tue Oct 30 12:51:45 2007 @@ -469,7 +469,9 @@ create the ``XSLT`` objects and store them in a read-only dictionary. That way, you can access and use them in any thread. Note that passing the same XSL tree into multiple ``XSLT()`` - instances will create independent stylesheets. + instances will create independent stylesheets, so later + modifications of the tree will not be reflected in the already + created stylesheets. My program crashes when run with mod_python/Pyro/Zope/Plone/... From scoder at codespeak.net Tue Oct 30 12:54:28 2007 From: scoder at codespeak.net (scoder at codespeak.net) Date: Tue, 30 Oct 2007 12:54:28 +0100 (CET) Subject: [Lxml-checkins] r48192 - lxml/branch/lxml-1.3/doc Message-ID: <20071030115428.4F47A811B@code0.codespeak.net> Author: scoder Date: Tue Oct 30 12:54:28 2007 New Revision: 48192 Modified: lxml/branch/lxml-1.3/doc/FAQ.txt Log: FAQ fix Modified: lxml/branch/lxml-1.3/doc/FAQ.txt ============================================================================== --- lxml/branch/lxml-1.3/doc/FAQ.txt (original) +++ lxml/branch/lxml-1.3/doc/FAQ.txt Tue Oct 30 12:54:28 2007 @@ -463,7 +463,7 @@ * create the stylesheets (partially) programmatically in the main program, e.g. by adding ``xsl:output`` tags, ``xsl:include`` tags or - Templates (be careful with the order here) to the XSL tree, and then + templates (be careful with the order here) to the XSL tree, and then create the ``XSLT`` objects and store them in a read-only dictionary. That way, you can access and use them in any thread. Note that passing the same XSL tree into multiple ``XSLT()`` From scoder at codespeak.net Tue Oct 30 12:54:52 2007 From: scoder at codespeak.net (scoder at codespeak.net) Date: Tue, 30 Oct 2007 12:54:52 +0100 (CET) Subject: [Lxml-checkins] r48193 - lxml/trunk/doc Message-ID: <20071030115452.7DCB08120@code0.codespeak.net> Author: scoder Date: Tue Oct 30 12:54:52 2007 New Revision: 48193 Modified: lxml/trunk/doc/FAQ.txt Log: FAQ fix Modified: lxml/trunk/doc/FAQ.txt ============================================================================== --- lxml/trunk/doc/FAQ.txt (original) +++ lxml/trunk/doc/FAQ.txt Tue Oct 30 12:54:52 2007 @@ -465,7 +465,7 @@ * create the stylesheets (partially) programmatically in the main program, e.g. by adding ``xsl:output`` tags, ``xsl:include`` tags or - Templates (be careful with the order here) to the XSL tree, and then + templates (be careful with the order here) to the XSL tree, and then create the ``XSLT`` objects and store them in a read-only dictionary. That way, you can access and use them in any thread. Note that passing the same XSL tree into multiple ``XSLT()`` From scoder at codespeak.net Tue Oct 30 22:14:06 2007 From: scoder at codespeak.net (scoder at codespeak.net) Date: Tue, 30 Oct 2007 22:14:06 +0100 (CET) Subject: [Lxml-checkins] r48213 - lxml/trunk/doc Message-ID: <20071030211406.59FD9816D@code0.codespeak.net> Author: scoder Date: Tue Oct 30 22:14:03 2007 New Revision: 48213 Modified: lxml/trunk/doc/FAQ.txt Log: FAQ update Modified: lxml/trunk/doc/FAQ.txt ============================================================================== --- lxml/trunk/doc/FAQ.txt (original) +++ lxml/trunk/doc/FAQ.txt Tue Oct 30 22:14:03 2007 @@ -468,10 +468,12 @@ templates (be careful with the order here) to the XSL tree, and then create the ``XSLT`` objects and store them in a read-only dictionary. That way, you can access and use them in any thread. - Note that passing the same XSL tree into multiple ``XSLT()`` - instances will create independent stylesheets, so later - modifications of the tree will not be reflected in the already - created stylesheets. + +Note that passing the same XSL tree into multiple ``XSLT()`` instances +will create independent stylesheets, so later modifications of the +tree will not be reflected in the already created stylesheets. Also, +since lxml 2.0, you can deep copy XSLT objects using the ``copy`` +module from the standard library. My program crashes when run with mod_python/Pyro/Zope/Plone/... From scoder at codespeak.net Tue Oct 30 22:15:20 2007 From: scoder at codespeak.net (scoder at codespeak.net) Date: Tue, 30 Oct 2007 22:15:20 +0100 (CET) Subject: [Lxml-checkins] r48214 - lxml/trunk/src/lxml Message-ID: <20071030211520.08905816D@code0.codespeak.net> Author: scoder Date: Tue Oct 30 22:15:20 2007 New Revision: 48214 Modified: lxml/trunk/src/lxml/lxml.etree.pyx lxml/trunk/src/lxml/lxml.objectify.pyx lxml/trunk/src/lxml/parser.pxi lxml/trunk/src/lxml/proxy.pxi Log: raise memory errors in a couple of places Modified: lxml/trunk/src/lxml/lxml.etree.pyx ============================================================================== --- lxml/trunk/src/lxml/lxml.etree.pyx (original) +++ lxml/trunk/src/lxml/lxml.etree.pyx Tue Oct 30 22:15:20 2007 @@ -164,11 +164,13 @@ def __init__(self): self._storage = [] - cdef void add(self, obj): + cdef int add(self, obj) except -1: python.PyList_Append(self._storage, obj) + return 0 - cdef void clear(self): + cdef int clear(self) except -1: del self._storage[:] + return 0 # class for temporarily storing exceptions raised in extensions cdef class _ExceptionContext: @@ -182,12 +184,12 @@ cdef void _store_exception(self, exception): self._exc_info = (exception, None, None) - cdef int _has_raised(self): + cdef bint _has_raised(self): return self._exc_info is not None - cdef _raise_if_stored(self): + cdef int _raise_if_stored(self) except -1: if self._exc_info is None: - return + return 0 type, value, traceback = self._exc_info self._exc_info = None if value is None and traceback is None: @@ -315,7 +317,7 @@ return ns cdef xmlNs* _findOrBuildNodeNs(self, xmlNode* c_node, - char* c_href, char* c_prefix): + char* c_href, char* c_prefix) except NULL: """Get or create namespace structure for a node. Reuses the prefix if possible. """ @@ -336,9 +338,12 @@ if tree.xmlSearchNs(self._c_doc, c_node, c_prefix) is NULL: break - return tree.xmlNewNs(c_node, c_href, c_prefix) + c_ns = tree.xmlNewNs(c_node, c_href, c_prefix) + if c_ns is NULL: + python.PyErr_NoMemory() + return c_ns - cdef void _setNodeNs(self, xmlNode* c_node, char* href): + cdef int _setNodeNs(self, xmlNode* c_node, char* href) except -1: "Lookup namespace structure and set it for the node." cdef xmlNs* c_ns c_ns = self._findOrBuildNodeNs(c_node, href, NULL) Modified: lxml/trunk/src/lxml/lxml.objectify.pyx ============================================================================== --- lxml/trunk/src/lxml/lxml.objectify.pyx (original) +++ lxml/trunk/src/lxml/lxml.objectify.pyx Tue Oct 30 22:15:20 2007 @@ -1224,7 +1224,7 @@ ################################################################################ # Pickle support -cdef void _setupPickle(reduceFunction): +cdef _setupPickle(reduceFunction): import copy_reg copy_reg.constructor(fromstring) copy_reg.pickle(ObjectifiedElement, reduceFunction, fromstring) Modified: lxml/trunk/src/lxml/parser.pxi ============================================================================== --- lxml/trunk/src/lxml/parser.pxi (original) +++ lxml/trunk/src/lxml/parser.pxi Tue Oct 30 22:15:20 2007 @@ -1212,13 +1212,15 @@ parser = __GLOBAL_PARSER_CONTEXT.getDefaultParser() return (<_BaseParser>parser)._parseDocFromFilelike(source, filename) -cdef xmlDoc* _newDoc(): +cdef xmlDoc* _newDoc() except NULL: cdef xmlDoc* result result = tree.xmlNewDoc("1.0") + if result is NULL: + python.PyErr_NoMemory() __GLOBAL_PARSER_CONTEXT.initDocDict(result) return result -cdef xmlDoc* _copyDoc(xmlDoc* c_doc, int recursive): +cdef xmlDoc* _copyDoc(xmlDoc* c_doc, int recursive) except NULL: cdef python.PyThreadState* state cdef xmlDoc* result if recursive: @@ -1226,10 +1228,12 @@ result = tree.xmlCopyDoc(c_doc, recursive) if recursive: python.PyEval_RestoreThread(state) + if result is NULL: + python.PyErr_NoMemory() __GLOBAL_PARSER_CONTEXT.initDocDict(result) return result -cdef xmlDoc* _copyDocRoot(xmlDoc* c_doc, xmlNode* c_new_root): +cdef xmlDoc* _copyDocRoot(xmlDoc* c_doc, xmlNode* c_new_root) except NULL: "Recursively copy the document and make c_new_root the new root node." cdef python.PyThreadState* state cdef xmlDoc* result @@ -1238,15 +1242,19 @@ __GLOBAL_PARSER_CONTEXT.initDocDict(result) state = python.PyEval_SaveThread() c_node = tree.xmlDocCopyNode(c_new_root, result, 1) # recursive + python.PyEval_RestoreThread(state) + if c_node is NULL: + python.PyErr_NoMemory() tree.xmlDocSetRootElement(result, c_node) _copyTail(c_new_root.next, c_node) - python.PyEval_RestoreThread(state) return result -cdef xmlNode* _copyNodeToDoc(xmlNode* c_node, xmlDoc* c_doc): +cdef xmlNode* _copyNodeToDoc(xmlNode* c_node, xmlDoc* c_doc) except NULL: "Recursively copy the element into the document. c_doc is not modified." cdef xmlNode* c_root c_root = tree.xmlDocCopyNode(c_node, c_doc, 1) # recursive + if c_root is NULL: + python.PyErr_NoMemory() _copyTail(c_node.next, c_root) return c_root Modified: lxml/trunk/src/lxml/proxy.pxi ============================================================================== --- lxml/trunk/src/lxml/proxy.pxi (original) +++ lxml/trunk/src/lxml/proxy.pxi Tue Oct 30 22:15:20 2007 @@ -49,7 +49,7 @@ ################################################################################ # temporarily make a node the root node of its document -cdef xmlDoc* _fakeRootDoc(xmlDoc* c_base_doc, xmlNode* c_node): +cdef xmlDoc* _fakeRootDoc(xmlDoc* c_base_doc, xmlNode* c_node) except NULL: # build a temporary document that has the given node as root node # note that copy and original must not be modified during its lifetime!! # always call _destroyFakeDoc() after use! From scoder at codespeak.net Tue Oct 30 22:15:48 2007 From: scoder at codespeak.net (scoder at codespeak.net) Date: Tue, 30 Oct 2007 22:15:48 +0100 (CET) Subject: [Lxml-checkins] r48215 - lxml/trunk/src/lxml Message-ID: <20071030211548.5B4AC816D@code0.codespeak.net> Author: scoder Date: Tue Oct 30 22:15:47 2007 New Revision: 48215 Modified: lxml/trunk/src/lxml/proxy.pxi Log: attribute namespace setup fix Modified: lxml/trunk/src/lxml/proxy.pxi ============================================================================== --- lxml/trunk/src/lxml/proxy.pxi (original) +++ lxml/trunk/src/lxml/proxy.pxi Tue Oct 30 22:15:47 2007 @@ -258,7 +258,7 @@ if c_ns is not NULL: # not in cache, must find a replacement from this document - c_new_ns = doc._findOrBuildNodeNs(c_node, + c_new_ns = doc._findOrBuildNodeNs(c_element, c_ns.href, c_ns.prefix) if c_cache_last >= c_cache_size: # must resize cache From scoder at codespeak.net Tue Oct 30 22:16:08 2007 From: scoder at codespeak.net (scoder at codespeak.net) Date: Tue, 30 Oct 2007 22:16:08 +0100 (CET) Subject: [Lxml-checkins] r48216 - lxml/branch/lxml-1.3/src/lxml Message-ID: <20071030211608.9D724816D@code0.codespeak.net> Author: scoder Date: Tue Oct 30 22:16:08 2007 New Revision: 48216 Modified: lxml/branch/lxml-1.3/src/lxml/proxy.pxi Log: attribute namespace setup fix Modified: lxml/branch/lxml-1.3/src/lxml/proxy.pxi ============================================================================== --- lxml/branch/lxml-1.3/src/lxml/proxy.pxi (original) +++ lxml/branch/lxml-1.3/src/lxml/proxy.pxi Tue Oct 30 22:16:08 2007 @@ -258,7 +258,7 @@ if c_ns is not NULL: # not in cache, must find a replacement from this document - c_new_ns = doc._findOrBuildNodeNs(c_node, + c_new_ns = doc._findOrBuildNodeNs(c_element, c_ns.href, c_ns.prefix) if c_cache_last >= c_cache_size: # must resize cache From scoder at codespeak.net Wed Oct 31 14:39:30 2007 From: scoder at codespeak.net (scoder at codespeak.net) Date: Wed, 31 Oct 2007 14:39:30 +0100 (CET) Subject: [Lxml-checkins] r48224 - lxml/trunk/src/lxml Message-ID: <20071031133930.85CF681B5@code0.codespeak.net> Author: scoder Date: Wed Oct 31 14:39:29 2007 New Revision: 48224 Modified: lxml/trunk/src/lxml/lxml.etree.pyx Log: added assertion Modified: lxml/trunk/src/lxml/lxml.etree.pyx ============================================================================== --- lxml/trunk/src/lxml/lxml.etree.pyx (original) +++ lxml/trunk/src/lxml/lxml.etree.pyx Wed Oct 31 14:39:29 2007 @@ -323,6 +323,10 @@ """ cdef xmlNs* c_ns cdef xmlNs* c_doc_ns + if c_node.type != tree.XML_ELEMENT_NODE: + assert c_node.type == tree.XML_ELEMENT_NODE, \ + "invalid node type %d, expected %d" % ( + c_node.type, tree.XML_ELEMENT_NODE) # look for existing ns c_ns = tree.xmlSearchNsByHref(self._c_doc, c_node, c_href) if c_ns is not NULL: