From ianb at codespeak.net Tue Oct 2 00:24:09 2007 From: ianb at codespeak.net (ianb at codespeak.net) Date: Tue, 2 Oct 2007 00:24:09 +0200 (CEST) Subject: [Lxml-checkins] r47079 - lxml/trunk/src/lxml/html Message-ID: <20071001222409.DD3168111@code0.codespeak.net> Author: ianb Date: Tue Oct 2 00:24:07 2007 New Revision: 47079 Modified: lxml/trunk/src/lxml/html/setmixin.py Log: fix the in-place operators in SetMixin Modified: lxml/trunk/src/lxml/html/setmixin.py ============================================================================== --- lxml/trunk/src/lxml/html/setmixin.py (original) +++ lxml/trunk/src/lxml/html/setmixin.py Tue Oct 2 00:24:07 2007 @@ -71,21 +71,27 @@ for item in other: self.add(item) - __ior__ = update + def __ior__(self, other): + self.update(other) + return self def intersection_update(self, other): for item in self: if item not in other: self.remove(item) - __iand__ = intersection_update + def __iand__(self, other): + self.intersection_update(other) + return self def difference_update(self, other): for item in other: if item in self: self.remove(item) - __isub__ = difference_update + def __isub__(self, other): + self.difference_update(other) + return self def symmetric_difference_update(self, other): for item in other: @@ -94,7 +100,9 @@ else: self.add(item) - __ixor__ = symmetric_difference_update + def __ixor__(self, other): + self.symmetric_difference_update(other) + return self def discard(self, item): try: From lxml-checkins at codespeak.net Thu Oct 4 23:42:44 2007 From: lxml-checkins at codespeak.net (VIAGRA ® Official Site) Date: Thu, 4 Oct 2007 23:42:44 +0200 (CEST) Subject: [Lxml-checkins] October 75% OFF Message-ID: <20071004034331.7317.qmail@host75.201-252-1.telecom.net.ar> An HTML attachment was scrubbed... URL: http://codespeak.net/pipermail/lxml-checkins/attachments/20071004/e946f154/attachment.htm From lxml-checkins at codespeak.net Fri Oct 5 17:54:08 2007 From: lxml-checkins at codespeak.net (lxml-checkins at codespeak.net) Date: Fri, 5 Oct 2007 17:54:08 +0200 (CEST) Subject: [Lxml-checkins] Check out what's new Message-ID: <74047053679.3673357686918@delivery.net> An HTML attachment was scrubbed... URL: http://codespeak.net/pipermail/lxml-checkins/attachments/20071005/e44a8a2b/attachment.htm From scoder at codespeak.net Sun Oct 7 06:30:36 2007 From: scoder at codespeak.net (scoder at codespeak.net) Date: Sun, 7 Oct 2007 06:30:36 +0200 (CEST) Subject: [Lxml-checkins] r47250 - in lxml/trunk: . src/lxml src/lxml/tests Message-ID: <20071007043036.BD7A380FB@code0.codespeak.net> Author: scoder Date: Sun Oct 7 06:30:34 2007 New Revision: 47250 Modified: lxml/trunk/CHANGES.txt lxml/trunk/src/lxml/apihelpers.pxi lxml/trunk/src/lxml/etree.pyx lxml/trunk/src/lxml/tests/test_htmlparser.py lxml/trunk/src/lxml/tests/test_unicode.py Log: let tag name validation distinguish HTML/XML tags based on the related parser, allow ':' in HTML tags Modified: lxml/trunk/CHANGES.txt ============================================================================== --- lxml/trunk/CHANGES.txt (original) +++ lxml/trunk/CHANGES.txt Sun Oct 7 06:30:34 2007 @@ -16,9 +16,11 @@ Other changes ------------- -* lxml.etree no longer validates unicode characters in tag names to - avoid rejecting HTML tags. Only special characters like ':' and '>' - are rejected. +* Tag name validation in lxml.etree (and lxml.html) now distinguishes + between HTML tags and XML tags based on the parser that was used to + parse or create them. HTML tags no longer reject any non-ASCII + characters in tag names but only spaces and the special characters + '<>&/'. 2.0alpha3 (2007-09-26) Modified: lxml/trunk/src/lxml/apihelpers.pxi ============================================================================== --- lxml/trunk/src/lxml/apihelpers.pxi (original) +++ lxml/trunk/src/lxml/apihelpers.pxi Sun Oct 7 06:30:34 2007 @@ -99,7 +99,10 @@ """ cdef xmlNode* c_node ns_utf, name_utf = _getNsTag(tag) - _tagValidOrRaise(name_utf) + if parser is not None and parser._for_html: + _htmlTagValidOrRaise(name_utf) + else: + _tagValidOrRaise(name_utf) if doc is not None: c_doc = doc._c_doc elif c_doc is NULL: @@ -147,16 +150,22 @@ If 'c_doc' is also NULL, a new xmlDoc will be created. """ + cdef _BaseParser parser cdef _Document doc cdef xmlNode* c_node cdef xmlDoc* c_doc if parent is None or parent._doc is None: return None ns_utf, name_utf = _getNsTag(tag) - _tagValidOrRaise(name_utf) doc = parent._doc c_doc = doc._c_doc + parser = doc._parser + if parser is not None and parser._for_html: + _htmlTagValidOrRaise(name_utf) + else: + _tagValidOrRaise(name_utf) + c_node = _createElement(c_doc, name_utf) if c_node is NULL: python.PyErr_NoMemory() @@ -175,6 +184,7 @@ cdef _initNodeAttributes(xmlNode* c_node, _Document doc, attrib, extra): """Initialise the attributes of an element node. """ + cdef bint is_html cdef xmlNs* c_ns # 'extra' is not checked here (expected to be a keyword dict) if attrib is not None and not hasattr(attrib, 'items'): @@ -185,9 +195,11 @@ else: attrib.update(extra) if attrib: + is_html = doc._parser._for_html for name, value in attrib.items(): attr_ns_utf, attr_name_utf = _getNsTag(name) - _attributeValidOrRaise(attr_name_utf) + if not is_html: + _attributeValidOrRaise(attr_name_utf) value_utf = _utf8(value) if attr_ns_utf is None: tree.xmlNewProp(c_node, _cstr(attr_name_utf), _cstr(value_utf)) @@ -242,7 +254,8 @@ cdef char* c_value cdef char* c_tag ns, tag = _getNsTag(key) - _attributeValidOrRaise(tag) + if not element._doc._parser._for_html: + _attributeValidOrRaise(tag) c_tag = _cstr(tag) if isinstance(value, QName): value = _resolveQNameText(element, value) @@ -790,13 +803,17 @@ cdef int _pyXmlNameIsValid(name_utf8): return _xmlNameIsValid(_cstr(name_utf8)) +cdef int _pyHtmlNameIsValid(name_utf8): + return _htmlNameIsValid(_cstr(name_utf8)) + cdef int _xmlNameIsValid(char* c_name): - #return tree.xmlValidateNCName(c_name, 0) == 0 + return tree.xmlValidateNCName(c_name, 0) == 0 + +cdef int _htmlNameIsValid(char* c_name): if c_name is NULL or c_name[0] == c'\0': return 0 while c_name[0] != c'\0': - if c_name[0] == c':' or \ - c_name[0] == c'&' or \ + if c_name[0] == c'&' or \ c_name[0] == c'<' or \ c_name[0] == c'>' or \ c_name[0] == c'/' or \ @@ -815,6 +832,12 @@ python.PyUnicode_FromEncodedObject(tag_utf, 'UTF-8', 'strict') return 0 +cdef int _htmlTagValidOrRaise(tag_utf) except -1: + if not _pyHtmlNameIsValid(tag_utf): + raise ValueError, "Invalid HTML tag name %r" % \ + python.PyUnicode_FromEncodedObject(tag_utf, 'UTF-8', 'strict') + return 0 + cdef int _attributeValidOrRaise(name_utf) except -1: if not _pyXmlNameIsValid(name_utf): raise ValueError, "Invalid attribute name %r" % \ Modified: lxml/trunk/src/lxml/etree.pyx ============================================================================== --- lxml/trunk/src/lxml/etree.pyx (original) +++ lxml/trunk/src/lxml/etree.pyx Sun Oct 7 06:30:34 2007 @@ -707,8 +707,13 @@ return self._tag def __set__(self, value): + cdef _BaseParser parser ns, name = _getNsTag(value) - _tagValidOrRaise(name) + parser = self._doc._parser + if parser is not None and parser._for_html: + _htmlTagValidOrRaise(name) + else: + _tagValidOrRaise(name) self._tag = value tree.xmlNodeSetName(self._c_node, _cstr(name)) if ns is None: Modified: lxml/trunk/src/lxml/tests/test_htmlparser.py ============================================================================== --- lxml/trunk/src/lxml/tests/test_htmlparser.py (original) +++ lxml/trunk/src/lxml/tests/test_htmlparser.py Sun Oct 7 06:30:34 2007 @@ -39,6 +39,74 @@ self.assertRaises(self.etree.XMLSyntaxError, parse, f, parser) + def test_html_element_name_empty(self): + parser = self.etree.HTMLParser() + Element = parser.makeelement + + el = Element('name') + self.assertRaises(ValueError, Element, '{}') + self.assertRaises(ValueError, setattr, el, 'tag', '{}') + + self.assertRaises(ValueError, Element, '{test}') + self.assertRaises(ValueError, setattr, el, 'tag', '{test}') + + def test_html_element_name_colon(self): + parser = self.etree.HTMLParser() + Element = parser.makeelement + + pname = Element('p:name') + self.assertEquals(pname.tag, 'p:name') + + pname = Element('{test}p:name') + self.assertEquals(pname.tag, '{test}p:name') + + pname = Element('name') + pname.tag = 'p:name' + self.assertEquals(pname.tag, 'p:name') + + def test_html_element_name_space(self): + parser = self.etree.HTMLParser() + Element = parser.makeelement + + self.assertRaises(ValueError, Element, ' name ') + self.assertRaises(ValueError, Element, 'na me') + self.assertRaises(ValueError, Element, '{test} name') + + el = Element('name') + self.assertRaises(ValueError, setattr, el, 'tag', ' name ') + + def test_html_subelement_name_empty(self): + parser = self.etree.HTMLParser() + Element = parser.makeelement + + SubElement = self.etree.SubElement + + el = Element('name') + self.assertRaises(ValueError, SubElement, el, '{}') + self.assertRaises(ValueError, SubElement, el, '{test}') + + def test_html_subelement_name_colon(self): + parser = self.etree.HTMLParser() + Element = parser.makeelement + SubElement = self.etree.SubElement + + el = Element('name') + pname = SubElement(el, 'p:name') + self.assertEquals(pname.tag, 'p:name') + + pname = SubElement(el, '{test}p:name') + self.assertEquals(pname.tag, '{test}p:name') + + def test_html_subelement_name_space(self): + parser = self.etree.HTMLParser() + Element = parser.makeelement + SubElement = self.etree.SubElement + + el = Element('name') + self.assertRaises(ValueError, SubElement, el, ' name ') + self.assertRaises(ValueError, SubElement, el, 'na me') + self.assertRaises(ValueError, SubElement, el, '{test} name') + def test_module_parse_html_norecover(self): parser = self.etree.HTMLParser(recover=False) parse = self.etree.parse Modified: lxml/trunk/src/lxml/tests/test_unicode.py ============================================================================== --- lxml/trunk/src/lxml/tests/test_unicode.py (original) +++ lxml/trunk/src/lxml/tests/test_unicode.py Sun Oct 7 06:30:34 2007 @@ -5,9 +5,9 @@ ascii_uni = u'a' -# klingon = u"\uF8D2" # not valid for XML names +klingon = u"\uF8D2" # not valid for XML names -invalid_tag = "\u0680:\u3120" +invalid_tag = "test" + klingon uni = u'?\u0680\u3120' # some non-ASCII characters From scoder at codespeak.net Sun Oct 7 06:32:49 2007 From: scoder at codespeak.net (scoder at codespeak.net) Date: Sun, 7 Oct 2007 06:32:49 +0200 (CEST) Subject: [Lxml-checkins] r47251 - in lxml/trunk: . doc Message-ID: <20071007043249.5136B80FB@code0.codespeak.net> Author: scoder Date: Sun Oct 7 06:32:49 2007 New Revision: 47251 Modified: lxml/trunk/CHANGES.txt lxml/trunk/doc/main.txt lxml/trunk/version.txt Log: 2.0alpha4 Modified: lxml/trunk/CHANGES.txt ============================================================================== --- lxml/trunk/CHANGES.txt (original) +++ lxml/trunk/CHANGES.txt Sun Oct 7 06:32:49 2007 @@ -2,8 +2,9 @@ lxml changelog ============== -Under development -================= + +2.0alpha3 (2007-10-07) +====================== Features added -------------- Modified: lxml/trunk/doc/main.txt ============================================================================== --- lxml/trunk/doc/main.txt (original) +++ lxml/trunk/doc/main.txt Sun Oct 7 06:32:49 2007 @@ -138,8 +138,8 @@ .. _`lxml at the Python Package Index`: http://pypi.python.org/pypi/lxml/ .. _`this key`: pubkey.asc -The latest version is `lxml 2.0alpha3`_, released 2007-09-26 -(`changes for 2.0alpha3`_). `Older versions`_ are listed below. +The latest version is `lxml 2.0alpha4`_, released 2007-10-07 +(`changes for 2.0alpha4`_). `Older versions`_ are listed below. .. _`Older versions`: #old-versions @@ -199,6 +199,8 @@ Old Versions ------------ +* `lxml 2.0alpha3`_, released 2007-09-26 (`changes for 2.0alpha3`_) + * `lxml 2.0alpha2`_, released 2007-09-15 (`changes for 2.0alpha2`_) * `lxml 2.0alpha1`_, released 2007-09-02 (`changes for 2.0alpha1`_) Modified: lxml/trunk/version.txt ============================================================================== --- lxml/trunk/version.txt (original) +++ lxml/trunk/version.txt Sun Oct 7 06:32:49 2007 @@ -1 +1 @@ -2.0alpha3 +2.0alpha4 From scoder at codespeak.net Sun Oct 7 06:34:18 2007 From: scoder at codespeak.net (scoder at codespeak.net) Date: Sun, 7 Oct 2007 06:34:18 +0200 (CEST) Subject: [Lxml-checkins] r47252 - lxml/trunk/doc Message-ID: <20071007043418.0354580FB@code0.codespeak.net> Author: scoder Date: Sun Oct 7 06:34:18 2007 New Revision: 47252 Modified: lxml/trunk/doc/main.txt Log: 2.0alpha4 Modified: lxml/trunk/doc/main.txt ============================================================================== --- lxml/trunk/doc/main.txt (original) +++ lxml/trunk/doc/main.txt Sun Oct 7 06:34:18 2007 @@ -251,6 +251,7 @@ * `lxml 0.5`_, released 2005-04-08 +.. _`lxml 2.0alpha4`: lxml-2.0alpha4.tgz .. _`lxml 2.0alpha3`: lxml-2.0alpha3.tgz .. _`lxml 2.0alpha2`: lxml-2.0alpha2.tgz .. _`lxml 2.0alpha1`: lxml-2.0alpha1.tgz @@ -277,6 +278,7 @@ .. _`lxml 0.5.1`: lxml-0.5.1.tgz .. _`lxml 0.5`: lxml-0.5.tgz +.. _`changes for 2.0alpha4`: changes-2.0alpha4.html .. _`changes for 2.0alpha3`: changes-2.0alpha3.html .. _`changes for 2.0alpha2`: changes-2.0alpha2.html .. _`changes for 2.0alpha1`: changes-2.0alpha1.html From scoder at codespeak.net Sun Oct 7 06:34:44 2007 From: scoder at codespeak.net (scoder at codespeak.net) Date: Sun, 7 Oct 2007 06:34:44 +0200 (CEST) Subject: [Lxml-checkins] r47253 - lxml/trunk Message-ID: <20071007043444.51E6E80FB@code0.codespeak.net> Author: scoder Date: Sun Oct 7 06:34:44 2007 New Revision: 47253 Modified: lxml/trunk/CHANGES.txt Log: 2.0alpha4 Modified: lxml/trunk/CHANGES.txt ============================================================================== --- lxml/trunk/CHANGES.txt (original) +++ lxml/trunk/CHANGES.txt Sun Oct 7 06:34:44 2007 @@ -3,7 +3,7 @@ ============== -2.0alpha3 (2007-10-07) +2.0alpha4 (2007-10-07) ====================== Features added From scoder at codespeak.net Sun Oct 7 07:03:33 2007 From: scoder at codespeak.net (scoder at codespeak.net) Date: Sun, 7 Oct 2007 07:03:33 +0200 (CEST) Subject: [Lxml-checkins] r47254 - in lxml/trunk: . src/lxml src/lxml/tests Message-ID: <20071007050333.809498113@code0.codespeak.net> Author: scoder Date: Sun Oct 7 07:03:32 2007 New Revision: 47254 Modified: lxml/trunk/CHANGES.txt lxml/trunk/src/lxml/apihelpers.pxi lxml/trunk/src/lxml/tests/test_etree.py lxml/trunk/src/lxml/tests/test_htmlparser.py Log: added " and ' to the list of invalid HTML tag characters Modified: lxml/trunk/CHANGES.txt ============================================================================== --- lxml/trunk/CHANGES.txt (original) +++ lxml/trunk/CHANGES.txt Sun Oct 7 07:03:32 2007 @@ -21,7 +21,7 @@ between HTML tags and XML tags based on the parser that was used to parse or create them. HTML tags no longer reject any non-ASCII characters in tag names but only spaces and the special characters - '<>&/'. + ``<>&/"'``. 2.0alpha3 (2007-09-26) Modified: lxml/trunk/src/lxml/apihelpers.pxi ============================================================================== --- lxml/trunk/src/lxml/apihelpers.pxi (original) +++ lxml/trunk/src/lxml/apihelpers.pxi Sun Oct 7 07:03:32 2007 @@ -817,6 +817,8 @@ c_name[0] == c'<' or \ c_name[0] == c'>' or \ c_name[0] == c'/' or \ + c_name[0] == c'"' or \ + c_name[0] == c"'" or \ c_name[0] == c'\x09' or \ c_name[0] == c'\x0A' or \ c_name[0] == c'\x0B' or \ Modified: lxml/trunk/src/lxml/tests/test_etree.py ============================================================================== --- lxml/trunk/src/lxml/tests/test_etree.py (original) +++ lxml/trunk/src/lxml/tests/test_etree.py Sun Oct 7 07:03:32 2007 @@ -74,6 +74,18 @@ el = Element('name') self.assertRaises(ValueError, setattr, el, 'tag', 'p:name') + def test_element_name_quote(self): + Element = self.etree.Element + self.assertRaises(ValueError, Element, "p'name") + self.assertRaises(ValueError, Element, 'p"name') + + self.assertRaises(ValueError, Element, "{test}p'name") + self.assertRaises(ValueError, Element, '{test}p"name') + + el = Element('name') + self.assertRaises(ValueError, setattr, el, 'tag', "p'name") + self.assertRaises(ValueError, setattr, el, 'tag', 'p"name') + def test_element_name_space(self): Element = self.etree.Element self.assertRaises(ValueError, Element, ' name ') @@ -99,6 +111,17 @@ self.assertRaises(ValueError, SubElement, el, 'p:name') self.assertRaises(ValueError, SubElement, el, '{test}p:name') + def test_subelement_name_quote(self): + Element = self.etree.Element + SubElement = self.etree.SubElement + + el = Element('name') + self.assertRaises(ValueError, SubElement, el, "p'name") + self.assertRaises(ValueError, SubElement, el, "{test}p'name") + + self.assertRaises(ValueError, SubElement, el, 'p"name') + self.assertRaises(ValueError, SubElement, el, '{test}p"name') + def test_subelement_name_space(self): Element = self.etree.Element SubElement = self.etree.SubElement Modified: lxml/trunk/src/lxml/tests/test_htmlparser.py ============================================================================== --- lxml/trunk/src/lxml/tests/test_htmlparser.py (original) +++ lxml/trunk/src/lxml/tests/test_htmlparser.py Sun Oct 7 07:03:32 2007 @@ -64,6 +64,20 @@ pname.tag = 'p:name' self.assertEquals(pname.tag, 'p:name') + def test_html_element_name_quote(self): + parser = self.etree.HTMLParser() + Element = parser.makeelement + + self.assertRaises(ValueError, Element, 'p"name') + self.assertRaises(ValueError, Element, "na'me") + self.assertRaises(ValueError, Element, '{test}"name') + self.assertRaises(ValueError, Element, "{test}name'") + + el = Element('name') + self.assertRaises(ValueError, setattr, el, 'tag', "pname'") + self.assertRaises(ValueError, setattr, el, 'tag', '"pname') + self.assertEquals(el.tag, "name") + def test_html_element_name_space(self): parser = self.etree.HTMLParser() Element = parser.makeelement @@ -74,6 +88,7 @@ el = Element('name') self.assertRaises(ValueError, setattr, el, 'tag', ' name ') + self.assertEquals(el.tag, "name") def test_html_subelement_name_empty(self): parser = self.etree.HTMLParser() @@ -97,6 +112,17 @@ pname = SubElement(el, '{test}p:name') self.assertEquals(pname.tag, '{test}p:name') + def test_html_subelement_name_quote(self): + parser = self.etree.HTMLParser() + Element = parser.makeelement + SubElement = self.etree.SubElement + + el = Element('name') + self.assertRaises(ValueError, SubElement, el, "name'") + self.assertRaises(ValueError, SubElement, el, 'na"me') + self.assertRaises(ValueError, SubElement, el, "{test}na'me") + self.assertRaises(ValueError, SubElement, el, '{test}"name') + def test_html_subelement_name_space(self): parser = self.etree.HTMLParser() Element = parser.makeelement From scoder at codespeak.net Sun Oct 7 22:12:25 2007 From: scoder at codespeak.net (scoder at codespeak.net) Date: Sun, 7 Oct 2007 22:12:25 +0200 (CEST) Subject: [Lxml-checkins] r47270 - lxml/trunk/doc Message-ID: <20071007201225.4C1318188@code0.codespeak.net> Author: scoder Date: Sun Oct 7 22:12:23 2007 New Revision: 47270 Modified: lxml/trunk/doc/lxml2.txt Log: docs Modified: lxml/trunk/doc/lxml2.txt ============================================================================== --- lxml/trunk/doc/lxml2.txt (original) +++ lxml/trunk/doc/lxml2.txt Sun Oct 7 22:12:23 2007 @@ -78,7 +78,7 @@ type annotation on serialisation, you can use the ``deannotate()`` function. * The C-API function ``findOrBuildNodeNs()`` was replaced by the more generic - ``findOrBuildNodeNsPrefix()`` + ``findOrBuildNodeNsPrefix()`` that accepts an additional default prefix. Enhancements From scoder at codespeak.net Mon Oct 8 21:57:08 2007 From: scoder at codespeak.net (scoder at codespeak.net) Date: Mon, 8 Oct 2007 21:57:08 +0200 (CEST) Subject: [Lxml-checkins] r47317 - lxml/trunk/src/lxml Message-ID: <20071008195708.A49C3815A@code0.codespeak.net> Author: scoder Date: Mon Oct 8 21:57:08 2007 New Revision: 47317 Modified: lxml/trunk/src/lxml/apihelpers.pxi Log: handle exceptions in subelement creation Modified: lxml/trunk/src/lxml/apihelpers.pxi ============================================================================== --- lxml/trunk/src/lxml/apihelpers.pxi (original) +++ lxml/trunk/src/lxml/apihelpers.pxi Mon Oct 8 21:57:08 2007 @@ -171,15 +171,24 @@ python.PyErr_NoMemory() tree.xmlAddChild(parent._c_node, c_node) - if text is not None: - _setNodeText(c_node, text) - if tail is not None: - _setTailText(c_node, tail) - - # add namespaces to node if necessary - doc._setNodeNamespaces(c_node, ns_utf, nsmap) - _initNodeAttributes(c_node, doc, attrib, extra_attrs) - return _elementFactory(doc, c_node) + try: + if text is not None: + _setNodeText(c_node, text) + if tail is not None: + _setTailText(c_node, tail) + + # add namespaces to node if necessary + doc._setNodeNamespaces(c_node, ns_utf, nsmap) + _initNodeAttributes(c_node, doc, attrib, extra_attrs) + return _elementFactory(doc, c_node) + except: + # free allocated c_node/c_doc unless Python does it for us + if c_node.doc is not c_doc: + # node not yet in document => will not be freed by document + if tail is not None: + _removeText(c_node.next) # tail + tree.xmlFreeNode(c_node) + raise cdef _initNodeAttributes(xmlNode* c_node, _Document doc, attrib, extra): """Initialise the attributes of an element node. From scoder at codespeak.net Mon Oct 8 22:00:20 2007 From: scoder at codespeak.net (scoder at codespeak.net) Date: Mon, 8 Oct 2007 22:00:20 +0200 (CEST) Subject: [Lxml-checkins] r47318 - in lxml/trunk: . src/lxml Message-ID: <20071008200020.C380F80C9@code0.codespeak.net> Author: scoder Date: Mon Oct 8 22:00:20 2007 New Revision: 47318 Modified: lxml/trunk/CHANGES.txt lxml/trunk/src/lxml/etree.pyx Log: make namespace prefix counter a Python long to avoid crashes by counter overflow Modified: lxml/trunk/CHANGES.txt ============================================================================== --- lxml/trunk/CHANGES.txt (original) +++ lxml/trunk/CHANGES.txt Mon Oct 8 22:00:20 2007 @@ -2,6 +2,21 @@ lxml changelog ============== +Under development +================= + +Features added +-------------- + +Bugs fixed +---------- + +* lxml.etree could crash when adding more than 10000 namespaces to a + document + +Other changes +------------- + 2.0alpha4 (2007-10-07) ====================== Modified: lxml/trunk/src/lxml/etree.pyx ============================================================================== --- lxml/trunk/src/lxml/etree.pyx (original) +++ lxml/trunk/src/lxml/etree.pyx Mon Oct 8 22:00:20 2007 @@ -229,7 +229,7 @@ When instances of this class are garbage collected, the libxml document is cleaned up. """ - cdef int _ns_counter + cdef object _ns_counter cdef xmlDoc* _c_doc cdef _BaseParser _parser @@ -295,7 +295,7 @@ return self._c_doc.URL cdef buildNewPrefix(self): - ns = python.PyString_FromFormat("ns%d", self._ns_counter) + ns = "ns%d" % self._ns_counter self._ns_counter = self._ns_counter + 1 return ns @@ -304,7 +304,6 @@ """Get or create namespace structure for a node. Reuses the prefix if possible. """ - cdef int i cdef xmlNs* c_ns cdef xmlNs* c_doc_ns # look for existing ns @@ -315,15 +314,12 @@ if c_prefix is NULL or \ tree.xmlSearchNs(self._c_doc, c_node, c_prefix) is not NULL: # try to simulate ElementTree's namespace prefix creation - for i from 0 <= i < 10000: + while 1: prefix = self.buildNewPrefix() c_prefix = _cstr(prefix) # make sure it's not used already if tree.xmlSearchNs(self._c_doc, c_node, c_prefix) is NULL: break - if i >= 10000: - # XXX too many prefixes in use - this is pretty bad! - return NULL return tree.xmlNewNs(c_node, c_href, c_prefix) @@ -333,8 +329,8 @@ c_ns = self._findOrBuildNodeNs(c_node, href, NULL) tree.xmlSetNs(c_node, c_ns) - cdef void _setNodeNamespaces(self, xmlNode* c_node, - object node_ns_utf, object nsmap): + cdef int _setNodeNamespaces(self, xmlNode* c_node, + object node_ns_utf, object nsmap) except -1: """Lookup current namespace prefixes, then set namespace structure for node and register new ns-prefix mappings. @@ -347,7 +343,7 @@ if not nsmap: if node_ns_utf is not None: self._setNodeNs(c_node, _cstr(node_ns_utf)) - return + return 0 c_doc = self._c_doc for prefix, href in nsmap.items(): @@ -368,6 +364,7 @@ if node_ns_utf is not None: self._setNodeNs(c_node, _cstr(node_ns_utf)) + return 0 cdef extern from "etree_defs.h": # macro call to 't->tp_new()' for fast instantiation @@ -377,7 +374,7 @@ cdef _Document result result = NEW_DOCUMENT(_Document) result._c_doc = c_doc - result._ns_counter = 0 + result._ns_counter = 0L if parser is None: parser = __GLOBAL_PARSER_CONTEXT.getDefaultParser() result._parser = parser From scoder at codespeak.net Mon Oct 8 22:59:19 2007 From: scoder at codespeak.net (scoder at codespeak.net) Date: Mon, 8 Oct 2007 22:59:19 +0200 (CEST) Subject: [Lxml-checkins] r47319 - in lxml/branch/lxml-1.3: . src/lxml Message-ID: <20071008205919.961DE812D@code0.codespeak.net> Author: scoder Date: Mon Oct 8 22:59:18 2007 New Revision: 47319 Modified: lxml/branch/lxml-1.3/CHANGES.txt lxml/branch/lxml-1.3/src/lxml/etree.pyx Log: make namespace prefix counter a Python long to avoid crashes by counter overflow Modified: lxml/branch/lxml-1.3/CHANGES.txt ============================================================================== --- lxml/branch/lxml-1.3/CHANGES.txt (original) +++ lxml/branch/lxml-1.3/CHANGES.txt Mon Oct 8 22:59:18 2007 @@ -11,6 +11,9 @@ Bugs fixed ---------- +* lxml.etree could crash when adding more than 10000 namespaces to a + document + * lxml failed to serialise namespace declarations of elements other than the root node of a tree Modified: lxml/branch/lxml-1.3/src/lxml/etree.pyx ============================================================================== --- lxml/branch/lxml-1.3/src/lxml/etree.pyx (original) +++ lxml/branch/lxml-1.3/src/lxml/etree.pyx Mon Oct 8 22:59:18 2007 @@ -231,7 +231,7 @@ When instances of this class are garbage collected, the libxml document is cleaned up. """ - cdef int _ns_counter + cdef object _ns_counter cdef xmlDoc* _c_doc cdef _BaseParser _parser @@ -297,7 +297,7 @@ return self._c_doc.URL cdef buildNewPrefix(self): - ns = python.PyString_FromFormat("ns%d", self._ns_counter) + ns = "ns%d" % self._ns_counter self._ns_counter = self._ns_counter + 1 return ns @@ -306,7 +306,6 @@ """Get or create namespace structure for a node. Reuses the prefix if possible. """ - cdef int i cdef xmlNs* c_ns cdef xmlNs* c_doc_ns # look for existing ns @@ -317,15 +316,12 @@ if c_prefix is NULL or \ tree.xmlSearchNs(self._c_doc, c_node, c_prefix) is not NULL: # try to simulate ElementTree's namespace prefix creation - for i from 0 <= i < 10000: + while 1: prefix = self.buildNewPrefix() c_prefix = _cstr(prefix) # make sure it's not used already if tree.xmlSearchNs(self._c_doc, c_node, c_prefix) is NULL: break - if i >= 10000: - # XXX too many prefixes in use - this is pretty bad! - return NULL return tree.xmlNewNs(c_node, c_href, c_prefix) @@ -335,8 +331,8 @@ c_ns = self._findOrBuildNodeNs(c_node, href, NULL) tree.xmlSetNs(c_node, c_ns) - cdef void _setNodeNamespaces(self, xmlNode* c_node, - object node_ns_utf, object nsmap): + cdef int _setNodeNamespaces(self, xmlNode* c_node, + object node_ns_utf, object nsmap) except -1: """Lookup current namespace prefixes, then set namespace structure for node and register new ns-prefix mappings. @@ -349,7 +345,7 @@ if not nsmap: if node_ns_utf is not None: self._setNodeNs(c_node, _cstr(node_ns_utf)) - return + return 0 c_doc = self._c_doc for prefix, href in nsmap.items(): @@ -370,12 +366,13 @@ if node_ns_utf is not None: self._setNodeNs(c_node, _cstr(node_ns_utf)) + return 0 cdef _Document _documentFactory(xmlDoc* c_doc, _BaseParser parser): cdef _Document result result = _Document() result._c_doc = c_doc - result._ns_counter = 0 + result._ns_counter = 0L if parser is None: parser = __GLOBAL_PARSER_CONTEXT.getDefaultParser() result._parser = parser From scoder at codespeak.net Tue Oct 9 11:20:55 2007 From: scoder at codespeak.net (scoder at codespeak.net) Date: Tue, 9 Oct 2007 11:20:55 +0200 (CEST) Subject: [Lxml-checkins] r47326 - lxml/trunk/src/lxml Message-ID: <20071009092055.7A8F080DA@code0.codespeak.net> Author: scoder Date: Tue Oct 9 11:20:53 2007 New Revision: 47326 Modified: lxml/trunk/src/lxml/apihelpers.pxi lxml/trunk/src/lxml/config.pxd lxml/trunk/src/lxml/etree.pyx lxml/trunk/src/lxml/etreepublic.pxd lxml/trunk/src/lxml/iterparse.pxi lxml/trunk/src/lxml/objectify.pyx lxml/trunk/src/lxml/objectpath.pxi lxml/trunk/src/lxml/parser.pxi lxml/trunk/src/lxml/public-api.pxi lxml/trunk/src/lxml/serializer.pxi lxml/trunk/src/lxml/tree.pxd lxml/trunk/src/lxml/xmlerror.pxi Log: use 'bint' instead of 'int' Pyrex type where appropriate Modified: lxml/trunk/src/lxml/apihelpers.pxi ============================================================================== --- lxml/trunk/src/lxml/apihelpers.pxi (original) +++ lxml/trunk/src/lxml/apihelpers.pxi Tue Oct 9 11:20:53 2007 @@ -448,7 +448,7 @@ element._c_node, _cstr(ns), NULL) return '%s:%s' % (c_ns.prefix, tag) -cdef int _hasChild(xmlNode* c_node): +cdef bint _hasChild(xmlNode* c_node): return c_node is not NULL and _findChildForwards(c_node, 0) is not NULL cdef xmlNode* _findChild(xmlNode* c_node, Py_ssize_t index): @@ -534,7 +534,7 @@ return NULL return c_node -cdef int _tagMatches(xmlNode* c_node, char* c_href, char* c_name): +cdef bint _tagMatches(xmlNode* c_node, char* c_href, char* c_name): """Tests if the node matches namespace URI and tag name. A node matches if it matches both c_href and c_name. @@ -697,7 +697,7 @@ cdef char* s cdef char* c_end cdef char c - cdef int is_non_ascii + cdef bint is_non_ascii s = _cstr(pystring) c_end = s + python.PyString_GET_SIZE(pystring) is_non_ascii = 0 Modified: lxml/trunk/src/lxml/config.pxd ============================================================================== --- lxml/trunk/src/lxml/config.pxd (original) +++ lxml/trunk/src/lxml/config.pxd Tue Oct 9 11:20:53 2007 @@ -1,3 +1,3 @@ cdef extern from "etree_defs.h": - cdef int ENABLE_THREADING - cdef int ENABLE_SCHEMATRON + cdef bint ENABLE_THREADING + cdef bint ENABLE_SCHEMATRON Modified: lxml/trunk/src/lxml/etree.pyx ============================================================================== --- lxml/trunk/src/lxml/etree.pyx (original) +++ lxml/trunk/src/lxml/etree.pyx Tue Oct 9 11:20:53 2007 @@ -853,7 +853,6 @@ return c def __nonzero__(self): - cdef xmlNode* c_node import warnings warnings.warn( "The behavior of this method will change in future versions. " @@ -861,7 +860,7 @@ FutureWarning ) # emulate old behaviour - return bool(_hasChild(self._c_node)) + return _hasChild(self._c_node) def __contains__(self, element): cdef xmlNode* c_node @@ -1384,22 +1383,22 @@ The keyword argument 'method' selects the output method: 'xml' or 'html'. """ - cdef int c_write_declaration + cdef bint write_declaration self._assertHasRoot() # suppress decl. in default case (purely for ElementTree compatibility) if xml_declaration is not None: - c_write_declaration = bool(xml_declaration) + write_declaration = xml_declaration if encoding is None: encoding = 'ASCII' elif encoding is None: encoding = 'ASCII' - c_write_declaration = 0 + write_declaration = 0 else: encoding = encoding.upper() - c_write_declaration = encoding not in \ + write_declaration = encoding not in \ ('US-ASCII', 'ASCII', 'UTF8', 'UTF-8') _tofilelike(file, self._context_node, encoding, method, - c_write_declaration, 1, bool(pretty_print)) + write_declaration, 1, pretty_print) def getpath(self, _Element element not None): """Returns a structural, absolute XPath expression to find that element. @@ -2164,7 +2163,7 @@ """Writes an element tree or element structure to sys.stdout. This function should be used for debugging only. """ - _dumpToFile(sys.stdout, elem._c_node, bool(pretty_print)) + _dumpToFile(sys.stdout, elem._c_node, pretty_print) def tostring(element_or_tree, encoding=None, method="xml", xml_declaration=None, pretty_print=False): @@ -2178,26 +2177,25 @@ The keyword argument 'pretty_print' (bool) enables formatted XML. - The keyword argument 'method' selects the output method: 'xml' or 'html'. + The keyword argument 'method' selects the output method: 'xml', + 'html' or plain 'text'. """ - cdef int write_declaration - cdef int c_pretty_print - c_pretty_print = bool(pretty_print) + cdef bint write_declaration if xml_declaration is None: # by default, write an XML declaration only for non-standard encodings write_declaration = encoding is not None and encoding.upper() not in \ ('ASCII', 'UTF-8', 'UTF8', 'US-ASCII') else: - write_declaration = bool(xml_declaration) + write_declaration = xml_declaration if encoding is None: encoding = 'ASCII' if isinstance(element_or_tree, _Element): return _tostring(<_Element>element_or_tree, encoding, method, - write_declaration, 0, c_pretty_print) + write_declaration, 0, pretty_print) elif isinstance(element_or_tree, _ElementTree): return _tostring((<_ElementTree>element_or_tree)._context_node, - encoding, method, write_declaration, 1, c_pretty_print) + encoding, method, write_declaration, 1, pretty_print) else: raise TypeError, "Type '%s' cannot be serialized." % type(element_or_tree) @@ -2218,17 +2216,16 @@ therefore not necessarily suited for serialization to byte streams without further treatment. - The keyword argument 'pretty_print' (bool) enables formatted XML. + The boolean keyword argument 'pretty_print' enables formatted XML. - The keyword argument 'method' selects the output method: 'xml' or 'html'. + The keyword argument 'method' selects the output method: 'xml', + 'html' or plain 'text'. """ - cdef int c_pretty_print - c_pretty_print = bool(pretty_print) if isinstance(element_or_tree, _Element): - return _tounicode(<_Element>element_or_tree, method, 0, c_pretty_print) + return _tounicode(<_Element>element_or_tree, method, 0, pretty_print) elif isinstance(element_or_tree, _ElementTree): return _tounicode((<_ElementTree>element_or_tree)._context_node, - method, 1, c_pretty_print) + method, 1, pretty_print) else: raise TypeError, "Type '%s' cannot be serialized." % type(element_or_tree) Modified: lxml/trunk/src/lxml/etreepublic.pxd ============================================================================== --- lxml/trunk/src/lxml/etreepublic.pxd (original) +++ lxml/trunk/src/lxml/etreepublic.pxd Tue Oct 9 11:20:53 2007 @@ -5,7 +5,7 @@ cdef extern from "etree_defs.h": # test if c_node is considered an Element (i.e. Element, Comment, etc.) - cdef int _isElement(tree.xmlNode* c_node) + cdef bint _isElement(tree.xmlNode* c_node) # return the namespace URI of the node or NULL cdef char* _getNs(tree.xmlNode* node) @@ -129,7 +129,7 @@ # XML node helper functions # check if the element has at least one child - cdef int hasChild(tree.xmlNode* c_node) + cdef bint hasChild(tree.xmlNode* c_node) # find child element number 'index' (supports negative indexes) cdef tree.xmlNode* findChild(tree.xmlNode* c_node, @@ -191,10 +191,10 @@ cdef object namespacedNameFromNsName(char* c_ns, char* c_tag) # check if the node has a text value (which may be '') - cdef int hasText(tree.xmlNode* c_node) + cdef bint hasText(tree.xmlNode* c_node) # check if the node has a tail value (which may be '') - cdef int hasTail(tree.xmlNode* c_node) + cdef bint hasTail(tree.xmlNode* c_node) # get the text content of an element (or None) cdef object textOf(tree.xmlNode* c_node) Modified: lxml/trunk/src/lxml/iterparse.pxi ============================================================================== --- lxml/trunk/src/lxml/iterparse.pxi (original) +++ lxml/trunk/src/lxml/iterparse.pxi Tue Oct 9 11:20:53 2007 @@ -292,7 +292,6 @@ filename = _encodeFilename(filename) self._source = source - html = bool(html) if html: # make sure we're not looking for namespaces if 'start' in events: Modified: lxml/trunk/src/lxml/objectify.pyx ============================================================================== --- lxml/trunk/src/lxml/objectify.pyx (original) +++ lxml/trunk/src/lxml/objectify.pyx Tue Oct 9 11:20:53 2007 @@ -1045,7 +1045,7 @@ cdef object _makeelement cdef object _namespace cdef object _nsmap - cdef int _annotate + cdef bint _annotate def __init__(self, namespace=None, nsmap=None, annotate=True, makeelement=None): if nsmap is None: @@ -1055,7 +1055,7 @@ self._namespace = None else: self._namespace = "{%s}" % namespace - self._annotate = bool(annotate) + self._annotate = annotate if makeelement is not None: assert callable(makeelement) self._makeelement = makeelement @@ -1077,15 +1077,15 @@ cdef object _tag cdef object _nsmap cdef object _element_factory - cdef int _annotate + cdef bint _annotate def __call__(self, *children, **attrib): cdef _ObjectifyElementMakerCaller elementMaker cdef python.PyObject* pytype cdef _Element element cdef _Element childElement - cdef int has_children - cdef int has_string_value + cdef bint has_children + cdef bint has_string_value if self._element_factory is None: element = _makeElement(self._tag, None, attrib, self._nsmap) else: @@ -1153,7 +1153,7 @@ ################################################################################ # Recursive element dumping -cdef int __RECURSIVE_STR +cdef bint __RECURSIVE_STR __RECURSIVE_STR = 0 # default: off def enableRecursiveStr(on=True): @@ -1161,7 +1161,7 @@ based on objectify.dump(element). """ global __RECURSIVE_STR - __RECURSIVE_STR = bool(on) + __RECURSIVE_STR = on def dump(_Element element not None): """Return a recursively generated string representation of an element. @@ -1323,8 +1323,7 @@ """ cdef _Element element element = cetree.rootNodeOrRaise(element_or_tree) - _annotate(element, 0, 1, bool(ignore_xsi), bool(ignore_old), - None, empty_pytype) + _annotate(element, 0, 1, ignore_xsi, ignore_old, None, empty_pytype) def xsiannotate(element_or_tree, ignore_old=False, ignore_pytype=False, empty_type=None): @@ -1350,8 +1349,7 @@ """ cdef _Element element element = cetree.rootNodeOrRaise(element_or_tree) - _annotate(element, 1, 0, bool(ignore_old), bool(ignore_pytype), - empty_type, None) + _annotate(element, 1, 0, ignore_old, ignore_pytype, empty_type, None) def annotate(element_or_tree, ignore_old=True, ignore_xsi=False, empty_pytype=None, empty_type=None, annotate_xsi=0, @@ -1386,12 +1384,12 @@ """ cdef _Element element element = cetree.rootNodeOrRaise(element_or_tree) - _annotate(element, annotate_xsi, annotate_pytype, bool(ignore_xsi), - bool(ignore_old), empty_type, empty_pytype) + _annotate(element, annotate_xsi, annotate_pytype, ignore_xsi, + ignore_old, empty_type, empty_pytype) -cdef _annotate(_Element element, int annotate_xsi, int annotate_pytype, - int ignore_xsi, int ignore_pytype, +cdef _annotate(_Element element, bint annotate_xsi, bint annotate_pytype, + bint ignore_xsi, bint ignore_pytype, empty_type_name, empty_pytype_name): cdef _Document doc cdef tree.xmlNode* c_node Modified: lxml/trunk/src/lxml/objectpath.pxi ============================================================================== --- lxml/trunk/src/lxml/objectpath.pxi (original) +++ lxml/trunk/src/lxml/objectpath.pxi Tue Oct 9 11:20:53 2007 @@ -86,7 +86,7 @@ """Parse object path string into a 'hrefOnameOhrefOnameOOO' string and an index list. The index list is None if no index was used in the path. """ - cdef int has_dot + cdef bint has_dot new_path = [] path = cetree.utf8(path.strip()) if path == '.': Modified: lxml/trunk/src/lxml/parser.pxi ============================================================================== --- lxml/trunk/src/lxml/parser.pxi (original) +++ lxml/trunk/src/lxml/parser.pxi Tue Oct 9 11:20:53 2007 @@ -431,7 +431,7 @@ cdef object _handleParseResult(self, _BaseParser parser, xmlDoc* result, filename): cdef xmlDoc* c_doc - cdef int recover + cdef bint recover recover = parser._parse_options & xmlparser.XML_PARSE_RECOVER c_doc = _handleParseResult(self, self._c_ctxt, result, filename, recover) @@ -439,7 +439,7 @@ cdef xmlDoc* _handleParseResultDoc(self, _BaseParser parser, xmlDoc* result, filename) except NULL: - cdef int recover + cdef bint recover recover = parser._parse_options & xmlparser.XML_PARSE_RECOVER return _handleParseResult(self, self._c_ctxt, result, filename, recover) @@ -481,8 +481,8 @@ cdef xmlDoc* _handleParseResult(_ParserContext context, xmlparser.xmlParserCtxt* c_ctxt, xmlDoc* result, filename, - int recover) except NULL: - cdef int well_formed + bint recover) except NULL: + cdef bint well_formed if c_ctxt.myDoc is not NULL: if c_ctxt.myDoc != result: tree.xmlFreeDoc(c_ctxt.myDoc) @@ -556,8 +556,8 @@ self._filename = filename self._target = target self._for_html = for_html - self._remove_comments = bool(remove_comments) - self._remove_pis = bool(remove_pis) + self._remove_comments = remove_comments + self._remove_pis = remove_pis self._resolvers = _ResolverRegistry() @@ -711,7 +711,6 @@ cdef python.PyThreadState* state cdef xmlDoc* result cdef xmlparser.xmlParserCtxt* pctxt - cdef int recover cdef Py_ssize_t py_buffer_len cdef int buffer_len cdef char* c_text @@ -752,7 +751,6 @@ cdef python.PyThreadState* state cdef xmlDoc* result cdef xmlparser.xmlParserCtxt* pctxt - cdef int recover cdef char* c_encoding if c_len > python.INT_MAX: raise ParserError, "string is too long to parse it with libxml2" @@ -788,7 +786,6 @@ cdef python.PyThreadState* state cdef xmlDoc* result cdef xmlparser.xmlParserCtxt* pctxt - cdef int recover cdef int orig_options cdef char* c_encoding result = NULL @@ -825,7 +822,6 @@ cdef xmlDoc* result cdef xmlparser.xmlParserCtxt* pctxt cdef char* c_filename - cdef int recover if not filename: filename = None @@ -884,7 +880,6 @@ cdef char* c_encoding cdef int buffer_len cdef int error - cdef int recover if python.PyString_Check(data): c_encoding = NULL c_data = _cstr(data) Modified: lxml/trunk/src/lxml/public-api.pxi ============================================================================== --- lxml/trunk/src/lxml/public-api.pxi (original) +++ lxml/trunk/src/lxml/public-api.pxi Tue Oct 9 11:20:53 2007 @@ -54,10 +54,10 @@ cdef public _Element rootNodeOrRaise(object input): return _rootNodeOrRaise(input) -cdef public int hasText(xmlNode* c_node): +cdef public bint hasText(xmlNode* c_node): return _hasText(c_node) -cdef public int hasTail(xmlNode* c_node): +cdef public bint hasTail(xmlNode* c_node): return _hasTail(c_node) cdef public object textOf(xmlNode* c_node): @@ -106,7 +106,7 @@ char* c_href, char* c_name): return _delAttributeFromNsName(c_element, c_href, c_name) -cdef public int hasChild(xmlNode* c_node): +cdef public bint hasChild(xmlNode* c_node): return _hasChild(c_node) cdef public xmlNode* findChild(xmlNode* c_node, Py_ssize_t index): Modified: lxml/trunk/src/lxml/serializer.pxi ============================================================================== --- lxml/trunk/src/lxml/serializer.pxi (original) +++ lxml/trunk/src/lxml/serializer.pxi Tue Oct 9 11:20:53 2007 @@ -44,8 +44,8 @@ return python.PyUnicode_AsEncodedString(text, encoding, 'strict') cdef _tostring(_Element element, encoding, method, - int write_xml_declaration, int write_complete_document, - int pretty_print): + bint write_xml_declaration, bint write_complete_document, + bint pretty_print): """Serialize an element to an encoded string representation of its XML tree. """ @@ -96,7 +96,7 @@ return result cdef _tounicode(_Element element, method, - int write_complete_document, int pretty_print): + bint write_complete_document, bint pretty_print): """Serialize an element to the Python unicode representation of its XML tree. """ @@ -133,9 +133,9 @@ cdef void _writeNodeToBuffer(tree.xmlOutputBuffer* c_buffer, xmlNode* c_node, char* encoding, int c_method, - int write_xml_declaration, - int write_complete_document, - int pretty_print): + bint write_xml_declaration, + bint write_complete_document, + bint pretty_print): cdef xmlDoc* c_doc cdef xmlNode* c_nsdecl_node c_doc = c_node.doc @@ -222,7 +222,7 @@ tree.xmlOutputBufferWrite(c_buffer, 3, "]>\n") cdef void _writeTail(tree.xmlOutputBuffer* c_buffer, xmlNode* c_node, - char* encoding, int pretty_print): + char* encoding, bint pretty_print): "Write the element tail." c_node = c_node.next while c_node is not NULL and c_node.type == tree.XML_TEXT_NODE: @@ -231,7 +231,7 @@ c_node = c_node.next cdef void _writePrevSiblings(tree.xmlOutputBuffer* c_buffer, xmlNode* c_node, - char* encoding, int pretty_print): + char* encoding, bint pretty_print): cdef xmlNode* c_sibling if c_node.parent is not NULL and _isElement(c_node.parent): return @@ -247,7 +247,7 @@ c_sibling = c_sibling.next cdef void _writeNextSiblings(tree.xmlOutputBuffer* c_buffer, xmlNode* c_node, - char* encoding, int pretty_print): + char* encoding, bint pretty_print): cdef xmlNode* c_sibling if c_node.parent is not NULL and _isElement(c_node.parent): return @@ -307,8 +307,8 @@ return (<_FilelikeWriter>ctxt).close() cdef _tofilelike(f, _Element element, encoding, method, - int write_xml_declaration, int write_doctype, - int pretty_print): + bint write_xml_declaration, bint write_doctype, + bint pretty_print): cdef python.PyThreadState* state cdef _FilelikeWriter writer cdef tree.xmlOutputBuffer* c_buffer @@ -400,7 +400,7 @@ # dump node to file (mainly for debug) -cdef _dumpToFile(f, xmlNode* c_node, int pretty_print): +cdef _dumpToFile(f, xmlNode* c_node, bint pretty_print): cdef tree.xmlOutputBuffer* c_buffer if not python.PyFile_Check(f): raise ValueError, "Not a file" Modified: lxml/trunk/src/lxml/tree.pxd ============================================================================== --- lxml/trunk/src/lxml/tree.pxd (original) +++ lxml/trunk/src/lxml/tree.pxd Tue Oct 9 11:20:53 2007 @@ -300,9 +300,9 @@ cdef void* xmlMalloc(size_t size) cdef extern from "etree_defs.h": - cdef int _isElement(xmlNode* node) - cdef int _isElementOrXInclude(xmlNode* node) + cdef bint _isElement(xmlNode* node) + cdef bint _isElementOrXInclude(xmlNode* node) cdef char* _getNs(xmlNode* node) cdef void BEGIN_FOR_EACH_ELEMENT_FROM(xmlNode* tree_top, - xmlNode* start_node, int inclusive) + xmlNode* start_node, bint inclusive) cdef void END_FOR_EACH_ELEMENT_FROM(xmlNode* start_node) Modified: lxml/trunk/src/lxml/xmlerror.pxi ============================================================================== --- lxml/trunk/src/lxml/xmlerror.pxi (original) +++ lxml/trunk/src/lxml/xmlerror.pxi Tue Oct 9 11:20:53 2007 @@ -98,7 +98,7 @@ return '' cdef void _receive(self, xmlerror.xmlError* error): - cdef int is_error + cdef bint is_error cdef _LogEntry entry entry = _LogEntry() entry._setError(error) @@ -114,6 +114,7 @@ cdef void _receiveGeneric(self, int domain, int type, int level, int line, message, filename): + cdef bint is_error cdef _LogEntry entry entry = _LogEntry() entry._setGeneric(domain, type, level, line, message, filename) @@ -184,7 +185,9 @@ return False def __nonzero__(self): - return bool(self._entries) + cdef bint result + result = self._entries + return result def filter_domains(self, domains): """Filter the errors by the given domains and return a new error log From scoder at codespeak.net Tue Oct 9 12:02:21 2007 From: scoder at codespeak.net (scoder at codespeak.net) Date: Tue, 9 Oct 2007 12:02:21 +0200 (CEST) Subject: [Lxml-checkins] r47330 - lxml/trunk/src/lxml Message-ID: <20071009100221.2483081A8@code0.codespeak.net> Author: scoder Date: Tue Oct 9 12:02:20 2007 New Revision: 47330 Modified: lxml/trunk/src/lxml/etree.pyx Log: keep prefix counter a C integer, change formatting string instead Modified: lxml/trunk/src/lxml/etree.pyx ============================================================================== --- lxml/trunk/src/lxml/etree.pyx (original) +++ lxml/trunk/src/lxml/etree.pyx Tue Oct 9 12:02:20 2007 @@ -229,7 +229,8 @@ When instances of this class are garbage collected, the libxml document is cleaned up. """ - cdef object _ns_counter + cdef unsigned int _ns_counter + cdef object _prefix_format cdef xmlDoc* _c_doc cdef _BaseParser _parser @@ -295,8 +296,12 @@ return self._c_doc.URL cdef buildNewPrefix(self): - ns = "ns%d" % self._ns_counter + ns = python.PyString_FromFormat( + _cstr(self._prefix_format), self._ns_counter) self._ns_counter = self._ns_counter + 1 + if self._ns_counter == 0: + # overflow! + self._prefix_format = self._prefix_format + "A" return ns cdef xmlNs* _findOrBuildNodeNs(self, xmlNode* c_node, @@ -374,7 +379,8 @@ cdef _Document result result = NEW_DOCUMENT(_Document) result._c_doc = c_doc - result._ns_counter = 0L + result._ns_counter = 0 + result._prefix_format = "ns%lu" if parser is None: parser = __GLOBAL_PARSER_CONTEXT.getDefaultParser() result._parser = parser From scoder at codespeak.net Tue Oct 9 12:02:54 2007 From: scoder at codespeak.net (scoder at codespeak.net) Date: Tue, 9 Oct 2007 12:02:54 +0200 (CEST) Subject: [Lxml-checkins] r47331 - lxml/branch/lxml-1.3/src/lxml Message-ID: <20071009100254.4B17581A8@code0.codespeak.net> Author: scoder Date: Tue Oct 9 12:02:53 2007 New Revision: 47331 Modified: lxml/branch/lxml-1.3/src/lxml/etree.pyx Log: keep prefix counter a C integer, change formatting string instead Modified: lxml/branch/lxml-1.3/src/lxml/etree.pyx ============================================================================== --- lxml/branch/lxml-1.3/src/lxml/etree.pyx (original) +++ lxml/branch/lxml-1.3/src/lxml/etree.pyx Tue Oct 9 12:02:53 2007 @@ -231,7 +231,8 @@ When instances of this class are garbage collected, the libxml document is cleaned up. """ - cdef object _ns_counter + cdef unsigned int _ns_counter + cdef object _prefix_format cdef xmlDoc* _c_doc cdef _BaseParser _parser @@ -297,8 +298,12 @@ return self._c_doc.URL cdef buildNewPrefix(self): - ns = "ns%d" % self._ns_counter + ns = python.PyString_FromFormat( + _cstr(self._prefix_format), self._ns_counter) self._ns_counter = self._ns_counter + 1 + if self._ns_counter == 0: + # overflow! + self._prefix_format = self._prefix_format + "A" return ns cdef xmlNs* _findOrBuildNodeNs(self, xmlNode* c_node, @@ -372,7 +377,8 @@ cdef _Document result result = _Document() result._c_doc = c_doc - result._ns_counter = 0L + result._ns_counter = 0 + result._prefix_format = "ns%lu" if parser is None: parser = __GLOBAL_PARSER_CONTEXT.getDefaultParser() result._parser = parser From scoder at codespeak.net Tue Oct 9 14:56:29 2007 From: scoder at codespeak.net (scoder at codespeak.net) Date: Tue, 9 Oct 2007 14:56:29 +0200 (CEST) Subject: [Lxml-checkins] r47343 - lxml/trunk/doc Message-ID: <20071009125629.8F4DE81B1@code0.codespeak.net> Author: scoder Date: Tue Oct 9 14:56:29 2007 New Revision: 47343 Modified: lxml/trunk/doc/build.txt Log: typo Modified: lxml/trunk/doc/build.txt ============================================================================== --- lxml/trunk/doc/build.txt (original) +++ lxml/trunk/doc/build.txt Tue Oct 9 14:56:29 2007 @@ -223,7 +223,7 @@ version numbers:: STATIC_INCLUDE_DIRS = [ - "..\\libxml2-2.6.23.win32\\include ", + "..\\libxml2-2.6.23.win32\\include", "..\\libxslt-1.1.15.win32\\include", "..\\zlib-1.2.3.win32\\include", "..\\iconv-1.9.1.win32\\include" From scoder at codespeak.net Tue Oct 9 14:57:07 2007 From: scoder at codespeak.net (scoder at codespeak.net) Date: Tue, 9 Oct 2007 14:57:07 +0200 (CEST) Subject: [Lxml-checkins] r47344 - lxml/branch/lxml-1.3/doc Message-ID: <20071009125707.98E2280C7@code0.codespeak.net> Author: scoder Date: Tue Oct 9 14:57:07 2007 New Revision: 47344 Modified: lxml/branch/lxml-1.3/doc/build.txt Log: typo Modified: lxml/branch/lxml-1.3/doc/build.txt ============================================================================== --- lxml/branch/lxml-1.3/doc/build.txt (original) +++ lxml/branch/lxml-1.3/doc/build.txt Tue Oct 9 14:57:07 2007 @@ -250,7 +250,7 @@ version numbers:: STATIC_INCLUDE_DIRS = [ - "..\\libxml2-2.6.23.win32\\include ", + "..\\libxml2-2.6.23.win32\\include", "..\\libxslt-1.1.15.win32\\include", "..\\zlib-1.2.3.win32\\include", "..\\iconv-1.9.1.win32\\include" From scoder at codespeak.net Tue Oct 9 15:03:40 2007 From: scoder at codespeak.net (scoder at codespeak.net) Date: Tue, 9 Oct 2007 15:03:40 +0200 (CEST) Subject: [Lxml-checkins] r47346 - lxml/trunk/doc Message-ID: <20071009130340.0048481B1@code0.codespeak.net> Author: scoder Date: Tue Oct 9 15:03:40 2007 New Revision: 47346 Modified: lxml/trunk/doc/build.txt Log: removed doc leftover Modified: lxml/trunk/doc/build.txt ============================================================================== --- lxml/trunk/doc/build.txt (original) +++ lxml/trunk/doc/build.txt Tue Oct 9 15:03:40 2007 @@ -232,7 +232,6 @@ STATIC_LIBRARY_DIRS = [ "..\\libxml2-2.6.23.win32\\lib", "..\\libxslt-1.1.15.win32\\lib", - "..\\libxslt-1.1.15.win32\\lib", "..\\zlib-1.2.3.win32\\lib", "..\\iconv-1.9.1.win32\\lib" ] From scoder at codespeak.net Tue Oct 9 15:03:54 2007 From: scoder at codespeak.net (scoder at codespeak.net) Date: Tue, 9 Oct 2007 15:03:54 +0200 (CEST) Subject: [Lxml-checkins] r47347 - lxml/branch/lxml-1.3/doc Message-ID: <20071009130354.9DF7381B1@code0.codespeak.net> Author: scoder Date: Tue Oct 9 15:03:54 2007 New Revision: 47347 Modified: lxml/branch/lxml-1.3/doc/build.txt Log: removed doc leftover Modified: lxml/branch/lxml-1.3/doc/build.txt ============================================================================== --- lxml/branch/lxml-1.3/doc/build.txt (original) +++ lxml/branch/lxml-1.3/doc/build.txt Tue Oct 9 15:03:54 2007 @@ -259,7 +259,6 @@ STATIC_LIBRARY_DIRS = [ "..\\libxml2-2.6.23.win32\\lib", "..\\libxslt-1.1.15.win32\\lib", - "..\\libxslt-1.1.15.win32\\lib", "..\\zlib-1.2.3.win32\\lib", "..\\iconv-1.9.1.win32\\lib" ] From scoder at codespeak.net Wed Oct 10 09:13:13 2007 From: scoder at codespeak.net (scoder at codespeak.net) Date: Wed, 10 Oct 2007 09:13:13 +0200 (CEST) Subject: [Lxml-checkins] r47366 - lxml/trunk/doc Message-ID: <20071010071313.983FC81E0@code0.codespeak.net> Author: scoder Date: Wed Oct 10 09:13:10 2007 New Revision: 47366 Modified: lxml/trunk/doc/lxmlhtml.txt Log: doc fix Modified: lxml/trunk/doc/lxmlhtml.txt ============================================================================== --- lxml/trunk/doc/lxmlhtml.txt (original) +++ lxml/trunk/doc/lxmlhtml.txt Wed Oct 10 09:13:10 2007 @@ -2,9 +2,9 @@ lxml.html ========= -Since version 2.0, lxml provides a dedicated package for dealing with HTML: -``lxml.html``. It provides a special Element API for HTML elements, as well -as a number of utilities for common tasks. +Since version 2.0, lxml comes with a dedicated package for dealing +with HTML: ``lxml.html``. It provides a special Element API for HTML +elements, as well as a number of utilities for common tasks. .. contents:: .. From scoder at codespeak.net Wed Oct 10 09:19:28 2007 From: scoder at codespeak.net (scoder at codespeak.net) Date: Wed, 10 Oct 2007 09:19:28 +0200 (CEST) Subject: [Lxml-checkins] r47367 - lxml/trunk/doc Message-ID: <20071010071928.8AA9181E5@code0.codespeak.net> Author: scoder Date: Wed Oct 10 09:19:28 2007 New Revision: 47367 Modified: lxml/trunk/doc/lxmlhtml.txt Log: doc fix Modified: lxml/trunk/doc/lxmlhtml.txt ============================================================================== --- lxml/trunk/doc/lxmlhtml.txt (original) +++ lxml/trunk/doc/lxmlhtml.txt Wed Oct 10 09:19:28 2007 @@ -37,7 +37,8 @@ If you give a URL, or if the object has a ``.geturl()`` method (as file-like objects from ``urllib.urlopen()`` have), then that URL - is used as the base URL. + is used as the base URL. You can also provide an explicit + ``base_url`` keyword argument. ``document_fromstring(string)``: Parses a document from the given string. This always creates a From scoder at codespeak.net Wed Oct 10 11:22:38 2007 From: scoder at codespeak.net (scoder at codespeak.net) Date: Wed, 10 Oct 2007 11:22:38 +0200 (CEST) Subject: [Lxml-checkins] r47375 - lxml/trunk Message-ID: <20071010092238.D7EDD81E6@code0.codespeak.net> Author: scoder Date: Wed Oct 10 11:22:37 2007 New Revision: 47375 Modified: lxml/trunk/setup.py lxml/trunk/setupinfo.py lxml/trunk/versioninfo.py Log: Py3 syntax fixes in build scripts Modified: lxml/trunk/setup.py ============================================================================== --- lxml/trunk/setup.py (original) +++ lxml/trunk/setup.py Wed Oct 10 11:22:37 2007 @@ -38,7 +38,7 @@ # create lxml-version.h file svn_version = versioninfo.svn_version() versioninfo.create_version_h(svn_version) -print "Building lxml version", svn_version +print("Building lxml version %s." % svn_version) branch_link = """ Modified: lxml/trunk/setupinfo.py ============================================================================== --- lxml/trunk/setupinfo.py (original) +++ lxml/trunk/setupinfo.py Wed Oct 10 11:22:37 2007 @@ -3,7 +3,7 @@ try: from Cython.Distutils import build_ext as build_pyx - print "Building with Cython." + print("Building with Cython.") CYTHON_INSTALLED = True except ImportError: CYTHON_INSTALLED = False @@ -131,13 +131,21 @@ return macros def flags(cmd): - wf, rf, ef = os.popen3(cmd) + try: + import subprocess + except ImportError: + # Python 2.3 + _, rf, ef = os.popen3(cmd) + else: + # Python 2.4+ + p = subprocess.Popen(cmd, shell=True, close_fds=True, + stdout=subprocess.PIPE, stderr=subprocess.PIPE) + rf, ef = p.stdout, p.stderr errors = ef.read() if errors: - print "ERROR:", errors - print "** make sure the development packages of libxml2 and libxslt are installed **" - print - return rf.read().split() + print("ERROR: %s" % errors) + print("** make sure the development packages of libxml2 and libxslt are installed **\n") + return str(rf.read()).split() def has_option(name): try: Modified: lxml/trunk/versioninfo.py ============================================================================== --- lxml/trunk/versioninfo.py (original) +++ lxml/trunk/versioninfo.py Wed Oct 10 11:22:37 2007 @@ -35,7 +35,7 @@ if data.startswith('8'): # SVN >= 1.4 - data = map(str.splitlines, data.split('\n\x0c\n')) + data = [ d.splitlines() for d in data.split('\n\x0c\n') ] del data[0][0] # get rid of the '8' dirurl = data[0][3] try: From ianb at codespeak.net Sun Oct 14 02:12:32 2007 From: ianb at codespeak.net (ianb at codespeak.net) Date: Sun, 14 Oct 2007 02:12:32 +0200 (CEST) Subject: [Lxml-checkins] r47440 - in lxml/trunk: . src/lxml Message-ID: <20071014001232.1B0708165@code0.codespeak.net> Author: ianb Date: Sun Oct 14 02:12:31 2007 New Revision: 47440 Modified: lxml/trunk/CHANGES.txt lxml/trunk/src/lxml/doctestcompare.py Log: Improve doctestcompare a little: NOPARSE_MARKUP option to suppress its behavior, and xmlns=... now works Modified: lxml/trunk/CHANGES.txt ============================================================================== --- lxml/trunk/CHANGES.txt (original) +++ lxml/trunk/CHANGES.txt Sun Oct 14 02:12:31 2007 @@ -8,12 +8,20 @@ Features added -------------- +* When using ``lxml.doctestcompare`` you can give the doctest option + ``NOPARSE_MARKUP`` (like ``# doctest: +NOPARSE_MARKUP``) to suppress + the special checking for one test. + Bugs fixed ---------- * lxml.etree could crash when adding more than 10000 namespaces to a document +* With ``lxml.doctestcompare`` if you do ```` in your + output, it will then be namespace-neutral (before the ellipsis was + treated as a real namespace). + Other changes ------------- Modified: lxml/trunk/src/lxml/doctestcompare.py ============================================================================== --- lxml/trunk/src/lxml/doctestcompare.py (original) +++ lxml/trunk/src/lxml/doctestcompare.py Sun Oct 14 02:12:31 2007 @@ -23,6 +23,8 @@ displayed (indented), and a rough diff-like output is given. Anything marked with ``-`` is in the output but wasn't supposed to be, and similarly ``+`` means its in the example but wasn't in the output. + +You can disable parsing on one line with ``# doctest:+NOPARSE_MARKUP`` """ from lxml import etree @@ -36,6 +38,7 @@ PARSE_HTML = doctest.register_optionflag('PARSE_HTML') PARSE_XML = doctest.register_optionflag('PARSE_XML') +NOPARSE_MARKUP = doctest.register_optionflag('NOPARSE_MARKUP') OutputChecker = doctest.OutputChecker @@ -84,6 +87,8 @@ def get_parser(self, want, got, optionflags): parser = None + if NOPARSE_MARKUP & optionflags: + return None if PARSE_HTML & optionflags: parser = document_fromstring elif PARSE_XML & optionflags: @@ -102,7 +107,7 @@ and not _repr_re.search(s)) def compare_docs(self, want, got): - if want.tag != got.tag and want.tag != 'any': + if not self.tag_compare(want.tag, got.tag): return False if not self.text_compare(want.text, got.text, True): return False @@ -143,6 +148,17 @@ else: return False + def tag_compare(self, want, got): + if want == 'any': + return True + want = want or '' + got = got or '' + if want.startswith('{...}'): + # Ellipsis on the namespace + return want.split('}')[-1] == got.split('}')[-1] + else: + return want == got + def output_difference(self, example, got, optionflags): want = example.want parser = self.get_parser(want, got, optionflags) @@ -282,7 +298,7 @@ return ''.join(parts) def collect_diff_tag(self, want, got): - if want.tag != got.tag and want.tag != 'any': + if not self.tag_compare(want.tag, got.tag): tag = '%s (got: %s)' % (want.tag, got.tag) else: tag = got.tag @@ -431,3 +447,21 @@ raise LookupError( "Could not find doctest (only use this function *inside* a doctest)") +__test__ = { + 'basic': ''' + >>> temp_install() + >>> print """stuff""" + ... + >>> print """""" + + + + >>> print """blahblahblah""" # doctest: +NOPARSE_MARKUP, +ELLIPSIS + ...foo /> + '''} + +if __name__ == '__main__': + import doctest + doctest.testmod() + + From scoder at codespeak.net Mon Oct 15 18:28:00 2007 From: scoder at codespeak.net (scoder at codespeak.net) Date: Mon, 15 Oct 2007 18:28:00 +0200 (CEST) Subject: [Lxml-checkins] r47468 - lxml/trunk/src/lxml Message-ID: <20071015162800.944FE8102@code0.codespeak.net> Author: scoder Date: Mon Oct 15 18:27:59 2007 New Revision: 47468 Modified: lxml/trunk/src/lxml/apihelpers.pxi Log: faster attribute collecting, _countElements() helper function Modified: lxml/trunk/src/lxml/apihelpers.pxi ============================================================================== --- lxml/trunk/src/lxml/apihelpers.pxi (original) +++ lxml/trunk/src/lxml/apihelpers.pxi Mon Oct 15 18:27:59 2007 @@ -307,9 +307,21 @@ """Collect all attributes of a node in a list. Depending on collecttype, it collects either the name (1), the value (2) or the name-value tuples. """ + cdef Py_ssize_t count cdef xmlAttr* c_attr c_attr = c_node.properties - attributes = [] + count = 0 + while c_attr is not NULL: + if c_attr.type == tree.XML_ATTRIBUTE_NODE: + count = count + 1 + c_attr = c_attr.next + + if count == 0: + return [] + + attributes = python.PyList_New(count) + c_attr = c_node.properties + count = 0 while c_attr is not NULL: if c_attr.type == tree.XML_ATTRIBUTE_NODE: if collecttype == 1: @@ -320,9 +332,9 @@ item = (_namespacedName(c_attr), _attributeValue(c_node, c_attr)) - ret = python.PyList_Append(attributes, item) - if ret: - raise + python.Py_INCREF(item) + python.PyList_SET_ITEM(attributes, count, item) + count = count + 1 c_attr = c_attr.next return attributes @@ -451,6 +463,16 @@ cdef bint _hasChild(xmlNode* c_node): return c_node is not NULL and _findChildForwards(c_node, 0) is not NULL +cdef Py_ssize_t _countElements(xmlNode* c_node): + "Counts the elements within the following siblings and the node itself." + cdef Py_ssize_t count + count = 0 + while c_node is not NULL: + if _isElement(c_node): + count = count + 1 + c_node = c_node.next + return count + cdef xmlNode* _findChild(xmlNode* c_node, Py_ssize_t index): if index < 0: return _findChildBackwards(c_node, -index - 1) From scoder at codespeak.net Mon Oct 15 18:28:26 2007 From: scoder at codespeak.net (scoder at codespeak.net) Date: Mon, 15 Oct 2007 18:28:26 +0200 (CEST) Subject: [Lxml-checkins] r47469 - lxml/trunk/src/lxml Message-ID: <20071015162826.D27038102@code0.codespeak.net> Author: scoder Date: Mon Oct 15 18:28:26 2007 New Revision: 47469 Modified: lxml/trunk/src/lxml/python.pxd Log: forgotten PyAPI declarations Modified: lxml/trunk/src/lxml/python.pxd ============================================================================== --- lxml/trunk/src/lxml/python.pxd (original) +++ lxml/trunk/src/lxml/python.pxd Mon Oct 15 18:28:26 2007 @@ -41,8 +41,10 @@ cdef Py_ssize_t PyTuple_GET_SIZE(object t) cdef object PyTuple_GET_ITEM(object o, Py_ssize_t pos) + cdef object PyList_New(Py_ssize_t index) cdef Py_ssize_t PyList_GET_SIZE(object l) cdef object PyList_GET_ITEM(object l, Py_ssize_t index) + cdef void PyList_SET_ITEM(object l, Py_ssize_t index, object value) cdef int PyList_Append(object l, object obj) except -1 cdef int PyList_Reverse(object l) except -1 cdef int PyList_Insert(object l, Py_ssize_t index, object o) except -1 @@ -61,14 +63,15 @@ cdef object PySequence_List(object o) cdef object PySequence_Tuple(object o) - cdef int PyDict_Check(object instance) - cdef int PyList_Check(object instance) - cdef int PyTuple_Check(object instance) - cdef int PyNumber_Check(object instance) - cdef int PyBool_Check(object instance) - cdef int PySequence_Check(object instance) - cdef int PyType_Check(object instance) - cdef int PyTuple_CheckExact(object instance) + cdef bint PyDict_Check(object instance) + cdef bint PyList_Check(object instance) + cdef bint PyTuple_Check(object instance) + cdef bint PyNumber_Check(object instance) + cdef bint PyBool_Check(object instance) + cdef bint PySequence_Check(object instance) + cdef bint PyType_Check(object instance) + cdef bint PyTuple_CheckExact(object instance) + cdef bint PySlice_Check(object instance) cdef int PyObject_SetAttr(object o, object name, object value) cdef object PyObject_RichCompare(object o1, object o2, int op) From ianb at codespeak.net Tue Oct 16 05:35:34 2007 From: ianb at codespeak.net (ianb at codespeak.net) Date: Tue, 16 Oct 2007 05:35:34 +0200 (CEST) Subject: [Lxml-checkins] r47483 - lxml/trunk/src/lxml Message-ID: <20071016033534.43F7D80C2@code0.codespeak.net> Author: ianb Date: Tue Oct 16 05:35:33 2007 New Revision: 47483 Modified: lxml/trunk/src/lxml/doctestcompare.py Log: Oops, sometimes the tag we're comparing is a comment Modified: lxml/trunk/src/lxml/doctestcompare.py ============================================================================== --- lxml/trunk/src/lxml/doctestcompare.py (original) +++ lxml/trunk/src/lxml/doctestcompare.py Tue Oct 16 05:35:33 2007 @@ -151,6 +151,9 @@ def tag_compare(self, want, got): if want == 'any': return True + if (not isinstance(want, basestring) + or not isinstance(got, basestring)): + return want == got want = want or '' got = got or '' if want.startswith('{...}'): From scoder at codespeak.net Fri Oct 19 11:45:23 2007 From: scoder at codespeak.net (scoder at codespeak.net) Date: Fri, 19 Oct 2007 11:45:23 +0200 (CEST) Subject: [Lxml-checkins] r47573 - lxml/tag/lxml-1.3.4 Message-ID: <20071019094523.8E94B8121@code0.codespeak.net> Author: scoder Date: Fri Oct 19 11:45:22 2007 New Revision: 47573 Added: lxml/tag/lxml-1.3.4/ - copied from r46182, lxml/branch/lxml-1.3/ Log: lxml 1.3.4 tag From scoder at codespeak.net Fri Oct 19 12:54:43 2007 From: scoder at codespeak.net (scoder at codespeak.net) Date: Fri, 19 Oct 2007 12:54:43 +0200 (CEST) Subject: [Lxml-checkins] r47574 - lxml/trunk Message-ID: <20071019105443.BB72D811F@code0.codespeak.net> Author: scoder Date: Fri Oct 19 12:54:42 2007 New Revision: 47574 Modified: lxml/trunk/CREDITS.txt Log: Holger Modified: lxml/trunk/CREDITS.txt ============================================================================== --- lxml/trunk/CREDITS.txt (original) +++ lxml/trunk/CREDITS.txt Fri Oct 19 12:54:42 2007 @@ -7,6 +7,8 @@ Ian Bicking - lxml.html +Holger Joukl - bug reports, feedback and development on lxml.objectify + Marc-Antoine Parent - XPath extension function help and patches Olivier Grisel - improved (c)ElementTree compatibility patches, From scoder at codespeak.net Fri Oct 19 16:42:20 2007 From: scoder at codespeak.net (scoder at codespeak.net) Date: Fri, 19 Oct 2007 16:42:20 +0200 (CEST) Subject: [Lxml-checkins] r47591 - lxml/trunk/src/lxml Message-ID: <20071019144220.DE82D8145@code0.codespeak.net> Author: scoder Date: Fri Oct 19 16:42:18 2007 New Revision: 47591 Modified: lxml/trunk/src/lxml/etree.pyx Log: fix prefix name creation also for Python <= 2.4 Modified: lxml/trunk/src/lxml/etree.pyx ============================================================================== --- lxml/trunk/src/lxml/etree.pyx (original) +++ lxml/trunk/src/lxml/etree.pyx Fri Oct 19 16:42:18 2007 @@ -229,8 +229,8 @@ When instances of this class are garbage collected, the libxml document is cleaned up. """ - cdef unsigned int _ns_counter - cdef object _prefix_format + cdef int _ns_counter + cdef object _prefix_tail cdef xmlDoc* _c_doc cdef _BaseParser _parser @@ -296,12 +296,17 @@ return self._c_doc.URL cdef buildNewPrefix(self): - ns = python.PyString_FromFormat( - _cstr(self._prefix_format), self._ns_counter) + ns = python.PyString_FromFormat("ns%d", self._ns_counter) + if self._prefix_tail is not None: + ns = ns + self._prefix_tail self._ns_counter = self._ns_counter + 1 - if self._ns_counter == 0: + if self._ns_counter < 0: # overflow! - self._prefix_format = self._prefix_format + "A" + self._ns_counter = 0 + if self._prefix_tail is None: + self._prefix_tail = "A" + else: + self._prefix_tail = self._prefix_tail + "A" return ns cdef xmlNs* _findOrBuildNodeNs(self, xmlNode* c_node, @@ -380,7 +385,7 @@ result = NEW_DOCUMENT(_Document) result._c_doc = c_doc result._ns_counter = 0 - result._prefix_format = "ns%lu" + result._prefix_tail = None if parser is None: parser = __GLOBAL_PARSER_CONTEXT.getDefaultParser() result._parser = parser From scoder at codespeak.net Fri Oct 19 16:45:14 2007 From: scoder at codespeak.net (scoder at codespeak.net) Date: Fri, 19 Oct 2007 16:45:14 +0200 (CEST) Subject: [Lxml-checkins] r47592 - lxml/branch/lxml-1.3/src/lxml Message-ID: <20071019144514.4626E813C@code0.codespeak.net> Author: scoder Date: Fri Oct 19 16:45:13 2007 New Revision: 47592 Modified: lxml/branch/lxml-1.3/src/lxml/etree.pyx Log: fix prefix name creation also for Python <= 2.4 Modified: lxml/branch/lxml-1.3/src/lxml/etree.pyx ============================================================================== --- lxml/branch/lxml-1.3/src/lxml/etree.pyx (original) +++ lxml/branch/lxml-1.3/src/lxml/etree.pyx Fri Oct 19 16:45:13 2007 @@ -231,8 +231,8 @@ When instances of this class are garbage collected, the libxml document is cleaned up. """ - cdef unsigned int _ns_counter - cdef object _prefix_format + cdef int _ns_counter + cdef object _prefix_tail cdef xmlDoc* _c_doc cdef _BaseParser _parser @@ -298,12 +298,17 @@ return self._c_doc.URL cdef buildNewPrefix(self): - ns = python.PyString_FromFormat( - _cstr(self._prefix_format), self._ns_counter) + ns = python.PyString_FromFormat("ns%d", self._ns_counter) + if self._prefix_tail is not None: + ns = ns + self._prefix_tail self._ns_counter = self._ns_counter + 1 - if self._ns_counter == 0: + if self._ns_counter < 0: # overflow! - self._prefix_format = self._prefix_format + "A" + self._ns_counter = 0 + if self._prefix_tail is None: + self._prefix_tail = "A" + else: + self._prefix_tail = self._prefix_tail + "A" return ns cdef xmlNs* _findOrBuildNodeNs(self, xmlNode* c_node, @@ -378,7 +383,7 @@ result = _Document() result._c_doc = c_doc result._ns_counter = 0 - result._prefix_format = "ns%lu" + result._prefix_tail = None if parser is None: parser = __GLOBAL_PARSER_CONTEXT.getDefaultParser() result._parser = parser From scoder at codespeak.net Sat Oct 20 14:44:51 2007 From: scoder at codespeak.net (scoder at codespeak.net) Date: Sat, 20 Oct 2007 14:44:51 +0200 (CEST) Subject: [Lxml-checkins] r47636 - lxml/trunk Message-ID: <20071020124451.A9E0B8143@code0.codespeak.net> Author: scoder Date: Sat Oct 20 14:44:50 2007 New Revision: 47636 Modified: lxml/trunk/setupinfo.py Log: print Cython version in setup.py Modified: lxml/trunk/setupinfo.py ============================================================================== --- lxml/trunk/setupinfo.py (original) +++ lxml/trunk/setupinfo.py Sat Oct 20 14:44:50 2007 @@ -3,7 +3,8 @@ try: from Cython.Distutils import build_ext as build_pyx - print("Building with Cython.") + import Cython.Compiler.Version + print("Building with Cython %s." % Cython.Compiler.Version.version) CYTHON_INSTALLED = True except ImportError: CYTHON_INSTALLED = False From scoder at codespeak.net Sat Oct 20 14:46:30 2007 From: scoder at codespeak.net (scoder at codespeak.net) Date: Sat, 20 Oct 2007 14:46:30 +0200 (CEST) Subject: [Lxml-checkins] r47637 - in lxml/trunk: . src/lxml Message-ID: <20071020124630.E3081814B@code0.codespeak.net> Author: scoder Date: Sat Oct 20 14:46:30 2007 New Revision: 47637 Modified: lxml/trunk/CHANGES.txt lxml/trunk/src/lxml/docloader.pxi lxml/trunk/src/lxml/etree.pyx lxml/trunk/src/lxml/xslt.pxi Log: improved interaction of custom resolvers and XSLT Modified: lxml/trunk/CHANGES.txt ============================================================================== --- lxml/trunk/CHANGES.txt (original) +++ lxml/trunk/CHANGES.txt Sat Oct 20 14:46:30 2007 @@ -8,6 +8,9 @@ Features added -------------- +* Resolvers can now provide a ``base_url`` keyword argument when + resolving a document as string data. + * When using ``lxml.doctestcompare`` you can give the doctest option ``NOPARSE_MARKUP`` (like ``# doctest: +NOPARSE_MARKUP``) to suppress the special checking for one test. @@ -15,6 +18,9 @@ Bugs fixed ---------- +* Using custom resolvers on XSLT stylesheets parsed from a string + could request ill-formed URLs. + * lxml.etree could crash when adding more than 10000 namespaces to a document Modified: lxml/trunk/src/lxml/docloader.pxi ============================================================================== --- lxml/trunk/src/lxml/docloader.pxi (original) +++ lxml/trunk/src/lxml/docloader.pxi Sat Oct 20 14:46:30 2007 @@ -9,43 +9,69 @@ cdef class _InputDocument: cdef _InputDocumentDataType _type cdef object _data_bytes + cdef object _filename cdef object _file cdef class Resolver: "This is the base class of all resolvers." def resolve(self, system_url, public_id, context): + """Override this method to resolve an external source by + ``system_url`` and ``public_id``. The third argument is an + opaque context object. + + Return the result of one of the ``resolve_*()`` methods. + """ return None def resolve_empty(self, context): - "Return an empty input document." + """Return an empty input document. + + Pass context as parameter. + """ cdef _InputDocument doc_ref doc_ref = _InputDocument() doc_ref._type = PARSER_DATA_EMPTY return doc_ref - def resolve_string(self, string, context): - "Return a parsable string as input document." + def resolve_string(self, string, context, base_url=None): + """Return a parsable string as input document. + + Pass data string and context as parameters. + + You can pass the source URL as 'base_url' keyword. + """ cdef _InputDocument doc_ref doc_ref = _InputDocument() doc_ref._type = PARSER_DATA_STRING doc_ref._data_bytes = _utf8(string) + if base_url is not None: + doc_ref._filename = _encodeFilename(base_url) return doc_ref def resolve_filename(self, filename, context): - "Return the name of a parsable file as input document." + """Return the name of a parsable file as input document. + + Pass filename and context as parameters. + """ cdef _InputDocument doc_ref doc_ref = _InputDocument() doc_ref._type = PARSER_DATA_FILENAME - doc_ref._data_bytes = _encodeFilename(filename) + doc_ref._filename = _encodeFilename(filename) return doc_ref def resolve_file(self, f, context): - "Return an open file-like object as input document." + """Return an open file-like object as input document. + + Pass open file and context as parameters. + """ cdef _InputDocument doc_ref - if not hasattr(f, 'read'): + try: + f.read + except AttributeError: raise TypeError, "Argument is not a file-like object" doc_ref = _InputDocument() doc_ref._type = PARSER_DATA_FILE + doc_ref._filename = _getFilenameForFile(f) doc_ref._file = f return doc_ref Modified: lxml/trunk/src/lxml/etree.pyx ============================================================================== --- lxml/trunk/src/lxml/etree.pyx (original) +++ lxml/trunk/src/lxml/etree.pyx Sat Oct 20 14:46:30 2007 @@ -16,6 +16,9 @@ del __builtin__ +cdef object os_path_join +from os.path import join as os_path_join + cdef object _elementpath import _elementpath Modified: lxml/trunk/src/lxml/xslt.pxi ============================================================================== --- lxml/trunk/src/lxml/xslt.pxi (original) +++ lxml/trunk/src/lxml/xslt.pxi Sat Oct 20 14:46:30 2007 @@ -84,25 +84,26 @@ context = <_XSLTResolverContext>c_context try: resolvers = context._resolvers - uri = funicode(c_uri) + if cstd.strncmp('string://', c_uri, 9) == 0: + uri = funicode(c_uri + 9) + if cstd.strncmp('string://', context._c_style_doc.URL, 9) != 0 and \ + cstd.strcmp('', context._c_style_doc.URL) != 0: + # stylesheet URL known => make the target URL absolute + uri = os_path_join(context._c_style_doc.URL, uri) + else: + uri = funicode(c_uri) doc_ref = resolvers.resolve(uri, None, context) c_doc = NULL if doc_ref is not None: if doc_ref._type == PARSER_DATA_STRING: c_doc = _parseDoc( - doc_ref._data_bytes, None, context._parser) + doc_ref._data_bytes, doc_ref._filename, context._parser) elif doc_ref._type == PARSER_DATA_FILENAME: - if python.PyUnicode_Check(doc_ref._data_bytes): - filename = _utf8(doc_ref._data_bytes) - else: - filename = doc_ref._data_bytes - c_doc = _parseDocFromFile(filename, context._parser) + c_doc = _parseDocFromFile(doc_ref._filename, context._parser) elif doc_ref._type == PARSER_DATA_FILE: - filename = _getFilenameForFile(doc_ref._file) - data = doc_ref._file.read() - c_doc = _parseDoc( - data, filename, context._parser) + c_doc = _parseDocFromFilelike( + doc_ref._file, doc_ref._filename, context._parser) elif doc_ref._type == PARSER_DATA_EMPTY: c_doc = _newDoc() if c_doc is not NULL and c_doc.URL is NULL: @@ -115,7 +116,7 @@ cdef void _xslt_store_resolver_exception(char* c_uri, void* context, xslt.xsltLoadType c_type): - message = "Cannot resolve URI %s" % funicode(c_uri) + message = "Cannot resolve URI %s" % c_uri if c_type == xslt.XSLT_LOAD_DOCUMENT: exception = XSLTApplyError(message) else: @@ -299,7 +300,7 @@ # make sure we always have a stylesheet URL if c_doc.URL is NULL: - doc_url_utf = "XSLT:__STRING__XSLT__%s" % id(self) + doc_url_utf = "string://__STRING__XSLT__%s" % id(self) c_doc.URL = tree.xmlStrdup(_cstr(doc_url_utf)) self._error_log = _ErrorLog() From scoder at codespeak.net Sat Oct 20 14:50:20 2007 From: scoder at codespeak.net (scoder at codespeak.net) Date: Sat, 20 Oct 2007 14:50:20 +0200 (CEST) Subject: [Lxml-checkins] r47638 - lxml/trunk/src/lxml Message-ID: <20071020125020.4842B80A4@code0.codespeak.net> Author: scoder Date: Sat Oct 20 14:50:19 2007 New Revision: 47638 Modified: lxml/trunk/src/lxml/etree.pyx lxml/trunk/src/lxml/objectify.pyx Log: fix error handling on PyList_Append() Modified: lxml/trunk/src/lxml/etree.pyx ============================================================================== --- lxml/trunk/src/lxml/etree.pyx (original) +++ lxml/trunk/src/lxml/etree.pyx Sat Oct 20 14:50:19 2007 @@ -845,10 +845,8 @@ result = [] while c_node is not NULL and c < stop: if _isElement(c_node): - ret = python.PyList_Append( + python.PyList_Append( result, _elementFactory(self._doc, c_node)) - if ret: - raise c = c + 1 c_node = c_node.next return result @@ -992,15 +990,12 @@ code should use ``list(element)`` or simply iterate over elements. """ cdef xmlNode* c_node - cdef int ret result = [] c_node = self._c_node.children while c_node is not NULL: if _isElement(c_node): - ret = python.PyList_Append( + python.PyList_Append( result, _elementFactory(self._doc, c_node)) - if ret: - raise c_node = c_node.next return result Modified: lxml/trunk/src/lxml/objectify.pyx ============================================================================== --- lxml/trunk/src/lxml/objectify.pyx (original) +++ lxml/trunk/src/lxml/objectify.pyx Sat Oct 20 14:50:19 2007 @@ -193,15 +193,12 @@ returned in document order. """ cdef tree.xmlNode* c_node - cdef int ret result = [] c_node = self._c_node.children while c_node is not NULL: if tree._isElement(c_node): - ret = python.PyList_Append( + python.PyList_Append( result, cetree.elementFactory(self._doc, c_node)) - if ret: - raise c_node = c_node.next return result From scoder at codespeak.net Sat Oct 20 14:52:33 2007 From: scoder at codespeak.net (scoder at codespeak.net) Date: Sat, 20 Oct 2007 14:52:33 +0200 (CEST) Subject: [Lxml-checkins] r47639 - lxml/branch/lxml-1.3/src/lxml Message-ID: <20071020125233.676AE814E@code0.codespeak.net> Author: scoder Date: Sat Oct 20 14:52:33 2007 New Revision: 47639 Modified: lxml/branch/lxml-1.3/src/lxml/etree.pyx Log: fix error handling on PyList_Append() Modified: lxml/branch/lxml-1.3/src/lxml/etree.pyx ============================================================================== --- lxml/branch/lxml-1.3/src/lxml/etree.pyx (original) +++ lxml/branch/lxml-1.3/src/lxml/etree.pyx Sat Oct 20 14:52:33 2007 @@ -829,10 +829,8 @@ result = [] while c_node is not NULL and c < stop: if _isElement(c_node): - ret = python.PyList_Append( + python.PyList_Append( result, _elementFactory(self._doc, c_node)) - if ret: - raise c = c + 1 c_node = c_node.next return result @@ -967,15 +965,12 @@ """Returns all subelements. The elements are returned in document order. """ cdef xmlNode* c_node - cdef int ret result = [] c_node = self._c_node.children while c_node is not NULL: if _isElement(c_node): - ret = python.PyList_Append( + python.PyList_Append( result, _elementFactory(self._doc, c_node)) - if ret: - raise c_node = c_node.next return result From scoder at codespeak.net Sat Oct 20 14:53:53 2007 From: scoder at codespeak.net (scoder at codespeak.net) Date: Sat, 20 Oct 2007 14:53:53 +0200 (CEST) Subject: [Lxml-checkins] r47640 - lxml/trunk/src/lxml Message-ID: <20071020125353.547B28152@code0.codespeak.net> Author: scoder Date: Sat Oct 20 14:53:53 2007 New Revision: 47640 Modified: lxml/trunk/src/lxml/pyclasslookup.pyx Log: fix error handling on PyList_Append() Modified: lxml/trunk/src/lxml/pyclasslookup.pyx ============================================================================== --- lxml/trunk/src/lxml/pyclasslookup.pyx (original) +++ lxml/trunk/src/lxml/pyclasslookup.pyx Sat Oct 20 14:53:53 2007 @@ -103,10 +103,8 @@ result = [] while c_node is not NULL and c < stop: if tree._isElement(c_node): - ret = python.PyList_Append( + python.PyList_Append( result, _newProxy(self._source_proxy, c_node)) - if ret: - raise c = c + 1 c_node = c_node.next return result @@ -163,16 +161,13 @@ order. """ cdef tree.xmlNode* c_node - cdef int ret self._assertNode() result = [] c_node = self._c_node.children while c_node is not NULL: if tree._isElement(c_node): - ret = python.PyList_Append( + python.PyList_Append( result, _newProxy(self._source_proxy, c_node)) - if ret: - raise c_node = c_node.next return result From scoder at codespeak.net Sat Oct 20 14:54:25 2007 From: scoder at codespeak.net (scoder at codespeak.net) Date: Sat, 20 Oct 2007 14:54:25 +0200 (CEST) Subject: [Lxml-checkins] r47641 - lxml/branch/lxml-1.3/src/lxml Message-ID: <20071020125425.2153C8154@code0.codespeak.net> Author: scoder Date: Sat Oct 20 14:54:24 2007 New Revision: 47641 Modified: lxml/branch/lxml-1.3/src/lxml/pyclasslookup.pyx Log: fix error handling on PyList_Append() Modified: lxml/branch/lxml-1.3/src/lxml/pyclasslookup.pyx ============================================================================== --- lxml/branch/lxml-1.3/src/lxml/pyclasslookup.pyx (original) +++ lxml/branch/lxml-1.3/src/lxml/pyclasslookup.pyx Sat Oct 20 14:54:24 2007 @@ -103,10 +103,8 @@ result = [] while c_node is not NULL and c < stop: if tree._isElement(c_node): - ret = python.PyList_Append( + python.PyList_Append( result, _newProxy(self._source_proxy, c_node)) - if ret: - raise c = c + 1 c_node = c_node.next return result @@ -163,16 +161,13 @@ order. """ cdef tree.xmlNode* c_node - cdef int ret self._assertNode() result = [] c_node = self._c_node.children while c_node is not NULL: if tree._isElement(c_node): - ret = python.PyList_Append( + python.PyList_Append( result, _newProxy(self._source_proxy, c_node)) - if ret: - raise c_node = c_node.next return result From scoder at codespeak.net Sun Oct 21 09:22:42 2007 From: scoder at codespeak.net (scoder at codespeak.net) Date: Sun, 21 Oct 2007 09:22:42 +0200 (CEST) Subject: [Lxml-checkins] r47664 - lxml/trunk/doc Message-ID: <20071021072242.63FC88160@code0.codespeak.net> Author: scoder Date: Sun Oct 21 09:22:40 2007 New Revision: 47664 Modified: lxml/trunk/doc/tutorial.txt Log: tutorial section on serialisation Modified: lxml/trunk/doc/tutorial.txt ============================================================================== --- lxml/trunk/doc/tutorial.txt (original) +++ lxml/trunk/doc/tutorial.txt Sun Oct 21 09:22:40 2007 @@ -334,13 +334,74 @@ .. _`further iterators`: api.html#iteration +Serialisation +------------- + +Serialisation commonly uses with the ``tostring()`` function that +returns a string, or the ``ElementTree.write()`` method that writes to +a file or file-like object. Both accept the same keyword arguments +like ``pretty_print`` for formatted output or ``encoding`` to select a +specific output encoding other than plain ASCII:: + + >>> root = etree.XML('') + + >>> print etree.tostring(root) + + + >>> print etree.tostring(root, xml_declaration=True) + + + + >>> print etree.tostring(root, encoding='iso-8859-1') + + + + >>> print etree.tostring(root, pretty_print=True) + + + + + + + +Since lxml 2.0 (and ElementTree 1.3), the serialisation functions can +do more than XML serialisation and optional pretty printing. You can +serialise to HTML or extract the text content by passing the +``method`` keyword:: + + >>> root = etree.XML('

Hello
World

') + + >>> print etree.tostring(root) # default: method = 'xml' +

Hello
World

+ + >>> print etree.tostring(root, method='xml') # same as above +

Hello
World

+ + >>> print etree.tostring(root, method='html') +

Hello
World

+ + >>> print etree.tostring(root, method='html', pretty_print=True) + + +

Hello
World

+ + + >>> print etree.tostring(root, method='text') + HelloWorld + +For the plain text output, the ``tounicode()`` function might become handy:: + + >>> etree.tounicode(root, method='text') + u'HelloWorld' + + The ElementTree class ===================== An ``ElementTree`` is mainly a document wrapper around a tree with a root node. It provides a couple of methods for parsing, serialisation and general document handling. One of the bigger differences is that it serialises as a -complete document, as opposed to a single Element. This includes top-level +complete document, as opposed to a single ``Element``. This includes top-level processing instructions and comments, as well as a DOCTYPE and other DTD content in the document:: From scoder at codespeak.net Sun Oct 21 09:23:56 2007 From: scoder at codespeak.net (scoder at codespeak.net) Date: Sun, 21 Oct 2007 09:23:56 +0200 (CEST) Subject: [Lxml-checkins] r47665 - in lxml/trunk: doc src/lxml src/lxml/tests Message-ID: <20071021072356.BF8D88160@code0.codespeak.net> Author: scoder Date: Sun Oct 21 09:23:56 2007 New Revision: 47665 Modified: lxml/trunk/doc/parsing.txt lxml/trunk/src/lxml/docloader.pxi lxml/trunk/src/lxml/etree.pyx lxml/trunk/src/lxml/objectify.pyx lxml/trunk/src/lxml/parser.pxi lxml/trunk/src/lxml/tests/test_elementtree.py lxml/trunk/src/lxml/tests/test_etree.py lxml/trunk/src/lxml/tests/test_htmlparser.py Log: use keyword-only arguments in API Modified: lxml/trunk/doc/parsing.txt ============================================================================== --- lxml/trunk/doc/parsing.txt (original) +++ lxml/trunk/doc/parsing.txt Sun Oct 21 09:23:56 2007 @@ -495,7 +495,7 @@ >>> etree.tostring(root) '  +  ' - >>> etree.tostring(root, 'UTF-8', xml_declaration=False) + >>> etree.tostring(root, encoding='UTF-8', xml_declaration=False) ' \xef\xa3\x91 + \xef\xa3\x92 ' As an extension, lxml.etree has a new ``tounicode()`` function that you can Modified: lxml/trunk/src/lxml/docloader.pxi ============================================================================== --- lxml/trunk/src/lxml/docloader.pxi (original) +++ lxml/trunk/src/lxml/docloader.pxi Sun Oct 21 09:23:56 2007 @@ -33,7 +33,7 @@ doc_ref._type = PARSER_DATA_EMPTY return doc_ref - def resolve_string(self, string, context, base_url=None): + def resolve_string(self, string, context, *, base_url=None): """Return a parsable string as input document. Pass data string and context as parameters. Modified: lxml/trunk/src/lxml/etree.pyx ============================================================================== --- lxml/trunk/src/lxml/etree.pyx (original) +++ lxml/trunk/src/lxml/etree.pyx Sun Oct 21 09:23:56 2007 @@ -1383,7 +1383,7 @@ return self._doc._parser return None - def write(self, file, encoding=None, method="xml", + def write(self, file, *, encoding=None, method="xml", pretty_print=False, xml_declaration=None): """Write the tree to a file or file-like object. @@ -2061,7 +2061,7 @@ """ return _makeSubElement(_parent, _tag, None, None, attrib, nsmap, _extra) -def ElementTree(_Element element=None, file=None, _BaseParser parser=None): +def ElementTree(_Element element=None, *, file=None, _BaseParser parser=None): """ElementTree wrapper class. """ cdef xmlNode* c_next @@ -2084,7 +2084,7 @@ return _elementTreeFactory(doc, element) -def HTML(text, _BaseParser parser=None, base_url=None): +def HTML(text, _BaseParser parser=None, *, base_url=None): """Parses an HTML document from a string constant. This function can be used to embed "HTML literals" in Python code. @@ -2106,7 +2106,7 @@ except _TargetParserResult, result_container: return result_container.result -def XML(text, _BaseParser parser=None, base_url=None): +def XML(text, _BaseParser parser=None, *, base_url=None): """Parses an XML document from a string constant. This function can be used to embed "XML literals" in Python code, like in @@ -2130,7 +2130,7 @@ except _TargetParserResult, result_container: return result_container.result -def fromstring(text, _BaseParser parser=None, base_url=None): +def fromstring(text, _BaseParser parser=None, *, base_url=None): """Parses an XML document from a string. To override the default parser with a different parser you can pass it to @@ -2168,13 +2168,13 @@ """ return isinstance(element, _Element) -def dump(_Element elem not None, pretty_print=True): +def dump(_Element elem not None, *, pretty_print=True): """Writes an element tree or element structure to sys.stdout. This function should be used for debugging only. """ _dumpToFile(sys.stdout, elem._c_node, pretty_print) -def tostring(element_or_tree, encoding=None, method="xml", +def tostring(element_or_tree, *, encoding=None, method="xml", xml_declaration=None, pretty_print=False): """Serialize an element to an encoded string representation of its XML tree. @@ -2217,7 +2217,7 @@ """ return [tostring(element_or_tree, *args, **kwargs)] -def tounicode(element_or_tree, method="xml", pretty_print=False): +def tounicode(element_or_tree, *, method="xml", pretty_print=False): """Serialize an element to the Python unicode representation of its XML tree. Modified: lxml/trunk/src/lxml/objectify.pyx ============================================================================== --- lxml/trunk/src/lxml/objectify.pyx (original) +++ lxml/trunk/src/lxml/objectify.pyx Sun Oct 21 09:23:56 2007 @@ -1043,7 +1043,7 @@ cdef object _namespace cdef object _nsmap cdef bint _annotate - def __init__(self, namespace=None, nsmap=None, annotate=True, + def __init__(self, *, namespace=None, nsmap=None, annotate=True, makeelement=None): if nsmap is None: nsmap = _DEFAULT_NSMAP @@ -1301,7 +1301,7 @@ pass return None -def pyannotate(element_or_tree, ignore_old=False, ignore_xsi=False, +def pyannotate(element_or_tree, *, ignore_old=False, ignore_xsi=False, empty_pytype=None): """Recursively annotates the elements of an XML tree with 'pytype' attributes. @@ -1322,7 +1322,7 @@ element = cetree.rootNodeOrRaise(element_or_tree) _annotate(element, 0, 1, ignore_xsi, ignore_old, None, empty_pytype) -def xsiannotate(element_or_tree, ignore_old=False, ignore_pytype=False, +def xsiannotate(element_or_tree, *, ignore_old=False, ignore_pytype=False, empty_type=None): """Recursively annotates the elements of an XML tree with 'xsi:type' attributes. @@ -1348,7 +1348,7 @@ element = cetree.rootNodeOrRaise(element_or_tree) _annotate(element, 1, 0, ignore_old, ignore_pytype, empty_type, None) -def annotate(element_or_tree, ignore_old=True, ignore_xsi=False, +def annotate(element_or_tree, *, ignore_old=True, ignore_xsi=False, empty_pytype=None, empty_type=None, annotate_xsi=0, annotate_pytype=1): """Recursively annotates the elements of an XML tree with 'xsi:type' @@ -1536,7 +1536,7 @@ tree.xmlSetNsProp(c_node, c_ns, "nil", "true") tree.END_FOR_EACH_ELEMENT_FROM(c_node) -def deannotate(element_or_tree, pytype=True, xsi=True): +def deannotate(element_or_tree, *, pytype=True, xsi=True): """Recursively de-annotate the elements of an XML tree by removing 'pytype' and/or 'type' attributes. @@ -1640,7 +1640,7 @@ E = ElementMaker() -def Element(_tag, attrib=None, nsmap=None, _pytype=None, **_attributes): +def Element(_tag, attrib=None, nsmap=None, *, _pytype=None, **_attributes): """Objectify specific version of the lxml.etree Element() factory that always creates a structural (tree) element. @@ -1657,7 +1657,7 @@ _attributes[PYTYPE_ATTRIBUTE] = _pytype return _makeElement(_tag, None, _attributes, nsmap) -def DataElement(_value, attrib=None, nsmap=None, _pytype=None, _xsi=None, +def DataElement(_value, attrib=None, nsmap=None, *, _pytype=None, _xsi=None, **_attributes): """Create a new element from a Python value and XML attributes taken from keyword arguments or a dictionary passed as second argument. Modified: lxml/trunk/src/lxml/parser.pxi ============================================================================== --- lxml/trunk/src/lxml/parser.pxi (original) +++ lxml/trunk/src/lxml/parser.pxi Sun Oct 21 09:23:56 2007 @@ -1037,7 +1037,7 @@ not harmful, it is more efficient to use separate parsers. This does not apply to the default parser. """ - def __init__(self, attribute_defaults=False, dtd_validation=False, + def __init__(self, *, attribute_defaults=False, dtd_validation=False, load_dtd=False, no_network=True, ns_clean=False, recover=False, remove_blank_text=False, compact=True, resolve_entities=True, remove_comments=False, @@ -1076,7 +1076,7 @@ This parser has ``remove_comments`` and ``remove_pis`` enabled by default and thus ignores comments and processing instructions. """ - def __init__(self, attribute_defaults=False, dtd_validation=False, + def __init__(self, *, attribute_defaults=False, dtd_validation=False, load_dtd=False, no_network=True, ns_clean=False, recover=False, remove_blank_text=False, compact=True, resolve_entities=True, remove_comments=True, Modified: lxml/trunk/src/lxml/tests/test_elementtree.py ============================================================================== --- lxml/trunk/src/lxml/tests/test_elementtree.py (original) +++ lxml/trunk/src/lxml/tests/test_elementtree.py Sun Oct 21 09:23:56 2007 @@ -43,13 +43,13 @@ tree = ElementTree(element) self.buildNodes(element, 10, 3) f = open(self.getTestFilePath('testdump.xml'), 'w') - tree.write(f, 'UTF-8') + tree.write(f, encoding='UTF-8') f.close() f = open(self.getTestFilePath('testdump.xml'), 'r') tree = ElementTree(file=f) f.close() f = open(self.getTestFilePath('testdump2.xml'), 'w') - tree.write(f, 'UTF-8') + tree.write(f, encoding='UTF-8') f.close() f = open(self.getTestFilePath('testdump.xml'), 'r') data1 = f.read() @@ -2358,7 +2358,7 @@ f = StringIO() tree = ElementTree(element=a) - tree.write(f, 'utf-8') + tree.write(f, encoding='utf-8') self.assertEquals(u'S?k p? nettet'.encode('UTF-8'), f.getvalue()) @@ -2389,7 +2389,7 @@ f = StringIO() tree = ElementTree(element=a) - tree.write(f, 'iso-8859-1') + tree.write(f, encoding='iso-8859-1') result = f.getvalue() declaration = "" self.assertEncodingDeclaration(result,'iso-8859-1') @@ -2460,7 +2460,7 @@ a = Element('a') a.text = u'S?k p? nettet' self.assertEquals(u'S?k p? nettet'.encode('UTF-8'), - tostring(a, 'utf-8')) + tostring(a, encoding='utf-8')) def test_encoding_tostring_unknown(self): Element = self.etree.Element @@ -2468,7 +2468,8 @@ a = Element('a') a.text = u'S?k p? nettet' - self.assertRaises(LookupError, tostring, a, 'Invalid Encoding') + self.assertRaises(LookupError, tostring, a, + encoding='Invalid Encoding') def test_encoding_tostring_sub(self): Element = self.etree.Element @@ -2479,7 +2480,7 @@ b = SubElement(a, 'b') b.text = u'S?k p? nettet' self.assertEquals(u'S?k p? nettet'.encode('UTF-8'), - tostring(b, 'utf-8')) + tostring(b, encoding='utf-8')) def test_encoding_tostring_sub_tail(self): Element = self.etree.Element @@ -2491,7 +2492,7 @@ b.text = u'S?k p? nettet' b.tail = u'S?k' self.assertEquals(u'S?k p? nettetS?k'.encode('UTF-8'), - tostring(b, 'utf-8')) + tostring(b, encoding='utf-8')) def test_encoding_tostring_default_encoding(self): Element = self.etree.Element @@ -2919,7 +2920,7 @@ try: f = open(filename, 'wb') tree = ElementTree(element=element) - tree.write(f, encoding) + tree.write(f, encoding=encoding) f.close() f = open(filename, 'rb') data = f.read() Modified: lxml/trunk/src/lxml/tests/test_etree.py ============================================================================== --- lxml/trunk/src/lxml/tests/test_etree.py (original) +++ lxml/trunk/src/lxml/tests/test_etree.py Sun Oct 21 09:23:56 2007 @@ -1777,7 +1777,7 @@ b = SubElement(a, 'b') c = SubElement(a, 'c') - result = unicode(tostring(a, 'UTF-16'), 'UTF-16') + result = unicode(tostring(a, encoding='UTF-16'), 'UTF-16') self.assertEquals('', canonicalize(result)) Modified: lxml/trunk/src/lxml/tests/test_htmlparser.py ============================================================================== --- lxml/trunk/src/lxml/tests/test_htmlparser.py (original) +++ lxml/trunk/src/lxml/tests/test_htmlparser.py Sun Oct 21 09:23:56 2007 @@ -16,6 +16,11 @@ etree = etree html_str = "test

page title

" + html_str_pretty = """\ + +test +

page title

+""" broken_html_str = "test<body><h1>page title</h3></p></html>" uhtml_str = u"<html><head><title>test ??\uF8D2

page ??\uF8D2 title

" @@ -29,9 +34,14 @@ def test_module_HTML_unicode(self): element = self.etree.HTML(self.uhtml_str) - self.assertEqual(unicode(self.etree.tostring(element, 'UTF8'), 'UTF8'), + self.assertEqual(unicode(self.etree.tostring(element, encoding='UTF8'), 'UTF8'), unicode(self.uhtml_str.encode('UTF8'), 'UTF8')) + def test_module_HTML_pretty_print(self): + element = self.etree.HTML(self.html_str) + self.assertEqual(self.etree.tostring(element, method="html", pretty_print=True), + self.html_str_pretty) + def test_module_parse_html_error(self): parser = self.etree.HTMLParser(recover=False) parse = self.etree.parse @@ -202,14 +212,14 @@ parser = self.etree.HTMLParser() f = SillyFileLike(self.html_str) tree = self.etree.parse(f, parser) - html = self.etree.tostring(tree.getroot(), 'UTF-8') + html = self.etree.tostring(tree.getroot(), encoding='UTF-8') self.assertEqual(html, self.html_str) ## def test_module_parse_html_filelike_unicode(self): ## parser = self.etree.HTMLParser() ## f = SillyFileLike(self.uhtml_str) ## tree = self.etree.parse(f, parser) -## html = self.etree.tostring(tree.getroot(), 'UTF-8') +## html = self.etree.tostring(tree.getroot(), encoding='UTF-8') ## self.assertEqual(unicode(html, 'UTF-8'), self.uhtml_str) def test_html_file_error(self): From scoder at codespeak.net Sun Oct 21 14:25:47 2007 From: scoder at codespeak.net (scoder at codespeak.net) Date: Sun, 21 Oct 2007 14:25:47 +0200 (CEST) Subject: [Lxml-checkins] r47669 - lxml/branch/pyrex-0.9.6.2-fixes Message-ID: <20071021122547.DB301814E@code0.codespeak.net> Author: scoder Date: Sun Oct 21 14:25:46 2007 New Revision: 47669 Added: lxml/branch/pyrex-0.9.6.2-fixes/ - copied from r47668, lxml/trunk/ Log: new branch for fixes to build with Pyrex 0.9.6 From scoder at codespeak.net Sun Oct 21 14:27:46 2007 From: scoder at codespeak.net (scoder at codespeak.net) Date: Sun, 21 Oct 2007 14:27:46 +0200 (CEST) Subject: [Lxml-checkins] r47670 - lxml/branch/pyrex-0.9.6.2-fixes/src/lxml Message-ID: <20071021122746.1C6D08159@code0.codespeak.net> Author: scoder Date: Sun Oct 21 14:27:45 2007 New Revision: 47670 Modified: lxml/branch/pyrex-0.9.6.2-fixes/src/lxml/classlookup.pxi lxml/branch/pyrex-0.9.6.2-fixes/src/lxml/etree.pyx lxml/branch/pyrex-0.9.6.2-fixes/src/lxml/etreepublic.pxd lxml/branch/pyrex-0.9.6.2-fixes/src/lxml/extensions.pxi lxml/branch/pyrex-0.9.6.2-fixes/src/lxml/objectify.pyx lxml/branch/pyrex-0.9.6.2-fixes/src/lxml/parser.pxi lxml/branch/pyrex-0.9.6.2-fixes/src/lxml/parsertarget.pxi lxml/branch/pyrex-0.9.6.2-fixes/src/lxml/public-api.pxi lxml/branch/pyrex-0.9.6.2-fixes/src/lxml/pyclasslookup.pyx Log: lower case 'gil', public API and import changes Modified: lxml/branch/pyrex-0.9.6.2-fixes/src/lxml/classlookup.pxi ============================================================================== --- lxml/branch/pyrex-0.9.6.2-fixes/src/lxml/classlookup.pxi (original) +++ lxml/branch/pyrex-0.9.6.2-fixes/src/lxml/classlookup.pxi Sun Oct 21 14:27:45 2007 @@ -48,7 +48,7 @@ ################################################################################ # Element class lookup -ctypedef object (*_element_class_lookup_function)(object, _Document, xmlNode*) +ctypedef public object (*_element_class_lookup_function)(object, _Document, xmlNode*) # class to store element class lookup functions cdef public class ElementClassLookup [ type LxmlElementClassLookupType, Modified: lxml/branch/pyrex-0.9.6.2-fixes/src/lxml/etree.pyx ============================================================================== --- lxml/branch/pyrex-0.9.6.2-fixes/src/lxml/etree.pyx (original) +++ lxml/branch/pyrex-0.9.6.2-fixes/src/lxml/etree.pyx Sun Oct 21 14:27:45 2007 @@ -1769,7 +1769,7 @@ return attribs -ctypedef xmlNode* (*_node_to_node_function)(xmlNode*) +ctypedef public xmlNode* (*_node_to_node_function)(xmlNode*) cdef public class _ElementTagMatcher [ object LxmlElementTagMatcher, type LxmlElementTagMatcherType ]: Modified: lxml/branch/pyrex-0.9.6.2-fixes/src/lxml/etreepublic.pxd ============================================================================== --- lxml/branch/pyrex-0.9.6.2-fixes/src/lxml/etreepublic.pxd (original) +++ lxml/branch/pyrex-0.9.6.2-fixes/src/lxml/etreepublic.pxd Sun Oct 21 14:27:45 2007 @@ -16,10 +16,10 @@ int start_node_inclusive) cdef void END_FOR_EACH_ELEMENT_FROM(tree.xmlNode* start_node) -cdef extern from "etree.h": +cdef extern from "etree_api.h": # first function to call! - cdef int import_etree(etree_module) except -1 + cdef int import_etree() except -1 ########################################################################## # public ElementTree API classes Modified: lxml/branch/pyrex-0.9.6.2-fixes/src/lxml/extensions.pxi ============================================================================== --- lxml/branch/pyrex-0.9.6.2-fixes/src/lxml/extensions.pxi (original) +++ lxml/branch/pyrex-0.9.6.2-fixes/src/lxml/extensions.pxi Sun Oct 21 14:27:45 2007 @@ -583,7 +583,7 @@ # lookup the function by name and call it cdef void _xpath_function_call(xpath.xmlXPathParserContext* ctxt, - int nargs) with GIL: + int nargs) with gil: cdef xpath.xmlXPathContext* rctxt cdef _BaseContext context rctxt = ctxt.context Modified: lxml/branch/pyrex-0.9.6.2-fixes/src/lxml/objectify.pyx ============================================================================== --- lxml/branch/pyrex-0.9.6.2-fixes/src/lxml/objectify.pyx (original) +++ lxml/branch/pyrex-0.9.6.2-fixes/src/lxml/objectify.pyx Sun Oct 21 14:27:45 2007 @@ -10,7 +10,7 @@ cdef object etree from lxml import etree # initialize C-API of lxml.etree -import_etree(etree) +import_etree() __version__ = etree.__version__ Modified: lxml/branch/pyrex-0.9.6.2-fixes/src/lxml/parser.pxi ============================================================================== --- lxml/branch/pyrex-0.9.6.2-fixes/src/lxml/parser.pxi (original) +++ lxml/branch/pyrex-0.9.6.2-fixes/src/lxml/parser.pxi Sun Oct 21 14:27:45 2007 @@ -288,7 +288,7 @@ self._exc_context._store_raised() return -1 -cdef int _readFilelikeParser(void* ctxt, char* c_buffer, int c_size) with GIL: +cdef int _readFilelikeParser(void* ctxt, char* c_buffer, int c_size) with gil: return (<_FileReaderContext>ctxt).copyToBuffer(c_buffer, c_size) ############################################################ @@ -297,7 +297,7 @@ cdef xmlparser.xmlParserInput* _parser_resolve_from_python( char* c_url, char* c_pubid, xmlparser.xmlParserCtxt* c_context, - int* error) with GIL: + int* error) with gil: # call the Python document loaders cdef xmlparser.xmlParserInput* c_input cdef _ResolverContext context Modified: lxml/branch/pyrex-0.9.6.2-fixes/src/lxml/parsertarget.pxi ============================================================================== --- lxml/branch/pyrex-0.9.6.2-fixes/src/lxml/parsertarget.pxi (original) +++ lxml/branch/pyrex-0.9.6.2-fixes/src/lxml/parsertarget.pxi Sun Oct 21 14:27:45 2007 @@ -87,7 +87,7 @@ char* c_namespace, int c_nb_namespaces, char** c_namespaces, int c_nb_attributes, int c_nb_defaulted, - char** c_attributes) with GIL: + char** c_attributes) with gil: cdef _TargetParserContext context cdef xmlparser.xmlParserCtxt* c_ctxt cdef int i @@ -118,7 +118,7 @@ _handleSaxTargetException(context, c_ctxt) cdef void _targetSaxEnd(void* ctxt, char* c_localname, char* c_prefix, - char* c_namespace) with GIL: + char* c_namespace) with gil: cdef _TargetParserContext context cdef xmlparser.xmlParserCtxt* c_ctxt c_ctxt = ctxt @@ -131,7 +131,7 @@ except: _handleSaxTargetException(context, c_ctxt) -cdef void _targetSaxData(void* ctxt, char* c_data, int data_len) with GIL: +cdef void _targetSaxData(void* ctxt, char* c_data, int data_len) with gil: cdef _TargetParserContext context cdef xmlparser.xmlParserCtxt* c_ctxt c_ctxt = ctxt @@ -145,7 +145,7 @@ _handleSaxTargetException(context, c_ctxt) cdef void _targetSaxDoctype(void* ctxt, char* c_name, char* c_public, - char* c_system) with GIL: + char* c_system) with gil: cdef _TargetParserContext context cdef xmlparser.xmlParserCtxt* c_ctxt c_ctxt = ctxt @@ -162,7 +162,7 @@ except: _handleSaxTargetException(context, c_ctxt) -cdef void _targetSaxPI(void* ctxt, char* c_target, char* c_data) with GIL: +cdef void _targetSaxPI(void* ctxt, char* c_target, char* c_data) with gil: cdef _TargetParserContext context cdef xmlparser.xmlParserCtxt* c_ctxt c_ctxt = ctxt @@ -176,7 +176,7 @@ except: _handleSaxTargetException(context, c_ctxt) -cdef void _targetSaxComment(void* ctxt, char* c_data, int data_len) with GIL: +cdef void _targetSaxComment(void* ctxt, char* c_data, int data_len) with gil: cdef _TargetParserContext context cdef xmlparser.xmlParserCtxt* c_ctxt c_ctxt = ctxt Modified: lxml/branch/pyrex-0.9.6.2-fixes/src/lxml/public-api.pxi ============================================================================== --- lxml/branch/pyrex-0.9.6.2-fixes/src/lxml/public-api.pxi (original) +++ lxml/branch/pyrex-0.9.6.2-fixes/src/lxml/public-api.pxi Sun Oct 21 14:27:45 2007 @@ -1,156 +1,156 @@ # Public C API for lxml.etree -cdef public _Element deepcopyNodeToDocument(_Document doc, xmlNode* c_root): +cdef api _Element deepcopyNodeToDocument(_Document doc, xmlNode* c_root): "Recursively copy the element into the document. doc is not modified." cdef xmlNode* c_node c_node = _copyNodeToDoc(c_root, doc._c_doc) return _elementFactory(doc, c_node) -cdef public _ElementTree elementTreeFactory(_Element context_node): +cdef api _ElementTree elementTreeFactory(_Element context_node): return newElementTree(context_node, _ElementTree) -cdef public _ElementTree newElementTree(_Element context_node, +cdef api _ElementTree newElementTree(_Element context_node, object subclass): if context_node is NULL or context_node is None: raise TypeError return _newElementTree(context_node._doc, context_node, subclass) -cdef public _Element elementFactory(_Document doc, xmlNode* c_node): +cdef api _Element elementFactory(_Document doc, xmlNode* c_node): if c_node is NULL or doc is None: raise TypeError return _elementFactory(doc, c_node) -cdef public _Element makeElement(tag, _Document doc, parser, +cdef api _Element makeElement(tag, _Document doc, parser, text, tail, attrib, nsmap): return _makeElement(tag, NULL, doc, parser, text, tail, attrib, nsmap, None) -cdef public _Element makeSubElement(_Element parent, tag, text, tail, +cdef api _Element makeSubElement(_Element parent, tag, text, tail, attrib, nsmap): return _makeSubElement(parent, tag, text, tail, attrib, nsmap, None) -cdef public void setElementClassLookupFunction( +cdef api void setElementClassLookupFunction( _element_class_lookup_function function, state): _setElementClassLookupFunction(function, state) -cdef public object lookupDefaultElementClass(state, doc, xmlNode* c_node): +cdef api object lookupDefaultElementClass(state, doc, xmlNode* c_node): return _lookupDefaultElementClass(state, doc, c_node) -cdef public object lookupNamespaceElementClass(state, doc, xmlNode* c_node): +cdef api object lookupNamespaceElementClass(state, doc, xmlNode* c_node): return _find_nselement_class(state, doc, c_node) -cdef public object callLookupFallback(FallbackElementClassLookup lookup, +cdef api object callLookupFallback(FallbackElementClassLookup lookup, _Document doc, xmlNode* c_node): return lookup._callFallback(doc, c_node) -cdef public int tagMatches(xmlNode* c_node, char* c_href, char* c_name): +cdef api int tagMatches(xmlNode* c_node, char* c_href, char* c_name): if c_node is NULL: return -1 return _tagMatches(c_node, c_href, c_name) -cdef public _Document documentOrRaise(object input): +cdef api _Document documentOrRaise(object input): return _documentOrRaise(input) -cdef public _Element rootNodeOrRaise(object input): +cdef api _Element rootNodeOrRaise(object input): return _rootNodeOrRaise(input) -cdef public bint hasText(xmlNode* c_node): +cdef api bint hasText(xmlNode* c_node): return _hasText(c_node) -cdef public bint hasTail(xmlNode* c_node): +cdef api bint hasTail(xmlNode* c_node): return _hasTail(c_node) -cdef public object textOf(xmlNode* c_node): +cdef api object textOf(xmlNode* c_node): if c_node is NULL: return None return _collectText(c_node.children) -cdef public object tailOf(xmlNode* c_node): +cdef api object tailOf(xmlNode* c_node): if c_node is NULL: return None return _collectText(c_node.next) -cdef public int setNodeText(xmlNode* c_node, text) except -1: +cdef api int setNodeText(xmlNode* c_node, text) except -1: if c_node is NULL: raise ValueError return _setNodeText(c_node, text) -cdef public int setTailText(xmlNode* c_node, text) except -1: +cdef api int setTailText(xmlNode* c_node, text) except -1: if c_node is NULL: raise ValueError return _setTailText(c_node, text) -cdef public object attributeValue(xmlNode* c_element, xmlAttr* c_attrib_node): +cdef api object attributeValue(xmlNode* c_element, xmlAttr* c_attrib_node): return _attributeValue(c_element, c_attrib_node) -cdef public object attributeValueFromNsName(xmlNode* c_element, +cdef api object attributeValueFromNsName(xmlNode* c_element, char* ns, char* name): return _attributeValueFromNsName(c_element, ns, name) -cdef public object getAttributeValue(_Element element, key, default): +cdef api object getAttributeValue(_Element element, key, default): return _getAttributeValue(element, key, default) -cdef public object iterattributes(_Element element, int keysvalues): +cdef api object iterattributes(_Element element, int keysvalues): return _attributeIteratorFactory(element, keysvalues) -cdef public object collectAttributes(xmlNode* c_element, int keysvalues): +cdef api object collectAttributes(xmlNode* c_element, int keysvalues): return _collectAttributes(c_element, keysvalues) -cdef public int setAttributeValue(_Element element, key, value) except -1: +cdef api int setAttributeValue(_Element element, key, value) except -1: return _setAttributeValue(element, key, value) -cdef public int delAttribute(_Element element, key) except -1: +cdef api int delAttribute(_Element element, key) except -1: return _delAttribute(element, key) -cdef public int delAttributeFromNsName(tree.xmlNode* c_element, +cdef api int delAttributeFromNsName(tree.xmlNode* c_element, char* c_href, char* c_name): return _delAttributeFromNsName(c_element, c_href, c_name) -cdef public bint hasChild(xmlNode* c_node): +cdef api bint hasChild(xmlNode* c_node): return _hasChild(c_node) -cdef public xmlNode* findChild(xmlNode* c_node, Py_ssize_t index): +cdef api xmlNode* findChild(xmlNode* c_node, Py_ssize_t index): return _findChild(c_node, index) -cdef public xmlNode* findChildForwards(xmlNode* c_node, Py_ssize_t index): +cdef api xmlNode* findChildForwards(xmlNode* c_node, Py_ssize_t index): return _findChildForwards(c_node, index) -cdef public xmlNode* findChildBackwards(xmlNode* c_node, Py_ssize_t index): +cdef api xmlNode* findChildBackwards(xmlNode* c_node, Py_ssize_t index): return _findChildBackwards(c_node, index) -cdef public xmlNode* nextElement(xmlNode* c_node): +cdef api xmlNode* nextElement(xmlNode* c_node): return _nextElement(c_node) -cdef public xmlNode* previousElement(xmlNode* c_node): +cdef api xmlNode* previousElement(xmlNode* c_node): return _previousElement(c_node) -cdef public void appendChild(_Element parent, _Element child): +cdef api void appendChild(_Element parent, _Element child): _appendChild(parent, child) -cdef