From ianb at codespeak.net Tue Oct 2 00:24:09 2007
From: ianb at codespeak.net (ianb at codespeak.net)
Date: Tue, 2 Oct 2007 00:24:09 +0200 (CEST)
Subject: [Lxml-checkins] r47079 - lxml/trunk/src/lxml/html
Message-ID: <20071001222409.DD3168111@code0.codespeak.net>
Author: ianb
Date: Tue Oct 2 00:24:07 2007
New Revision: 47079
Modified:
lxml/trunk/src/lxml/html/setmixin.py
Log:
fix the in-place operators in SetMixin
Modified: lxml/trunk/src/lxml/html/setmixin.py
==============================================================================
--- lxml/trunk/src/lxml/html/setmixin.py (original)
+++ lxml/trunk/src/lxml/html/setmixin.py Tue Oct 2 00:24:07 2007
@@ -71,21 +71,27 @@
for item in other:
self.add(item)
- __ior__ = update
+ def __ior__(self, other):
+ self.update(other)
+ return self
def intersection_update(self, other):
for item in self:
if item not in other:
self.remove(item)
- __iand__ = intersection_update
+ def __iand__(self, other):
+ self.intersection_update(other)
+ return self
def difference_update(self, other):
for item in other:
if item in self:
self.remove(item)
- __isub__ = difference_update
+ def __isub__(self, other):
+ self.difference_update(other)
+ return self
def symmetric_difference_update(self, other):
for item in other:
@@ -94,7 +100,9 @@
else:
self.add(item)
- __ixor__ = symmetric_difference_update
+ def __ixor__(self, other):
+ self.symmetric_difference_update(other)
+ return self
def discard(self, item):
try:
From lxml-checkins at codespeak.net Thu Oct 4 23:42:44 2007
From: lxml-checkins at codespeak.net (VIAGRA ® Official Site)
Date: Thu, 4 Oct 2007 23:42:44 +0200 (CEST)
Subject: [Lxml-checkins] October 75% OFF
Message-ID: <20071004034331.7317.qmail@host75.201-252-1.telecom.net.ar>
An HTML attachment was scrubbed...
URL: http://codespeak.net/pipermail/lxml-checkins/attachments/20071004/e946f154/attachment.htm
From lxml-checkins at codespeak.net Fri Oct 5 17:54:08 2007
From: lxml-checkins at codespeak.net (lxml-checkins at codespeak.net)
Date: Fri, 5 Oct 2007 17:54:08 +0200 (CEST)
Subject: [Lxml-checkins] Check out what's new
Message-ID: <74047053679.3673357686918@delivery.net>
An HTML attachment was scrubbed...
URL: http://codespeak.net/pipermail/lxml-checkins/attachments/20071005/e44a8a2b/attachment.htm
From scoder at codespeak.net Sun Oct 7 06:30:36 2007
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Sun, 7 Oct 2007 06:30:36 +0200 (CEST)
Subject: [Lxml-checkins] r47250 - in lxml/trunk: . src/lxml src/lxml/tests
Message-ID: <20071007043036.BD7A380FB@code0.codespeak.net>
Author: scoder
Date: Sun Oct 7 06:30:34 2007
New Revision: 47250
Modified:
lxml/trunk/CHANGES.txt
lxml/trunk/src/lxml/apihelpers.pxi
lxml/trunk/src/lxml/etree.pyx
lxml/trunk/src/lxml/tests/test_htmlparser.py
lxml/trunk/src/lxml/tests/test_unicode.py
Log:
let tag name validation distinguish HTML/XML tags based on the related parser, allow ':' in HTML tags
Modified: lxml/trunk/CHANGES.txt
==============================================================================
--- lxml/trunk/CHANGES.txt (original)
+++ lxml/trunk/CHANGES.txt Sun Oct 7 06:30:34 2007
@@ -16,9 +16,11 @@
Other changes
-------------
-* lxml.etree no longer validates unicode characters in tag names to
- avoid rejecting HTML tags. Only special characters like ':' and '>'
- are rejected.
+* Tag name validation in lxml.etree (and lxml.html) now distinguishes
+ between HTML tags and XML tags based on the parser that was used to
+ parse or create them. HTML tags no longer reject any non-ASCII
+ characters in tag names but only spaces and the special characters
+ '<>&/'.
2.0alpha3 (2007-09-26)
Modified: lxml/trunk/src/lxml/apihelpers.pxi
==============================================================================
--- lxml/trunk/src/lxml/apihelpers.pxi (original)
+++ lxml/trunk/src/lxml/apihelpers.pxi Sun Oct 7 06:30:34 2007
@@ -99,7 +99,10 @@
"""
cdef xmlNode* c_node
ns_utf, name_utf = _getNsTag(tag)
- _tagValidOrRaise(name_utf)
+ if parser is not None and parser._for_html:
+ _htmlTagValidOrRaise(name_utf)
+ else:
+ _tagValidOrRaise(name_utf)
if doc is not None:
c_doc = doc._c_doc
elif c_doc is NULL:
@@ -147,16 +150,22 @@
If 'c_doc' is also NULL, a new xmlDoc will be created.
"""
+ cdef _BaseParser parser
cdef _Document doc
cdef xmlNode* c_node
cdef xmlDoc* c_doc
if parent is None or parent._doc is None:
return None
ns_utf, name_utf = _getNsTag(tag)
- _tagValidOrRaise(name_utf)
doc = parent._doc
c_doc = doc._c_doc
+ parser = doc._parser
+ if parser is not None and parser._for_html:
+ _htmlTagValidOrRaise(name_utf)
+ else:
+ _tagValidOrRaise(name_utf)
+
c_node = _createElement(c_doc, name_utf)
if c_node is NULL:
python.PyErr_NoMemory()
@@ -175,6 +184,7 @@
cdef _initNodeAttributes(xmlNode* c_node, _Document doc, attrib, extra):
"""Initialise the attributes of an element node.
"""
+ cdef bint is_html
cdef xmlNs* c_ns
# 'extra' is not checked here (expected to be a keyword dict)
if attrib is not None and not hasattr(attrib, 'items'):
@@ -185,9 +195,11 @@
else:
attrib.update(extra)
if attrib:
+ is_html = doc._parser._for_html
for name, value in attrib.items():
attr_ns_utf, attr_name_utf = _getNsTag(name)
- _attributeValidOrRaise(attr_name_utf)
+ if not is_html:
+ _attributeValidOrRaise(attr_name_utf)
value_utf = _utf8(value)
if attr_ns_utf is None:
tree.xmlNewProp(c_node, _cstr(attr_name_utf), _cstr(value_utf))
@@ -242,7 +254,8 @@
cdef char* c_value
cdef char* c_tag
ns, tag = _getNsTag(key)
- _attributeValidOrRaise(tag)
+ if not element._doc._parser._for_html:
+ _attributeValidOrRaise(tag)
c_tag = _cstr(tag)
if isinstance(value, QName):
value = _resolveQNameText(element, value)
@@ -790,13 +803,17 @@
cdef int _pyXmlNameIsValid(name_utf8):
return _xmlNameIsValid(_cstr(name_utf8))
+cdef int _pyHtmlNameIsValid(name_utf8):
+ return _htmlNameIsValid(_cstr(name_utf8))
+
cdef int _xmlNameIsValid(char* c_name):
- #return tree.xmlValidateNCName(c_name, 0) == 0
+ return tree.xmlValidateNCName(c_name, 0) == 0
+
+cdef int _htmlNameIsValid(char* c_name):
if c_name is NULL or c_name[0] == c'\0':
return 0
while c_name[0] != c'\0':
- if c_name[0] == c':' or \
- c_name[0] == c'&' or \
+ if c_name[0] == c'&' or \
c_name[0] == c'<' or \
c_name[0] == c'>' or \
c_name[0] == c'/' or \
@@ -815,6 +832,12 @@
python.PyUnicode_FromEncodedObject(tag_utf, 'UTF-8', 'strict')
return 0
+cdef int _htmlTagValidOrRaise(tag_utf) except -1:
+ if not _pyHtmlNameIsValid(tag_utf):
+ raise ValueError, "Invalid HTML tag name %r" % \
+ python.PyUnicode_FromEncodedObject(tag_utf, 'UTF-8', 'strict')
+ return 0
+
cdef int _attributeValidOrRaise(name_utf) except -1:
if not _pyXmlNameIsValid(name_utf):
raise ValueError, "Invalid attribute name %r" % \
Modified: lxml/trunk/src/lxml/etree.pyx
==============================================================================
--- lxml/trunk/src/lxml/etree.pyx (original)
+++ lxml/trunk/src/lxml/etree.pyx Sun Oct 7 06:30:34 2007
@@ -707,8 +707,13 @@
return self._tag
def __set__(self, value):
+ cdef _BaseParser parser
ns, name = _getNsTag(value)
- _tagValidOrRaise(name)
+ parser = self._doc._parser
+ if parser is not None and parser._for_html:
+ _htmlTagValidOrRaise(name)
+ else:
+ _tagValidOrRaise(name)
self._tag = value
tree.xmlNodeSetName(self._c_node, _cstr(name))
if ns is None:
Modified: lxml/trunk/src/lxml/tests/test_htmlparser.py
==============================================================================
--- lxml/trunk/src/lxml/tests/test_htmlparser.py (original)
+++ lxml/trunk/src/lxml/tests/test_htmlparser.py Sun Oct 7 06:30:34 2007
@@ -39,6 +39,74 @@
self.assertRaises(self.etree.XMLSyntaxError,
parse, f, parser)
+ def test_html_element_name_empty(self):
+ parser = self.etree.HTMLParser()
+ Element = parser.makeelement
+
+ el = Element('name')
+ self.assertRaises(ValueError, Element, '{}')
+ self.assertRaises(ValueError, setattr, el, 'tag', '{}')
+
+ self.assertRaises(ValueError, Element, '{test}')
+ self.assertRaises(ValueError, setattr, el, 'tag', '{test}')
+
+ def test_html_element_name_colon(self):
+ parser = self.etree.HTMLParser()
+ Element = parser.makeelement
+
+ pname = Element('p:name')
+ self.assertEquals(pname.tag, 'p:name')
+
+ pname = Element('{test}p:name')
+ self.assertEquals(pname.tag, '{test}p:name')
+
+ pname = Element('name')
+ pname.tag = 'p:name'
+ self.assertEquals(pname.tag, 'p:name')
+
+ def test_html_element_name_space(self):
+ parser = self.etree.HTMLParser()
+ Element = parser.makeelement
+
+ self.assertRaises(ValueError, Element, ' name ')
+ self.assertRaises(ValueError, Element, 'na me')
+ self.assertRaises(ValueError, Element, '{test} name')
+
+ el = Element('name')
+ self.assertRaises(ValueError, setattr, el, 'tag', ' name ')
+
+ def test_html_subelement_name_empty(self):
+ parser = self.etree.HTMLParser()
+ Element = parser.makeelement
+
+ SubElement = self.etree.SubElement
+
+ el = Element('name')
+ self.assertRaises(ValueError, SubElement, el, '{}')
+ self.assertRaises(ValueError, SubElement, el, '{test}')
+
+ def test_html_subelement_name_colon(self):
+ parser = self.etree.HTMLParser()
+ Element = parser.makeelement
+ SubElement = self.etree.SubElement
+
+ el = Element('name')
+ pname = SubElement(el, 'p:name')
+ self.assertEquals(pname.tag, 'p:name')
+
+ pname = SubElement(el, '{test}p:name')
+ self.assertEquals(pname.tag, '{test}p:name')
+
+ def test_html_subelement_name_space(self):
+ parser = self.etree.HTMLParser()
+ Element = parser.makeelement
+ SubElement = self.etree.SubElement
+
+ el = Element('name')
+ self.assertRaises(ValueError, SubElement, el, ' name ')
+ self.assertRaises(ValueError, SubElement, el, 'na me')
+ self.assertRaises(ValueError, SubElement, el, '{test} name')
+
def test_module_parse_html_norecover(self):
parser = self.etree.HTMLParser(recover=False)
parse = self.etree.parse
Modified: lxml/trunk/src/lxml/tests/test_unicode.py
==============================================================================
--- lxml/trunk/src/lxml/tests/test_unicode.py (original)
+++ lxml/trunk/src/lxml/tests/test_unicode.py Sun Oct 7 06:30:34 2007
@@ -5,9 +5,9 @@
ascii_uni = u'a'
-# klingon = u"\uF8D2" # not valid for XML names
+klingon = u"\uF8D2" # not valid for XML names
-invalid_tag = "\u0680:\u3120"
+invalid_tag = "test" + klingon
uni = u'?\u0680\u3120' # some non-ASCII characters
From scoder at codespeak.net Sun Oct 7 06:32:49 2007
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Sun, 7 Oct 2007 06:32:49 +0200 (CEST)
Subject: [Lxml-checkins] r47251 - in lxml/trunk: . doc
Message-ID: <20071007043249.5136B80FB@code0.codespeak.net>
Author: scoder
Date: Sun Oct 7 06:32:49 2007
New Revision: 47251
Modified:
lxml/trunk/CHANGES.txt
lxml/trunk/doc/main.txt
lxml/trunk/version.txt
Log:
2.0alpha4
Modified: lxml/trunk/CHANGES.txt
==============================================================================
--- lxml/trunk/CHANGES.txt (original)
+++ lxml/trunk/CHANGES.txt Sun Oct 7 06:32:49 2007
@@ -2,8 +2,9 @@
lxml changelog
==============
-Under development
-=================
+
+2.0alpha3 (2007-10-07)
+======================
Features added
--------------
Modified: lxml/trunk/doc/main.txt
==============================================================================
--- lxml/trunk/doc/main.txt (original)
+++ lxml/trunk/doc/main.txt Sun Oct 7 06:32:49 2007
@@ -138,8 +138,8 @@
.. _`lxml at the Python Package Index`: http://pypi.python.org/pypi/lxml/
.. _`this key`: pubkey.asc
-The latest version is `lxml 2.0alpha3`_, released 2007-09-26
-(`changes for 2.0alpha3`_). `Older versions`_ are listed below.
+The latest version is `lxml 2.0alpha4`_, released 2007-10-07
+(`changes for 2.0alpha4`_). `Older versions`_ are listed below.
.. _`Older versions`: #old-versions
@@ -199,6 +199,8 @@
Old Versions
------------
+* `lxml 2.0alpha3`_, released 2007-09-26 (`changes for 2.0alpha3`_)
+
* `lxml 2.0alpha2`_, released 2007-09-15 (`changes for 2.0alpha2`_)
* `lxml 2.0alpha1`_, released 2007-09-02 (`changes for 2.0alpha1`_)
Modified: lxml/trunk/version.txt
==============================================================================
--- lxml/trunk/version.txt (original)
+++ lxml/trunk/version.txt Sun Oct 7 06:32:49 2007
@@ -1 +1 @@
-2.0alpha3
+2.0alpha4
From scoder at codespeak.net Sun Oct 7 06:34:18 2007
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Sun, 7 Oct 2007 06:34:18 +0200 (CEST)
Subject: [Lxml-checkins] r47252 - lxml/trunk/doc
Message-ID: <20071007043418.0354580FB@code0.codespeak.net>
Author: scoder
Date: Sun Oct 7 06:34:18 2007
New Revision: 47252
Modified:
lxml/trunk/doc/main.txt
Log:
2.0alpha4
Modified: lxml/trunk/doc/main.txt
==============================================================================
--- lxml/trunk/doc/main.txt (original)
+++ lxml/trunk/doc/main.txt Sun Oct 7 06:34:18 2007
@@ -251,6 +251,7 @@
* `lxml 0.5`_, released 2005-04-08
+.. _`lxml 2.0alpha4`: lxml-2.0alpha4.tgz
.. _`lxml 2.0alpha3`: lxml-2.0alpha3.tgz
.. _`lxml 2.0alpha2`: lxml-2.0alpha2.tgz
.. _`lxml 2.0alpha1`: lxml-2.0alpha1.tgz
@@ -277,6 +278,7 @@
.. _`lxml 0.5.1`: lxml-0.5.1.tgz
.. _`lxml 0.5`: lxml-0.5.tgz
+.. _`changes for 2.0alpha4`: changes-2.0alpha4.html
.. _`changes for 2.0alpha3`: changes-2.0alpha3.html
.. _`changes for 2.0alpha2`: changes-2.0alpha2.html
.. _`changes for 2.0alpha1`: changes-2.0alpha1.html
From scoder at codespeak.net Sun Oct 7 06:34:44 2007
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Sun, 7 Oct 2007 06:34:44 +0200 (CEST)
Subject: [Lxml-checkins] r47253 - lxml/trunk
Message-ID: <20071007043444.51E6E80FB@code0.codespeak.net>
Author: scoder
Date: Sun Oct 7 06:34:44 2007
New Revision: 47253
Modified:
lxml/trunk/CHANGES.txt
Log:
2.0alpha4
Modified: lxml/trunk/CHANGES.txt
==============================================================================
--- lxml/trunk/CHANGES.txt (original)
+++ lxml/trunk/CHANGES.txt Sun Oct 7 06:34:44 2007
@@ -3,7 +3,7 @@
==============
-2.0alpha3 (2007-10-07)
+2.0alpha4 (2007-10-07)
======================
Features added
From scoder at codespeak.net Sun Oct 7 07:03:33 2007
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Sun, 7 Oct 2007 07:03:33 +0200 (CEST)
Subject: [Lxml-checkins] r47254 - in lxml/trunk: . src/lxml src/lxml/tests
Message-ID: <20071007050333.809498113@code0.codespeak.net>
Author: scoder
Date: Sun Oct 7 07:03:32 2007
New Revision: 47254
Modified:
lxml/trunk/CHANGES.txt
lxml/trunk/src/lxml/apihelpers.pxi
lxml/trunk/src/lxml/tests/test_etree.py
lxml/trunk/src/lxml/tests/test_htmlparser.py
Log:
added " and ' to the list of invalid HTML tag characters
Modified: lxml/trunk/CHANGES.txt
==============================================================================
--- lxml/trunk/CHANGES.txt (original)
+++ lxml/trunk/CHANGES.txt Sun Oct 7 07:03:32 2007
@@ -21,7 +21,7 @@
between HTML tags and XML tags based on the parser that was used to
parse or create them. HTML tags no longer reject any non-ASCII
characters in tag names but only spaces and the special characters
- '<>&/'.
+ ``<>&/"'``.
2.0alpha3 (2007-09-26)
Modified: lxml/trunk/src/lxml/apihelpers.pxi
==============================================================================
--- lxml/trunk/src/lxml/apihelpers.pxi (original)
+++ lxml/trunk/src/lxml/apihelpers.pxi Sun Oct 7 07:03:32 2007
@@ -817,6 +817,8 @@
c_name[0] == c'<' or \
c_name[0] == c'>' or \
c_name[0] == c'/' or \
+ c_name[0] == c'"' or \
+ c_name[0] == c"'" or \
c_name[0] == c'\x09' or \
c_name[0] == c'\x0A' or \
c_name[0] == c'\x0B' or \
Modified: lxml/trunk/src/lxml/tests/test_etree.py
==============================================================================
--- lxml/trunk/src/lxml/tests/test_etree.py (original)
+++ lxml/trunk/src/lxml/tests/test_etree.py Sun Oct 7 07:03:32 2007
@@ -74,6 +74,18 @@
el = Element('name')
self.assertRaises(ValueError, setattr, el, 'tag', 'p:name')
+ def test_element_name_quote(self):
+ Element = self.etree.Element
+ self.assertRaises(ValueError, Element, "p'name")
+ self.assertRaises(ValueError, Element, 'p"name')
+
+ self.assertRaises(ValueError, Element, "{test}p'name")
+ self.assertRaises(ValueError, Element, '{test}p"name')
+
+ el = Element('name')
+ self.assertRaises(ValueError, setattr, el, 'tag', "p'name")
+ self.assertRaises(ValueError, setattr, el, 'tag', 'p"name')
+
def test_element_name_space(self):
Element = self.etree.Element
self.assertRaises(ValueError, Element, ' name ')
@@ -99,6 +111,17 @@
self.assertRaises(ValueError, SubElement, el, 'p:name')
self.assertRaises(ValueError, SubElement, el, '{test}p:name')
+ def test_subelement_name_quote(self):
+ Element = self.etree.Element
+ SubElement = self.etree.SubElement
+
+ el = Element('name')
+ self.assertRaises(ValueError, SubElement, el, "p'name")
+ self.assertRaises(ValueError, SubElement, el, "{test}p'name")
+
+ self.assertRaises(ValueError, SubElement, el, 'p"name')
+ self.assertRaises(ValueError, SubElement, el, '{test}p"name')
+
def test_subelement_name_space(self):
Element = self.etree.Element
SubElement = self.etree.SubElement
Modified: lxml/trunk/src/lxml/tests/test_htmlparser.py
==============================================================================
--- lxml/trunk/src/lxml/tests/test_htmlparser.py (original)
+++ lxml/trunk/src/lxml/tests/test_htmlparser.py Sun Oct 7 07:03:32 2007
@@ -64,6 +64,20 @@
pname.tag = 'p:name'
self.assertEquals(pname.tag, 'p:name')
+ def test_html_element_name_quote(self):
+ parser = self.etree.HTMLParser()
+ Element = parser.makeelement
+
+ self.assertRaises(ValueError, Element, 'p"name')
+ self.assertRaises(ValueError, Element, "na'me")
+ self.assertRaises(ValueError, Element, '{test}"name')
+ self.assertRaises(ValueError, Element, "{test}name'")
+
+ el = Element('name')
+ self.assertRaises(ValueError, setattr, el, 'tag', "pname'")
+ self.assertRaises(ValueError, setattr, el, 'tag', '"pname')
+ self.assertEquals(el.tag, "name")
+
def test_html_element_name_space(self):
parser = self.etree.HTMLParser()
Element = parser.makeelement
@@ -74,6 +88,7 @@
el = Element('name')
self.assertRaises(ValueError, setattr, el, 'tag', ' name ')
+ self.assertEquals(el.tag, "name")
def test_html_subelement_name_empty(self):
parser = self.etree.HTMLParser()
@@ -97,6 +112,17 @@
pname = SubElement(el, '{test}p:name')
self.assertEquals(pname.tag, '{test}p:name')
+ def test_html_subelement_name_quote(self):
+ parser = self.etree.HTMLParser()
+ Element = parser.makeelement
+ SubElement = self.etree.SubElement
+
+ el = Element('name')
+ self.assertRaises(ValueError, SubElement, el, "name'")
+ self.assertRaises(ValueError, SubElement, el, 'na"me')
+ self.assertRaises(ValueError, SubElement, el, "{test}na'me")
+ self.assertRaises(ValueError, SubElement, el, '{test}"name')
+
def test_html_subelement_name_space(self):
parser = self.etree.HTMLParser()
Element = parser.makeelement
From scoder at codespeak.net Sun Oct 7 22:12:25 2007
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Sun, 7 Oct 2007 22:12:25 +0200 (CEST)
Subject: [Lxml-checkins] r47270 - lxml/trunk/doc
Message-ID: <20071007201225.4C1318188@code0.codespeak.net>
Author: scoder
Date: Sun Oct 7 22:12:23 2007
New Revision: 47270
Modified:
lxml/trunk/doc/lxml2.txt
Log:
docs
Modified: lxml/trunk/doc/lxml2.txt
==============================================================================
--- lxml/trunk/doc/lxml2.txt (original)
+++ lxml/trunk/doc/lxml2.txt Sun Oct 7 22:12:23 2007
@@ -78,7 +78,7 @@
type annotation on serialisation, you can use the ``deannotate()`` function.
* The C-API function ``findOrBuildNodeNs()`` was replaced by the more generic
- ``findOrBuildNodeNsPrefix()``
+ ``findOrBuildNodeNsPrefix()`` that accepts an additional default prefix.
Enhancements
From scoder at codespeak.net Mon Oct 8 21:57:08 2007
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Mon, 8 Oct 2007 21:57:08 +0200 (CEST)
Subject: [Lxml-checkins] r47317 - lxml/trunk/src/lxml
Message-ID: <20071008195708.A49C3815A@code0.codespeak.net>
Author: scoder
Date: Mon Oct 8 21:57:08 2007
New Revision: 47317
Modified:
lxml/trunk/src/lxml/apihelpers.pxi
Log:
handle exceptions in subelement creation
Modified: lxml/trunk/src/lxml/apihelpers.pxi
==============================================================================
--- lxml/trunk/src/lxml/apihelpers.pxi (original)
+++ lxml/trunk/src/lxml/apihelpers.pxi Mon Oct 8 21:57:08 2007
@@ -171,15 +171,24 @@
python.PyErr_NoMemory()
tree.xmlAddChild(parent._c_node, c_node)
- if text is not None:
- _setNodeText(c_node, text)
- if tail is not None:
- _setTailText(c_node, tail)
-
- # add namespaces to node if necessary
- doc._setNodeNamespaces(c_node, ns_utf, nsmap)
- _initNodeAttributes(c_node, doc, attrib, extra_attrs)
- return _elementFactory(doc, c_node)
+ try:
+ if text is not None:
+ _setNodeText(c_node, text)
+ if tail is not None:
+ _setTailText(c_node, tail)
+
+ # add namespaces to node if necessary
+ doc._setNodeNamespaces(c_node, ns_utf, nsmap)
+ _initNodeAttributes(c_node, doc, attrib, extra_attrs)
+ return _elementFactory(doc, c_node)
+ except:
+ # free allocated c_node/c_doc unless Python does it for us
+ if c_node.doc is not c_doc:
+ # node not yet in document => will not be freed by document
+ if tail is not None:
+ _removeText(c_node.next) # tail
+ tree.xmlFreeNode(c_node)
+ raise
cdef _initNodeAttributes(xmlNode* c_node, _Document doc, attrib, extra):
"""Initialise the attributes of an element node.
From scoder at codespeak.net Mon Oct 8 22:00:20 2007
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Mon, 8 Oct 2007 22:00:20 +0200 (CEST)
Subject: [Lxml-checkins] r47318 - in lxml/trunk: . src/lxml
Message-ID: <20071008200020.C380F80C9@code0.codespeak.net>
Author: scoder
Date: Mon Oct 8 22:00:20 2007
New Revision: 47318
Modified:
lxml/trunk/CHANGES.txt
lxml/trunk/src/lxml/etree.pyx
Log:
make namespace prefix counter a Python long to avoid crashes by counter overflow
Modified: lxml/trunk/CHANGES.txt
==============================================================================
--- lxml/trunk/CHANGES.txt (original)
+++ lxml/trunk/CHANGES.txt Mon Oct 8 22:00:20 2007
@@ -2,6 +2,21 @@
lxml changelog
==============
+Under development
+=================
+
+Features added
+--------------
+
+Bugs fixed
+----------
+
+* lxml.etree could crash when adding more than 10000 namespaces to a
+ document
+
+Other changes
+-------------
+
2.0alpha4 (2007-10-07)
======================
Modified: lxml/trunk/src/lxml/etree.pyx
==============================================================================
--- lxml/trunk/src/lxml/etree.pyx (original)
+++ lxml/trunk/src/lxml/etree.pyx Mon Oct 8 22:00:20 2007
@@ -229,7 +229,7 @@
When instances of this class are garbage collected, the libxml
document is cleaned up.
"""
- cdef int _ns_counter
+ cdef object _ns_counter
cdef xmlDoc* _c_doc
cdef _BaseParser _parser
@@ -295,7 +295,7 @@
return self._c_doc.URL
cdef buildNewPrefix(self):
- ns = python.PyString_FromFormat("ns%d", self._ns_counter)
+ ns = "ns%d" % self._ns_counter
self._ns_counter = self._ns_counter + 1
return ns
@@ -304,7 +304,6 @@
"""Get or create namespace structure for a node. Reuses the prefix if
possible.
"""
- cdef int i
cdef xmlNs* c_ns
cdef xmlNs* c_doc_ns
# look for existing ns
@@ -315,15 +314,12 @@
if c_prefix is NULL or \
tree.xmlSearchNs(self._c_doc, c_node, c_prefix) is not NULL:
# try to simulate ElementTree's namespace prefix creation
- for i from 0 <= i < 10000:
+ while 1:
prefix = self.buildNewPrefix()
c_prefix = _cstr(prefix)
# make sure it's not used already
if tree.xmlSearchNs(self._c_doc, c_node, c_prefix) is NULL:
break
- if i >= 10000:
- # XXX too many prefixes in use - this is pretty bad!
- return NULL
return tree.xmlNewNs(c_node, c_href, c_prefix)
@@ -333,8 +329,8 @@
c_ns = self._findOrBuildNodeNs(c_node, href, NULL)
tree.xmlSetNs(c_node, c_ns)
- cdef void _setNodeNamespaces(self, xmlNode* c_node,
- object node_ns_utf, object nsmap):
+ cdef int _setNodeNamespaces(self, xmlNode* c_node,
+ object node_ns_utf, object nsmap) except -1:
"""Lookup current namespace prefixes, then set namespace structure for
node and register new ns-prefix mappings.
@@ -347,7 +343,7 @@
if not nsmap:
if node_ns_utf is not None:
self._setNodeNs(c_node, _cstr(node_ns_utf))
- return
+ return 0
c_doc = self._c_doc
for prefix, href in nsmap.items():
@@ -368,6 +364,7 @@
if node_ns_utf is not None:
self._setNodeNs(c_node, _cstr(node_ns_utf))
+ return 0
cdef extern from "etree_defs.h":
# macro call to 't->tp_new()' for fast instantiation
@@ -377,7 +374,7 @@
cdef _Document result
result = NEW_DOCUMENT(_Document)
result._c_doc = c_doc
- result._ns_counter = 0
+ result._ns_counter = 0L
if parser is None:
parser = __GLOBAL_PARSER_CONTEXT.getDefaultParser()
result._parser = parser
From scoder at codespeak.net Mon Oct 8 22:59:19 2007
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Mon, 8 Oct 2007 22:59:19 +0200 (CEST)
Subject: [Lxml-checkins] r47319 - in lxml/branch/lxml-1.3: . src/lxml
Message-ID: <20071008205919.961DE812D@code0.codespeak.net>
Author: scoder
Date: Mon Oct 8 22:59:18 2007
New Revision: 47319
Modified:
lxml/branch/lxml-1.3/CHANGES.txt
lxml/branch/lxml-1.3/src/lxml/etree.pyx
Log:
make namespace prefix counter a Python long to avoid crashes by counter overflow
Modified: lxml/branch/lxml-1.3/CHANGES.txt
==============================================================================
--- lxml/branch/lxml-1.3/CHANGES.txt (original)
+++ lxml/branch/lxml-1.3/CHANGES.txt Mon Oct 8 22:59:18 2007
@@ -11,6 +11,9 @@
Bugs fixed
----------
+* lxml.etree could crash when adding more than 10000 namespaces to a
+ document
+
* lxml failed to serialise namespace declarations of elements other than the
root node of a tree
Modified: lxml/branch/lxml-1.3/src/lxml/etree.pyx
==============================================================================
--- lxml/branch/lxml-1.3/src/lxml/etree.pyx (original)
+++ lxml/branch/lxml-1.3/src/lxml/etree.pyx Mon Oct 8 22:59:18 2007
@@ -231,7 +231,7 @@
When instances of this class are garbage collected, the libxml
document is cleaned up.
"""
- cdef int _ns_counter
+ cdef object _ns_counter
cdef xmlDoc* _c_doc
cdef _BaseParser _parser
@@ -297,7 +297,7 @@
return self._c_doc.URL
cdef buildNewPrefix(self):
- ns = python.PyString_FromFormat("ns%d", self._ns_counter)
+ ns = "ns%d" % self._ns_counter
self._ns_counter = self._ns_counter + 1
return ns
@@ -306,7 +306,6 @@
"""Get or create namespace structure for a node. Reuses the prefix if
possible.
"""
- cdef int i
cdef xmlNs* c_ns
cdef xmlNs* c_doc_ns
# look for existing ns
@@ -317,15 +316,12 @@
if c_prefix is NULL or \
tree.xmlSearchNs(self._c_doc, c_node, c_prefix) is not NULL:
# try to simulate ElementTree's namespace prefix creation
- for i from 0 <= i < 10000:
+ while 1:
prefix = self.buildNewPrefix()
c_prefix = _cstr(prefix)
# make sure it's not used already
if tree.xmlSearchNs(self._c_doc, c_node, c_prefix) is NULL:
break
- if i >= 10000:
- # XXX too many prefixes in use - this is pretty bad!
- return NULL
return tree.xmlNewNs(c_node, c_href, c_prefix)
@@ -335,8 +331,8 @@
c_ns = self._findOrBuildNodeNs(c_node, href, NULL)
tree.xmlSetNs(c_node, c_ns)
- cdef void _setNodeNamespaces(self, xmlNode* c_node,
- object node_ns_utf, object nsmap):
+ cdef int _setNodeNamespaces(self, xmlNode* c_node,
+ object node_ns_utf, object nsmap) except -1:
"""Lookup current namespace prefixes, then set namespace structure for
node and register new ns-prefix mappings.
@@ -349,7 +345,7 @@
if not nsmap:
if node_ns_utf is not None:
self._setNodeNs(c_node, _cstr(node_ns_utf))
- return
+ return 0
c_doc = self._c_doc
for prefix, href in nsmap.items():
@@ -370,12 +366,13 @@
if node_ns_utf is not None:
self._setNodeNs(c_node, _cstr(node_ns_utf))
+ return 0
cdef _Document _documentFactory(xmlDoc* c_doc, _BaseParser parser):
cdef _Document result
result = _Document()
result._c_doc = c_doc
- result._ns_counter = 0
+ result._ns_counter = 0L
if parser is None:
parser = __GLOBAL_PARSER_CONTEXT.getDefaultParser()
result._parser = parser
From scoder at codespeak.net Tue Oct 9 11:20:55 2007
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Tue, 9 Oct 2007 11:20:55 +0200 (CEST)
Subject: [Lxml-checkins] r47326 - lxml/trunk/src/lxml
Message-ID: <20071009092055.7A8F080DA@code0.codespeak.net>
Author: scoder
Date: Tue Oct 9 11:20:53 2007
New Revision: 47326
Modified:
lxml/trunk/src/lxml/apihelpers.pxi
lxml/trunk/src/lxml/config.pxd
lxml/trunk/src/lxml/etree.pyx
lxml/trunk/src/lxml/etreepublic.pxd
lxml/trunk/src/lxml/iterparse.pxi
lxml/trunk/src/lxml/objectify.pyx
lxml/trunk/src/lxml/objectpath.pxi
lxml/trunk/src/lxml/parser.pxi
lxml/trunk/src/lxml/public-api.pxi
lxml/trunk/src/lxml/serializer.pxi
lxml/trunk/src/lxml/tree.pxd
lxml/trunk/src/lxml/xmlerror.pxi
Log:
use 'bint' instead of 'int' Pyrex type where appropriate
Modified: lxml/trunk/src/lxml/apihelpers.pxi
==============================================================================
--- lxml/trunk/src/lxml/apihelpers.pxi (original)
+++ lxml/trunk/src/lxml/apihelpers.pxi Tue Oct 9 11:20:53 2007
@@ -448,7 +448,7 @@
element._c_node, _cstr(ns), NULL)
return '%s:%s' % (c_ns.prefix, tag)
-cdef int _hasChild(xmlNode* c_node):
+cdef bint _hasChild(xmlNode* c_node):
return c_node is not NULL and _findChildForwards(c_node, 0) is not NULL
cdef xmlNode* _findChild(xmlNode* c_node, Py_ssize_t index):
@@ -534,7 +534,7 @@
return NULL
return c_node
-cdef int _tagMatches(xmlNode* c_node, char* c_href, char* c_name):
+cdef bint _tagMatches(xmlNode* c_node, char* c_href, char* c_name):
"""Tests if the node matches namespace URI and tag name.
A node matches if it matches both c_href and c_name.
@@ -697,7 +697,7 @@
cdef char* s
cdef char* c_end
cdef char c
- cdef int is_non_ascii
+ cdef bint is_non_ascii
s = _cstr(pystring)
c_end = s + python.PyString_GET_SIZE(pystring)
is_non_ascii = 0
Modified: lxml/trunk/src/lxml/config.pxd
==============================================================================
--- lxml/trunk/src/lxml/config.pxd (original)
+++ lxml/trunk/src/lxml/config.pxd Tue Oct 9 11:20:53 2007
@@ -1,3 +1,3 @@
cdef extern from "etree_defs.h":
- cdef int ENABLE_THREADING
- cdef int ENABLE_SCHEMATRON
+ cdef bint ENABLE_THREADING
+ cdef bint ENABLE_SCHEMATRON
Modified: lxml/trunk/src/lxml/etree.pyx
==============================================================================
--- lxml/trunk/src/lxml/etree.pyx (original)
+++ lxml/trunk/src/lxml/etree.pyx Tue Oct 9 11:20:53 2007
@@ -853,7 +853,6 @@
return c
def __nonzero__(self):
- cdef xmlNode* c_node
import warnings
warnings.warn(
"The behavior of this method will change in future versions. "
@@ -861,7 +860,7 @@
FutureWarning
)
# emulate old behaviour
- return bool(_hasChild(self._c_node))
+ return _hasChild(self._c_node)
def __contains__(self, element):
cdef xmlNode* c_node
@@ -1384,22 +1383,22 @@
The keyword argument 'method' selects the output method: 'xml' or
'html'.
"""
- cdef int c_write_declaration
+ cdef bint write_declaration
self._assertHasRoot()
# suppress decl. in default case (purely for ElementTree compatibility)
if xml_declaration is not None:
- c_write_declaration = bool(xml_declaration)
+ write_declaration = xml_declaration
if encoding is None:
encoding = 'ASCII'
elif encoding is None:
encoding = 'ASCII'
- c_write_declaration = 0
+ write_declaration = 0
else:
encoding = encoding.upper()
- c_write_declaration = encoding not in \
+ write_declaration = encoding not in \
('US-ASCII', 'ASCII', 'UTF8', 'UTF-8')
_tofilelike(file, self._context_node, encoding, method,
- c_write_declaration, 1, bool(pretty_print))
+ write_declaration, 1, pretty_print)
def getpath(self, _Element element not None):
"""Returns a structural, absolute XPath expression to find that element.
@@ -2164,7 +2163,7 @@
"""Writes an element tree or element structure to sys.stdout. This function
should be used for debugging only.
"""
- _dumpToFile(sys.stdout, elem._c_node, bool(pretty_print))
+ _dumpToFile(sys.stdout, elem._c_node, pretty_print)
def tostring(element_or_tree, encoding=None, method="xml",
xml_declaration=None, pretty_print=False):
@@ -2178,26 +2177,25 @@
The keyword argument 'pretty_print' (bool) enables formatted XML.
- The keyword argument 'method' selects the output method: 'xml' or 'html'.
+ The keyword argument 'method' selects the output method: 'xml',
+ 'html' or plain 'text'.
"""
- cdef int write_declaration
- cdef int c_pretty_print
- c_pretty_print = bool(pretty_print)
+ cdef bint write_declaration
if xml_declaration is None:
# by default, write an XML declaration only for non-standard encodings
write_declaration = encoding is not None and encoding.upper() not in \
('ASCII', 'UTF-8', 'UTF8', 'US-ASCII')
else:
- write_declaration = bool(xml_declaration)
+ write_declaration = xml_declaration
if encoding is None:
encoding = 'ASCII'
if isinstance(element_or_tree, _Element):
return _tostring(<_Element>element_or_tree, encoding, method,
- write_declaration, 0, c_pretty_print)
+ write_declaration, 0, pretty_print)
elif isinstance(element_or_tree, _ElementTree):
return _tostring((<_ElementTree>element_or_tree)._context_node,
- encoding, method, write_declaration, 1, c_pretty_print)
+ encoding, method, write_declaration, 1, pretty_print)
else:
raise TypeError, "Type '%s' cannot be serialized." % type(element_or_tree)
@@ -2218,17 +2216,16 @@
therefore not necessarily suited for serialization to byte streams without
further treatment.
- The keyword argument 'pretty_print' (bool) enables formatted XML.
+ The boolean keyword argument 'pretty_print' enables formatted XML.
- The keyword argument 'method' selects the output method: 'xml' or 'html'.
+ The keyword argument 'method' selects the output method: 'xml',
+ 'html' or plain 'text'.
"""
- cdef int c_pretty_print
- c_pretty_print = bool(pretty_print)
if isinstance(element_or_tree, _Element):
- return _tounicode(<_Element>element_or_tree, method, 0, c_pretty_print)
+ return _tounicode(<_Element>element_or_tree, method, 0, pretty_print)
elif isinstance(element_or_tree, _ElementTree):
return _tounicode((<_ElementTree>element_or_tree)._context_node,
- method, 1, c_pretty_print)
+ method, 1, pretty_print)
else:
raise TypeError, "Type '%s' cannot be serialized." % type(element_or_tree)
Modified: lxml/trunk/src/lxml/etreepublic.pxd
==============================================================================
--- lxml/trunk/src/lxml/etreepublic.pxd (original)
+++ lxml/trunk/src/lxml/etreepublic.pxd Tue Oct 9 11:20:53 2007
@@ -5,7 +5,7 @@
cdef extern from "etree_defs.h":
# test if c_node is considered an Element (i.e. Element, Comment, etc.)
- cdef int _isElement(tree.xmlNode* c_node)
+ cdef bint _isElement(tree.xmlNode* c_node)
# return the namespace URI of the node or NULL
cdef char* _getNs(tree.xmlNode* node)
@@ -129,7 +129,7 @@
# XML node helper functions
# check if the element has at least one child
- cdef int hasChild(tree.xmlNode* c_node)
+ cdef bint hasChild(tree.xmlNode* c_node)
# find child element number 'index' (supports negative indexes)
cdef tree.xmlNode* findChild(tree.xmlNode* c_node,
@@ -191,10 +191,10 @@
cdef object namespacedNameFromNsName(char* c_ns, char* c_tag)
# check if the node has a text value (which may be '')
- cdef int hasText(tree.xmlNode* c_node)
+ cdef bint hasText(tree.xmlNode* c_node)
# check if the node has a tail value (which may be '')
- cdef int hasTail(tree.xmlNode* c_node)
+ cdef bint hasTail(tree.xmlNode* c_node)
# get the text content of an element (or None)
cdef object textOf(tree.xmlNode* c_node)
Modified: lxml/trunk/src/lxml/iterparse.pxi
==============================================================================
--- lxml/trunk/src/lxml/iterparse.pxi (original)
+++ lxml/trunk/src/lxml/iterparse.pxi Tue Oct 9 11:20:53 2007
@@ -292,7 +292,6 @@
filename = _encodeFilename(filename)
self._source = source
- html = bool(html)
if html:
# make sure we're not looking for namespaces
if 'start' in events:
Modified: lxml/trunk/src/lxml/objectify.pyx
==============================================================================
--- lxml/trunk/src/lxml/objectify.pyx (original)
+++ lxml/trunk/src/lxml/objectify.pyx Tue Oct 9 11:20:53 2007
@@ -1045,7 +1045,7 @@
cdef object _makeelement
cdef object _namespace
cdef object _nsmap
- cdef int _annotate
+ cdef bint _annotate
def __init__(self, namespace=None, nsmap=None, annotate=True,
makeelement=None):
if nsmap is None:
@@ -1055,7 +1055,7 @@
self._namespace = None
else:
self._namespace = "{%s}" % namespace
- self._annotate = bool(annotate)
+ self._annotate = annotate
if makeelement is not None:
assert callable(makeelement)
self._makeelement = makeelement
@@ -1077,15 +1077,15 @@
cdef object _tag
cdef object _nsmap
cdef object _element_factory
- cdef int _annotate
+ cdef bint _annotate
def __call__(self, *children, **attrib):
cdef _ObjectifyElementMakerCaller elementMaker
cdef python.PyObject* pytype
cdef _Element element
cdef _Element childElement
- cdef int has_children
- cdef int has_string_value
+ cdef bint has_children
+ cdef bint has_string_value
if self._element_factory is None:
element = _makeElement(self._tag, None, attrib, self._nsmap)
else:
@@ -1153,7 +1153,7 @@
################################################################################
# Recursive element dumping
-cdef int __RECURSIVE_STR
+cdef bint __RECURSIVE_STR
__RECURSIVE_STR = 0 # default: off
def enableRecursiveStr(on=True):
@@ -1161,7 +1161,7 @@
based on objectify.dump(element).
"""
global __RECURSIVE_STR
- __RECURSIVE_STR = bool(on)
+ __RECURSIVE_STR = on
def dump(_Element element not None):
"""Return a recursively generated string representation of an element.
@@ -1323,8 +1323,7 @@
"""
cdef _Element element
element = cetree.rootNodeOrRaise(element_or_tree)
- _annotate(element, 0, 1, bool(ignore_xsi), bool(ignore_old),
- None, empty_pytype)
+ _annotate(element, 0, 1, ignore_xsi, ignore_old, None, empty_pytype)
def xsiannotate(element_or_tree, ignore_old=False, ignore_pytype=False,
empty_type=None):
@@ -1350,8 +1349,7 @@
"""
cdef _Element element
element = cetree.rootNodeOrRaise(element_or_tree)
- _annotate(element, 1, 0, bool(ignore_old), bool(ignore_pytype),
- empty_type, None)
+ _annotate(element, 1, 0, ignore_old, ignore_pytype, empty_type, None)
def annotate(element_or_tree, ignore_old=True, ignore_xsi=False,
empty_pytype=None, empty_type=None, annotate_xsi=0,
@@ -1386,12 +1384,12 @@
"""
cdef _Element element
element = cetree.rootNodeOrRaise(element_or_tree)
- _annotate(element, annotate_xsi, annotate_pytype, bool(ignore_xsi),
- bool(ignore_old), empty_type, empty_pytype)
+ _annotate(element, annotate_xsi, annotate_pytype, ignore_xsi,
+ ignore_old, empty_type, empty_pytype)
-cdef _annotate(_Element element, int annotate_xsi, int annotate_pytype,
- int ignore_xsi, int ignore_pytype,
+cdef _annotate(_Element element, bint annotate_xsi, bint annotate_pytype,
+ bint ignore_xsi, bint ignore_pytype,
empty_type_name, empty_pytype_name):
cdef _Document doc
cdef tree.xmlNode* c_node
Modified: lxml/trunk/src/lxml/objectpath.pxi
==============================================================================
--- lxml/trunk/src/lxml/objectpath.pxi (original)
+++ lxml/trunk/src/lxml/objectpath.pxi Tue Oct 9 11:20:53 2007
@@ -86,7 +86,7 @@
"""Parse object path string into a 'hrefOnameOhrefOnameOOO' string and an
index list. The index list is None if no index was used in the path.
"""
- cdef int has_dot
+ cdef bint has_dot
new_path = []
path = cetree.utf8(path.strip())
if path == '.':
Modified: lxml/trunk/src/lxml/parser.pxi
==============================================================================
--- lxml/trunk/src/lxml/parser.pxi (original)
+++ lxml/trunk/src/lxml/parser.pxi Tue Oct 9 11:20:53 2007
@@ -431,7 +431,7 @@
cdef object _handleParseResult(self, _BaseParser parser,
xmlDoc* result, filename):
cdef xmlDoc* c_doc
- cdef int recover
+ cdef bint recover
recover = parser._parse_options & xmlparser.XML_PARSE_RECOVER
c_doc = _handleParseResult(self, self._c_ctxt, result,
filename, recover)
@@ -439,7 +439,7 @@
cdef xmlDoc* _handleParseResultDoc(self, _BaseParser parser,
xmlDoc* result, filename) except NULL:
- cdef int recover
+ cdef bint recover
recover = parser._parse_options & xmlparser.XML_PARSE_RECOVER
return _handleParseResult(self, self._c_ctxt, result,
filename, recover)
@@ -481,8 +481,8 @@
cdef xmlDoc* _handleParseResult(_ParserContext context,
xmlparser.xmlParserCtxt* c_ctxt,
xmlDoc* result, filename,
- int recover) except NULL:
- cdef int well_formed
+ bint recover) except NULL:
+ cdef bint well_formed
if c_ctxt.myDoc is not NULL:
if c_ctxt.myDoc != result:
tree.xmlFreeDoc(c_ctxt.myDoc)
@@ -556,8 +556,8 @@
self._filename = filename
self._target = target
self._for_html = for_html
- self._remove_comments = bool(remove_comments)
- self._remove_pis = bool(remove_pis)
+ self._remove_comments = remove_comments
+ self._remove_pis = remove_pis
self._resolvers = _ResolverRegistry()
@@ -711,7 +711,6 @@
cdef python.PyThreadState* state
cdef xmlDoc* result
cdef xmlparser.xmlParserCtxt* pctxt
- cdef int recover
cdef Py_ssize_t py_buffer_len
cdef int buffer_len
cdef char* c_text
@@ -752,7 +751,6 @@
cdef python.PyThreadState* state
cdef xmlDoc* result
cdef xmlparser.xmlParserCtxt* pctxt
- cdef int recover
cdef char* c_encoding
if c_len > python.INT_MAX:
raise ParserError, "string is too long to parse it with libxml2"
@@ -788,7 +786,6 @@
cdef python.PyThreadState* state
cdef xmlDoc* result
cdef xmlparser.xmlParserCtxt* pctxt
- cdef int recover
cdef int orig_options
cdef char* c_encoding
result = NULL
@@ -825,7 +822,6 @@
cdef xmlDoc* result
cdef xmlparser.xmlParserCtxt* pctxt
cdef char* c_filename
- cdef int recover
if not filename:
filename = None
@@ -884,7 +880,6 @@
cdef char* c_encoding
cdef int buffer_len
cdef int error
- cdef int recover
if python.PyString_Check(data):
c_encoding = NULL
c_data = _cstr(data)
Modified: lxml/trunk/src/lxml/public-api.pxi
==============================================================================
--- lxml/trunk/src/lxml/public-api.pxi (original)
+++ lxml/trunk/src/lxml/public-api.pxi Tue Oct 9 11:20:53 2007
@@ -54,10 +54,10 @@
cdef public _Element rootNodeOrRaise(object input):
return _rootNodeOrRaise(input)
-cdef public int hasText(xmlNode* c_node):
+cdef public bint hasText(xmlNode* c_node):
return _hasText(c_node)
-cdef public int hasTail(xmlNode* c_node):
+cdef public bint hasTail(xmlNode* c_node):
return _hasTail(c_node)
cdef public object textOf(xmlNode* c_node):
@@ -106,7 +106,7 @@
char* c_href, char* c_name):
return _delAttributeFromNsName(c_element, c_href, c_name)
-cdef public int hasChild(xmlNode* c_node):
+cdef public bint hasChild(xmlNode* c_node):
return _hasChild(c_node)
cdef public xmlNode* findChild(xmlNode* c_node, Py_ssize_t index):
Modified: lxml/trunk/src/lxml/serializer.pxi
==============================================================================
--- lxml/trunk/src/lxml/serializer.pxi (original)
+++ lxml/trunk/src/lxml/serializer.pxi Tue Oct 9 11:20:53 2007
@@ -44,8 +44,8 @@
return python.PyUnicode_AsEncodedString(text, encoding, 'strict')
cdef _tostring(_Element element, encoding, method,
- int write_xml_declaration, int write_complete_document,
- int pretty_print):
+ bint write_xml_declaration, bint write_complete_document,
+ bint pretty_print):
"""Serialize an element to an encoded string representation of its XML
tree.
"""
@@ -96,7 +96,7 @@
return result
cdef _tounicode(_Element element, method,
- int write_complete_document, int pretty_print):
+ bint write_complete_document, bint pretty_print):
"""Serialize an element to the Python unicode representation of its XML
tree.
"""
@@ -133,9 +133,9 @@
cdef void _writeNodeToBuffer(tree.xmlOutputBuffer* c_buffer,
xmlNode* c_node, char* encoding, int c_method,
- int write_xml_declaration,
- int write_complete_document,
- int pretty_print):
+ bint write_xml_declaration,
+ bint write_complete_document,
+ bint pretty_print):
cdef xmlDoc* c_doc
cdef xmlNode* c_nsdecl_node
c_doc = c_node.doc
@@ -222,7 +222,7 @@
tree.xmlOutputBufferWrite(c_buffer, 3, "]>\n")
cdef void _writeTail(tree.xmlOutputBuffer* c_buffer, xmlNode* c_node,
- char* encoding, int pretty_print):
+ char* encoding, bint pretty_print):
"Write the element tail."
c_node = c_node.next
while c_node is not NULL and c_node.type == tree.XML_TEXT_NODE:
@@ -231,7 +231,7 @@
c_node = c_node.next
cdef void _writePrevSiblings(tree.xmlOutputBuffer* c_buffer, xmlNode* c_node,
- char* encoding, int pretty_print):
+ char* encoding, bint pretty_print):
cdef xmlNode* c_sibling
if c_node.parent is not NULL and _isElement(c_node.parent):
return
@@ -247,7 +247,7 @@
c_sibling = c_sibling.next
cdef void _writeNextSiblings(tree.xmlOutputBuffer* c_buffer, xmlNode* c_node,
- char* encoding, int pretty_print):
+ char* encoding, bint pretty_print):
cdef xmlNode* c_sibling
if c_node.parent is not NULL and _isElement(c_node.parent):
return
@@ -307,8 +307,8 @@
return (<_FilelikeWriter>ctxt).close()
cdef _tofilelike(f, _Element element, encoding, method,
- int write_xml_declaration, int write_doctype,
- int pretty_print):
+ bint write_xml_declaration, bint write_doctype,
+ bint pretty_print):
cdef python.PyThreadState* state
cdef _FilelikeWriter writer
cdef tree.xmlOutputBuffer* c_buffer
@@ -400,7 +400,7 @@
# dump node to file (mainly for debug)
-cdef _dumpToFile(f, xmlNode* c_node, int pretty_print):
+cdef _dumpToFile(f, xmlNode* c_node, bint pretty_print):
cdef tree.xmlOutputBuffer* c_buffer
if not python.PyFile_Check(f):
raise ValueError, "Not a file"
Modified: lxml/trunk/src/lxml/tree.pxd
==============================================================================
--- lxml/trunk/src/lxml/tree.pxd (original)
+++ lxml/trunk/src/lxml/tree.pxd Tue Oct 9 11:20:53 2007
@@ -300,9 +300,9 @@
cdef void* xmlMalloc(size_t size)
cdef extern from "etree_defs.h":
- cdef int _isElement(xmlNode* node)
- cdef int _isElementOrXInclude(xmlNode* node)
+ cdef bint _isElement(xmlNode* node)
+ cdef bint _isElementOrXInclude(xmlNode* node)
cdef char* _getNs(xmlNode* node)
cdef void BEGIN_FOR_EACH_ELEMENT_FROM(xmlNode* tree_top,
- xmlNode* start_node, int inclusive)
+ xmlNode* start_node, bint inclusive)
cdef void END_FOR_EACH_ELEMENT_FROM(xmlNode* start_node)
Modified: lxml/trunk/src/lxml/xmlerror.pxi
==============================================================================
--- lxml/trunk/src/lxml/xmlerror.pxi (original)
+++ lxml/trunk/src/lxml/xmlerror.pxi Tue Oct 9 11:20:53 2007
@@ -98,7 +98,7 @@
return ''
cdef void _receive(self, xmlerror.xmlError* error):
- cdef int is_error
+ cdef bint is_error
cdef _LogEntry entry
entry = _LogEntry()
entry._setError(error)
@@ -114,6 +114,7 @@
cdef void _receiveGeneric(self, int domain, int type, int level, int line,
message, filename):
+ cdef bint is_error
cdef _LogEntry entry
entry = _LogEntry()
entry._setGeneric(domain, type, level, line, message, filename)
@@ -184,7 +185,9 @@
return False
def __nonzero__(self):
- return bool(self._entries)
+ cdef bint result
+ result = self._entries
+ return result
def filter_domains(self, domains):
"""Filter the errors by the given domains and return a new error log
From scoder at codespeak.net Tue Oct 9 12:02:21 2007
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Tue, 9 Oct 2007 12:02:21 +0200 (CEST)
Subject: [Lxml-checkins] r47330 - lxml/trunk/src/lxml
Message-ID: <20071009100221.2483081A8@code0.codespeak.net>
Author: scoder
Date: Tue Oct 9 12:02:20 2007
New Revision: 47330
Modified:
lxml/trunk/src/lxml/etree.pyx
Log:
keep prefix counter a C integer, change formatting string instead
Modified: lxml/trunk/src/lxml/etree.pyx
==============================================================================
--- lxml/trunk/src/lxml/etree.pyx (original)
+++ lxml/trunk/src/lxml/etree.pyx Tue Oct 9 12:02:20 2007
@@ -229,7 +229,8 @@
When instances of this class are garbage collected, the libxml
document is cleaned up.
"""
- cdef object _ns_counter
+ cdef unsigned int _ns_counter
+ cdef object _prefix_format
cdef xmlDoc* _c_doc
cdef _BaseParser _parser
@@ -295,8 +296,12 @@
return self._c_doc.URL
cdef buildNewPrefix(self):
- ns = "ns%d" % self._ns_counter
+ ns = python.PyString_FromFormat(
+ _cstr(self._prefix_format), self._ns_counter)
self._ns_counter = self._ns_counter + 1
+ if self._ns_counter == 0:
+ # overflow!
+ self._prefix_format = self._prefix_format + "A"
return ns
cdef xmlNs* _findOrBuildNodeNs(self, xmlNode* c_node,
@@ -374,7 +379,8 @@
cdef _Document result
result = NEW_DOCUMENT(_Document)
result._c_doc = c_doc
- result._ns_counter = 0L
+ result._ns_counter = 0
+ result._prefix_format = "ns%lu"
if parser is None:
parser = __GLOBAL_PARSER_CONTEXT.getDefaultParser()
result._parser = parser
From scoder at codespeak.net Tue Oct 9 12:02:54 2007
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Tue, 9 Oct 2007 12:02:54 +0200 (CEST)
Subject: [Lxml-checkins] r47331 - lxml/branch/lxml-1.3/src/lxml
Message-ID: <20071009100254.4B17581A8@code0.codespeak.net>
Author: scoder
Date: Tue Oct 9 12:02:53 2007
New Revision: 47331
Modified:
lxml/branch/lxml-1.3/src/lxml/etree.pyx
Log:
keep prefix counter a C integer, change formatting string instead
Modified: lxml/branch/lxml-1.3/src/lxml/etree.pyx
==============================================================================
--- lxml/branch/lxml-1.3/src/lxml/etree.pyx (original)
+++ lxml/branch/lxml-1.3/src/lxml/etree.pyx Tue Oct 9 12:02:53 2007
@@ -231,7 +231,8 @@
When instances of this class are garbage collected, the libxml
document is cleaned up.
"""
- cdef object _ns_counter
+ cdef unsigned int _ns_counter
+ cdef object _prefix_format
cdef xmlDoc* _c_doc
cdef _BaseParser _parser
@@ -297,8 +298,12 @@
return self._c_doc.URL
cdef buildNewPrefix(self):
- ns = "ns%d" % self._ns_counter
+ ns = python.PyString_FromFormat(
+ _cstr(self._prefix_format), self._ns_counter)
self._ns_counter = self._ns_counter + 1
+ if self._ns_counter == 0:
+ # overflow!
+ self._prefix_format = self._prefix_format + "A"
return ns
cdef xmlNs* _findOrBuildNodeNs(self, xmlNode* c_node,
@@ -372,7 +377,8 @@
cdef _Document result
result = _Document()
result._c_doc = c_doc
- result._ns_counter = 0L
+ result._ns_counter = 0
+ result._prefix_format = "ns%lu"
if parser is None:
parser = __GLOBAL_PARSER_CONTEXT.getDefaultParser()
result._parser = parser
From scoder at codespeak.net Tue Oct 9 14:56:29 2007
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Tue, 9 Oct 2007 14:56:29 +0200 (CEST)
Subject: [Lxml-checkins] r47343 - lxml/trunk/doc
Message-ID: <20071009125629.8F4DE81B1@code0.codespeak.net>
Author: scoder
Date: Tue Oct 9 14:56:29 2007
New Revision: 47343
Modified:
lxml/trunk/doc/build.txt
Log:
typo
Modified: lxml/trunk/doc/build.txt
==============================================================================
--- lxml/trunk/doc/build.txt (original)
+++ lxml/trunk/doc/build.txt Tue Oct 9 14:56:29 2007
@@ -223,7 +223,7 @@
version numbers::
STATIC_INCLUDE_DIRS = [
- "..\\libxml2-2.6.23.win32\\include ",
+ "..\\libxml2-2.6.23.win32\\include",
"..\\libxslt-1.1.15.win32\\include",
"..\\zlib-1.2.3.win32\\include",
"..\\iconv-1.9.1.win32\\include"
From scoder at codespeak.net Tue Oct 9 14:57:07 2007
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Tue, 9 Oct 2007 14:57:07 +0200 (CEST)
Subject: [Lxml-checkins] r47344 - lxml/branch/lxml-1.3/doc
Message-ID: <20071009125707.98E2280C7@code0.codespeak.net>
Author: scoder
Date: Tue Oct 9 14:57:07 2007
New Revision: 47344
Modified:
lxml/branch/lxml-1.3/doc/build.txt
Log:
typo
Modified: lxml/branch/lxml-1.3/doc/build.txt
==============================================================================
--- lxml/branch/lxml-1.3/doc/build.txt (original)
+++ lxml/branch/lxml-1.3/doc/build.txt Tue Oct 9 14:57:07 2007
@@ -250,7 +250,7 @@
version numbers::
STATIC_INCLUDE_DIRS = [
- "..\\libxml2-2.6.23.win32\\include ",
+ "..\\libxml2-2.6.23.win32\\include",
"..\\libxslt-1.1.15.win32\\include",
"..\\zlib-1.2.3.win32\\include",
"..\\iconv-1.9.1.win32\\include"
From scoder at codespeak.net Tue Oct 9 15:03:40 2007
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Tue, 9 Oct 2007 15:03:40 +0200 (CEST)
Subject: [Lxml-checkins] r47346 - lxml/trunk/doc
Message-ID: <20071009130340.0048481B1@code0.codespeak.net>
Author: scoder
Date: Tue Oct 9 15:03:40 2007
New Revision: 47346
Modified:
lxml/trunk/doc/build.txt
Log:
removed doc leftover
Modified: lxml/trunk/doc/build.txt
==============================================================================
--- lxml/trunk/doc/build.txt (original)
+++ lxml/trunk/doc/build.txt Tue Oct 9 15:03:40 2007
@@ -232,7 +232,6 @@
STATIC_LIBRARY_DIRS = [
"..\\libxml2-2.6.23.win32\\lib",
"..\\libxslt-1.1.15.win32\\lib",
- "..\\libxslt-1.1.15.win32\\lib",
"..\\zlib-1.2.3.win32\\lib",
"..\\iconv-1.9.1.win32\\lib"
]
From scoder at codespeak.net Tue Oct 9 15:03:54 2007
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Tue, 9 Oct 2007 15:03:54 +0200 (CEST)
Subject: [Lxml-checkins] r47347 - lxml/branch/lxml-1.3/doc
Message-ID: <20071009130354.9DF7381B1@code0.codespeak.net>
Author: scoder
Date: Tue Oct 9 15:03:54 2007
New Revision: 47347
Modified:
lxml/branch/lxml-1.3/doc/build.txt
Log:
removed doc leftover
Modified: lxml/branch/lxml-1.3/doc/build.txt
==============================================================================
--- lxml/branch/lxml-1.3/doc/build.txt (original)
+++ lxml/branch/lxml-1.3/doc/build.txt Tue Oct 9 15:03:54 2007
@@ -259,7 +259,6 @@
STATIC_LIBRARY_DIRS = [
"..\\libxml2-2.6.23.win32\\lib",
"..\\libxslt-1.1.15.win32\\lib",
- "..\\libxslt-1.1.15.win32\\lib",
"..\\zlib-1.2.3.win32\\lib",
"..\\iconv-1.9.1.win32\\lib"
]
From scoder at codespeak.net Wed Oct 10 09:13:13 2007
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Wed, 10 Oct 2007 09:13:13 +0200 (CEST)
Subject: [Lxml-checkins] r47366 - lxml/trunk/doc
Message-ID: <20071010071313.983FC81E0@code0.codespeak.net>
Author: scoder
Date: Wed Oct 10 09:13:10 2007
New Revision: 47366
Modified:
lxml/trunk/doc/lxmlhtml.txt
Log:
doc fix
Modified: lxml/trunk/doc/lxmlhtml.txt
==============================================================================
--- lxml/trunk/doc/lxmlhtml.txt (original)
+++ lxml/trunk/doc/lxmlhtml.txt Wed Oct 10 09:13:10 2007
@@ -2,9 +2,9 @@
lxml.html
=========
-Since version 2.0, lxml provides a dedicated package for dealing with HTML:
-``lxml.html``. It provides a special Element API for HTML elements, as well
-as a number of utilities for common tasks.
+Since version 2.0, lxml comes with a dedicated package for dealing
+with HTML: ``lxml.html``. It provides a special Element API for HTML
+elements, as well as a number of utilities for common tasks.
.. contents::
..
From scoder at codespeak.net Wed Oct 10 09:19:28 2007
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Wed, 10 Oct 2007 09:19:28 +0200 (CEST)
Subject: [Lxml-checkins] r47367 - lxml/trunk/doc
Message-ID: <20071010071928.8AA9181E5@code0.codespeak.net>
Author: scoder
Date: Wed Oct 10 09:19:28 2007
New Revision: 47367
Modified:
lxml/trunk/doc/lxmlhtml.txt
Log:
doc fix
Modified: lxml/trunk/doc/lxmlhtml.txt
==============================================================================
--- lxml/trunk/doc/lxmlhtml.txt (original)
+++ lxml/trunk/doc/lxmlhtml.txt Wed Oct 10 09:19:28 2007
@@ -37,7 +37,8 @@
If you give a URL, or if the object has a ``.geturl()`` method (as
file-like objects from ``urllib.urlopen()`` have), then that URL
- is used as the base URL.
+ is used as the base URL. You can also provide an explicit
+ ``base_url`` keyword argument.
``document_fromstring(string)``:
Parses a document from the given string. This always creates a
From scoder at codespeak.net Wed Oct 10 11:22:38 2007
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Wed, 10 Oct 2007 11:22:38 +0200 (CEST)
Subject: [Lxml-checkins] r47375 - lxml/trunk
Message-ID: <20071010092238.D7EDD81E6@code0.codespeak.net>
Author: scoder
Date: Wed Oct 10 11:22:37 2007
New Revision: 47375
Modified:
lxml/trunk/setup.py
lxml/trunk/setupinfo.py
lxml/trunk/versioninfo.py
Log:
Py3 syntax fixes in build scripts
Modified: lxml/trunk/setup.py
==============================================================================
--- lxml/trunk/setup.py (original)
+++ lxml/trunk/setup.py Wed Oct 10 11:22:37 2007
@@ -38,7 +38,7 @@
# create lxml-version.h file
svn_version = versioninfo.svn_version()
versioninfo.create_version_h(svn_version)
-print "Building lxml version", svn_version
+print("Building lxml version %s." % svn_version)
branch_link = """
Modified: lxml/trunk/setupinfo.py
==============================================================================
--- lxml/trunk/setupinfo.py (original)
+++ lxml/trunk/setupinfo.py Wed Oct 10 11:22:37 2007
@@ -3,7 +3,7 @@
try:
from Cython.Distutils import build_ext as build_pyx
- print "Building with Cython."
+ print("Building with Cython.")
CYTHON_INSTALLED = True
except ImportError:
CYTHON_INSTALLED = False
@@ -131,13 +131,21 @@
return macros
def flags(cmd):
- wf, rf, ef = os.popen3(cmd)
+ try:
+ import subprocess
+ except ImportError:
+ # Python 2.3
+ _, rf, ef = os.popen3(cmd)
+ else:
+ # Python 2.4+
+ p = subprocess.Popen(cmd, shell=True, close_fds=True,
+ stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+ rf, ef = p.stdout, p.stderr
errors = ef.read()
if errors:
- print "ERROR:", errors
- print "** make sure the development packages of libxml2 and libxslt are installed **"
- print
- return rf.read().split()
+ print("ERROR: %s" % errors)
+ print("** make sure the development packages of libxml2 and libxslt are installed **\n")
+ return str(rf.read()).split()
def has_option(name):
try:
Modified: lxml/trunk/versioninfo.py
==============================================================================
--- lxml/trunk/versioninfo.py (original)
+++ lxml/trunk/versioninfo.py Wed Oct 10 11:22:37 2007
@@ -35,7 +35,7 @@
if data.startswith('8'):
# SVN >= 1.4
- data = map(str.splitlines, data.split('\n\x0c\n'))
+ data = [ d.splitlines() for d in data.split('\n\x0c\n') ]
del data[0][0] # get rid of the '8'
dirurl = data[0][3]
try:
From ianb at codespeak.net Sun Oct 14 02:12:32 2007
From: ianb at codespeak.net (ianb at codespeak.net)
Date: Sun, 14 Oct 2007 02:12:32 +0200 (CEST)
Subject: [Lxml-checkins] r47440 - in lxml/trunk: . src/lxml
Message-ID: <20071014001232.1B0708165@code0.codespeak.net>
Author: ianb
Date: Sun Oct 14 02:12:31 2007
New Revision: 47440
Modified:
lxml/trunk/CHANGES.txt
lxml/trunk/src/lxml/doctestcompare.py
Log:
Improve doctestcompare a little: NOPARSE_MARKUP option to suppress its behavior, and xmlns=... now works
Modified: lxml/trunk/CHANGES.txt
==============================================================================
--- lxml/trunk/CHANGES.txt (original)
+++ lxml/trunk/CHANGES.txt Sun Oct 14 02:12:31 2007
@@ -8,12 +8,20 @@
Features added
--------------
+* When using ``lxml.doctestcompare`` you can give the doctest option
+ ``NOPARSE_MARKUP`` (like ``# doctest: +NOPARSE_MARKUP``) to suppress
+ the special checking for one test.
+
Bugs fixed
----------
* lxml.etree could crash when adding more than 10000 namespaces to a
document
+* With ``lxml.doctestcompare`` if you do `` Hello Hello
World
World
Hello
World
Hello
World
Hello
World