From scoder at codespeak.net Tue Oct 13 09:15:03 2009
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Tue, 13 Oct 2009 09:15:03 +0200 (CEST)
Subject: [Lxml-checkins] r68360 - in lxml/trunk: . src/lxml/tests
Message-ID: <20091013071503.F09A0168053@codespeak.net>
Author: scoder
Date: Tue Oct 13 09:15:02 2009
New Revision: 68360
Modified:
lxml/trunk/ (props changed)
lxml/trunk/src/lxml/tests/common_imports.py
lxml/trunk/src/lxml/tests/test_elementtree.py
Log:
r5272 at delle: sbehnel | 2009-10-01 12:25:59 +0200
enable compatible comparison tests for ET 1.2.x
Modified: lxml/trunk/src/lxml/tests/common_imports.py
==============================================================================
--- lxml/trunk/src/lxml/tests/common_imports.py (original)
+++ lxml/trunk/src/lxml/tests/common_imports.py Tue Oct 13 09:15:02 2009
@@ -22,9 +22,9 @@
ElementTree = None
if hasattr(ElementTree, 'VERSION'):
- if make_version_tuple(ElementTree.VERSION)[:2] < (1,3):
- # compatibility tests require ET 1.3+
- ElementTree = None
+ ET_VERSION = make_version_tuple(ElementTree.VERSION)
+else:
+ ET_VERSION = (0,0,0)
try:
import cElementTree # standard ET
@@ -35,9 +35,18 @@
cElementTree = None
if hasattr(cElementTree, 'VERSION'):
- if make_version_tuple(cElementTree.VERSION)[:2] <= (1,0):
- # compatibility tests do not run with cET 1.0.7
- cElementTree = None
+ CET_VERSION = make_version_tuple(cElementTree.VERSION)
+else:
+ CET_VERSION = (0,0,0)
+
+def filter_by_version(test_class, version_dict, current_version):
+ """Remove test methods that do not work with the current lib version.
+ """
+ find_required_version = version_dict.get
+ for name in dir(test_class):
+ expected_version = find_required_version(name, (0,0,0))
+ if expected_version > current_version:
+ setattr(test_class, name, None)
try:
import doctest
Modified: lxml/trunk/src/lxml/tests/test_elementtree.py
==============================================================================
--- lxml/trunk/src/lxml/tests/test_elementtree.py (original)
+++ lxml/trunk/src/lxml/tests/test_elementtree.py Tue Oct 13 09:15:02 2009
@@ -15,14 +15,19 @@
if this_dir not in sys.path:
sys.path.insert(0, this_dir) # needed for Py3
-from common_imports import StringIO, BytesIO, etree, ElementTree, cElementTree
-from common_imports import fileInTestDir, canonicalize, HelperTestCase
+from common_imports import StringIO, BytesIO, etree
+from common_imports import ElementTree, cElementTree, ET_VERSION, CET_VERSION
+from common_imports import filter_by_version, fileInTestDir, canonicalize, HelperTestCase
from common_imports import _str, _bytes
+if cElementTree is not None and CET_VERSION <= (1,0,7):
+ cElementTree = None
+
+if ElementTree is not None:
+ print("Comparing with ElementTree %s" % getattr(ElementTree, "VERSION", "?"))
+
if cElementTree is not None:
- if tuple([int(n) for n in
- getattr(cElementTree, "VERSION", "0.0").split(".")]) <= (1,0,7):
- cElementTree = None
+ print("Comparing with cElementTree %s" % getattr(cElementTree, "VERSION", "?"))
try:
reversed
@@ -34,6 +39,8 @@
class ETreeTestCaseBase(HelperTestCase):
etree = None
+ required_versions_ET = {}
+ required_versions_cET = {}
def test_element(self):
for i in range(10):
@@ -658,6 +665,7 @@
self.assertEquals(0, len(root))
self.assertEquals('This is a text.', root.text)
+ required_versions_ET['test_fromstringlist'] = (1,3)
def test_fromstringlist(self):
fromstringlist = self.etree.fromstringlist
@@ -666,6 +674,7 @@
self.assertEquals(0, len(root))
self.assertEquals('This is a text.', root.text)
+ required_versions_ET['test_fromstringlist_characters'] = (1,3)
def test_fromstringlist_characters(self):
fromstringlist = self.etree.fromstringlist
@@ -673,6 +682,7 @@
self.assertEquals(0, len(root))
self.assertEquals('This is a text.', root.text)
+ required_versions_ET['test_fromstringlist_single'] = (1,3)
def test_fromstringlist_single(self):
fromstringlist = self.etree.fromstringlist
@@ -780,6 +790,7 @@
result.append(el1.tag)
self.assertEquals(['one','one', 'two', 'two', 'one', 'two'], result)
+ required_versions_ET['test_itertext'] = (1,3)
def test_itertext(self):
# ET 1.3+
XML = self.etree.XML
@@ -789,6 +800,7 @@
self.assertEquals(["RTEXT", "ATAIL", "CTEXT", "CTAIL"],
text)
+ required_versions_ET['test_itertext_child'] = (1,3)
def test_itertext_child(self):
# ET 1.3+
XML = self.etree.XML
@@ -868,6 +880,7 @@
_bytes('This is a test.' % (i, i)),
canonicalize(data))
+ required_versions_ET['test_write_method_html'] = (1,3)
def test_write_method_html(self):
ElementTree = self.etree.ElementTree
Element = self.etree.Element
@@ -887,6 +900,7 @@
self.assertEquals(_bytes('
html
test
'),
data)
+ required_versions_ET['test_write_method_text'] = (1,3)
def test_write_method_text(self):
ElementTree = self.etree.ElementTree
Element = self.etree.Element
@@ -1062,6 +1076,7 @@
a.tail)
self.assertXML(_bytes(''), a)
+ required_versions_ET['test_extend'] = (1,3)
def test_extend(self):
root = self.etree.Element('foo')
for i in range(3):
@@ -1099,6 +1114,7 @@
self.assertEquals(a[0].text, 'foo')
# ElementTree < 1.3 adds whitespace around comments
+ required_versions_ET['test_comment_text'] = (1,3)
def test_comment_text(self):
Element = self.etree.Element
SubElement = self.etree.SubElement
@@ -1119,8 +1135,9 @@
self.assertEquals(
_bytes(''),
tostring(a))
-
+
# ElementTree < 1.3 adds whitespace around comments
+ required_versions_ET['test_comment_whitespace'] = (1,3)
def test_comment_whitespace(self):
Element = self.etree.Element
SubElement = self.etree.SubElement
@@ -1654,6 +1671,7 @@
_bytes(''),
b)
+ required_versions_ET['test_iter'] = (1,3)
def test_iter(self):
Element = self.etree.Element
SubElement = self.etree.SubElement
@@ -2607,6 +2625,7 @@
self.assert_(tostring(b) == _bytes('Foo') or
tostring(b) == _bytes('Foo'))
+ required_versions_ET['test_tostring_method_html'] = (1,3)
def test_tostring_method_html(self):
tostring = self.etree.tostring
Element = self.etree.Element
@@ -2621,6 +2640,7 @@
self.assertEquals(_bytes('html
test
'),
tostring(html, method="html"))
+ required_versions_ET['test_tostring_method_text'] = (1,3)
def test_tostring_method_text(self):
tostring = self.etree.tostring
Element = self.etree.Element
@@ -2784,6 +2804,7 @@
parse = self.etree.parse
self.assertRaises(TypeError, parse, None)
+ required_versions_ET['test_parse_error'] = (1,3)
def test_parse_error(self):
# ET < 1.3 raises ExpatError
parse = self.etree.parse
@@ -2791,6 +2812,7 @@
self.assertRaises(SyntaxError, parse, f)
f.close()
+ required_versions_ET['test_parse_error_from_file'] = (1,3)
def test_parse_error_from_file(self):
parse = self.etree.parse
# from file
@@ -2890,6 +2912,7 @@
self.assertEquals(_str('S?k p? nettet').encode('iso-8859-1'),
result)
+ required_versions_ET['test_parse_encoding_8bit_explicit'] = (1,3)
def test_parse_encoding_8bit_explicit(self):
XMLParser = self.etree.XMLParser
@@ -2905,6 +2928,7 @@
a = tree.getroot()
self.assertEquals(a.text, text)
+ required_versions_ET['test_parse_encoding_8bit_override'] = (1,3)
def test_parse_encoding_8bit_override(self):
XMLParser = self.etree.XMLParser
@@ -3302,11 +3326,28 @@
self.assertEquals(root[0].tag, "a")
self.assertEquals(root[0].get("test"), "works")
+ def test_feed_parser_unicode(self):
+ parser = self.etree.XMLParser()
+
+ parser.feed(_str('<'))
+ parser.feed(_str('a test="works"/'))
+ parser.feed(_str('>'))
+
+ root = parser.close()
+
+ self.assertEquals(root.tag, "root")
+ self.assertEquals(root[0].tag, "a")
+ self.assertEquals(root[0].get("test"), "works")
+
+ required_versions_ET['test_feed_parser_error_close_empty'] = (1,3)
def test_feed_parser_error_close_empty(self):
ParseError = self.etree.ParseError
parser = self.etree.XMLParser()
self.assertRaises(ParseError, parser.close)
+ required_versions_ET['test_feed_parser_error_close_incomplete'] = (1,3)
def test_feed_parser_error_close_incomplete(self):
ParseError = self.etree.ParseError
parser = self.etree.XMLParser()
@@ -3316,6 +3357,7 @@
self.assertRaises(ParseError, parser.close)
+ required_versions_ET['test_feed_parser_error_broken'] = (1,3)
def test_feed_parser_error_broken(self):
ParseError = self.etree.ParseError
parser = self.etree.XMLParser()
@@ -3330,6 +3372,7 @@
self.assertRaises(ParseError, parser.close)
+ required_versions_ET['test_feed_parser_error_position'] = (1,3)
def test_feed_parser_error_position(self):
ParseError = self.etree.ParseError
parser = self.etree.XMLParser()
@@ -3344,6 +3387,7 @@
# parser target interface
+ required_versions_ET['test_parser_target_property'] = (1,3)
def test_parser_target_property(self):
class Target(object):
pass
@@ -3488,6 +3532,7 @@
"end-sub", "start-sub", "end-sub", "end-root"],
events)
+ required_versions_ET['test_parser_target_entity_unknown'] = (1,3)
def test_parser_target_entity_unknown(self):
events = []
class Target(object):
@@ -3645,10 +3690,18 @@
class ElementTreeTestCase(ETreeTestCaseBase):
etree = ElementTree
+ filter_by_version(
+ ElementTreeTestCase,
+ ElementTreeTestCase.required_versions_ET, ET_VERSION)
+
if cElementTree:
class CElementTreeTestCase(ETreeTestCaseBase):
etree = cElementTree
+ filter_by_version(
+ CElementTreeTestCase,
+ CElementTreeTestCase.required_versions_cET, CET_VERSION)
+
def test_suite():
suite = unittest.TestSuite()
if etree:
From scoder at codespeak.net Tue Oct 13 09:15:10 2009
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Tue, 13 Oct 2009 09:15:10 +0200 (CEST)
Subject: [Lxml-checkins] r68361 - in lxml/trunk: . src/lxml
Message-ID: <20091013071510.30A5B168058@codespeak.net>
Author: scoder
Date: Tue Oct 13 09:15:07 2009
New Revision: 68361
Modified:
lxml/trunk/ (props changed)
lxml/trunk/src/lxml/extensions.pxi
Log:
r5273 at delle: sbehnel | 2009-10-12 21:46:24 +0200
code simplification
Modified: lxml/trunk/src/lxml/extensions.pxi
==============================================================================
--- lxml/trunk/src/lxml/extensions.pxi (original)
+++ lxml/trunk/src/lxml/extensions.pxi Tue Oct 13 09:15:07 2009
@@ -677,12 +677,12 @@
xpath.xmlXPathParserContext* ctxt, int nargs):
cdef _Document doc
cdef xpath.xmlXPathObject* obj
- cdef int i
cdef list args
+ cdef int i
doc = context._doc
try:
args = []
- for i from 0 <= i < nargs:
+ for i in xrange(nargs):
obj = xpath.valuePop(ctxt)
o = _unwrapXPathObject(obj, doc, context._build_smart_strings)
_freeXPathObject(obj)
From scoder at codespeak.net Tue Oct 13 09:15:13 2009
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Tue, 13 Oct 2009 09:15:13 +0200 (CEST)
Subject: [Lxml-checkins] r68362 - in lxml/trunk: . src/lxml/html
src/lxml/html/tests
Message-ID: <20091013071513.E2976168058@codespeak.net>
Author: scoder
Date: Tue Oct 13 09:15:13 2009
New Revision: 68362
Modified:
lxml/trunk/ (props changed)
lxml/trunk/CHANGES.txt
lxml/trunk/src/lxml/html/__init__.py
lxml/trunk/src/lxml/html/tests/test_forms.txt
Log:
r5274 at delle: sbehnel | 2009-10-13 08:48:17 +0200
let textarea.value represent the complete tag content
Modified: lxml/trunk/CHANGES.txt
==============================================================================
--- lxml/trunk/CHANGES.txt (original)
+++ lxml/trunk/CHANGES.txt Tue Oct 13 09:15:13 2009
@@ -22,6 +22,11 @@
Bugs fixed
----------
+* The ``.value`` attribute of ``textarea`` elements in lxml.html did
+ not represent the complete raw value (including child tags etc.). It
+ now serialises the complete content on read and replaces the
+ complete content by a string on write.
+
* Looking up and deleting attributes without a namespace could hit a
namespaced attribute of the same name instead.
Modified: lxml/trunk/src/lxml/html/__init__.py
==============================================================================
--- lxml/trunk/src/lxml/html/__init__.py (original)
+++ lxml/trunk/src/lxml/html/__init__.py Tue Oct 13 09:15:13 2009
@@ -965,11 +965,21 @@
"""
Get/set the value (which is the contents of this element)
"""
- return self.text or ''
+ content = self.text or ''
+ if self.tag.startswith("{%s}" % XHTML_NAMESPACE):
+ serialisation_method = 'xml'
+ else:
+ serialisation_method = 'html'
+ for el in self:
+ # it's rare that we actually get here, so let's not use ''.join()
+ content += etree.tostring(el, method=serialisation_method)
+ return content
def _value__set(self, value):
+ del self[:]
self.text = value
def _value__del(self):
self.text = ''
+ del self[:]
value = property(_value__get, _value__set, _value__del, doc=_value__get.__doc__)
HtmlElementClassLookup._default_element_classes['textarea'] = TextareaElement
Modified: lxml/trunk/src/lxml/html/tests/test_forms.txt
==============================================================================
--- lxml/trunk/src/lxml/html/tests/test_forms.txt (original)
+++ lxml/trunk/src/lxml/html/tests/test_forms.txt Tue Oct 13 09:15:13 2009
@@ -166,3 +166,21 @@
[('foo', 'bar')]
>>> tree.forms[0].fields.values()
['bar']
+
+>>> tree = lxml.html.fromstring('''
+...
+...
+...
+... ''')
+>>> tree.forms[0].fields.keys()
+['foo']
+>>> ta = tree.forms[0].inputs['foo']
+>>> print(ta.value)
+some text
content with tags
+>>> ta.value = 'abc
def'
+>>> print(ta.value)
+abc
def
+>>> len(ta)
+0
From scoder at codespeak.net Fri Oct 16 23:38:48 2009
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Fri, 16 Oct 2009 23:38:48 +0200 (CEST)
Subject: [Lxml-checkins] r68550 - in lxml/trunk: . src/lxml
Message-ID: <20091016213848.B3629168067@codespeak.net>
Author: scoder
Date: Fri Oct 16 23:38:48 2009
New Revision: 68550
Modified:
lxml/trunk/ (props changed)
lxml/trunk/src/lxml/lxml.etree.pyx
Log:
r5279 at delle: sbehnel | 2009-10-16 23:30:29 +0200
tiny code fix
Modified: lxml/trunk/src/lxml/lxml.etree.pyx
==============================================================================
--- lxml/trunk/src/lxml/lxml.etree.pyx (original)
+++ lxml/trunk/src/lxml/lxml.etree.pyx Fri Oct 16 23:38:48 2009
@@ -1606,11 +1606,11 @@
_copyNonElementSiblings(self._context_node._c_node, root._c_node)
doc = root._doc
c_doc = self._context_node._doc._c_doc
- if c_doc.intSubset and not doc._c_doc.intSubset:
+ if c_doc.intSubset is not NULL and doc._c_doc.intSubset is NULL:
doc._c_doc.intSubset = tree.xmlCopyDtd(c_doc.intSubset)
if doc._c_doc.intSubset is NULL:
python.PyErr_NoMemory()
- if c_doc.extSubset and not doc._c_doc.extSubset:
+ if c_doc.extSubset is not NULL and not doc._c_doc.extSubset is NULL:
doc._c_doc.extSubset = tree.xmlCopyDtd(c_doc.extSubset)
if doc._c_doc.extSubset is NULL:
python.PyErr_NoMemory()
From scoder at codespeak.net Fri Oct 16 23:38:54 2009
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Fri, 16 Oct 2009 23:38:54 +0200 (CEST)
Subject: [Lxml-checkins] r68551 - in lxml/trunk: . src/lxml
Message-ID: <20091016213854.58F88168076@codespeak.net>
Author: scoder
Date: Fri Oct 16 23:38:52 2009
New Revision: 68551
Modified:
lxml/trunk/ (props changed)
lxml/trunk/CHANGES.txt
lxml/trunk/src/lxml/parser.pxi
Log:
r5280 at delle: sbehnel | 2009-10-16 23:38:38 +0200
fix bug 410916: entity non-resolution in feed parser
Modified: lxml/trunk/CHANGES.txt
==============================================================================
--- lxml/trunk/CHANGES.txt (original)
+++ lxml/trunk/CHANGES.txt Fri Oct 16 23:38:52 2009
@@ -22,6 +22,9 @@
Bugs fixed
----------
+* The ``resolve_entities`` option did not work in the incremental feed
+ parser.
+
* The ``.value`` attribute of ``textarea`` elements in lxml.html did
not represent the complete raw value (including child tags etc.). It
now serialises the complete content on read and replaces the
Modified: lxml/trunk/src/lxml/parser.pxi
==============================================================================
--- lxml/trunk/src/lxml/parser.pxi (original)
+++ lxml/trunk/src/lxml/parser.pxi Fri Oct 16 23:38:52 2009
@@ -1094,6 +1094,15 @@
py_buffer_len -= buffer_len
c_data += buffer_len
+ if error and not pctxt.replaceEntities and not pctxt.validate:
+ # in this mode, we ignore errors about undefined entities
+ for entry in context._error_log.filter_from_errors():
+ if entry.type != ErrorTypes.WAR_UNDECLARED_ENTITY and \
+ entry.type != ErrorTypes.ERR_UNDECLARED_ENTITY:
+ break
+ else:
+ error = 0
+
if not recover and (error or not pctxt.wellFormed):
self._feed_parser_running = 0
try:
From scoder at codespeak.net Fri Oct 16 23:43:18 2009
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Fri, 16 Oct 2009 23:43:18 +0200 (CEST)
Subject: [Lxml-checkins] r68552 - in lxml/branch/lxml-2.2: . src/lxml
Message-ID: <20091016214318.DBAA4168067@codespeak.net>
Author: scoder
Date: Fri Oct 16 23:43:17 2009
New Revision: 68552
Modified:
lxml/branch/lxml-2.2/ (props changed)
lxml/branch/lxml-2.2/CHANGES.txt
lxml/branch/lxml-2.2/src/lxml/parser.pxi
Log:
trunk merge: fix for bug 410916 - unresolved entities in feed parser
Modified: lxml/branch/lxml-2.2/CHANGES.txt
==============================================================================
--- lxml/branch/lxml-2.2/CHANGES.txt (original)
+++ lxml/branch/lxml-2.2/CHANGES.txt Fri Oct 16 23:43:17 2009
@@ -2,7 +2,7 @@
lxml changelog
==============
-2.2.2 (2009-10-??)
+2.2.3 (2009-10-??)
==================
Features added
@@ -11,6 +11,9 @@
Bugs fixed
----------
+* The ``resolve_entities`` option did not work in the incremental feed
+ parser.
+
* Looking up and deleting attributes without a namespace could hit a
namespaced attribute of the same name instead.
Modified: lxml/branch/lxml-2.2/src/lxml/parser.pxi
==============================================================================
--- lxml/branch/lxml-2.2/src/lxml/parser.pxi (original)
+++ lxml/branch/lxml-2.2/src/lxml/parser.pxi Fri Oct 16 23:43:17 2009
@@ -1094,6 +1094,15 @@
py_buffer_len -= buffer_len
c_data += buffer_len
+ if error and not pctxt.replaceEntities and not pctxt.validate:
+ # in this mode, we ignore errors about undefined entities
+ for entry in context._error_log.filter_from_errors():
+ if entry.type != ErrorTypes.WAR_UNDECLARED_ENTITY and \
+ entry.type != ErrorTypes.ERR_UNDECLARED_ENTITY:
+ break
+ else:
+ error = 0
+
if not recover and (error or not pctxt.wellFormed):
self._feed_parser_running = 0
try:
From scoder at codespeak.net Sat Oct 17 01:18:39 2009
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Sat, 17 Oct 2009 01:18:39 +0200 (CEST)
Subject: [Lxml-checkins] r68557 - in lxml/trunk: . src/lxml
Message-ID: <20091016231839.9A7B0168041@codespeak.net>
Author: scoder
Date: Sat Oct 17 01:18:39 2009
New Revision: 68557
Modified:
lxml/trunk/ (props changed)
lxml/trunk/src/lxml/apihelpers.pxi
lxml/trunk/src/lxml/docloader.pxi
lxml/trunk/src/lxml/iterparse.pxi
lxml/trunk/src/lxml/lxml.objectify.pyx
lxml/trunk/src/lxml/objectpath.pxi
lxml/trunk/src/lxml/parser.pxi
lxml/trunk/src/lxml/python.pxd
Log:
r5283 at delle: sbehnel | 2009-10-17 01:16:46 +0200
fix PyBytes/PyString usage
Modified: lxml/trunk/src/lxml/apihelpers.pxi
==============================================================================
--- lxml/trunk/src/lxml/apihelpers.pxi (original)
+++ lxml/trunk/src/lxml/apihelpers.pxi Sat Oct 17 01:18:39 2009
@@ -1233,12 +1233,12 @@
cdef object _utf8(object s):
cdef int invalid
- if python.PyString_CheckExact(s):
+ if python.PyBytes_CheckExact(s):
invalid = check_string_utf8(s)
elif python.PyUnicode_CheckExact(s) or python.PyUnicode_Check(s):
s = python.PyUnicode_AsUTF8String(s)
invalid = check_string_utf8(s) == -1
- elif python.PyString_Check(s):
+ elif python.PyBytes_Check(s):
invalid = check_string_utf8(s)
else:
raise TypeError, u"Argument must be string or unicode."
@@ -1275,7 +1275,7 @@
"""
if filename is None:
return None
- elif python.PyString_Check(filename):
+ elif python.PyBytes_Check(filename):
return filename
elif python.PyUnicode_Check(filename):
filename8 = python.PyUnicode_AsEncodedString(
@@ -1313,7 +1313,7 @@
cdef char* c_filename
if filename is None:
return None
- elif python.PyString_Check(filename):
+ elif python.PyBytes_Check(filename):
if not check_string_utf8(filename):
# plain ASCII!
return filename
Modified: lxml/trunk/src/lxml/docloader.pxi
==============================================================================
--- lxml/trunk/src/lxml/docloader.pxi (original)
+++ lxml/trunk/src/lxml/docloader.pxi Sat Oct 17 01:18:39 2009
@@ -49,7 +49,7 @@
cdef _InputDocument doc_ref
if python.PyUnicode_Check(string):
string = python.PyUnicode_AsUTF8String(string)
- elif not python.PyString_Check(string):
+ elif not python.PyBytes_Check(string):
raise TypeError, "argument must be a byte string or unicode string"
doc_ref = _InputDocument()
doc_ref._type = PARSER_DATA_STRING
Modified: lxml/trunk/src/lxml/iterparse.pxi
==============================================================================
--- lxml/trunk/src/lxml/iterparse.pxi (original)
+++ lxml/trunk/src/lxml/iterparse.pxi Sat Oct 17 01:18:39 2009
@@ -479,7 +479,7 @@
while python.PyList_GET_SIZE(context._events) == 0:
if c_stream is NULL:
data = self._source.read(__ITERPARSE_CHUNK_SIZE)
- if not python.PyString_Check(data):
+ if not python.PyBytes_Check(data):
self._source = None
raise TypeError, u"reading file objects must return plain strings"
c_data_len = python.PyString_GET_SIZE(data)
Modified: lxml/trunk/src/lxml/lxml.objectify.pyx
==============================================================================
--- lxml/trunk/src/lxml/lxml.objectify.pyx (original)
+++ lxml/trunk/src/lxml/lxml.objectify.pyx Sat Oct 17 01:18:39 2009
@@ -949,7 +949,7 @@
cdef object _type
cdef list _schema_types
def __init__(self, name, type_check, type_class, stringify=None):
- if python.PyString_Check(name):
+ if python.PyBytes_Check(name):
name = python.PyUnicode_FromEncodedObject(name, 'ASCII', NULL)
elif not python.PyUnicode_Check(name):
raise TypeError, u"Type name must be a string"
@@ -1579,12 +1579,12 @@
doc = element._doc
if empty_type_name is not None:
- if python.PyString_Check(empty_type_name):
+ if python.PyBytes_Check(empty_type_name):
empty_type_name = python.PyUnicode_FromEncodedObject(
empty_type_name, "ASCII", NULL)
dict_result = python.PyDict_GetItem(_SCHEMA_TYPE_DICT, empty_type_name)
elif empty_pytype_name is not None:
- if python.PyString_Check(empty_pytype_name):
+ if python.PyBytes_Check(empty_pytype_name):
empty_pytype_name = python.PyUnicode_FromEncodedObject(
empty_pytype_name, "ASCII", NULL)
dict_result = python.PyDict_GetItem(_PYTYPE_DICT, empty_pytype_name)
Modified: lxml/trunk/src/lxml/objectpath.pxi
==============================================================================
--- lxml/trunk/src/lxml/objectpath.pxi (original)
+++ lxml/trunk/src/lxml/objectpath.pxi Sat Oct 17 01:18:39 2009
@@ -93,7 +93,7 @@
"""
cdef bint has_dot
cdef list new_path = []
- if python.PyString_Check(path):
+ if python.PyBytes_Check(path):
path = python.PyUnicode_FromEncodedObject(path, 'ASCII', NULL)
path = path.strip()
if path == u'.':
Modified: lxml/trunk/src/lxml/parser.pxi
==============================================================================
--- lxml/trunk/src/lxml/parser.pxi (original)
+++ lxml/trunk/src/lxml/parser.pxi Sat Oct 17 01:18:39 2009
@@ -357,7 +357,7 @@
c_requested -= remaining
self._bytes = self._filelike.read(c_requested)
- if not python.PyString_Check(self._bytes):
+ if not python.PyBytes_Check(self._bytes):
if python.PyUnicode_Check(self._bytes):
if self._encoding is None:
self._bytes = python.PyUnicode_AsUTF8String(self._bytes)
@@ -1043,7 +1043,7 @@
cdef int buffer_len
cdef int error
cdef bint recover = self._parse_options & xmlparser.XML_PARSE_RECOVER
- if python.PyString_Check(data):
+ if python.PyBytes_Check(data):
if self._default_encoding is None:
c_encoding = NULL
else:
@@ -1538,7 +1538,7 @@
# pass native unicode only if libxml2 can handle it
if _UNICODE_ENCODING is NULL:
text = python.PyUnicode_AsUTF8String(text)
- elif not python.PyString_Check(text):
+ elif not python.PyBytes_Check(text):
raise ValueError, u"can only parse strings"
if python.PyUnicode_Check(url):
url = python.PyUnicode_AsUTF8String(url)
Modified: lxml/trunk/src/lxml/python.pxd
==============================================================================
--- lxml/trunk/src/lxml/python.pxd (original)
+++ lxml/trunk/src/lxml/python.pxd Sat Oct 17 01:18:39 2009
@@ -23,8 +23,8 @@
cdef bint PyUnicode_Check(object obj)
cdef bint PyUnicode_CheckExact(object obj)
- cdef bint PyString_Check(object obj)
- cdef bint PyString_CheckExact(object obj)
+ cdef bint PyBytes_Check(object obj)
+ cdef bint PyBytes_CheckExact(object obj)
cdef object PyUnicode_FromEncodedObject(object s, char* encoding,
char* errors)
From scoder at codespeak.net Sat Oct 17 01:18:43 2009
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Sat, 17 Oct 2009 01:18:43 +0200 (CEST)
Subject: [Lxml-checkins] r68558 - in lxml/trunk: . src/lxml src/lxml/tests
Message-ID: <20091016231843.C7827168058@codespeak.net>
Author: scoder
Date: Sat Oct 17 01:18:43 2009
New Revision: 68558
Modified:
lxml/trunk/ (props changed)
lxml/trunk/src/lxml/extensions.pxi
lxml/trunk/src/lxml/tests/test_xpathevaluator.py
lxml/trunk/src/lxml/xpath.pxi
Log:
r5284 at delle: sbehnel | 2009-10-17 01:18:28 +0200
ticket 446654: support strings in node-sets returned from extension functions
Modified: lxml/trunk/src/lxml/extensions.pxi
==============================================================================
--- lxml/trunk/src/lxml/extensions.pxi (original)
+++ lxml/trunk/src/lxml/extensions.pxi Sat Oct 17 01:18:43 2009
@@ -457,12 +457,15 @@
################################################################################
# helper functions
-cdef xpath.xmlXPathObject* _wrapXPathObject(object obj) except NULL:
+cdef xpath.xmlXPathObject* _wrapXPathObject(object obj, _Document doc,
+ _BaseContext context) except NULL:
cdef xpath.xmlNodeSet* resultSet
- cdef _Element node
+ cdef _Element fake_node = None
+ cdef xmlNode* c_node
+
if python.PyUnicode_Check(obj):
obj = _utf8(obj)
- if python.PyString_Check(obj):
+ if python.PyBytes_Check(obj):
return xpath.xmlXPathNewCString(_cstr(obj))
if python.PyBool_Check(obj):
return xpath.xmlXPathNewBoolean(obj)
@@ -474,13 +477,42 @@
resultSet = xpath.xmlXPathNodeSetCreate((<_Element>obj)._c_node)
elif python.PySequence_Check(obj):
resultSet = xpath.xmlXPathNodeSetCreate(NULL)
- for element in obj:
- if isinstance(element, _Element):
- node = <_Element>element
- xpath.xmlXPathNodeSetAdd(resultSet, node._c_node)
- else:
- xpath.xmlXPathFreeNodeSet(resultSet)
- raise XPathResultError, u"This is not a node: %r" % element
+ try:
+ for value in obj:
+ if isinstance(value, _Element):
+ if context is not None:
+ context._hold(value)
+ xpath.xmlXPathNodeSetAdd(resultSet, (<_Element>value)._c_node)
+ else:
+ if context is None or doc is None:
+ raise XPathResultError, \
+ u"Non-Element values not supported at this point - got %r" % value
+ # support strings by appending text nodes to an Element
+ if python.PyUnicode_Check(value):
+ value = _utf8(value)
+ if python.PyBytes_Check(value):
+ if fake_node is None:
+ fake_node = _makeElement("text-root", NULL, doc, None,
+ None, None, None, None, None)
+ context._hold(fake_node)
+ else:
+ # append a comment node to keep the text nodes separate
+ c_node = tree.xmlNewDocComment(doc._c_doc, "")
+ if c_node is NULL:
+ python.PyErr_NoMemory()
+ tree.xmlAddChild(fake_node._c_node, c_node)
+ context._hold(value)
+ c_node = tree.xmlNewDocText(doc._c_doc, _cstr(value))
+ if c_node is NULL:
+ python.PyErr_NoMemory()
+ tree.xmlAddChild(fake_node._c_node, c_node)
+ xpath.xmlXPathNodeSetAdd(resultSet, c_node)
+ else:
+ raise XPathResultError, \
+ u"This is not a supported node-set result: %r" % value
+ except:
+ xpath.xmlXPathFreeNodeSet(resultSet)
+ raise
else:
raise XPathResultError, u"Unknown return type: %s" % \
python._fqtypename(obj)
@@ -613,7 +645,7 @@
else:
is_text = not (is_tail or is_attribute)
- if python.PyString_CheckExact(string_value):
+ if python.PyBytes_CheckExact(string_value):
result = _ElementStringResult(string_value)
result._parent = parent
result.is_attribute = is_attribute
@@ -691,7 +723,7 @@
res = function(context, *args)
# wrap result for XPath consumption
- obj = _wrapXPathObject(res)
+ obj = _wrapXPathObject(res, doc, context)
# prevent Python from deallocating elements handed to libxml2
context._hold(res)
xpath.valuePush(ctxt, obj)
Modified: lxml/trunk/src/lxml/tests/test_xpathevaluator.py
==============================================================================
--- lxml/trunk/src/lxml/tests/test_xpathevaluator.py (original)
+++ lxml/trunk/src/lxml/tests/test_xpathevaluator.py Sat Oct 17 01:18:43 2009
@@ -593,8 +593,14 @@
def tag(elem):
return elem.tag
+def tag_or_value(elem):
+ return getattr(elem, 'tag', elem)
+
def stringTest(ctxt, s1):
return "Hello "+s1
+
+def stringListTest(ctxt, s1):
+ return ["Hello "] + list(s1) + ["!"]
def floatTest(ctxt, f1):
return f1+4
@@ -616,7 +622,7 @@
return st1
def resultTypesTest(ctxt):
- return ["x","y"]
+ return [None,None]
def resultTypesTest2(ctxt):
return resultTypesTest
@@ -624,6 +630,7 @@
uri = "http://www.example.com/"
extension = {(None, 'stringTest'): stringTest,
+ (None, 'stringListTest'): stringListTest,
(None, 'floatTest'): floatTest,
(None, 'booleanTest'): booleanTest,
(None, 'setTest'): setTest,
@@ -655,6 +662,8 @@
['tag']
>>> list(map(tag, e("setTest2(/body/*)")))
['tag', 'section']
+ >>> list(map(tag_or_value, e("stringListTest(/body/tag)")))
+ ['Hello', 'tag', 'tag', 'tag', '!']
>>> e("argsTest1('a',1.5,true(),/body/tag)")
"a, 1.5, True, ['tag', 'tag', 'tag']"
>>> list(map(tag, e("argsTest2(/body/tag, /body/section)")))
@@ -662,7 +671,7 @@
>>> e("resultTypesTest()")
Traceback (most recent call last):
...
- XPathResultError: This is not a node: 'x'
+ XPathResultError: This is not a supported node-set result: None
>>> try:
... e("resultTypesTest2()")
... except etree.XPathResultError:
Modified: lxml/trunk/src/lxml/xpath.pxi
==============================================================================
--- lxml/trunk/src/lxml/xpath.pxi (original)
+++ lxml/trunk/src/lxml/xpath.pxi Sat Oct 17 01:18:43 2009
@@ -79,16 +79,16 @@
for name, value in variable_dict.items():
name_utf = self._to_utf(name)
xpath.xmlXPathRegisterVariable(
- self._xpathCtxt, _cstr(name_utf), _wrapXPathObject(value))
+ self._xpathCtxt, _cstr(name_utf), _wrapXPathObject(value, None, None))
cdef registerVariable(self, name, value):
name_utf = self._to_utf(name)
xpath.xmlXPathRegisterVariable(
- self._xpathCtxt, _cstr(name_utf), _wrapXPathObject(value))
+ self._xpathCtxt, _cstr(name_utf), _wrapXPathObject(value, None, None))
cdef void _registerVariable(self, name_utf, value):
xpath.xmlXPathRegisterVariable(
- self._xpathCtxt, _cstr(name_utf), _wrapXPathObject(value))
+ self._xpathCtxt, _cstr(name_utf), _wrapXPathObject(value, None, None))
cdef void _setupDict(self, xpath.xmlXPathContext* xpathCtxt):
__GLOBAL_PARSER_CONTEXT.initXPathParserDict(xpathCtxt)
From scoder at codespeak.net Sat Oct 17 01:20:52 2009
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Sat, 17 Oct 2009 01:20:52 +0200 (CEST)
Subject: [Lxml-checkins] r68559 - lxml/trunk
Message-ID: <20091016232052.20FDC16805A@codespeak.net>
Author: scoder
Date: Sat Oct 17 01:20:51 2009
New Revision: 68559
Modified:
lxml/trunk/ (props changed)
lxml/trunk/CHANGES.txt
Log:
r5288 at delle: sbehnel | 2009-10-17 01:20:44 +0200
changelog
Modified: lxml/trunk/CHANGES.txt
==============================================================================
--- lxml/trunk/CHANGES.txt (original)
+++ lxml/trunk/CHANGES.txt Sat Oct 17 01:20:51 2009
@@ -8,6 +8,9 @@
Features added
--------------
+* Support for strings (in addition to Elements) in node-sets returned
+ by extension functions.
+
* Forms that lack an ``action`` attribute default to the base URL of
the document on submit.
From scoder at codespeak.net Sat Oct 17 01:22:55 2009
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Sat, 17 Oct 2009 01:22:55 +0200 (CEST)
Subject: [Lxml-checkins] r68560 - in lxml/trunk: . src/lxml/tests
Message-ID: <20091016232255.AC94F16805C@codespeak.net>
Author: scoder
Date: Sat Oct 17 01:22:55 2009
New Revision: 68560
Modified:
lxml/trunk/ (props changed)
lxml/trunk/src/lxml/tests/test_xpathevaluator.py
Log:
r5290 at delle: sbehnel | 2009-10-17 01:22:51 +0200
test fix
Modified: lxml/trunk/src/lxml/tests/test_xpathevaluator.py
==============================================================================
--- lxml/trunk/src/lxml/tests/test_xpathevaluator.py (original)
+++ lxml/trunk/src/lxml/tests/test_xpathevaluator.py Sat Oct 17 01:22:55 2009
@@ -663,7 +663,7 @@
>>> list(map(tag, e("setTest2(/body/*)")))
['tag', 'section']
>>> list(map(tag_or_value, e("stringListTest(/body/tag)")))
- ['Hello', 'tag', 'tag', 'tag', '!']
+ ['Hello ', 'tag', 'tag', 'tag', '!']
>>> e("argsTest1('a',1.5,true(),/body/tag)")
"a, 1.5, True, ['tag', 'tag', 'tag']"
>>> list(map(tag, e("argsTest2(/body/tag, /body/section)")))
From scoder at codespeak.net Sat Oct 17 02:11:43 2009
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Sat, 17 Oct 2009 02:11:43 +0200 (CEST)
Subject: [Lxml-checkins] r68563 - in lxml/trunk: . src/lxml/html
src/lxml/html/tests
Message-ID: <20091017001143.24E1016803B@codespeak.net>
Author: scoder
Date: Sat Oct 17 02:11:42 2009
New Revision: 68563
Modified:
lxml/trunk/ (props changed)
lxml/trunk/src/lxml/html/__init__.py
lxml/trunk/src/lxml/html/tests/test_rewritelinks.txt
Log:
r5292 at delle: sbehnel | 2009-10-17 02:11:33 +0200
fix bug 449926: reverse URL iteration inside of text content to simplify replacements
Modified: lxml/trunk/src/lxml/html/__init__.py
==============================================================================
--- lxml/trunk/src/lxml/html/__init__.py (original)
+++ lxml/trunk/src/lxml/html/__init__.py Sat Oct 17 02:11:42 2009
@@ -308,6 +308,13 @@
Note: is *not* taken into account in any way. The
link you get is exactly the link in the document.
+
+ Note: multiple links inside of a single text string or
+ attribute value are returned in reversed order. This makes it
+ possible to replace or delete them from the text string value
+ based on their reported text positions. Otherwise, a
+ modification at one text position can change the positions of
+ links reported later on.
"""
link_attrs = defs.link_attrs
for el in self.iter():
@@ -347,15 +354,29 @@
## http://www.w3.org/TR/html401/struct/objects.html#adef-valuetype
yield (el, 'value', el.get('value'), 0)
if tag == 'style' and el.text:
- for match in _css_url_re.finditer(el.text):
- url, start = _unquote_match(match.group(1), match.start(1))
- yield (el, None, url, start)
- for match in _css_import_re.finditer(el.text):
- yield (el, None, match.group(1), match.start(1))
+ urls = [
+ _unquote_match(match.group(1), match.start(1))
+ for match in _css_url_re.finditer(el.text)
+ ] + [
+ (match.group(1), match.start(1))
+ for match in _css_import_re.finditer(el.text)
+ ]
+ if urls:
+ # sort by start pos to bring both match sets back into order
+ urls = [ (start, url) for (url, start) in urls ]
+ urls.sort()
+ # reverse the list to report correct positions despite
+ # modifications
+ urls.reverse()
+ for start, url in urls:
+ yield (el, None, url, start)
if 'style' in attribs:
- for match in _css_url_re.finditer(attribs['style']):
- url, start = _unquote_match(match.group(1), match.start(1))
- yield (el, 'style', url, start)
+ urls = list(_css_url_re.finditer(attribs['style']))
+ if urls:
+ # return in reversed order to simplify in-place modifications
+ for match in urls[::-1]:
+ url, start = _unquote_match(match.group(1), match.start(1))
+ yield (el, 'style', url, start)
def rewrite_links(self, link_repl_func, resolve_base_href=True,
base_href=None):
Modified: lxml/trunk/src/lxml/html/tests/test_rewritelinks.txt
==============================================================================
--- lxml/trunk/src/lxml/html/tests/test_rewritelinks.txt (original)
+++ lxml/trunk/src/lxml/html/tests/test_rewritelinks.txt Sat Oct 17 02:11:42 2009
@@ -1,3 +1,8 @@
+
+Setup::
+
+ >>> import lxml.html
+
We'll define a link translation function:
>>> base_href = 'http://old/base/path.html'
@@ -118,8 +123,8 @@
...
...