From scoder at codespeak.net Mon Jun 2 21:00:04 2008 From: scoder at codespeak.net (scoder at codespeak.net) Date: Mon, 2 Jun 2008 21:00:04 +0200 (CEST) Subject: [Lxml-checkins] r55502 - lxml/trunk Message-ID: <20080602190004.6139D2D8002@codespeak.net> Author: scoder Date: Mon Jun 2 21:00:02 2008 New Revision: 55502 Modified: lxml/trunk/ (props changed) lxml/trunk/selftest.py lxml/trunk/selftest2.py Log: r4458 at delle: sbehnel | 2008-06-01 17:38:32 +0200 more test fixes Modified: lxml/trunk/selftest.py ============================================================================== --- lxml/trunk/selftest.py (original) +++ lxml/trunk/selftest.py Mon Jun 2 21:00:02 2008 @@ -42,7 +42,10 @@ encoding = options["encoding"] except KeyError: encoding = "utf-8" - return fix_compatibility( file.getvalue().decode(encoding) ) + result = fix_compatibility(file.getvalue().decode(encoding)) + if sys.version_info[0] < 3: + result = result.encode(encoding) + return result def summarize(elem): return elem.tag @@ -525,7 +528,7 @@ Test encoding issues. >>> elem = ElementTree.Element("tag") - >>> elem.text = u"abc" + >>> elem.text = u'abc' >>> serialize(elem) 'abc' >>> serialize(elem, encoding="utf-8") @@ -580,6 +583,9 @@ """ +if sys.version_info[0] >= 3: + encoding.__doc__ = encoding.__doc__.replace("u'", "'") + def methods(): r""" Test serialization methods. Modified: lxml/trunk/selftest2.py ============================================================================== --- lxml/trunk/selftest2.py (original) +++ lxml/trunk/selftest2.py Mon Jun 2 21:00:02 2008 @@ -27,7 +27,9 @@ else: encoding = "utf-8" tree.write(file) - result = file.getvalue().decode(encoding) + result = file.getvalue() + if sys.version_info[0] >= 3: + result = result.decode(encoding) result = result.replace(' />', '/>') if result[-1:] == '\n': result = result[:-1] @@ -147,7 +149,7 @@ Test encoding issues. >>> elem = ElementTree.Element("tag") - >>> elem.text = u"abc" + >>> elem.text = u'abc' >>> serialize(elem) 'abc' >>> serialize(elem, "utf-8") @@ -202,6 +204,9 @@ """ +if sys.version_info[0] >= 3: + encoding.__doc__ = encoding.__doc__.replace("u'", "'") + def qname(): """ Test QName handling. From scoder at codespeak.net Mon Jun 2 21:00:23 2008 From: scoder at codespeak.net (scoder at codespeak.net) Date: Mon, 2 Jun 2008 21:00:23 +0200 (CEST) Subject: [Lxml-checkins] r55503 - in lxml/trunk: . src/lxml/tests Message-ID: <20080602190023.CE0292D8002@codespeak.net> Author: scoder Date: Mon Jun 2 21:00:21 2008 New Revision: 55503 Modified: lxml/trunk/ (props changed) lxml/trunk/src/lxml/tests/common_imports.py Log: r4459 at delle: sbehnel | 2008-06-02 15:55:58 +0200 cleanup Modified: lxml/trunk/src/lxml/tests/common_imports.py ============================================================================== --- lxml/trunk/src/lxml/tests/common_imports.py (original) +++ lxml/trunk/src/lxml/tests/common_imports.py Mon Jun 2 21:00:21 2008 @@ -179,7 +179,7 @@ data.seek(0) data.truncate() if amount: - self.data.write(result[amount:]) + append(result[amount:]) result = result[:amount] return result From scoder at codespeak.net Mon Jun 2 21:00:38 2008 From: scoder at codespeak.net (scoder at codespeak.net) Date: Mon, 2 Jun 2008 21:00:38 +0200 (CEST) Subject: [Lxml-checkins] r55504 - in lxml/trunk: . src/lxml Message-ID: <20080602190038.1CD602D8002@codespeak.net> Author: scoder Date: Mon Jun 2 21:00:35 2008 New Revision: 55504 Modified: lxml/trunk/ (props changed) lxml/trunk/src/lxml/xslt.pxi Log: r4460 at delle: sbehnel | 2008-06-02 15:56:07 +0200 buffer setup fix Modified: lxml/trunk/src/lxml/xslt.pxi ============================================================================== --- lxml/trunk/src/lxml/xslt.pxi (original) +++ lxml/trunk/src/lxml/xslt.pxi Mon Jun 2 21:00:35 2008 @@ -682,7 +682,7 @@ buffer.len = self._buffer_len self._buffer_refcnt += 1 if flags & python.PyBUF_WRITABLE: - buffer.readonly = 1 + buffer.readonly = 0 else: buffer.readonly = 1 if flags & python.PyBUF_FORMAT: From scoder at codespeak.net Mon Jun 2 21:00:50 2008 From: scoder at codespeak.net (scoder at codespeak.net) Date: Mon, 2 Jun 2008 21:00:50 +0200 (CEST) Subject: [Lxml-checkins] r55505 - in lxml/trunk: . src/lxml Message-ID: <20080602190050.01DFE2D8002@codespeak.net> Author: scoder Date: Mon Jun 2 21:00:49 2008 New Revision: 55505 Modified: lxml/trunk/ (props changed) lxml/trunk/src/lxml/parser.pxi Log: r4461 at delle: sbehnel | 2008-06-02 15:56:37 +0200 small fixes Modified: lxml/trunk/src/lxml/parser.pxi ============================================================================== --- lxml/trunk/src/lxml/parser.pxi (original) +++ lxml/trunk/src/lxml/parser.pxi Mon Jun 2 21:00:49 2008 @@ -163,7 +163,7 @@ cdef Py_ssize_t l cdef char* buffer cdef char* enc - utext = python.PyUnicode_DecodeUTF8("", 7, 'strict') + utext = python.PyUnicode_DecodeUTF8("", 7, NULL) l = python.PyUnicode_GET_DATA_SIZE(utext) buffer = python.PyUnicode_AS_DATA(utext) enc = _findEncodingName(buffer, l) @@ -215,7 +215,8 @@ cdef object _url cdef object _bytes cdef _ExceptionContext _exc_context - cdef cstd.size_t _bytes_read + cdef Py_ssize_t _bytes_read + cdef bint _reading_unicode cdef char* _c_url def __init__(self, filelike, exc_context, url, encoding): self._exc_context = exc_context @@ -302,13 +303,14 @@ if remaining <= 0: self._bytes = self._filelike.read(c_size) if not python.PyString_Check(self._bytes): + self._bytes_read = -1 raise TypeError, \ u"reading file objects must return plain strings" remaining = python.PyString_GET_SIZE(self._bytes) - self._bytes_read = 0 if remaining == 0: self._bytes_read = -1 return 0 + self._bytes_read = 0 if c_size > remaining: c_size = remaining c_start = _cstr(self._bytes) + self._bytes_read From scoder at codespeak.net Mon Jun 2 21:01:02 2008 From: scoder at codespeak.net (scoder at codespeak.net) Date: Mon, 2 Jun 2008 21:01:02 +0200 (CEST) Subject: [Lxml-checkins] r55506 - in lxml/trunk: . src/lxml src/lxml/tests Message-ID: <20080602190102.51FA22D8002@codespeak.net> Author: scoder Date: Mon Jun 2 21:01:00 2008 New Revision: 55506 Modified: lxml/trunk/ (props changed) lxml/trunk/CHANGES.txt lxml/trunk/src/lxml/lxml.etree.pyx lxml/trunk/src/lxml/parser.pxi lxml/trunk/src/lxml/tests/common_imports.py lxml/trunk/src/lxml/tests/test_etree.py Log: r4462 at delle: sbehnel | 2008-06-02 20:28:44 +0200 support for parsing from file-likes that return unicode strings, less GIL acquiring when parsing from file-likes Modified: lxml/trunk/CHANGES.txt ============================================================================== --- lxml/trunk/CHANGES.txt (original) +++ lxml/trunk/CHANGES.txt Mon Jun 2 21:01:00 2008 @@ -8,6 +8,9 @@ Features added -------------- +* Support for parsing from file-like objects that return unicode + strings. + * New function ``etree.cleanup_namespaces(el)`` that removes unused namespace declarations from a (sub)tree (experimental). Modified: lxml/trunk/src/lxml/lxml.etree.pyx ============================================================================== --- lxml/trunk/src/lxml/lxml.etree.pyx (original) +++ lxml/trunk/src/lxml/lxml.etree.pyx Mon Jun 2 21:01:00 2008 @@ -35,6 +35,12 @@ cdef object os_path_join from os.path import join as os_path_join +cdef object BytesIO, StringIO +try: + from io import BytesIO, StringIO +except ImportError: + from StringIO import StringIO, StringIO as BytesIO + cdef object _elementpath import _elementpath Modified: lxml/trunk/src/lxml/parser.pxi ============================================================================== --- lxml/trunk/src/lxml/parser.pxi (original) +++ lxml/trunk/src/lxml/parser.pxi Mon Jun 2 21:01:00 2008 @@ -216,7 +216,6 @@ cdef object _bytes cdef _ExceptionContext _exc_context cdef Py_ssize_t _bytes_read - cdef bint _reading_unicode cdef char* _c_url def __init__(self, filelike, exc_context, url, encoding): self._exc_context = exc_context @@ -292,31 +291,47 @@ return result - cdef int copyToBuffer(self, char* c_buffer, int c_size): + cdef int copyToBuffer(self, char* c_buffer, int c_requested): + cdef int c_byte_count cdef char* c_start cdef Py_ssize_t byte_count, remaining if self._bytes_read < 0: return 0 try: + c_byte_count = 0 byte_count = python.PyString_GET_SIZE(self._bytes) - remaining = byte_count - self._bytes_read - if remaining <= 0: - self._bytes = self._filelike.read(c_size) + remaining = byte_count - self._bytes_read + while c_requested > remaining: + c_start = _cstr(self._bytes) + self._bytes_read + cstd.memcpy(c_buffer, c_start, remaining) + c_byte_count += remaining + c_buffer += remaining + c_requested -= remaining + + self._bytes = self._filelike.read(c_requested) if not python.PyString_Check(self._bytes): - self._bytes_read = -1 - raise TypeError, \ - u"reading file objects must return plain strings" + if python.PyUnicode_Check(self._bytes): + if self._encoding is None: + self._bytes = python.PyUnicode_AsUTF8String(self._bytes) + else: + self._bytes = python.PyUnicode_AsEncodedString( + self._bytes, _cstr(self._encoding), NULL) + else: + raise TypeError, \ + u"reading from file-like objects must return byte strings or unicode strings" + remaining = python.PyString_GET_SIZE(self._bytes) if remaining == 0: self._bytes_read = -1 - return 0 + return c_byte_count self._bytes_read = 0 - if c_size > remaining: - c_size = remaining - c_start = _cstr(self._bytes) + self._bytes_read - self._bytes_read = self._bytes_read + c_size - cstd.memcpy(c_buffer, c_start, c_size) - return c_size + + if c_requested > 0: + c_start = _cstr(self._bytes) + self._bytes_read + cstd.memcpy(c_buffer, c_start, c_requested) + c_byte_count += c_requested + self._bytes_read += c_requested + return c_byte_count except: self._exc_context._store_raised() return -1 @@ -1322,10 +1337,17 @@ filename_utf = _encodeFilenameUTF8(filename) c_filename = _cstr(filename_utf) if python.PyUnicode_Check(text): + c_len = python.PyUnicode_GET_DATA_SIZE(text) + if c_len > python.INT_MAX: + return (<_BaseParser>parser)._parseDocFromFilelike( + StringIO(text), filename) return (<_BaseParser>parser)._parseUnicodeDoc(text, c_filename) else: + c_len = python.PyString_GET_SIZE(text) + if c_len > python.INT_MAX: + return (<_BaseParser>parser)._parseDocFromFilelike( + BytesIO(text), filename) c_text = _cstr(text) - c_len = python.PyString_GET_SIZE(text) return (<_BaseParser>parser)._parseDoc(c_text, c_len, c_filename) cdef xmlDoc* _parseDocFromFile(filename8, _BaseParser parser) except NULL: Modified: lxml/trunk/src/lxml/tests/common_imports.py ============================================================================== --- lxml/trunk/src/lxml/tests/common_imports.py (original) +++ lxml/trunk/src/lxml/tests/common_imports.py Mon Jun 2 21:01:00 2008 @@ -183,6 +183,25 @@ result = result[:amount] return result +class LargeFileLikeUnicode(LargeFileLike): + def __init__(self, charlen=100, depth=4, children=5): + LargeFileLike.__init__(self, charlen, depth, children) + self.data = StringIO() + self.chars = _str('a') * charlen + self.more = self.iterelements(depth) + + def iterelements(self, depth): + yield _str('') + depth -= 1 + if depth > 0: + for child in self.children: + for element in self.iterelements(depth): + yield element + yield self.chars + else: + yield self.chars + yield _str('') + def fileInTestDir(name): _testdir = os.path.dirname(__file__) return os.path.join(_testdir, name) Modified: lxml/trunk/src/lxml/tests/test_etree.py ============================================================================== --- lxml/trunk/src/lxml/tests/test_etree.py (original) +++ lxml/trunk/src/lxml/tests/test_etree.py Mon Jun 2 21:01:00 2008 @@ -14,8 +14,8 @@ sys.path.insert(0, this_dir) # needed for Py3 from common_imports import etree, StringIO, BytesIO, HelperTestCase, fileInTestDir -from common_imports import SillyFileLike, canonicalize, doctest, make_doctest -from common_imports import sorted, _str, _bytes +from common_imports import LargeFileLikeUnicode, doctest, make_doctest +from common_imports import canonicalize, sorted, _str, _bytes print("") print("TESTED VERSION: %s" % etree.__version__) @@ -1974,6 +1974,13 @@ root = etree.HTML(_bytes('')) self.assertEquals(root.base, "http://no/such/url") + def test_parse_fileobject_unicode(self): + # parse from a file object that returns unicode strings + f = LargeFileLikeUnicode() + tree = self.etree.parse(f) + root = tree.getroot() + self.assert_(root.tag.endswith('root')) + def test_dtd_io(self): # check that DTDs that go in also go back out xml = _bytes('''\ From scoder at codespeak.net Wed Jun 4 19:38:26 2008 From: scoder at codespeak.net (scoder at codespeak.net) Date: Wed, 4 Jun 2008 19:38:26 +0200 (CEST) Subject: [Lxml-checkins] r55571 - in lxml/trunk: . src/lxml src/lxml/tests Message-ID: <20080604173826.95C481684C6@codespeak.net> Author: scoder Date: Wed Jun 4 19:38:25 2008 New Revision: 55571 Modified: lxml/trunk/ (props changed) lxml/trunk/CHANGES.txt lxml/trunk/src/lxml/classlookup.pxi lxml/trunk/src/lxml/tests/test_classlookup.py Log: r4468 at delle: sbehnel | 2008-06-04 14:40:14 +0200 fixed crash in lookup classes when subclasses forget to call super.__init__() Modified: lxml/trunk/CHANGES.txt ============================================================================== --- lxml/trunk/CHANGES.txt (original) +++ lxml/trunk/CHANGES.txt Wed Jun 4 19:38:25 2008 @@ -40,6 +40,9 @@ Bugs fixed ---------- +* Crash in Element class lookup classes when the __init__() method of + the super class is not called from Python subclasses. + * A number of problems related to unicode/byte string conversion of filenames and error messages were fixed. Modified: lxml/trunk/src/lxml/classlookup.pxi ============================================================================== --- lxml/trunk/src/lxml/classlookup.pxi (original) +++ lxml/trunk/src/lxml/classlookup.pxi Wed Jun 4 19:38:25 2008 @@ -70,7 +70,6 @@ cdef public class ElementClassLookup [ type LxmlElementClassLookupType, object LxmlElementClassLookup ]: u"""ElementClassLookup(self) - Superclass of Element class lookups. """ cdef _element_class_lookup_function _lookup_function @@ -86,6 +85,10 @@ """ cdef readonly ElementClassLookup fallback cdef _element_class_lookup_function _fallback_function + def __cinit__(self): + # fall back to default lookup + self._fallback_function = _lookupDefaultElementClass + def __init__(self, ElementClassLookup fallback=None): if fallback is not None: self._setFallback(fallback) @@ -127,8 +130,10 @@ cdef readonly object comment_class cdef readonly object pi_class cdef readonly object entity_class - def __init__(self, element=None, comment=None, pi=None, entity=None): + def __cinit__(self): self._lookup_function = _lookupDefaultElementClass + + def __init__(self, element=None, comment=None, pi=None, entity=None): if element is None: self.element_class = _Element elif issubclass(element, ElementBase): @@ -211,6 +216,9 @@ cdef object _pytag cdef char* _c_ns cdef char* _c_name + def __cinit__(self): + self._lookup_function = _attribute_class_lookup + def __init__(self, attribute_name, class_mapping, ElementClassLookup fallback=None): self._pytag = _getNsTag(attribute_name) @@ -223,7 +231,6 @@ self._class_mapping = dict(class_mapping) FallbackElementClassLookup.__init__(self, fallback) - self._lookup_function = _attribute_class_lookup cdef object _attribute_class_lookup(state, _Document doc, xmlNode* c_node): cdef AttributeBasedElementClassLookup lookup @@ -246,8 +253,7 @@ u"""ParserBasedElementClassLookup(self, fallback=None) Element class lookup based on the XML parser. """ - def __init__(self, ElementClassLookup fallback=None): - FallbackElementClassLookup.__init__(self, fallback) + def __cinit__(self): self._lookup_function = _parser_class_lookup cdef object _parser_class_lookup(state, _Document doc, xmlNode* c_node): @@ -276,8 +282,7 @@ If you return None from this method, the fallback will be called. """ - def __init__(self, ElementClassLookup fallback=None): - FallbackElementClassLookup.__init__(self, fallback) + def __cinit__(self): self._lookup_function = _custom_class_lookup def lookup(self, type, doc, namespace, name): @@ -362,8 +367,7 @@ See http://codespeak.net/lxml/element_classes.html """ - def __init__(self, ElementClassLookup fallback=None): - FallbackElementClassLookup.__init__(self, fallback) + def __cinit__(self): self._lookup_function = _python_class_lookup def lookup(self, doc, element): Modified: lxml/trunk/src/lxml/tests/test_classlookup.py ============================================================================== --- lxml/trunk/src/lxml/tests/test_classlookup.py (original) +++ lxml/trunk/src/lxml/tests/test_classlookup.py Wed Jun 4 19:38:25 2008 @@ -185,6 +185,26 @@ del a self.assertEquals(root[0].tag, "a") + def test_lookup_without_fallback(self): + class Lookup(etree.CustomElementClassLookup): + def __init__(self): + # no super call here, so no fallback is set + pass + + def lookup(self, node_type, document, namespace, name): + return Foo + + class Foo(etree.ElementBase): + def custom(self): + return "test" + + parser = self.etree.XMLParser() + parser.set_element_class_lookup( Lookup() ) + + root = etree.XML('', parser) + + self.assertEquals("test", root.custom()) + def test_suite(): suite = unittest.TestSuite() From scoder at codespeak.net Wed Jun 4 22:00:30 2008 From: scoder at codespeak.net (scoder at codespeak.net) Date: Wed, 4 Jun 2008 22:00:30 +0200 (CEST) Subject: [Lxml-checkins] r55578 - in lxml/branch/lxml-2.0: . src/lxml src/lxml/tests Message-ID: <20080604200030.775E6168403@codespeak.net> Author: scoder Date: Wed Jun 4 22:00:27 2008 New Revision: 55578 Modified: lxml/branch/lxml-2.0/CHANGES.txt lxml/branch/lxml-2.0/src/lxml/classlookup.pxi lxml/branch/lxml-2.0/src/lxml/lxml.pyclasslookup.pyx lxml/branch/lxml-2.0/src/lxml/tests/test_classlookup.py Log: merged in class lookup crash fix from trunk Modified: lxml/branch/lxml-2.0/CHANGES.txt ============================================================================== --- lxml/branch/lxml-2.0/CHANGES.txt (original) +++ lxml/branch/lxml-2.0/CHANGES.txt Wed Jun 4 22:00:27 2008 @@ -2,6 +2,22 @@ lxml changelog ============== +Under development +================= + +Features added +-------------- + +Bugs fixed +---------- + +* Crash in Element class lookup classes when the __init__() method of + the super class is not called from Python subclasses. + +Other changes +------------- + + 2.0.6 (2008-05-31) ================== Modified: lxml/branch/lxml-2.0/src/lxml/classlookup.pxi ============================================================================== --- lxml/branch/lxml-2.0/src/lxml/classlookup.pxi (original) +++ lxml/branch/lxml-2.0/src/lxml/classlookup.pxi Wed Jun 4 22:00:27 2008 @@ -86,6 +86,10 @@ """ cdef readonly ElementClassLookup fallback cdef _element_class_lookup_function _fallback_function + def __cinit__(self): + # fall back to default lookup + self._fallback_function = _lookupDefaultElementClass + def __init__(self, ElementClassLookup fallback=None): if fallback is not None: self._setFallback(fallback) @@ -133,8 +137,10 @@ cdef readonly object comment_class cdef readonly object pi_class cdef readonly object entity_class - def __init__(self, element=None, comment=None, pi=None, entity=None): + def __cinit__(self): self._lookup_function = _lookupDefaultElementClass + + def __init__(self, element=None, comment=None, pi=None, entity=None): if element is None: self.element_class = _Element elif issubclass(element, ElementBase): @@ -213,6 +219,9 @@ cdef object _pytag cdef char* _c_ns cdef char* _c_name + def __cinit__(self): + self._lookup_function = _attribute_class_lookup + def __init__(self, attribute_name, class_mapping, ElementClassLookup fallback=None): self._pytag = _getNsTag(attribute_name) @@ -225,7 +234,6 @@ self._class_mapping = dict(class_mapping) FallbackElementClassLookup.__init__(self, fallback) - self._lookup_function = _attribute_class_lookup cdef object _attribute_class_lookup(state, _Document doc, xmlNode* c_node): cdef AttributeBasedElementClassLookup lookup @@ -245,8 +253,7 @@ """ParserBasedElementClassLookup(self, fallback=None) Element class lookup based on the XML parser. """ - def __init__(self, ElementClassLookup fallback=None): - FallbackElementClassLookup.__init__(self, fallback) + def __cinit__(self): self._lookup_function = _parser_class_lookup cdef object _parser_class_lookup(state, _Document doc, xmlNode* c_node): @@ -272,8 +279,7 @@ If you return None from this method, the fallback will be called. """ - def __init__(self, ElementClassLookup fallback=None): - FallbackElementClassLookup.__init__(self, fallback) + def __cinit__(self): self._lookup_function = _custom_class_lookup def lookup(self, type, doc, namespace, name): Modified: lxml/branch/lxml-2.0/src/lxml/lxml.pyclasslookup.pyx ============================================================================== --- lxml/branch/lxml-2.0/src/lxml/lxml.pyclasslookup.pyx (original) +++ lxml/branch/lxml-2.0/src/lxml/lxml.pyclasslookup.pyx Wed Jun 4 22:00:27 2008 @@ -311,8 +311,7 @@ If you return None from this method, the fallback will be called. """ - def __init__(self, ElementClassLookup fallback=None): - FallbackElementClassLookup.__init__(self, fallback) + def __cinit__(self): self._lookup_function = _lookup_class def lookup(self, doc, element): Modified: lxml/branch/lxml-2.0/src/lxml/tests/test_classlookup.py ============================================================================== --- lxml/branch/lxml-2.0/src/lxml/tests/test_classlookup.py (original) +++ lxml/branch/lxml-2.0/src/lxml/tests/test_classlookup.py Wed Jun 4 22:00:27 2008 @@ -153,6 +153,26 @@ self.assertFalse(hasattr(root, 'FIND_ME')) self.assertFalse(hasattr(root[0], 'FIND_ME')) + def test_lookup_without_fallback(self): + class Lookup(etree.CustomElementClassLookup): + def __init__(self): + # no super call here, so no fallback is set + pass + + def lookup(self, node_type, document, namespace, name): + return Foo + + class Foo(etree.ElementBase): + def custom(self): + return "test" + + parser = self.etree.XMLParser() + parser.set_element_class_lookup( Lookup() ) + + root = etree.XML('', parser) + + self.assertEquals("test", root.custom()) + def test_suite(): suite = unittest.TestSuite() From scoder at codespeak.net Wed Jun 4 22:20:12 2008 From: scoder at codespeak.net (scoder at codespeak.net) Date: Wed, 4 Jun 2008 22:20:12 +0200 (CEST) Subject: [Lxml-checkins] r55579 - lxml/branch/lxml-2.0/doc Message-ID: <20080604202012.4A0E6168405@codespeak.net> Author: scoder Date: Wed Jun 4 22:20:10 2008 New Revision: 55579 Modified: lxml/branch/lxml-2.0/doc/capi.txt lxml/branch/lxml-2.0/doc/element_classes.txt Log: API doc fixes Modified: lxml/branch/lxml-2.0/doc/capi.txt ============================================================================== --- lxml/branch/lxml-2.0/doc/capi.txt (original) +++ lxml/branch/lxml-2.0/doc/capi.txt Wed Jun 4 22:20:10 2008 @@ -49,10 +49,10 @@ from etreepublic cimport ElementBase cdef class NewElementClass(ElementBase): - def setValue(self, myval): + def set_value(self, myval): self.set("my_attribute", myval) - etree.setElementClassLookup( + etree.set_element_class_lookup( DefaultElementClassLookup(element=NewElementClass)) Modified: lxml/branch/lxml-2.0/doc/element_classes.txt ============================================================================== --- lxml/branch/lxml-2.0/doc/element_classes.txt (original) +++ lxml/branch/lxml-2.0/doc/element_classes.txt Wed Jun 4 22:20:10 2008 @@ -89,7 +89,7 @@ >>> parser_lookup = etree.ElementDefaultClassLookup(element=HonkElement) >>> parser = etree.XMLParser() - >>> parser.setElementClassLookup(parser_lookup) + >>> parser.set_element_class_lookup(parser_lookup) There is one drawback of the parser based scheme: the ``Element()`` factory does not know about your specialised parser and creates a new document that @@ -153,7 +153,7 @@ >>> lookup = etree.ElementDefaultClassLookup() >>> parser = etree.XMLParser() - >>> parser.setElementClassLookup(lookup) + >>> parser.set_element_class_lookup(lookup) Note that the default for new parsers is to use the global fallback, which is also the default lookup (if not configured otherwise). @@ -167,7 +167,7 @@ False >>> lookup = etree.ElementDefaultClassLookup(element=HonkElement) - >>> parser.setElementClassLookup(lookup) + >>> parser.set_element_class_lookup(lookup) >>> el = parser.makeelement("myelement") >>> print isinstance(el, HonkElement) @@ -189,7 +189,7 @@ >>> lookup = etree.ElementNamespaceClassLookup() >>> parser = etree.XMLParser() - >>> parser.setElementClassLookup(lookup) + >>> parser.set_element_class_lookup(lookup) See the separate section on `implementing namespaces`_ below to learn how to make use of it. @@ -203,7 +203,7 @@ >>> fallback = etree.ElementDefaultClassLookup(element=HonkElement) >>> lookup = etree.ElementNamespaceClassLookup(fallback) - >>> parser.setElementClassLookup(lookup) + >>> parser.set_element_class_lookup(lookup) Attribute based lookup @@ -217,7 +217,7 @@ >>> lookup = etree.AttributeBasedElementClassLookup( ... 'id', id_class_mapping) >>> parser = etree.XMLParser() - >>> parser.setElementClassLookup(lookup) + >>> parser.set_element_class_lookup(lookup) Instead of a global setup of this scheme, you should consider using a per-parser setup. @@ -230,7 +230,7 @@ >>> lookup = etree.AttributeBasedElementClassLookup( ... 'id', id_class_mapping, fallback) >>> parser = etree.XMLParser() - >>> parser.setElementClassLookup(lookup) + >>> parser.set_element_class_lookup(lookup) Custom element class lookup @@ -244,7 +244,7 @@ ... return MyElementClass # defined elsewhere >>> parser = etree.XMLParser() - >>> parser.setElementClassLookup(MyLookup()) + >>> parser.set_element_class_lookup(MyLookup()) The ``lookup()`` method must either return None (which triggers the fallback mechanism) or a subclass of ``lxml.etree.ElementBase``. It can take any @@ -272,7 +272,7 @@ ... return MyElementClass # defined elsewhere >>> parser = etree.XMLParser() - >>> parser.setElementClassLookup(MyLookup()) + >>> parser.set_element_class_lookup(MyLookup()) As before, the first argument to the ``lookup()`` method is the opaque document instance that contains the Element. The second arguments is a @@ -304,7 +304,7 @@ >>> lookup = etree.ElementNamespaceClassLookup() >>> parser = etree.XMLParser() - >>> parser.setElementClassLookup(lookup) + >>> parser.set_element_class_lookup(lookup) >>> namespace = lookup.get_namespace('http://hui.de/honk') From scoder at codespeak.net Thu Jun 5 21:23:54 2008 From: scoder at codespeak.net (scoder at codespeak.net) Date: Thu, 5 Jun 2008 21:23:54 +0200 (CEST) Subject: [Lxml-checkins] r55597 - in lxml/trunk: . doc Message-ID: <20080605192354.AE8C516840C@codespeak.net> Author: scoder Date: Thu Jun 5 21:23:52 2008 New Revision: 55597 Modified: lxml/trunk/ (props changed) lxml/trunk/doc/capi.txt Log: r4472 at delle: sbehnel | 2008-06-05 09:36:37 +0200 doc cleanup Modified: lxml/trunk/doc/capi.txt ============================================================================== --- lxml/trunk/doc/capi.txt (original) +++ lxml/trunk/doc/capi.txt Thu Jun 5 21:23:52 2008 @@ -49,10 +49,10 @@ from etreepublic cimport ElementBase cdef class NewElementClass(ElementBase): - def setValue(self, myval): + def set_value(self, myval): self.set("my_attribute", myval) - etree.setElementClassLookup( + etree.set_element_class_lookup( DefaultElementClassLookup(element=NewElementClass)) From scoder at codespeak.net Thu Jun 5 21:23:58 2008 From: scoder at codespeak.net (scoder at codespeak.net) Date: Thu, 5 Jun 2008 21:23:58 +0200 (CEST) Subject: [Lxml-checkins] r55598 - in lxml/trunk: . src/lxml Message-ID: <20080605192358.458E816840A@codespeak.net> Author: scoder Date: Thu Jun 5 21:23:57 2008 New Revision: 55598 Modified: lxml/trunk/ (props changed) lxml/trunk/src/lxml/apihelpers.pxi Log: r4473 at delle: sbehnel | 2008-06-05 09:41:11 +0200 cleanup in apihelpers.pxi, always inline some trivial functions Modified: lxml/trunk/src/lxml/apihelpers.pxi ============================================================================== --- lxml/trunk/src/lxml/apihelpers.pxi (original) +++ lxml/trunk/src/lxml/apihelpers.pxi Thu Jun 5 21:23:57 2008 @@ -1258,13 +1258,13 @@ raise ValueError, u"Empty tag name" return ns, tag -cdef int _pyXmlNameIsValid(name_utf8): +cdef inline int _pyXmlNameIsValid(name_utf8): return _xmlNameIsValid(_cstr(name_utf8)) -cdef int _pyHtmlNameIsValid(name_utf8): +cdef inline int _pyHtmlNameIsValid(name_utf8): return _htmlNameIsValid(_cstr(name_utf8)) -cdef int _xmlNameIsValid(char* c_name): +cdef inline int _xmlNameIsValid(char* c_name): return tree.xmlValidateNCName(c_name, 0) == 0 cdef int _htmlNameIsValid(char* c_name): @@ -1308,28 +1308,28 @@ cdef int _tagValidOrRaise(tag_utf) except -1: if not _pyXmlNameIsValid(tag_utf): raise ValueError, u"Invalid tag name %r" % \ - python.PyUnicode_FromEncodedObject(tag_utf, 'UTF-8', 'strict') + python.PyUnicode_FromEncodedObject(tag_utf, 'UTF-8', NULL) return 0 cdef int _htmlTagValidOrRaise(tag_utf) except -1: if not _pyHtmlNameIsValid(tag_utf): raise ValueError, u"Invalid HTML tag name %r" % \ - python.PyUnicode_FromEncodedObject(tag_utf, 'UTF-8', 'strict') + python.PyUnicode_FromEncodedObject(tag_utf, 'UTF-8', NULL) return 0 cdef int _attributeValidOrRaise(name_utf) except -1: if not _pyXmlNameIsValid(name_utf): raise ValueError, u"Invalid attribute name %r" % \ - python.PyUnicode_FromEncodedObject(name_utf, 'UTF-8', 'strict') + python.PyUnicode_FromEncodedObject(name_utf, 'UTF-8', NULL) return 0 cdef int _prefixValidOrRaise(tag_utf) except -1: if not _pyXmlNameIsValid(tag_utf): raise ValueError, u"Invalid namespace prefix %r" % \ - python.PyUnicode_FromEncodedObject(tag_utf, 'UTF-8', 'strict') + python.PyUnicode_FromEncodedObject(tag_utf, 'UTF-8', NULL) return 0 -cdef object _namespacedName(xmlNode* c_node): +cdef inline object _namespacedName(xmlNode* c_node): return _namespacedNameFromNsName(_getNs(c_node), c_node.name) cdef object _namespacedNameFromNsName(char* href, char* name): From scoder at codespeak.net Thu Jun 5 21:24:03 2008 From: scoder at codespeak.net (scoder at codespeak.net) Date: Thu, 5 Jun 2008 21:24:03 +0200 (CEST) Subject: [Lxml-checkins] r55599 - in lxml/trunk: . src/lxml src/lxml/tests Message-ID: <20080605192403.EBE4D168418@codespeak.net> Author: scoder Date: Thu Jun 5 21:24:03 2008 New Revision: 55599 Modified: lxml/trunk/ (props changed) lxml/trunk/CHANGES.txt lxml/trunk/src/lxml/apihelpers.pxi lxml/trunk/src/lxml/tests/test_etree.py Log: r4474 at delle: sbehnel | 2008-06-05 10:03:49 +0200 raise a ValueError instead of an AssertionError for inacceptable .tag/namespace/etc. user input Modified: lxml/trunk/CHANGES.txt ============================================================================== --- lxml/trunk/CHANGES.txt (original) +++ lxml/trunk/CHANGES.txt Thu Jun 5 21:24:03 2008 @@ -57,6 +57,10 @@ Other changes ------------- +* Passing non-ASCII byte strings or invalid unicode strings as .tag, + namespaces, etc. will result in a ValueError instead of an + AssertionError (just like the tag well-formedness check). + * Up to several times faster attribute access (i.e. tree traversal) in lxml.objectify. Modified: lxml/trunk/src/lxml/apihelpers.pxi ============================================================================== --- lxml/trunk/src/lxml/apihelpers.pxi (original) +++ lxml/trunk/src/lxml/apihelpers.pxi Thu Jun 5 21:24:03 2008 @@ -1132,15 +1132,16 @@ cdef object _utf8(object s): if python.PyString_Check(s): - assert isutf8py(s) == 0, \ - u"All strings must be XML compatible: Unicode or ASCII, no NULL bytes" + if isutf8py(s): + raise ValueError, \ + u"All strings must be XML compatible: Unicode or ASCII, no NULL bytes" elif python.PyUnicode_Check(s): - # FIXME: we should test these strings, too ... s = python.PyUnicode_AsUTF8String(s) - assert isutf8py(s) != -1, \ - u"All strings must be XML compatible: Unicode or ASCII, no NULL bytes" + if isutf8py(s) == -1: + raise ValueError, \ + u"All strings must be XML compatible: Unicode or ASCII, no NULL bytes" else: - raise TypeError, "Argument must be string or unicode." + raise TypeError, u"Argument must be string or unicode." return s cdef bint _isFilePath(char* c_path): Modified: lxml/trunk/src/lxml/tests/test_etree.py ============================================================================== --- lxml/trunk/src/lxml/tests/test_etree.py (original) +++ lxml/trunk/src/lxml/tests/test_etree.py Thu Jun 5 21:24:03 2008 @@ -723,7 +723,7 @@ Entity = self.etree.Entity self.assertRaises(ValueError, Entity, 'a b c') self.assertRaises(ValueError, Entity, 'a,b') - self.assertRaises(AssertionError, Entity, 'a\0b') + self.assertRaises(ValueError, Entity, 'a\0b') self.assertRaises(ValueError, Entity, '#abc') self.assertRaises(ValueError, Entity, '#xxyz') @@ -1999,53 +1999,53 @@ Element = self.etree.Element a = Element('a') - self.assertRaises(AssertionError, setattr, a, "text", 'ha\0ho') - self.assertRaises(AssertionError, setattr, a, "tail", 'ha\0ho') + self.assertRaises(ValueError, setattr, a, "text", 'ha\0ho') + self.assertRaises(ValueError, setattr, a, "tail", 'ha\0ho') - self.assertRaises(AssertionError, Element, 'ha\0ho') + self.assertRaises(ValueError, Element, 'ha\0ho') def test_unicode_byte_zero(self): Element = self.etree.Element a = Element('a') - self.assertRaises(AssertionError, setattr, a, "text", + self.assertRaises(ValueError, setattr, a, "text", _str('ha\0ho')) - self.assertRaises(AssertionError, setattr, a, "tail", + self.assertRaises(ValueError, setattr, a, "tail", _str('ha\0ho')) - self.assertRaises(AssertionError, Element, + self.assertRaises(ValueError, Element, _str('ha\0ho')) def test_byte_invalid(self): Element = self.etree.Element a = Element('a') - self.assertRaises(AssertionError, setattr, a, "text", 'ha\x07ho') - self.assertRaises(AssertionError, setattr, a, "text", 'ha\x02ho') + self.assertRaises(ValueError, setattr, a, "text", 'ha\x07ho') + self.assertRaises(ValueError, setattr, a, "text", 'ha\x02ho') - self.assertRaises(AssertionError, setattr, a, "tail", 'ha\x07ho') - self.assertRaises(AssertionError, setattr, a, "tail", 'ha\x02ho') + self.assertRaises(ValueError, setattr, a, "tail", 'ha\x07ho') + self.assertRaises(ValueError, setattr, a, "tail", 'ha\x02ho') - self.assertRaises(AssertionError, Element, 'ha\x07ho') - self.assertRaises(AssertionError, Element, 'ha\x02ho') + self.assertRaises(ValueError, Element, 'ha\x07ho') + self.assertRaises(ValueError, Element, 'ha\x02ho') def test_unicode_byte_invalid(self): Element = self.etree.Element a = Element('a') - self.assertRaises(AssertionError, setattr, a, "text", + self.assertRaises(ValueError, setattr, a, "text", _str('ha\x07ho')) - self.assertRaises(AssertionError, setattr, a, "text", + self.assertRaises(ValueError, setattr, a, "text", _str('ha\x02ho')) - self.assertRaises(AssertionError, setattr, a, "tail", + self.assertRaises(ValueError, setattr, a, "tail", _str('ha\x07ho')) - self.assertRaises(AssertionError, setattr, a, "tail", + self.assertRaises(ValueError, setattr, a, "tail", _str('ha\x02ho')) - self.assertRaises(AssertionError, Element, + self.assertRaises(ValueError, Element, _str('ha\x07ho')) - self.assertRaises(AssertionError, Element, + self.assertRaises(ValueError, Element, _str('ha\x02ho')) def test_encoding_tostring_utf16(self): From scoder at codespeak.net Thu Jun 5 21:24:08 2008 From: scoder at codespeak.net (scoder at codespeak.net) Date: Thu, 5 Jun 2008 21:24:08 +0200 (CEST) Subject: [Lxml-checkins] r55600 - in lxml/trunk: . src/lxml Message-ID: <20080605192408.BE575168419@codespeak.net> Author: scoder Date: Thu Jun 5 21:24:08 2008 New Revision: 55600 Modified: lxml/trunk/ (props changed) lxml/trunk/src/lxml/xinclude.pxi Log: r4475 at delle: sbehnel | 2008-06-05 15:12:08 +0200 GIL handling fix Modified: lxml/trunk/src/lxml/xinclude.pxi ============================================================================== --- lxml/trunk/src/lxml/xinclude.pxi (original) +++ lxml/trunk/src/lxml/xinclude.pxi Thu Jun 5 21:24:08 2008 @@ -31,6 +31,7 @@ # siblings. Tree traversal will simply ignore them as they are not # typed as elements. The included fragment is added between the two, # i.e. as a sibling, which does not conflict with traversal. + cdef int result self._error_log.connect() with nogil: if node._doc._parser is not None: From scoder at codespeak.net Thu Jun 5 21:24:20 2008 From: scoder at codespeak.net (scoder at codespeak.net) Date: Thu, 5 Jun 2008 21:24:20 +0200 (CEST) Subject: [Lxml-checkins] r55601 - in lxml/trunk: . src/lxml Message-ID: <20080605192420.131F8168415@codespeak.net> Author: scoder Date: Thu Jun 5 21:24:19 2008 New Revision: 55601 Modified: lxml/trunk/ (props changed) lxml/trunk/src/lxml/apihelpers.pxi lxml/trunk/src/lxml/iterparse.pxi lxml/trunk/src/lxml/parser.pxi lxml/trunk/src/lxml/proxy.pxi lxml/trunk/src/lxml/tree.pxd lxml/trunk/src/lxml/xmlerror.pxi lxml/trunk/src/lxml/xpath.pxd lxml/trunk/src/lxml/xslt.pxd Log: r4476 at delle: sbehnel | 2008-06-05 21:21:58 +0200 nogil declaration fixes Modified: lxml/trunk/src/lxml/apihelpers.pxi ============================================================================== --- lxml/trunk/src/lxml/apihelpers.pxi (original) +++ lxml/trunk/src/lxml/apihelpers.pxi Thu Jun 5 21:24:19 2008 @@ -720,7 +720,7 @@ c_child = c_child.prev return NULL -cdef inline xmlNode* _textNodeOrSkip(xmlNode* c_node): +cdef inline xmlNode* _textNodeOrSkip(xmlNode* c_node) nogil: u"""Return the node if it's a text node. Skip over ignorable nodes in a series of text nodes. Return NULL if a non-ignorable node is found. Modified: lxml/trunk/src/lxml/iterparse.pxi ============================================================================== --- lxml/trunk/src/lxml/iterparse.pxi (original) +++ lxml/trunk/src/lxml/iterparse.pxi Thu Jun 5 21:24:19 2008 @@ -357,7 +357,7 @@ cdef object _source cdef object _buffer cdef int (*_parse_chunk)(xmlparser.xmlParserCtxt* ctxt, - char* chunk, int size, int terminate) + char* chunk, int size, int terminate) nogil def __init__(self, source, events=(u"end",), *, tag=None, attribute_defaults=False, dtd_validation=False, load_dtd=False, no_network=True, remove_blank_text=False, Modified: lxml/trunk/src/lxml/parser.pxi ============================================================================== --- lxml/trunk/src/lxml/parser.pxi (original) +++ lxml/trunk/src/lxml/parser.pxi Thu Jun 5 21:24:19 2008 @@ -394,7 +394,7 @@ return c_input cdef xmlparser.xmlParserInput* _local_resolver(char* c_url, char* c_pubid, - xmlparser.xmlParserCtxt* c_context): + xmlparser.xmlParserCtxt* c_context) nogil: # no Python objects here, may be called without thread context ! # when we declare a Python object, Pyrex will INCREF(None) ! cdef xmlparser.xmlParserInput* c_input Modified: lxml/trunk/src/lxml/proxy.pxi ============================================================================== --- lxml/trunk/src/lxml/proxy.pxi (original) +++ lxml/trunk/src/lxml/proxy.pxi Thu Jun 5 21:24:19 2008 @@ -193,7 +193,7 @@ ################################################################################ # fix _Document references and namespaces when a node changes documents -cdef void _copyParentNamespaces(xmlNode* c_from_node, xmlNode* c_to_node): +cdef void _copyParentNamespaces(xmlNode* c_from_node, xmlNode* c_to_node) nogil: u"""Copy the namespaces of all ancestors of c_from_node to c_to_node. """ cdef xmlNode* c_parent Modified: lxml/trunk/src/lxml/tree.pxd ============================================================================== --- lxml/trunk/src/lxml/tree.pxd (original) +++ lxml/trunk/src/lxml/tree.pxd Thu Jun 5 21:24:19 2008 @@ -48,7 +48,7 @@ cdef extern from "libxml/hash.h": ctypedef struct xmlHashTable - ctypedef void xmlHashScanner(void* payload, void* data, char* name) nogil + ctypedef void xmlHashScanner(void* payload, void* data, char* name) # may require GIL! void xmlHashScan(xmlHashTable* table, xmlHashScanner f, void* data) nogil void* xmlHashLookup(xmlHashTable* table, char* name) nogil @@ -263,12 +263,12 @@ cdef int xmlOutputBufferClose(xmlOutputBuffer* out) nogil ctypedef int (*xmlInputReadCallback)(void* context, - char* buffer, int len) nogil - ctypedef int (*xmlInputCloseCallback)(void* context) nogil + char* buffer, int len) + ctypedef int (*xmlInputCloseCallback)(void* context) ctypedef int (*xmlOutputWriteCallback)(void* context, - char* buffer, int len) nogil - ctypedef int (*xmlOutputCloseCallback)(void* context) nogil + char* buffer, int len) + ctypedef int (*xmlOutputCloseCallback)(void* context) cdef xmlOutputBuffer* xmlAllocOutputBuffer( xmlCharEncodingHandler* encoder) nogil Modified: lxml/trunk/src/lxml/xmlerror.pxi ============================================================================== --- lxml/trunk/src/lxml/xmlerror.pxi (original) +++ lxml/trunk/src/lxml/xmlerror.pxi Thu Jun 5 21:24:19 2008 @@ -13,7 +13,7 @@ __GLOBAL_ERROR_LOG.clear() # dummy function: no debug output at all -cdef void _nullGenericErrorFunc(void* ctxt, char* msg, ...): +cdef void _nullGenericErrorFunc(void* ctxt, char* msg, ...) nogil: pass # setup for global log: Modified: lxml/trunk/src/lxml/xpath.pxd ============================================================================== --- lxml/trunk/src/lxml/xpath.pxd (original) +++ lxml/trunk/src/lxml/xpath.pxd Thu Jun 5 21:24:19 2008 @@ -70,10 +70,10 @@ ctypedef struct xmlXPathCompExpr - ctypedef void (*xmlXPathFunction)(xmlXPathParserContext* ctxt, int nargs) + ctypedef void (*xmlXPathFunction)(xmlXPathParserContext* ctxt, int nargs) nogil ctypedef xmlXPathFunction (*xmlXPathFuncLookupFunc)(void* ctxt, char* name, - char* ns_uri) + char* ns_uri) nogil cdef xmlXPathContext* xmlXPathNewContext(tree.xmlDoc* doc) nogil cdef xmlXPathObject* xmlXPathEvalExpression(char* str, Modified: lxml/trunk/src/lxml/xslt.pxd ============================================================================== --- lxml/trunk/src/lxml/xslt.pxd (original) +++ lxml/trunk/src/lxml/xslt.pxd Thu Jun 5 21:24:19 2008 @@ -41,7 +41,7 @@ ctypedef void (*xsltTransformFunction)(xsltTransformContext* ctxt, xmlNode* context_node, xmlNode* inst, - void* precomp_unused) + void* precomp_unused) nogil cdef int xsltRegisterExtFunction(xsltTransformContext* ctxt, char* name, @@ -67,7 +67,7 @@ ctypedef xmlDoc* (*xsltDocLoaderFunc)(char* URI, xmlDict* dict, int options, void* ctxt, - xsltLoadType type) + xsltLoadType type) nogil cdef xsltDocLoaderFunc xsltDocDefaultLoader cdef void xsltSetLoaderFunc(xsltDocLoaderFunc f) nogil @@ -80,7 +80,7 @@ xsltTransformContext* context) nogil cdef void xsltProcessOneNode(xsltTransformContext* ctxt, xmlNode* contextNode, - xsltStackElem* params) + xsltStackElem* params) nogil cdef xsltTransformContext* xsltNewTransformContext(xsltStylesheet* style, xmlDoc* doc) nogil cdef void xsltFreeTransformContext(xsltTransformContext* context) nogil @@ -95,7 +95,7 @@ void* ctxt, void (*handler)(void* ctxt, char* msg, ...)) nogil cdef void xsltSetTransformErrorFunc( xsltTransformContext*, void* ctxt, - void (*handler)(void* ctxt, char* msg, ...)) nogil + void (*handler)(void* ctxt, char* msg, ...) nogil) nogil cdef void xsltTransformError(xsltTransformContext* ctxt, xsltStylesheet* style, xmlNode* node, char* msg, ...) @@ -110,7 +110,7 @@ ctypedef int (*xsltSecurityCheck)(xsltSecurityPrefs* sec, xsltTransformContext* ctxt, - char* value) + char* value) nogil cdef xsltSecurityPrefs* xsltNewSecurityPrefs() nogil cdef void xsltFreeSecurityPrefs(xsltSecurityPrefs* sec) nogil From scoder at codespeak.net Thu Jun 5 21:28:57 2008 From: scoder at codespeak.net (scoder at codespeak.net) Date: Thu, 5 Jun 2008 21:28:57 +0200 (CEST) Subject: [Lxml-checkins] r55602 - in lxml/branch/lxml-2.0: . src/lxml Message-ID: <20080605192857.40BEB16840C@codespeak.net> Author: scoder Date: Thu Jun 5 21:28:56 2008 New Revision: 55602 Modified: lxml/branch/lxml-2.0/CHANGES.txt lxml/branch/lxml-2.0/src/lxml/xinclude.pxi Log: potential problem in XInclude: Python value handling while GIL is released Modified: lxml/branch/lxml-2.0/CHANGES.txt ============================================================================== --- lxml/branch/lxml-2.0/CHANGES.txt (original) +++ lxml/branch/lxml-2.0/CHANGES.txt Thu Jun 5 21:28:56 2008 @@ -11,6 +11,8 @@ Bugs fixed ---------- +* Potential threading problem in XInclude. + * Crash in Element class lookup classes when the __init__() method of the super class is not called from Python subclasses. Modified: lxml/branch/lxml-2.0/src/lxml/xinclude.pxi ============================================================================== --- lxml/branch/lxml-2.0/src/lxml/xinclude.pxi (original) +++ lxml/branch/lxml-2.0/src/lxml/xinclude.pxi Thu Jun 5 21:28:56 2008 @@ -31,6 +31,7 @@ # siblings. Tree traversal will simply ignore them as they are not # typed as elements. The included fragment is added between the two, # i.e. as a sibling, which does not conflict with traversal. + cdef int result self._error_log.connect() with nogil: if node._doc._parser is not None: From scoder at codespeak.net Thu Jun 5 21:29:25 2008 From: scoder at codespeak.net (scoder at codespeak.net) Date: Thu, 5 Jun 2008 21:29:25 +0200 (CEST) Subject: [Lxml-checkins] r55603 - lxml/branch/lxml-2.0/src/lxml Message-ID: <20080605192925.67BFE16840C@codespeak.net> Author: scoder Date: Thu Jun 5 21:29:24 2008 New Revision: 55603 Modified: lxml/branch/lxml-2.0/src/lxml/parser.pxi lxml/branch/lxml-2.0/src/lxml/proxy.pxi lxml/branch/lxml-2.0/src/lxml/tree.pxd lxml/branch/lxml-2.0/src/lxml/xmlerror.pxi lxml/branch/lxml-2.0/src/lxml/xslt.pxd Log: nogil declaration fixes Modified: lxml/branch/lxml-2.0/src/lxml/parser.pxi ============================================================================== --- lxml/branch/lxml-2.0/src/lxml/parser.pxi (original) +++ lxml/branch/lxml-2.0/src/lxml/parser.pxi Thu Jun 5 21:29:24 2008 @@ -344,7 +344,7 @@ return c_input cdef xmlparser.xmlParserInput* _local_resolver(char* c_url, char* c_pubid, - xmlparser.xmlParserCtxt* c_context): + xmlparser.xmlParserCtxt* c_context) nogil: # no Python objects here, may be called without thread context ! # when we declare a Python object, Pyrex will INCREF(None) ! cdef xmlparser.xmlParserInput* c_input Modified: lxml/branch/lxml-2.0/src/lxml/proxy.pxi ============================================================================== --- lxml/branch/lxml-2.0/src/lxml/proxy.pxi (original) +++ lxml/branch/lxml-2.0/src/lxml/proxy.pxi Thu Jun 5 21:29:24 2008 @@ -191,7 +191,7 @@ ################################################################################ # fix _Document references and namespaces when a node changes documents -cdef void _copyParentNamespaces(xmlNode* c_from_node, xmlNode* c_to_node): +cdef void _copyParentNamespaces(xmlNode* c_from_node, xmlNode* c_to_node) nogil: """Copy the namespaces of all ancestors of c_from_node to c_to_node. """ cdef xmlNode* c_parent Modified: lxml/branch/lxml-2.0/src/lxml/tree.pxd ============================================================================== --- lxml/branch/lxml-2.0/src/lxml/tree.pxd (original) +++ lxml/branch/lxml-2.0/src/lxml/tree.pxd Thu Jun 5 21:29:24 2008 @@ -48,7 +48,7 @@ cdef extern from "libxml/hash.h": ctypedef struct xmlHashTable - ctypedef void xmlHashScanner(void* payload, void* data, char* name) nogil + ctypedef void xmlHashScanner(void* payload, void* data, char* name) void xmlHashScan(xmlHashTable* table, xmlHashScanner f, void* data) nogil void* xmlHashLookup(xmlHashTable* table, char* name) nogil @@ -256,12 +256,12 @@ cdef int xmlOutputBufferClose(xmlOutputBuffer* out) nogil ctypedef int (*xmlInputReadCallback)(void* context, - char* buffer, int len) nogil - ctypedef int (*xmlInputCloseCallback)(void* context) nogil + char* buffer, int len) + ctypedef int (*xmlInputCloseCallback)(void* context) ctypedef int (*xmlOutputWriteCallback)(void* context, - char* buffer, int len) nogil - ctypedef int (*xmlOutputCloseCallback)(void* context) nogil + char* buffer, int len) + ctypedef int (*xmlOutputCloseCallback)(void* context) cdef xmlOutputBuffer* xmlAllocOutputBuffer( xmlCharEncodingHandler* encoder) nogil Modified: lxml/branch/lxml-2.0/src/lxml/xmlerror.pxi ============================================================================== --- lxml/branch/lxml-2.0/src/lxml/xmlerror.pxi (original) +++ lxml/branch/lxml-2.0/src/lxml/xmlerror.pxi Thu Jun 5 21:29:24 2008 @@ -21,7 +21,7 @@ __GLOBAL_ERROR_LOG.clear() # dummy function: no debug output at all -cdef void _nullGenericErrorFunc(void* ctxt, char* msg, ...): +cdef void _nullGenericErrorFunc(void* ctxt, char* msg, ...) nogil: pass # setup for global log: Modified: lxml/branch/lxml-2.0/src/lxml/xslt.pxd ============================================================================== --- lxml/branch/lxml-2.0/src/lxml/xslt.pxd (original) +++ lxml/branch/lxml-2.0/src/lxml/xslt.pxd Thu Jun 5 21:29:24 2008 @@ -54,7 +54,7 @@ ctypedef xmlDoc* (*xsltDocLoaderFunc)(char* URI, xmlDict* dict, int options, void* ctxt, - xsltLoadType type) + xsltLoadType type) nogil cdef xsltDocLoaderFunc xsltDocDefaultLoader cdef void xsltSetLoaderFunc(xsltDocLoaderFunc f) nogil From scoder at codespeak.net Thu Jun 5 21:29:50 2008 From: scoder at codespeak.net (scoder at codespeak.net) Date: Thu, 5 Jun 2008 21:29:50 +0200 (CEST) Subject: [Lxml-checkins] r55604 - lxml/branch/lxml-2.0 Message-ID: <20080605192950.F1E3316840C@codespeak.net> Author: scoder Date: Thu Jun 5 21:29:50 2008 New Revision: 55604 Modified: lxml/branch/lxml-2.0/version.txt Log: version Modified: lxml/branch/lxml-2.0/version.txt ============================================================================== --- lxml/branch/lxml-2.0/version.txt (original) +++ lxml/branch/lxml-2.0/version.txt Thu Jun 5 21:29:50 2008 @@ -1 +1 @@ -2.0.6 +2.0.7 From scoder at codespeak.net Fri Jun 6 12:09:01 2008 From: scoder at codespeak.net (scoder at codespeak.net) Date: Fri, 6 Jun 2008 12:09:01 +0200 (CEST) Subject: [Lxml-checkins] r55623 - lxml/branch/lxml-2.0/doc Message-ID: <20080606100901.7E7CA16844B@codespeak.net> Author: scoder Date: Fri Jun 6 12:09:00 2008 New Revision: 55623 Modified: lxml/branch/lxml-2.0/doc/extensions.txt Log: doc cleanup Modified: lxml/branch/lxml-2.0/doc/extensions.txt ============================================================================== --- lxml/branch/lxml-2.0/doc/extensions.txt (original) +++ lxml/branch/lxml-2.0/doc/extensions.txt Fri Jun 6 12:09:00 2008 @@ -46,11 +46,8 @@ Now we're going to create a document that we can run XPath expressions against:: - >>> from lxml import etree - >>> from StringIO import StringIO - >>> f = StringIO('Haegar') - >>> doc = etree.parse(f) - >>> root = doc.getroot() + >>> root = etree.XML('Haegar') + >>> doc = etree.ElementTree(root) Done. Now we can have XPath expressions call our new function:: @@ -148,7 +145,7 @@ >>> print e.evaluate('f:hello(local-name(/a))') Hello a - >>> xslt = etree.XSLT(etree.ElementTree(etree.XML(''' + >>> xslt = etree.XSLT(etree.XML(''' ... @@ -157,7 +154,7 @@ ... ... ... - ... '''))) + ... ''')) >>> print xslt(doc) Ola Haegar @@ -165,6 +162,7 @@ creation. While the following example involves no functions, the idea should still be clear:: + >>> from StringIO import StringIO >>> f = StringIO('') >>> ns_doc = etree.parse(f) >>> e = etree.XPathEvaluator(ns_doc) From scoder at codespeak.net Fri Jun 6 14:52:51 2008 From: scoder at codespeak.net (scoder at codespeak.net) Date: Fri, 6 Jun 2008 14:52:51 +0200 (CEST) Subject: [Lxml-checkins] r55628 - in lxml/branch/lxml-2.0: . src/lxml src/lxml/tests Message-ID: <20080606125251.1546A16845E@codespeak.net> Author: scoder Date: Fri Jun 6 14:52:50 2008 New Revision: 55628 Modified: lxml/branch/lxml-2.0/CHANGES.txt lxml/branch/lxml-2.0/src/lxml/parser.pxi lxml/branch/lxml-2.0/src/lxml/tests/test_etree.py Log: merge from trunk: prevent non-ASCII character escaping in attribute values Modified: lxml/branch/lxml-2.0/CHANGES.txt ============================================================================== --- lxml/branch/lxml-2.0/CHANGES.txt (original) +++ lxml/branch/lxml-2.0/CHANGES.txt Fri Jun 6 14:52:50 2008 @@ -19,6 +19,9 @@ Other changes ------------- +* Non-ASCII characters in attribute values are no longer escaped on + serialisation. + 2.0.6 (2008-05-31) ================== Modified: lxml/branch/lxml-2.0/src/lxml/parser.pxi ============================================================================== --- lxml/branch/lxml-2.0/src/lxml/parser.pxi (original) +++ lxml/branch/lxml-2.0/src/lxml/parser.pxi Fri Jun 6 14:52:50 2008 @@ -536,8 +536,11 @@ _raiseParseError(c_ctxt, filename, context._error_log) else: _raiseParseError(c_ctxt, filename, None) - elif result.URL is NULL and filename is not None: - result.URL = tree.xmlStrdup(_cstr(filename)) + else: + if result.URL is NULL and filename is not None: + result.URL = tree.xmlStrdup(_cstr(filename)) + if result.encoding is NULL: + result.encoding = tree.xmlStrdup("UTF-8") return result @@ -1270,6 +1273,8 @@ result = tree.xmlCopyDoc(c_doc, 0) if result is NULL: python.PyErr_NoMemory() + if result.encoding is NULL: + result.encoding = tree.xmlStrdup("UTF-8") __GLOBAL_PARSER_CONTEXT.initDocDict(result) return result Modified: lxml/branch/lxml-2.0/src/lxml/tests/test_etree.py ============================================================================== --- lxml/branch/lxml-2.0/src/lxml/tests/test_etree.py (original) +++ lxml/branch/lxml-2.0/src/lxml/tests/test_etree.py Fri Jun 6 14:52:50 2008 @@ -1872,7 +1872,7 @@ xml = '' tree = etree.parse(StringIO(xml)) docinfo = tree.docinfo - self.assertEquals(docinfo.encoding, None) + self.assertEquals(docinfo.encoding, "UTF-8") self.assertEquals(docinfo.xml_version, "1.0") self.assertEquals(docinfo.public_id, None) self.assertEquals(docinfo.system_url, None) From scoder at codespeak.net Fri Jun 6 15:05:03 2008 From: scoder at codespeak.net (scoder at codespeak.net) Date: Fri, 6 Jun 2008 15:05:03 +0200 (CEST) Subject: [Lxml-checkins] r55630 - in lxml/trunk: . doc Message-ID: <20080606130503.53D0416844B@codespeak.net> Author: scoder Date: Fri Jun 6 15:05:02 2008 New Revision: 55630 Modified: lxml/trunk/ (props changed) lxml/trunk/doc/extensions.txt Log: r4485 at delle: sbehnel | 2008-06-06 11:16:03 +0200 doc cleanup Modified: lxml/trunk/doc/extensions.txt ============================================================================== --- lxml/trunk/doc/extensions.txt (original) +++ lxml/trunk/doc/extensions.txt Fri Jun 6 15:05:02 2008 @@ -83,10 +83,8 @@ .. sourcecode:: pycon - >>> from lxml import etree - >>> f = StringIO('Haegar') - >>> doc = etree.parse(f) - >>> root = doc.getroot() + >>> root = etree.XML('Haegar') + >>> doc = etree.ElementTree(root) Done. Now we can have XPath expressions call our new function: @@ -196,7 +194,7 @@ >>> print(e('f:hello(local-name(/a))')) Hello a - >>> xslt = etree.XSLT(etree.ElementTree(etree.XML(''' + >>> xslt = etree.XSLT(etree.XML(''' ... @@ -205,7 +203,7 @@ ... ... ... - ... '''))) + ... ''')) >>> print(xslt(doc)) Ola Haegar From scoder at codespeak.net Fri Jun 6 15:05:10 2008 From: scoder at codespeak.net (scoder at codespeak.net) Date: Fri, 6 Jun 2008 15:05:10 +0200 (CEST) Subject: [Lxml-checkins] r55631 - in lxml/trunk: . src/lxml src/lxml/tests Message-ID: <20080606130510.1ACB316843E@codespeak.net> Author: scoder Date: Fri Jun 6 15:05:09 2008 New Revision: 55631 Modified: lxml/trunk/ (props changed) lxml/trunk/CHANGES.txt lxml/trunk/src/lxml/parser.pxi lxml/trunk/src/lxml/tests/test_etree.py Log: r4487 at delle: sbehnel | 2008-06-06 13:56:34 +0200 always set 'UTF-8' as document encoding if it wasn't provided - keeps the serialiser from escaping attribute values Modified: lxml/trunk/CHANGES.txt ============================================================================== --- lxml/trunk/CHANGES.txt (original) +++ lxml/trunk/CHANGES.txt Fri Jun 6 15:05:09 2008 @@ -57,6 +57,9 @@ Other changes ------------- +* Non-ASCII characters in attribute values are no longer escaped on + serialisation. + * Passing non-ASCII byte strings or invalid unicode strings as .tag, namespaces, etc. will result in a ValueError instead of an AssertionError (just like the tag well-formedness check). Modified: lxml/trunk/src/lxml/parser.pxi ============================================================================== --- lxml/trunk/src/lxml/parser.pxi (original) +++ lxml/trunk/src/lxml/parser.pxi Fri Jun 6 15:05:09 2008 @@ -588,8 +588,11 @@ _raiseParseError(c_ctxt, filename, context._error_log) else: _raiseParseError(c_ctxt, filename, None) - elif result.URL is NULL and filename is not None: - result.URL = tree.xmlStrdup(_cstr(filename)) + else: + if result.URL is NULL and filename is not None: + result.URL = tree.xmlStrdup(_cstr(filename)) + if result.encoding is NULL: + result.encoding = tree.xmlStrdup("UTF-8") return result cdef int _fixHtmlDictNames(tree.xmlDict* c_dict, xmlDoc* c_doc) nogil: @@ -1366,6 +1369,8 @@ result = tree.xmlNewDoc(NULL) if result is NULL: python.PyErr_NoMemory() + if result.encoding is NULL: + result.encoding = tree.xmlStrdup("UTF-8") __GLOBAL_PARSER_CONTEXT.initDocDict(result) return result Modified: lxml/trunk/src/lxml/tests/test_etree.py ============================================================================== --- lxml/trunk/src/lxml/tests/test_etree.py (original) +++ lxml/trunk/src/lxml/tests/test_etree.py Fri Jun 6 15:05:09 2008 @@ -1931,7 +1931,7 @@ xml = _bytes('') tree = etree.parse(BytesIO(xml)) docinfo = tree.docinfo - self.assertEquals(docinfo.encoding, None) + self.assertEquals(docinfo.encoding, "UTF-8") self.assertEquals(docinfo.xml_version, "1.0") self.assertEquals(docinfo.public_id, None) self.assertEquals(docinfo.system_url, None) From scoder at codespeak.net Fri Jun 6 15:05:14 2008 From: scoder at codespeak.net (scoder at codespeak.net) Date: Fri, 6 Jun 2008 15:05:14 +0200 (CEST) Subject: [Lxml-checkins] r55632 - lxml/trunk Message-ID: <20080606130514.AD30416844D@codespeak.net> Author: scoder Date: Fri Jun 6 15:05:14 2008 New Revision: 55632 Modified: lxml/trunk/ (props changed) lxml/trunk/selftest.py lxml/trunk/selftest2.py Log: r4488 at delle: sbehnel | 2008-06-06 15:03:09 +0200 enabled more ET compatibility tests Modified: lxml/trunk/selftest.py ============================================================================== --- lxml/trunk/selftest.py (original) +++ lxml/trunk/selftest.py Fri Jun 6 15:05:14 2008 @@ -535,8 +535,8 @@ 'abc' >>> serialize(elem, encoding="us-ascii") 'abc' - >>> serialize(elem, encoding="ISO-8859-1") - "\nabc" + >>> serialize(elem, encoding="iso-8859-1").lower() + "\nabc" >>> elem.text = "<&\"\'>" >>> serialize(elem) @@ -545,19 +545,19 @@ '<&"\'>' >>> serialize(elem, encoding="us-ascii") # cdata characters '<&"\'>' - >>> serialize(elem, encoding="ISO-8859-1") - '\n<&"\'>' + >>> serialize(elem, encoding="iso-8859-1").lower() + '\n<&"\'>' -## >>> elem.attrib["key"] = "<&\"\'>" -## >>> elem.text = None -## >>> serialize(elem) -## '' -## >>> serialize(elem, encoding="utf-8") -## '' -## >>> serialize(elem, encoding="us-ascii") -## '' -## >>> serialize(elem, encoding="iso-8859-1") -## '\n' + >>> elem.attrib["key"] = "<&\"\'>" + >>> elem.text = None + >>> serialize(elem) + '' + >>> serialize(elem, encoding="utf-8") + '' + >>> serialize(elem, encoding="us-ascii") + '' + >>> serialize(elem, encoding="iso-8859-1").lower() + '\n' >>> elem.text = u'\xe5\xf6\xf6<>' >>> elem.attrib.clear() @@ -567,20 +567,19 @@ '\xc3\xa5\xc3\xb6\xc3\xb6<>' >>> serialize(elem, encoding="us-ascii") 'åöö<>' - >>> serialize(elem, encoding="ISO-8859-1") - "\n\xe5\xf6\xf6<>" - -## >>> elem.attrib["key"] = u'\xe5\xf6\xf6<>' -## >>> elem.text = None -## >>> serialize(elem) -## '' -## >>> serialize(elem, encoding="utf-8") -## '' -## >>> serialize(elem, encoding="us-ascii") -## '' -## >>> serialize(elem, encoding="ISO-8859-1") -## '\n' + >>> serialize(elem, encoding="iso-8859-1").lower() + "\n\xe5\xf6\xf6<>" + >>> elem.attrib["key"] = u'\xe5\xf6\xf6<>' + >>> elem.text = None + >>> serialize(elem) + '' + >>> serialize(elem, encoding="utf-8") + '' + >>> serialize(elem, encoding="us-ascii") + '' + >>> serialize(elem, encoding="iso-8859-1").lower() + '\n' """ if sys.version_info[0] >= 3: Modified: lxml/trunk/selftest2.py ============================================================================== --- lxml/trunk/selftest2.py (original) +++ lxml/trunk/selftest2.py Fri Jun 6 15:05:14 2008 @@ -169,16 +169,16 @@ >>> serialize(elem, "iso-8859-1").lower() '\n<&"\'>' -## >>> elem.attrib["key"] = "<&\"\'>" -## >>> elem.text = None -## >>> serialize(elem) -## '' -## >>> serialize(elem, "utf-8") -## '' -## >>> serialize(elem, "us-ascii") -## '' -## >>> serialize(elem, "iso-8859-1") -## '\n' + >>> elem.attrib["key"] = "<&\"\'>" + >>> elem.text = None + >>> serialize(elem) + '' + >>> serialize(elem, "utf-8") + '' + >>> serialize(elem, "us-ascii") + '' + >>> serialize(elem, "iso-8859-1").lower() + '\n' >>> elem.text = u'\xe5\xf6\xf6<>' >>> elem.attrib.clear() @@ -191,16 +191,16 @@ >>> serialize(elem, "iso-8859-1").lower() "\n\xe5\xf6\xf6<>" -## >>> elem.attrib["key"] = u'\xe5\xf6\xf6<>' -## >>> elem.text = None -## >>> serialize(elem) -## '' -## >>> serialize(elem, "utf-8") -## '' -## >>> serialize(elem, "us-ascii") -## '' -## >>> serialize(elem, "iso-8859-1") -## '\n' + >>> elem.attrib["key"] = u'\xe5\xf6\xf6<>' + >>> elem.text = None + >>> serialize(elem) + '' + >>> serialize(elem, "utf-8") + '' + >>> serialize(elem, "us-ascii") + '' + >>> serialize(elem, "iso-8859-1").lower() + '\n' """ From scoder at codespeak.net Mon Jun 9 19:42:12 2008 From: scoder at codespeak.net (scoder at codespeak.net) Date: Mon, 9 Jun 2008 19:42:12 +0200 (CEST) Subject: [Lxml-checkins] r55701 - in lxml/trunk: . src/lxml src/lxml/tests Message-ID: <20080609174212.BB7E82A0153@codespeak.net> Author: scoder Date: Mon Jun 9 19:42:11 2008 New Revision: 55701 Modified: lxml/trunk/ (props changed) lxml/trunk/CHANGES.txt lxml/trunk/src/lxml/lxml.etree.pyx lxml/trunk/src/lxml/tests/test_elementtree.py lxml/trunk/src/lxml/tests/test_etree.py Log: r4493 at delle: sbehnel | 2008-06-09 12:24:15 +0200 handle target parser result in ElementTree.parse() method Modified: lxml/trunk/CHANGES.txt ============================================================================== --- lxml/trunk/CHANGES.txt (original) +++ lxml/trunk/CHANGES.txt Mon Jun 9 19:42:11 2008 @@ -40,6 +40,8 @@ Bugs fixed ---------- +* ``ElementTree.parse()`` didn't handle target parser result. + * Crash in Element class lookup classes when the __init__() method of the super class is not called from Python subclasses. Modified: lxml/trunk/src/lxml/lxml.etree.pyx ============================================================================== --- lxml/trunk/src/lxml/lxml.etree.pyx (original) +++ lxml/trunk/src/lxml/lxml.etree.pyx Mon Jun 9 19:42:11 2008 @@ -1505,13 +1505,15 @@ Updates self with the content of source and returns its root """ - cdef _Document doc - doc = _parseDocument(source, parser, base_url) - self._context_node = doc.getroot() - if self._context_node is None: - self._doc = doc - else: - self._doc = None + cdef _Document doc = None + try: + doc = _parseDocument(source, parser, base_url) + self._context_node = doc.getroot() + if self._context_node is None: + self._doc = doc + except _TargetParserResult, result_container: + # raises a TypeError if we don't get an _Element + self._context_node = result_container.result return self._context_node def _setroot(self, _Element root not None): Modified: lxml/trunk/src/lxml/tests/test_elementtree.py ============================================================================== --- lxml/trunk/src/lxml/tests/test_elementtree.py (original) +++ lxml/trunk/src/lxml/tests/test_elementtree.py Mon Jun 9 19:42:11 2008 @@ -3308,6 +3308,30 @@ self.assertEquals("DONE", done) self.assertEquals(["start", "end"], events) + def test_elementtree_parser_target(self): + assertEquals = self.assertEquals + assertFalse = self.assertFalse + Element = self.etree.Element + + events = [] + class Target(object): + def start(self, tag, attrib): + events.append("start") + assertFalse(attrib) + assertEquals("TAG", tag) + def end(self, tag): + events.append("end") + assertEquals("TAG", tag) + def close(self): + return Element("DONE") + + parser = self.etree.XMLParser(target=Target()) + tree = self.etree.ElementTree() + tree.parse(BytesIO(""), parser=parser) + + self.assertEquals("DONE", tree.getroot().tag) + self.assertEquals(["start", "end"], events) + def test_parser_target_attrib(self): assertEquals = self.assertEquals Modified: lxml/trunk/src/lxml/tests/test_etree.py ============================================================================== --- lxml/trunk/src/lxml/tests/test_etree.py (original) +++ lxml/trunk/src/lxml/tests/test_etree.py Mon Jun 9 19:42:11 2008 @@ -430,6 +430,29 @@ self.assertRaises( LookupError, self.etree.XMLParser, encoding="hopefully unknown") + def test_elementtree_parser_target_type_error(self): + assertEquals = self.assertEquals + assertFalse = self.assertFalse + + events = [] + class Target(object): + def start(self, tag, attrib): + events.append("start") + assertFalse(attrib) + assertEquals("TAG", tag) + def end(self, tag): + events.append("end") + assertEquals("TAG", tag) + def close(self): + return "DONE" # no Element! + + parser = self.etree.XMLParser(target=Target()) + tree = self.etree.ElementTree() + + self.assertRaises(TypeError, + tree.parse, BytesIO(""), parser=parser) + self.assertEquals(["start", "end"], events) + def test_parser_target_comment(self): events = [] class Target(object): From scoder at codespeak.net Mon Jun 9 19:42:18 2008 From: scoder at codespeak.net (scoder at codespeak.net) Date: Mon, 9 Jun 2008 19:42:18 +0200 (CEST) Subject: [Lxml-checkins] r55702 - in lxml/trunk: . src/lxml src/lxml/tests Message-ID: <20080609174218.92DD42A0183@codespeak.net> Author: scoder Date: Mon Jun 9 19:42:17 2008 New Revision: 55702 Modified: lxml/trunk/ (props changed) lxml/trunk/CHANGES.txt lxml/trunk/src/lxml/lxml.objectify.pyx lxml/trunk/src/lxml/tests/test_objectify.py Log: r4494 at delle: sbehnel | 2008-06-09 13:21:39 +0200 support for pickling ElementTree objects Modified: lxml/trunk/CHANGES.txt ============================================================================== --- lxml/trunk/CHANGES.txt (original) +++ lxml/trunk/CHANGES.txt Mon Jun 9 19:42:17 2008 @@ -8,6 +8,8 @@ Features added -------------- +* Pickling ``ElementTree`` objects in lxml.objectify. + * Support for parsing from file-like objects that return unicode strings. Modified: lxml/trunk/src/lxml/lxml.objectify.pyx ============================================================================== --- lxml/trunk/src/lxml/lxml.objectify.pyx (original) +++ lxml/trunk/src/lxml/lxml.objectify.pyx Mon Jun 9 19:42:17 2008 @@ -1355,20 +1355,29 @@ ################################################################################ # Pickle support -cdef _setupPickle(reduceFunction): +def __unpickleElementTree(data): + return etree.ElementTree(fromstring(data)) + +cdef _setupPickle(elementReduceFunction, elementTreeReduceFunction): if python.IS_PYTHON3: import copyreg else: import copy_reg as copyreg copyreg.constructor(fromstring) - copyreg.pickle(ObjectifiedElement, reduceFunction, fromstring) + copyreg.constructor(__unpickleElementTree) + copyreg.pickle(ObjectifiedElement, + elementReduceFunction, fromstring) + copyreg.pickle(etree._ElementTree, + elementTreeReduceFunction, __unpickleElementTree) -def pickleReduce(obj): - u"pickleReduce(obj)" +def pickleReduceElement(obj): return (fromstring, (etree.tostring(obj),)) -_setupPickle(pickleReduce) -del pickleReduce +def pickleReduceElementTree(obj): + return (__unpickleElementTree, (etree.tostring(obj),)) + +_setupPickle(pickleReduceElement, pickleReduceElementTree) +del pickleReduceElement, pickleReduceElementTree ################################################################################ # Element class lookup Modified: lxml/trunk/src/lxml/tests/test_objectify.py ============================================================================== --- lxml/trunk/src/lxml/tests/test_objectify.py (original) +++ lxml/trunk/src/lxml/tests/test_objectify.py Mon Jun 9 19:42:17 2008 @@ -2268,6 +2268,19 @@ etree.tostring(new_root), etree.tostring(root)) + def test_pickle_elementtree(self): + import pickle + + tree = etree.ElementTree(self.XML(xml_str + "")) + out = BytesIO() + pickle.dump(tree, out) + + new_tree = pickle.loads(out.getvalue()) + self.assert_(isinstance(new_tree, etree._ElementTree)) + self.assertEquals( + etree.tostring(new_tree), + etree.tostring(tree)) + # E-Factory tests, need to use sub-elements as root element is always # type-looked-up as ObjectifiedElement (no annotations) def test_efactory_int(self): From scoder at codespeak.net Mon Jun 9 19:42:23 2008 From: scoder at codespeak.net (scoder at codespeak.net) Date: Mon, 9 Jun 2008 19:42:23 +0200 (CEST) Subject: [Lxml-checkins] r55703 - in lxml/trunk: . tools Message-ID: <20080609174223.5D0A32A0153@codespeak.net> Author: scoder Date: Mon Jun 9 19:42:22 2008 New Revision: 55703 Modified: lxml/trunk/ (props changed) lxml/trunk/tools/xpathgrep.py Log: r4495 at delle: sbehnel | 2008-06-09 18:55:07 +0200 extended xpathgrep.py script: option for passing namespaces, enabled unicode methods as XPath functions Modified: lxml/trunk/tools/xpathgrep.py ============================================================================== --- lxml/trunk/tools/xpathgrep.py (original) +++ lxml/trunk/tools/xpathgrep.py Mon Jun 9 19:42:22 2008 @@ -1,6 +1,11 @@ #!/usr/bin/env python -import lxml.etree as et +try: + import lxml.etree as et +except ImportError, e: + print >> sys.stderr, "ERR: %s." % e + sys.exit(5) + import sys, os.path, optparse, itertools SHORT_DESCRIPTION = "An XPath file finder for XML files." @@ -17,6 +22,8 @@ + + # find all leaf elements: @@ -58,6 +65,16 @@ $ SCRIPT 'not(/*/*/*)' test.xml True + * find all elements that belong to a specific namespace and have @num=2 + $ SCRIPT --ns e=http://www.example.org/ns/example '//e:*[@num="2"]' test.xml + + +By default, all Python builtins and string methods are available as +XPath functions through the ``py`` prefix. There is also a string +comparison function ``py:within(x, a, b)`` that tests the string x +for being in the lexicographical interval given by a and b as in ``a +<= x <= b``. + '''.replace('SCRIPT', os.path.basename(sys.argv[0])) REGEXP_NS = "http://exslt.org/regular-expressions" @@ -119,37 +136,47 @@ def register_builtins(): ns = et.FunctionNamespace(PYTHON_BUILTINS_NS) - for (name, builtin) in vars(__builtins__).iteritems(): - if callable(builtin): - if not name.startswith('_') and name == name.lower(): - ns[name] = builtin + tostring = et.tostring str_xpath = et.XPath("string()") - def lower(_, s): - if isinstance(s, list): - if not s: - return '' - s = s[0] - if not isinstance(s, basestring): - if isinstance(s, bool): - s = str(s) - else: - s = str_xpath(s) - return s.lower() - def upper(_, s): + def make_string(s): if isinstance(s, list): if not s: - return '' + return u'' s = s[0] - if not isinstance(s, basestring): - if isinstance(s, bool): - s = str(s) + if not isinstance(s, unicode): + if isinstance(s, et._Element): + s = tostring(s, method="text", encoding=unicode) + elif isinstance(s, (str, bool)): + s = unicode(s) else: - s = str_xpath(s) - return s.upper() + s = unicode(str_xpath(s)) + return s + + def wrap_builtin(b): + def wrapped_builtin(_, *args): + return b(*args) + return wrapped_builtin + + for (name, builtin) in vars(__builtins__).iteritems(): + if callable(builtin): + if not name.startswith('_') and name == name.lower(): + ns[name] = wrap_builtin(builtin) - ns["lower"] = lower - ns["upper"] = upper + def wrap_str_method(b): + def wrapped_method(_, *args): + args = tuple(map(make_string, args)) + return b(*args) + return wrapped_method + + for (name, method) in vars(unicode).iteritems(): + if callable(method): + if not name.startswith('_'): + ns[name] = wrap_str_method(method) + + def within(_, s, a, b): + return make_string(a) <= make_string(s) <= make_string(b) + ns["within"] = within def parse_options(): @@ -169,13 +196,18 @@ help="run XInclude on the file before XPath") parser.add_option("--no-python", action="store_false", dest="python", default=True, - help="disable Python builtins (prefix 'py')") + help="disable Python builtins and functions (prefix 'py')") parser.add_option("--no-regexp", action="store_false", dest="regexp", default=True, help="disable regular expressions (prefix 're')") parser.add_option("-q", "--quiet", action="store_false", dest="verbose", default=True, help="don't print status messages to stdout") + parser.add_option("-N", "--ns", + action="append", default=[], + dest="namespaces", + help="add a namespace declaration: --ns PREFIX=NS",) + options, args = parser.parse_args() @@ -190,9 +222,7 @@ return options, args -if __name__ == "__main__": - options, args = parse_options() - +def main(options, args): namespaces = {} if options.regexp: namespaces["re"] = REGEXP_NS @@ -200,6 +230,10 @@ register_builtins() namespaces["py"] = PYTHON_BUILTINS_NS + for ns in options.namespaces: + prefix, NS = ns.split("=", 1) + namespaces[prefix.strip()] = NS.strip() + xpath = et.XPath(args[0], namespaces=namespaces) found = False @@ -212,7 +246,18 @@ found |= find_in_file( filename, xpath, print_name, options.xinclude) - if found: - sys.exit(0) - else: - sys.exit(1) + return found + +if __name__ == "__main__": + try: + options, args = parse_options() + found = main(options, args) + if found: + sys.exit(0) + else: + sys.exit(1) + except et.XPathSyntaxError, e: + print >> sys.stderr, "Err: %s" % e + sys.exit(4) + except KeyboardInterrupt: + pass From scoder at codespeak.net Mon Jun 9 19:42:27 2008 From: scoder at codespeak.net (scoder at codespeak.net) Date: Mon, 9 Jun 2008 19:42:27 +0200 (CEST) Subject: [Lxml-checkins] r55704 - in lxml/trunk: . tools Message-ID: <20080609174227.EC9FC2A0153@codespeak.net> Author: scoder Date: Mon Jun 9 19:42:27 2008 New Revision: 55704 Modified: lxml/trunk/ (props changed) lxml/trunk/tools/xpathgrep.py Log: r4496 at delle: sbehnel | 2008-06-09 19:23:48 +0200 some cleanup, option to prevent pretty printing Modified: lxml/trunk/tools/xpathgrep.py ============================================================================== --- lxml/trunk/tools/xpathgrep.py (original) +++ lxml/trunk/tools/xpathgrep.py Mon Jun 9 19:42:27 2008 @@ -71,10 +71,8 @@ By default, all Python builtins and string methods are available as XPath functions through the ``py`` prefix. There is also a string -comparison function ``py:within(x, a, b)`` that tests the string x -for being in the lexicographical interval given by a and b as in ``a -<= x <= b``. - +comparison function ``py:within(x, a, b)`` that tests the string x for +being lexicographically within the interval ``a <= x <= b``. '''.replace('SCRIPT', os.path.basename(sys.argv[0])) REGEXP_NS = "http://exslt.org/regular-expressions" @@ -82,21 +80,22 @@ parser = et.XMLParser(remove_blank_text=True) -def print_results(results): - if isinstance(results, basestring) or isinstance(results, bool): - print results - return - - for result in results: - if isinstance(result, basestring) or isinstance(result, bool): - print result - else: - print et.tostring( - result, - xml_declaration=False, - pretty_print=True) +def print_result(result, pretty_print): + if et.iselement(result): + result = et.tostring(result, xml_declaration=False, + pretty_print=pretty_print) + if pretty_print: + result = result[:-1] # strip newline at the end + print result + +def print_results(results, pretty_print): + if isinstance(results, list): + for result in results: + print_result(result, pretty_print) + else: + print_result(results, pretty_print) -def find_in_file(f, xpath, print_name=True, xinclude=False): +def find_in_file(f, xpath, print_name=True, xinclude=False, pretty_print=True): if hasattr(f, 'name'): filename = f.name else: @@ -127,7 +126,7 @@ if print_name: print ">> %s" % f if options.verbose: - print_results(results) + print_results(results, pretty_print) return True except Exception, e: print >> sys.stderr, "ERR: %r: %s: %s" % ( @@ -145,12 +144,10 @@ return u'' s = s[0] if not isinstance(s, unicode): - if isinstance(s, et._Element): + if et.iselement(s): s = tostring(s, method="text", encoding=unicode) - elif isinstance(s, (str, bool)): - s = unicode(s) else: - s = unicode(str_xpath(s)) + s = unicode(s) return s def wrap_builtin(b): @@ -203,6 +200,9 @@ parser.add_option("-q", "--quiet", action="store_false", dest="verbose", default=True, help="don't print status messages to stdout") + parser.add_option("-p", "--plain", + action="store_false", dest="pretty_print", default=True, + help="do not pretty-print the output") parser.add_option("-N", "--ns", action="append", default=[], dest="namespaces", @@ -239,12 +239,14 @@ found = False if len(args) == 1: found = find_in_file( - sys.stdin, xpath, print_name, options.xinclude) + sys.stdin, xpath, print_name, options.xinclude, + options.pretty_print) else: print_name = len(args) > 2 for filename in itertools.islice(args, 1, None): found |= find_in_file( - filename, xpath, print_name, options.xinclude) + filename, xpath, print_name, options.xinclude, + options.pretty_print) return found From scoder at codespeak.net Mon Jun 9 22:56:40 2008 From: scoder at codespeak.net (scoder at codespeak.net) Date: Mon, 9 Jun 2008 22:56:40 +0200 (CEST) Subject: [Lxml-checkins] r55718 - in lxml/trunk: . tools Message-ID: <20080609205640.D5A722A0153@codespeak.net> Author: scoder Date: Mon Jun 9 22:56:38 2008 New Revision: 55718 Modified: lxml/trunk/ (props changed) lxml/trunk/tools/xpathgrep.py Log: r4501 at delle: sbehnel | 2008-06-09 22:51:52 +0200 copy+paste bug in xpathgrep.py Modified: lxml/trunk/tools/xpathgrep.py ============================================================================== --- lxml/trunk/tools/xpathgrep.py (original) +++ lxml/trunk/tools/xpathgrep.py Mon Jun 9 22:56:38 2008 @@ -239,7 +239,7 @@ found = False if len(args) == 1: found = find_in_file( - sys.stdin, xpath, print_name, options.xinclude, + sys.stdin, xpath, False, options.xinclude, options.pretty_print) else: print_name = len(args) > 2 From scoder at codespeak.net Mon Jun 9 22:56:45 2008 From: scoder at codespeak.net (scoder at codespeak.net) Date: Mon, 9 Jun 2008 22:56:45 +0200 (CEST) Subject: [Lxml-checkins] r55719 - lxml/trunk Message-ID: <20080609205645.17E8B2A0153@codespeak.net> Author: scoder Date: Mon Jun 9 22:56:43 2008 New Revision: 55719 Modified: lxml/trunk/ (props changed) lxml/trunk/CHANGES.txt Log: r4502 at delle: sbehnel | 2008-06-09 22:52:34 +0200 changelog Modified: lxml/trunk/CHANGES.txt ============================================================================== --- lxml/trunk/CHANGES.txt (original) +++ lxml/trunk/CHANGES.txt Mon Jun 9 22:56:43 2008 @@ -8,6 +8,8 @@ Features added -------------- +* Major overhaul of ``tools/xpathgrep.py`` script. + * Pickling ``ElementTree`` objects in lxml.objectify. * Support for parsing from file-like objects that return unicode From scoder at codespeak.net Wed Jun 11 20:06:22 2008 From: scoder at codespeak.net (scoder at codespeak.net) Date: Wed, 11 Jun 2008 20:06:22 +0200 (CEST) Subject: [Lxml-checkins] r55771 - in lxml/branch/lxml-2.0: . src/lxml src/lxml/tests Message-ID: <20080611180622.9471D4981DA@codespeak.net> Author: scoder Date: Wed Jun 11 20:06:22 2008 New Revision: 55771 Modified: lxml/branch/lxml-2.0/CHANGES.txt lxml/branch/lxml-2.0/src/lxml/lxml.etree.pyx lxml/branch/lxml-2.0/src/lxml/lxml.objectify.pyx lxml/branch/lxml-2.0/src/lxml/tests/test_elementtree.py lxml/branch/lxml-2.0/src/lxml/tests/test_etree.py lxml/branch/lxml-2.0/src/lxml/tests/test_objectify.py Log: trunk merge: fix ElementTree.parse() for parser target, pickle ElementTree objects Modified: lxml/branch/lxml-2.0/CHANGES.txt ============================================================================== --- lxml/branch/lxml-2.0/CHANGES.txt (original) +++ lxml/branch/lxml-2.0/CHANGES.txt Wed Jun 11 20:06:22 2008 @@ -8,9 +8,13 @@ Features added -------------- +* Pickling ``ElementTree`` objects in lxml.objectify. + Bugs fixed ---------- +* ``ElementTree.parse()`` didn't handle target parser result. + * Potential threading problem in XInclude. * Crash in Element class lookup classes when the __init__() method of Modified: lxml/branch/lxml-2.0/src/lxml/lxml.etree.pyx ============================================================================== --- lxml/branch/lxml-2.0/src/lxml/lxml.etree.pyx (original) +++ lxml/branch/lxml-2.0/src/lxml/lxml.etree.pyx Wed Jun 11 20:06:22 2008 @@ -1500,13 +1500,15 @@ Updates self with the content of source and returns its root """ - cdef _Document doc - doc = _parseDocument(source, parser, base_url) - self._context_node = doc.getroot() - if self._context_node is None: - self._doc = doc - else: - self._doc = None + cdef _Document doc = None + try: + doc = _parseDocument(source, parser, base_url) + self._context_node = doc.getroot() + if self._context_node is None: + self._doc = doc + except _TargetParserResult, result_container: + # raises a TypeError if we don't get an _Element + self._context_node = result_container.result return self._context_node def _setroot(self, _Element root not None): Modified: lxml/branch/lxml-2.0/src/lxml/lxml.objectify.pyx ============================================================================== --- lxml/branch/lxml-2.0/src/lxml/lxml.objectify.pyx (original) +++ lxml/branch/lxml-2.0/src/lxml/lxml.objectify.pyx Wed Jun 11 20:06:22 2008 @@ -1292,17 +1292,26 @@ ################################################################################ # Pickle support -cdef _setupPickle(reduceFunction): +def __unpickleElementTree(data): + return etree.ElementTree(fromstring(data)) + +cdef _setupPickle(elementReduceFunction, elementTreeReduceFunction): import copy_reg copy_reg.constructor(fromstring) - copy_reg.pickle(ObjectifiedElement, reduceFunction, fromstring) + copy_reg.constructor(__unpickleElementTree) + copy_reg.pickle(ObjectifiedElement, + elementReduceFunction, fromstring) + copy_reg.pickle(etree._ElementTree, + elementTreeReduceFunction, __unpickleElementTree) -def pickleReduce(obj): - "pickleReduce(obj)" +def pickleReduceElement(obj): return (fromstring, (etree.tostring(obj),)) -_setupPickle(pickleReduce) -del pickleReduce +def pickleReduceElementTree(obj): + return (__unpickleElementTree, (etree.tostring(obj),)) + +_setupPickle(pickleReduceElement, pickleReduceElementTree) +del pickleReduceElement, pickleReduceElementTree ################################################################################ # Element class lookup Modified: lxml/branch/lxml-2.0/src/lxml/tests/test_elementtree.py ============================================================================== --- lxml/branch/lxml-2.0/src/lxml/tests/test_elementtree.py (original) +++ lxml/branch/lxml-2.0/src/lxml/tests/test_elementtree.py Wed Jun 11 20:06:22 2008 @@ -3142,6 +3142,30 @@ self.assertEquals("DONE", done) self.assertEquals(["start", "end"], events) + def test_elementtree_parser_target(self): + assertEquals = self.assertEquals + assertFalse = self.assertFalse + Element = self.etree.Element + + events = [] + class Target(object): + def start(self, tag, attrib): + events.append("start") + assertFalse(attrib) + assertEquals("TAG", tag) + def end(self, tag): + events.append("end") + assertEquals("TAG", tag) + def close(self): + return Element("DONE") + + parser = self.etree.XMLParser(target=Target()) + tree = self.etree.ElementTree() + tree.parse(StringIO(""), parser=parser) + + self.assertEquals("DONE", tree.getroot().tag) + self.assertEquals(["start", "end"], events) + def test_parser_target_attrib(self): assertEquals = self.assertEquals Modified: lxml/branch/lxml-2.0/src/lxml/tests/test_etree.py ============================================================================== --- lxml/branch/lxml-2.0/src/lxml/tests/test_etree.py (original) +++ lxml/branch/lxml-2.0/src/lxml/tests/test_etree.py Wed Jun 11 20:06:22 2008 @@ -416,6 +416,29 @@ self.assertRaises( LookupError, self.etree.XMLParser, encoding="hopefully unknown") + def test_elementtree_parser_target_type_error(self): + assertEquals = self.assertEquals + assertFalse = self.assertFalse + + events = [] + class Target(object): + def start(self, tag, attrib): + events.append("start") + assertFalse(attrib) + assertEquals("TAG", tag) + def end(self, tag): + events.append("end") + assertEquals("TAG", tag) + def close(self): + return "DONE" # no Element! + + parser = self.etree.XMLParser(target=Target()) + tree = self.etree.ElementTree() + + self.assertRaises(TypeError, + tree.parse, StringIO(""), parser=parser) + self.assertEquals(["start", "end"], events) + def test_iterwalk_tag(self): iterwalk = self.etree.iterwalk root = self.etree.XML('') Modified: lxml/branch/lxml-2.0/src/lxml/tests/test_objectify.py ============================================================================== --- lxml/branch/lxml-2.0/src/lxml/tests/test_objectify.py (original) +++ lxml/branch/lxml-2.0/src/lxml/tests/test_objectify.py Wed Jun 11 20:06:22 2008 @@ -2190,6 +2190,19 @@ etree.tostring(new_root), etree.tostring(root)) + def test_pickle_elementtree(self): + import pickle + + tree = etree.ElementTree(self.XML(xml_str + "")) + out = StringIO() + pickle.dump(tree, out) + + new_tree = pickle.loads(out.getvalue()) + self.assert_(isinstance(new_tree, etree._ElementTree)) + self.assertEquals( + etree.tostring(new_tree), + etree.tostring(tree)) + # E-Factory tests, need to use sub-elements as root element is always # type-looked-up as ObjectifiedElement (no annotations) def test_efactory_int(self): From scoder at codespeak.net Thu Jun 12 11:41:46 2008 From: scoder at codespeak.net (scoder at codespeak.net) Date: Thu, 12 Jun 2008 11:41:46 +0200 (CEST) Subject: [Lxml-checkins] r55777 - in lxml/trunk: . src/lxml Message-ID: <20080612094146.C936816A068@codespeak.net> Author: scoder Date: Thu Jun 12 11:41:44 2008 New Revision: 55777 Modified: lxml/trunk/ (props changed) lxml/trunk/src/lxml/extensions.pxi lxml/trunk/src/lxml/python.pxd Log: r4506 at delle: sbehnel | 2008-06-12 11:37:21 +0200 code simplification Modified: lxml/trunk/src/lxml/extensions.pxi ============================================================================== --- lxml/trunk/src/lxml/extensions.pxi (original) +++ lxml/trunk/src/lxml/extensions.pxi Thu Jun 12 11:41:44 2008 @@ -488,7 +488,7 @@ elif xpathObj.type == xpath.XPATH_NODESET: return _createNodeSetResult(xpathObj, doc) elif xpathObj.type == xpath.XPATH_BOOLEAN: - return python.PyBool_FromLong(xpathObj.boolval) + return xpathObj.boolval elif xpathObj.type == xpath.XPATH_NUMBER: return xpathObj.floatval elif xpathObj.type == xpath.XPATH_STRING: Modified: lxml/trunk/src/lxml/python.pxd ============================================================================== --- lxml/trunk/src/lxml/python.pxd (original) +++ lxml/trunk/src/lxml/python.pxd Thu Jun 12 11:41:44 2008 @@ -44,7 +44,6 @@ cdef object PyString_FromFormat(char* format, ...) cdef Py_ssize_t PyString_GET_SIZE(object s) - cdef object PyBool_FromLong(long value) cdef object PyNumber_Int(object value) cdef Py_ssize_t PyInt_AsSsize_t(object value) From scoder at codespeak.net Thu Jun 12 11:41:52 2008 From: scoder at codespeak.net (scoder at codespeak.net) Date: Thu, 12 Jun 2008 11:41:52 +0200 (CEST) Subject: [Lxml-checkins] r55778 - in lxml/trunk: . src/lxml Message-ID: <20080612094152.2184316A06B@codespeak.net> Author: scoder Date: Thu Jun 12 11:41:50 2008 New Revision: 55778 Modified: lxml/trunk/ (props changed) lxml/trunk/src/lxml/extensions.pxi lxml/trunk/src/lxml/xpath.pxd Log: r4507 at delle: sbehnel | 2008-06-12 11:39:46 +0200 code simplification Modified: lxml/trunk/src/lxml/extensions.pxi ============================================================================== --- lxml/trunk/src/lxml/extensions.pxi (original) +++ lxml/trunk/src/lxml/extensions.pxi Thu Jun 12 11:41:50 2008 @@ -488,7 +488,7 @@ elif xpathObj.type == xpath.XPATH_NODESET: return _createNodeSetResult(xpathObj, doc) elif xpathObj.type == xpath.XPATH_BOOLEAN: - return xpathObj.boolval + return xpathObj.boolval elif xpathObj.type == xpath.XPATH_NUMBER: return xpathObj.floatval elif xpathObj.type == xpath.XPATH_STRING: Modified: lxml/trunk/src/lxml/xpath.pxd ============================================================================== --- lxml/trunk/src/lxml/xpath.pxd (original) +++ lxml/trunk/src/lxml/xpath.pxd Thu Jun 12 11:41:50 2008 @@ -47,7 +47,7 @@ ctypedef struct xmlXPathObject: xmlXPathObjectType type xmlNodeSet* nodesetval - int boolval + bint boolval double floatval char* stringval From lxml-checkins at codespeak.net Sun Jun 15 14:49:51 2008 From: lxml-checkins at codespeak.net (VIAGRA ® Official Site) Date: Sun, 15 Jun 2008 14:49:51 +0200 (CEST) Subject: [Lxml-checkins] Dear lxml-checkins@codespeak.net June 83% 0FF Message-ID: <20080615014755.2302.qmail@cpc3-gran2-0-0-cust513.nott.cable.ntl.com> An HTML attachment was scrubbed... URL: http://codespeak.net/pipermail/lxml-checkins/attachments/20080615/c07c0487/attachment.htm From lxml-checkins at codespeak.net Tue Jun 17 21:07:26 2008 From: lxml-checkins at codespeak.net (Jim Helton) Date: Tue, 17 Jun 2008 21:07:26 +0200 (CEST) Subject: [Lxml-checkins] Dear lxml-checkins@codespeak.net Savings ...3 Days Only Message-ID: <20080617100532.14477.qmail@manz-590c3319.pool.einsundeins.de> An HTML attachment was scrubbed... URL: http://codespeak.net/pipermail/lxml-checkins/attachments/20080617/2e8c034a/attachment.htm From scoder at codespeak.net Thu Jun 19 18:36:43 2008 From: scoder at codespeak.net (scoder at codespeak.net) Date: Thu, 19 Jun 2008 18:36:43 +0200 (CEST) Subject: [Lxml-checkins] r55967 - in lxml/trunk: . src/lxml Message-ID: <20080619163643.77D16398005@codespeak.net> Author: scoder Date: Thu Jun 19 18:36:42 2008 New Revision: 55967 Modified: lxml/trunk/ (props changed) lxml/trunk/src/lxml/builder.py Log: r4510 at delle: sbehnel | 2008-06-16 14:34:42 +0200 docs Modified: lxml/trunk/src/lxml/builder.py ============================================================================== --- lxml/trunk/src/lxml/builder.py (original) +++ lxml/trunk/src/lxml/builder.py Thu Jun 19 18:36:42 2008 @@ -140,6 +140,17 @@

And finally, here is an embedded XHTML fragment.

+ + For namespace support, you can pass a namespace map (``nsmap``) + and/or a specific target ``namespace`` to the ElementMaker class:: + + >>> E = ElementMaker(namespace="http://my.ns/") + >>> print(ET.tostring( E.test )) + + + >>> E = ElementMaker(namespace="http://my.ns/", nsmap={'p':'http://my.ns/'}) + >>> print(ET.tostring( E.test )) + """ def __init__(self, typemap=None, From scoder at codespeak.net Thu Jun 19 18:36:50 2008 From: scoder at codespeak.net (scoder at codespeak.net) Date: Thu, 19 Jun 2008 18:36:50 +0200 (CEST) Subject: [Lxml-checkins] r55968 - in lxml/trunk: . src/lxml src/lxml/html src/lxml/html/tests Message-ID: <20080619163650.CB60139B5A9@codespeak.net> Author: scoder Date: Thu Jun 19 18:36:50 2008 New Revision: 55968 Modified: lxml/trunk/ (props changed) lxml/trunk/src/lxml/ElementInclude.py lxml/trunk/src/lxml/html/__init__.py lxml/trunk/src/lxml/html/clean.py lxml/trunk/src/lxml/html/tests/test_feedparser_data.py lxml/trunk/src/lxml/html/tests/test_forms.txt lxml/trunk/src/lxml/html/tests/test_rewritelinks.txt Log: r4511 at delle: sbehnel | 2008-06-19 10:46:05 +0200 fixes for Py3k stdlib restructuring Modified: lxml/trunk/src/lxml/ElementInclude.py ============================================================================== --- lxml/trunk/src/lxml/ElementInclude.py (original) +++ lxml/trunk/src/lxml/ElementInclude.py Thu Jun 19 18:36:50 2008 @@ -52,12 +52,18 @@ from lxml import etree import copy -from urlparse import urljoin -from urllib2 import urlopen +try: + from urlparse import urljoin + from urllib2 import urlopen +except ImportError: + # Python 3 + from urllib.parse import urljoin + from urllib.request import urlopen try: set except NameError: + # Python 2.3 from sets import Set as set XINCLUDE = "{http://www.w3.org/2001/XInclude}" Modified: lxml/trunk/src/lxml/html/__init__.py ============================================================================== --- lxml/trunk/src/lxml/html/__init__.py (original) +++ lxml/trunk/src/lxml/html/__init__.py Thu Jun 19 18:36:50 2008 @@ -3,7 +3,11 @@ import threading import re -import urlparse +try: + from urlparse import urljoin +except ImportError: + # Python 3 + from urllib.parse import urljoin import copy from lxml import etree from lxml.html import defs @@ -269,7 +273,7 @@ if resolve_base_href: self.resolve_base_href() def link_repl(href): - return urlparse.urljoin(base_url, href) + return urljoin(base_url, href) self.rewrite_links(link_repl) def resolve_base_href(self): @@ -316,13 +320,13 @@ if attrib in attribs: value = el.get(attrib) if codebase is not None: - value = urlparse.urljoin(codebase, value) + value = urljoin(codebase, value) yield (el, attrib, value, 0) if 'archive' in attribs: for match in _archive_re.finditer(el.get('archive')): value = match.group(0) if codebase is not None: - value = urlparse.urljoin(codebase, value) + value = urljoin(codebase, value) yield (el, 'archive', value, match.start()) if tag == 'param': valuetype = el.get('valuetype') or '' @@ -751,7 +755,7 @@ base_url = self.base_url action = self.get('action') if base_url and action is not None: - return urlparse.urljoin(base_url, action) + return urljoin(base_url, action) else: return action def _action__set(self, value): Modified: lxml/trunk/src/lxml/html/clean.py ============================================================================== --- lxml/trunk/src/lxml/html/clean.py (original) +++ lxml/trunk/src/lxml/html/clean.py Thu Jun 19 18:36:50 2008 @@ -6,7 +6,11 @@ import re import copy -import urlparse +try: + from urlparse import urlsplit +except ImportError: + # Python 3 + from urllib.parse import urlsplit from lxml import etree from lxml.html import defs from lxml.html import fromstring, tostring, XHTML_NAMESPACE @@ -418,7 +422,7 @@ if (self.whitelist_tags is not None and el.tag not in self.whitelist_tags): return False - scheme, netloc, path, query, fragment = urlparse.urlsplit(url) + scheme, netloc, path, query, fragment = urlsplit(url) netloc = netloc.lower().split(':', 1)[0] if scheme not in ('http', 'https'): return False Modified: lxml/trunk/src/lxml/html/tests/test_feedparser_data.py ============================================================================== --- lxml/trunk/src/lxml/html/tests/test_feedparser_data.py (original) +++ lxml/trunk/src/lxml/html/tests/test_feedparser_data.py Thu Jun 19 18:36:50 2008 @@ -1,7 +1,11 @@ import sys import os import re -import rfc822 +try: + from rfc822 import Message +except ImportError: + # Python 3 + from email import message_from_file as Message import unittest from lxml.tests.common_imports import doctest if sys.version_info >= (2,4): @@ -28,7 +32,7 @@ def parse(self): f = open(self.filename, 'r') - headers = rfc822.Message(f) + headers = Message(f) c = f.read() f.close() if not headers.keys(): Modified: lxml/trunk/src/lxml/html/tests/test_forms.txt ============================================================================== --- lxml/trunk/src/lxml/html/tests/test_forms.txt (original) +++ lxml/trunk/src/lxml/html/tests/test_forms.txt Thu Jun 19 18:36:50 2008 @@ -120,8 +120,9 @@ ValueError: There is no option with the value 'asdf' >>> select.value_options ['1', '2', '3'] ->>> import urllib ->>> print(urllib.urlencode(f.form_values())) +>>> try: from urllib import urlencode +... except ImportError: from urllib.parse import urlencode +>>> print(urlencode(f.form_values())) hidden_field=new+value&text_field=text_value&single_checkbox=on&single_checkbox2=good&check_group=1&check_group=2&check_group=3&textarea_field=some+text&select1=&select2=1&select2=2&select2=3 >>> fields = f.fields >>> fields # doctest:+NOPARSE_MARKUP Modified: lxml/trunk/src/lxml/html/tests/test_rewritelinks.txt ============================================================================== --- lxml/trunk/src/lxml/html/tests/test_rewritelinks.txt (original) +++ lxml/trunk/src/lxml/html/tests/test_rewritelinks.txt Thu Jun 19 18:36:50 2008 @@ -1,7 +1,8 @@ We'll define a link translation function: >>> base_href = 'http://old/base/path.html' - >>> import urlparse + >>> try: import urlparse + ... except ImportError: import urllib.parse as urlparse >>> def relocate_href(link): ... link = urlparse.urljoin(base_href, link) ... if link.startswith('http://old'): From scoder at codespeak.net Thu Jun 19 18:36:55 2008 From: scoder at codespeak.net (scoder at codespeak.net) Date: Thu, 19 Jun 2008 18:36:55 +0200 (CEST) Subject: [Lxml-checkins] r55969 - in lxml/trunk: . src/lxml/html/tests Message-ID: <20080619163655.5C56439B5DC@codespeak.net> Author: scoder Date: Thu Jun 19 18:36:54 2008 New Revision: 55969 Modified: lxml/trunk/ (props changed) lxml/trunk/src/lxml/html/tests/test_feedparser_data.py Log: r4512 at delle: sbehnel | 2008-06-19 11:45:11 +0200 another Py3k test fix Modified: lxml/trunk/src/lxml/html/tests/test_feedparser_data.py ============================================================================== --- lxml/trunk/src/lxml/html/tests/test_feedparser_data.py (original) +++ lxml/trunk/src/lxml/html/tests/test_feedparser_data.py Thu Jun 19 18:36:54 2008 @@ -35,6 +35,8 @@ headers = Message(f) c = f.read() f.close() + if not c.strip(): + c = headers.get_payload() if not headers.keys(): raise Exception( "File %s has no headers" % self.filename) From scoder at codespeak.net Thu Jun 19 18:36:59 2008 From: scoder at codespeak.net (scoder at codespeak.net) Date: Thu, 19 Jun 2008 18:36:59 +0200 (CEST) Subject: [Lxml-checkins] r55970 - in lxml/trunk: . doc Message-ID: <20080619163659.BF6A639B5E2@codespeak.net> Author: scoder Date: Thu Jun 19 18:36:58 2008 New Revision: 55970 Modified: lxml/trunk/ (props changed) lxml/trunk/doc/build.txt Log: r4513 at delle: sbehnel | 2008-06-19 11:46:26 +0200 require Cython 0.9.8 Modified: lxml/trunk/doc/build.txt ============================================================================== --- lxml/trunk/doc/build.txt (original) +++ lxml/trunk/doc/build.txt Thu Jun 19 18:36:58 2008 @@ -44,9 +44,9 @@ want to be an lxml developer, then you do need a working Cython installation. You can use EasyInstall_ to install it:: - easy_install Cython==0.9.6.14 + easy_install Cython==0.9.8 -lxml currently requires Cython 0.9.6.14, later versions were not +lxml currently requires Cython 0.9.8, later versions were not tested. From scoder at codespeak.net Thu Jun 19 18:37:04 2008 From: scoder at codespeak.net (scoder at codespeak.net) Date: Thu, 19 Jun 2008 18:37:04 +0200 (CEST) Subject: [Lxml-checkins] r55971 - in lxml/trunk: . doc Message-ID: <20080619163704.A2A9539B59A@codespeak.net> Author: scoder Date: Thu Jun 19 18:37:04 2008 New Revision: 55971 Modified: lxml/trunk/ (props changed) lxml/trunk/CHANGES.txt lxml/trunk/doc/main.txt Log: r4514 at delle: sbehnel | 2008-06-19 13:54:07 +0200 prepare release of lxml 2.1beta3 Modified: lxml/trunk/CHANGES.txt ============================================================================== --- lxml/trunk/CHANGES.txt (original) +++ lxml/trunk/CHANGES.txt Thu Jun 19 18:37:04 2008 @@ -2,8 +2,8 @@ lxml changelog ============== -Under development -================= +2.1beta3 (2008-06-19) +===================== Features added -------------- @@ -25,7 +25,7 @@ unicode string or a tree, based on the type of the input. Previously, the result was always a byte string or a tree. -* Support for Python 2.6 and 3.0 (experimental!). +* Support for Python 2.6 and 3.0 beta. * File name handling now uses a heuristic to convert between byte strings (usually filenames) and unicode strings (usually URLs). Modified: lxml/trunk/doc/main.txt ============================================================================== --- lxml/trunk/doc/main.txt (original) +++ lxml/trunk/doc/main.txt Thu Jun 19 18:37:04 2008 @@ -146,8 +146,8 @@ source release. If you can't wait, consider trying a less recent release version first. -The latest version is `lxml 2.1beta2`_, released 2008-05-02 -(`changes for 2.1beta2`_). `Older versions`_ are listed below. +The latest version is `lxml 2.1beta3`_, released 2008-06-19 +(`changes for 2.1beta3`_). `Older versions`_ are listed below. Please take a look at the `installation instructions`_! @@ -215,7 +215,9 @@ Old Versions ------------ -.. _`PDF documentation`: lxmldoc-2.1beta2.pdf +.. _`PDF documentation`: lxmldoc-2.1beta3.pdf + +* `lxml 2.1beta2`_, released 2008-05-02 (`changes for 2.1beta2`_) * `lxml 2.1beta1`_, released 2008-04-15 (`changes for 2.1beta1`_) @@ -285,6 +287,7 @@ * `lxml 0.5`_, released 2005-04-08 +.. _`lxml 2.1beta3`: lxml-2.1beta3.tgz .. _`lxml 2.1beta2`: lxml-2.1beta2.tgz .. _`lxml 2.1beta1`: lxml-2.1beta1.tgz .. _`lxml 2.1alpha1`: lxml-2.1alpha1.tgz @@ -320,6 +323,7 @@ .. _`lxml 0.5.1`: lxml-0.5.1.tgz .. _`lxml 0.5`: lxml-0.5.tgz +.. _`changes for 2.1beta3`: changes-2.1beta3.html .. _`changes for 2.1beta2`: changes-2.1beta2.html .. _`changes for 2.1beta1`: changes-2.1beta1.html .. _`changes for 2.1alpha1`: changes-2.1alpha1.html From scoder at codespeak.net Fri Jun 20 09:52:41 2008 From: scoder at codespeak.net (scoder at codespeak.net) Date: Fri, 20 Jun 2008 09:52:41 +0200 (CEST) Subject: [Lxml-checkins] r55981 - in lxml/trunk: . src/lxml src/lxml/tests Message-ID: <20080620075241.AB58B169F14@codespeak.net> Author: scoder Date: Fri Jun 20 09:52:39 2008 New Revision: 55981 Modified: lxml/trunk/ (props changed) lxml/trunk/src/lxml/cssselect.py lxml/trunk/src/lxml/tests/test_css.py Log: r4520 at delle: sbehnel | 2008-06-20 09:48:39 +0200 CSS selector parser accidentally merged non-adjacent classes Modified: lxml/trunk/src/lxml/cssselect.py ============================================================================== --- lxml/trunk/src/lxml/cssselect.py (original) +++ lxml/trunk/src/lxml/cssselect.py Fri Jun 20 09:52:39 2008 @@ -738,6 +738,8 @@ result = Pseudo(result, type, ident) continue else: + if peek == ' ': + stream.next() break # FIXME: not sure what "negation" is return result @@ -823,7 +825,10 @@ while 1: match = _whitespace_re.match(s, pos=pos) if match: + preceding_whitespace_pos = pos pos = match.end() + else: + preceding_whitespace_pos = 0 if pos >= len(s): return match = _count_re.match(s, pos=pos) @@ -839,6 +844,8 @@ pos += 2 continue if c in '>+~,.*=[]()|:#': + if c in '.#' and preceding_whitespace_pos > 0: + yield Token(' ', preceding_whitespace_pos) yield Token(c, pos) pos += 1 continue Modified: lxml/trunk/src/lxml/tests/test_css.py ============================================================================== --- lxml/trunk/src/lxml/tests/test_css.py (original) +++ lxml/trunk/src/lxml/tests/test_css.py Fri Jun 20 09:52:39 2008 @@ -46,6 +46,14 @@ ('div.dialog', 51), ('div .dialog', 51), ('div.character, div.dialog', 99), + ('div.direction.dialog', 0), + ('div.dialog.direction', 0), + ('div.dialog.scene', 1), + ('div.scene.scene', 1), + ('div.scene .scene', 0), + ('div.direction .dialog ', 0), + ('div .dialog .direction', 4), + ('div.dialog .dialog .direction', 4), ('#speech5', 1), ('div#speech5', 1), ('div #speech5', 1), From scoder at codespeak.net Fri Jun 20 09:52:45 2008 From: scoder at codespeak.net (scoder at codespeak.net) Date: Fri, 20 Jun 2008 09:52:45 +0200 (CEST) Subject: [Lxml-checkins] r55982 - lxml/trunk Message-ID: <20080620075245.BDFF0169F15@codespeak.net> Author: scoder Date: Fri Jun 20 09:52:44 2008 New Revision: 55982 Modified: lxml/trunk/ (props changed) lxml/trunk/CHANGES.txt Log: r4521 at delle: sbehnel | 2008-06-20 09:48:46 +0200 changelog Modified: lxml/trunk/CHANGES.txt ============================================================================== --- lxml/trunk/CHANGES.txt (original) +++ lxml/trunk/CHANGES.txt Fri Jun 20 09:52:44 2008 @@ -2,6 +2,22 @@ lxml changelog ============== +Under development +================= + +Features added +-------------- + +Bugs fixed +---------- + +* Multiple dot-separated classes in CSS selectors were not resolved + correctly. + +Other changes +------------- + + 2.1beta3 (2008-06-19) ===================== From scoder at codespeak.net Fri Jun 20 10:02:57 2008 From: scoder at codespeak.net (scoder at codespeak.net) Date: Fri, 20 Jun 2008 10:02:57 +0200 (CEST) Subject: [Lxml-checkins] r55983 - in lxml/branch/lxml-2.0: . src/lxml src/lxml/tests Message-ID: <20080620080257.8A89B169FD4@codespeak.net> Author: scoder Date: Fri Jun 20 10:02:56 2008 New Revision: 55983 Modified: lxml/branch/lxml-2.0/CHANGES.txt lxml/branch/lxml-2.0/src/lxml/cssselect.py lxml/branch/lxml-2.0/src/lxml/tests/test_css.py Log: CSS selector parser accidentally merged non-adjacent classes Modified: lxml/branch/lxml-2.0/CHANGES.txt ============================================================================== --- lxml/branch/lxml-2.0/CHANGES.txt (original) +++ lxml/branch/lxml-2.0/CHANGES.txt Fri Jun 20 10:02:56 2008 @@ -13,6 +13,9 @@ Bugs fixed ---------- +* Descending dot-separated classes in CSS selectors were not resolved + correctly. + * ``ElementTree.parse()`` didn't handle target parser result. * Potential threading problem in XInclude. Modified: lxml/branch/lxml-2.0/src/lxml/cssselect.py ============================================================================== --- lxml/branch/lxml-2.0/src/lxml/cssselect.py (original) +++ lxml/branch/lxml-2.0/src/lxml/cssselect.py Fri Jun 20 10:02:56 2008 @@ -726,6 +726,8 @@ result = Pseudo(result, type, ident) continue else: + if peek == ' ': + stream.next() break # FIXME: not sure what "negation" is return result @@ -811,7 +813,10 @@ while 1: match = _whitespace_re.match(s, pos=pos) if match: + preceding_whitespace_pos = pos pos = match.end() + else: + preceding_whitespace_pos = 0 if pos >= len(s): return match = _count_re.match(s, pos=pos) @@ -827,6 +832,8 @@ pos += 2 continue if c in '>+~,.*=[]()|:#': + if c in '.#' and preceding_whitespace_pos > 0: + yield Token(' ', preceding_whitespace_pos) yield Token(c, pos) pos += 1 continue Modified: lxml/branch/lxml-2.0/src/lxml/tests/test_css.py ============================================================================== --- lxml/branch/lxml-2.0/src/lxml/tests/test_css.py (original) +++ lxml/branch/lxml-2.0/src/lxml/tests/test_css.py Fri Jun 20 10:02:56 2008 @@ -41,6 +41,14 @@ ('div.dialog', 51), ('div .dialog', 51), ('div.character, div.dialog', 99), + ('div.direction.dialog', 0), + ('div.dialog.direction', 0), + ('div.dialog.scene', 1), + ('div.scene.scene', 1), + ('div.scene .scene', 0), + ('div.direction .dialog ', 0), + ('div .dialog .direction', 4), + ('div.dialog .dialog .direction', 4), ('#speech5', 1), ('div#speech5', 1), ('div #speech5', 1), From scoder at codespeak.net Fri Jun 20 10:27:10 2008 From: scoder at codespeak.net (scoder at codespeak.net) Date: Fri, 20 Jun 2008 10:27:10 +0200 (CEST) Subject: [Lxml-checkins] r55985 - lxml/branch/lxml-2.0 Message-ID: <20080620082710.C7F1816A08C@codespeak.net> Author: scoder Date: Fri Jun 20 10:27:09 2008 New Revision: 55985 Modified: lxml/branch/lxml-2.0/ (props changed) Log: more svn:ignore's From scoder at codespeak.net Fri Jun 20 10:28:09 2008 From: scoder at codespeak.net (scoder at codespeak.net) Date: Fri, 20 Jun 2008 10:28:09 +0200 (CEST) Subject: [Lxml-checkins] r55986 - in lxml/branch/lxml-2.0: benchmark doc doc/html doc/licenses fake_pyrex fake_pyrex/Pyrex fake_pyrex/Pyrex/Distutils samples src src/lxml src/lxml/html src/lxml/html/tests src/lxml/html/tests/feedparser-data src/lxml/html/tests/hackers-org-data src/lxml/tests src/lxml/tests/include tools Message-ID: <20080620082809.D358D16A098@codespeak.net> Author: scoder Date: Fri Jun 20 10:28:09 2008 New Revision: 55986 Modified: lxml/branch/lxml-2.0/benchmark/ (props changed) lxml/branch/lxml-2.0/doc/ (props changed) lxml/branch/lxml-2.0/doc/html/ (props changed) lxml/branch/lxml-2.0/doc/licenses/ (props changed) lxml/branch/lxml-2.0/fake_pyrex/ (props changed) lxml/branch/lxml-2.0/fake_pyrex/Pyrex/ (props changed) lxml/branch/lxml-2.0/fake_pyrex/Pyrex/Distutils/ (props changed) lxml/branch/lxml-2.0/samples/ (props changed) lxml/branch/lxml-2.0/src/ (props changed) lxml/branch/lxml-2.0/src/lxml/ (props changed) lxml/branch/lxml-2.0/src/lxml/html/ (props changed) lxml/branch/lxml-2.0/src/lxml/html/tests/ (props changed) lxml/branch/lxml-2.0/src/lxml/html/tests/feedparser-data/ (props changed) lxml/branch/lxml-2.0/src/lxml/html/tests/hackers-org-data/ (props changed) lxml/branch/lxml-2.0/src/lxml/tests/ (props changed) lxml/branch/lxml-2.0/src/lxml/tests/include/ (props changed) lxml/branch/lxml-2.0/tools/ (props changed) Log: more svn:ignore's From scoder at codespeak.net Fri Jun 20 11:02:52 2008 From: scoder at codespeak.net (scoder at codespeak.net) Date: Fri, 20 Jun 2008 11:02:52 +0200 (CEST) Subject: [Lxml-checkins] r55987 - lxml/tag/lxml-2.0.7 Message-ID: <20080620090252.3CD0A169EE7@codespeak.net> Author: scoder Date: Fri Jun 20 11:02:50 2008 New Revision: 55987 Added: lxml/tag/lxml-2.0.7/ - copied from r55986, lxml/branch/lxml-2.0/ Log: tag for lxml 2.0.7 From scoder at codespeak.net Fri Jun 20 11:04:49 2008 From: scoder at codespeak.net (scoder at codespeak.net) Date: Fri, 20 Jun 2008 11:04:49 +0200 (CEST) Subject: [Lxml-checkins] r55988 - lxml/tag/lxml-2.1beta3 Message-ID: <20080620090449.68FCA169EE7@codespeak.net> Author: scoder Date: Fri Jun 20 11:04:49 2008 New Revision: 55988 Added: lxml/tag/lxml-2.1beta3/ - copied from r55971, lxml/trunk/ Log: tag for lxml 2.1beta3 From scoder at codespeak.net Fri Jun 20 12:11:21 2008 From: scoder at codespeak.net (scoder at codespeak.net) Date: Fri, 20 Jun 2008 12:11:21 +0200 (CEST) Subject: [Lxml-checkins] r55989 - in lxml/branch/lxml-2.0: . doc Message-ID: <20080620101121.AAC0F169F33@codespeak.net> Author: scoder Date: Fri Jun 20 12:11:18 2008 New Revision: 55989 Modified: lxml/branch/lxml-2.0/CHANGES.txt lxml/branch/lxml-2.0/doc/main.txt Log: prepare release of lxml 2.0.7 Modified: lxml/branch/lxml-2.0/CHANGES.txt ============================================================================== --- lxml/branch/lxml-2.0/CHANGES.txt (original) +++ lxml/branch/lxml-2.0/CHANGES.txt Fri Jun 20 12:11:18 2008 @@ -2,8 +2,8 @@ lxml changelog ============== -Under development -================= +2.0.7 (2008-06-20) +================== Features added -------------- Modified: lxml/branch/lxml-2.0/doc/main.txt ============================================================================== --- lxml/branch/lxml-2.0/doc/main.txt (original) +++ lxml/branch/lxml-2.0/doc/main.txt Fri Jun 20 12:11:18 2008 @@ -146,8 +146,8 @@ source release. If you can't wait, consider trying a less recent release version first. -The latest version is `lxml 2.0.6`_, released 2008-05-31 -(`changes for 2.0.6`_). `Older versions`_ are listed below. +The latest version is `lxml 2.0.7`_, released 2008-06-20 +(`changes for 2.0.7`_). `Older versions`_ are listed below. Please take a look at the `installation instructions`_! @@ -215,7 +215,9 @@ Old Versions ------------ -.. _`PDF documentation`: lxmldoc-2.0.6.pdf +.. _`PDF documentation`: lxmldoc-2.0.7.pdf + +* `lxml 2.0.6`_, released 2008-05-31 (`changes for 2.0.6`_) * `lxml 2.0.5`_, released 2008-05-01 (`changes for 2.0.5`_) @@ -279,6 +281,7 @@ * `lxml 0.5`_, released 2005-04-08 +.. _`lxml 2.0.7`: lxml-2.0.7.tgz .. _`lxml 2.0.6`: lxml-2.0.6.tgz .. _`lxml 2.0.5`: lxml-2.0.5.tgz .. _`lxml 2.0.4`: lxml-2.0.4.tgz @@ -311,6 +314,7 @@ .. _`lxml 0.5.1`: lxml-0.5.1.tgz .. _`lxml 0.5`: lxml-0.5.tgz +.. _`changes for 2.0.7`: changes-2.0.7.html .. _`changes for 2.0.6`: changes-2.0.6.html .. _`changes for 2.0.5`: changes-2.0.5.html .. _`changes for 2.0.4`: changes-2.0.4.html From scoder at codespeak.net Fri Jun 20 12:11:38 2008 From: scoder at codespeak.net (scoder at codespeak.net) Date: Fri, 20 Jun 2008 12:11:38 +0200 (CEST) Subject: [Lxml-checkins] r55990 - lxml/tag/lxml-2.0.7 Message-ID: <20080620101138.C1CE6169F36@codespeak.net> Author: scoder Date: Fri Jun 20 12:11:37 2008 New Revision: 55990 Removed: lxml/tag/lxml-2.0.7/ Log: moved tag for lxml 2.0.7 From scoder at codespeak.net Fri Jun 20 12:11:59 2008 From: scoder at codespeak.net (scoder at codespeak.net) Date: Fri, 20 Jun 2008 12:11:59 +0200 (CEST) Subject: [Lxml-checkins] r55991 - lxml/tag/lxml-2.0.7 Message-ID: <20080620101159.B7E20169F48@codespeak.net> Author: scoder Date: Fri Jun 20 12:11:59 2008 New Revision: 55991 Added: lxml/tag/lxml-2.0.7/ - copied from r55989, lxml/branch/lxml-2.0/ Log: tag for lxml 2.0.7 From scoder at codespeak.net Fri Jun 20 13:25:22 2008 From: scoder at codespeak.net (scoder at codespeak.net) Date: Fri, 20 Jun 2008 13:25:22 +0200 (CEST) Subject: [Lxml-checkins] r55992 - lxml/trunk Message-ID: <20080620112522.90D6A16A096@codespeak.net> Author: scoder Date: Fri Jun 20 13:25:20 2008 New Revision: 55992 Modified: lxml/trunk/ (props changed) lxml/trunk/CHANGES.txt Log: r4524 at delle: sbehnel | 2008-06-20 09:53:47 +0200 changelog Modified: lxml/trunk/CHANGES.txt ============================================================================== --- lxml/trunk/CHANGES.txt (original) +++ lxml/trunk/CHANGES.txt Fri Jun 20 13:25:20 2008 @@ -11,7 +11,7 @@ Bugs fixed ---------- -* Multiple dot-separated classes in CSS selectors were not resolved +* Descending dot-separated classes in CSS selectors were not resolved correctly. Other changes From scoder at codespeak.net Fri Jun 20 13:25:30 2008 From: scoder at codespeak.net (scoder at codespeak.net) Date: Fri, 20 Jun 2008 13:25:30 +0200 (CEST) Subject: [Lxml-checkins] r55993 - in lxml/trunk: . doc Message-ID: <20080620112530.99CD516A098@codespeak.net> Author: scoder Date: Fri Jun 20 13:25:29 2008 New Revision: 55993 Modified: lxml/trunk/ (props changed) lxml/trunk/doc/mklatex.py Log: r4525 at delle: sbehnel | 2008-06-20 13:06:25 +0200 PDF doc link fix Modified: lxml/trunk/doc/mklatex.py ============================================================================== --- lxml/trunk/doc/mklatex.py (original) +++ lxml/trunk/doc/mklatex.py Fri Jun 20 13:25:29 2008 @@ -178,20 +178,20 @@ titles = {} replace_interdoc_hyperrefs = re.compile( - r'\\href\{([^/}]+)[.]([^./}]+)\}\{([^}]+)\}').sub + r'\\href\{([^/}]+)[.]([^./}]+)\}').sub replace_docinternal_hyperrefs = re.compile( r'\\href\{\\#([^}]+)\}').sub def build_hyperref(match): - basename, extension, linktext = match.groups() + basename, extension = match.groups() outname = BASENAME_MAP.get(basename, basename) if '#' in extension: anchor = extension.split('#')[-1] - return r"\hyperref[%s]{%s}" % (anchor, linktext) + return r"\hyperref[%s]" % anchor elif extension != 'html': - return r'\href{http://codespeak.net/lxml/%s.%s}{%s}' % ( - outname, extension, linktext) + return r'\href{http://codespeak.net/lxml/%s.%s}' % ( + outname, extension) else: - return r"\hyperref[_part_%s.tex]{%s}" % (outname, linktext) + return r"\hyperref[_part_%s.tex]" % outname def fix_relative_hyperrefs(line): if r'\href' not in line: return line From scoder at codespeak.net Fri Jun 20 13:25:34 2008 From: scoder at codespeak.net (scoder at codespeak.net) Date: Fri, 20 Jun 2008 13:25:34 +0200 (CEST) Subject: [Lxml-checkins] r55994 - in lxml/trunk: . doc Message-ID: <20080620112534.AA61916A099@codespeak.net> Author: scoder Date: Fri Jun 20 13:25:34 2008 New Revision: 55994 Modified: lxml/trunk/ (props changed) lxml/trunk/doc/mklatex.py Log: r4526 at delle: sbehnel | 2008-06-20 13:23:14 +0200 removed version section from first PDF chapter Modified: lxml/trunk/doc/mklatex.py ============================================================================== --- lxml/trunk/doc/mklatex.py (original) +++ lxml/trunk/doc/mklatex.py Fri Jun 20 13:25:34 2008 @@ -149,10 +149,12 @@ l = process_line(l) if skipping(l): # To-Do minitoc instead of tableofcontents - pass - else: + continue + elif "\hypertarget{old-versions}" in l: + break + elif "listcnt0" in l: l = l.replace("listcnt0", counter_text) - dest.write(l) + dest.write(l) if not title: raise Exception("Bueee, no title") From scoder at codespeak.net Fri Jun 20 13:26:34 2008 From: scoder at codespeak.net (scoder at codespeak.net) Date: Fri, 20 Jun 2008 13:26:34 +0200 (CEST) Subject: [Lxml-checkins] r55995 - lxml/branch/lxml-2.0/doc Message-ID: <20080620112634.0B4D716A098@codespeak.net> Author: scoder Date: Fri Jun 20 13:26:33 2008 New Revision: 55995 Modified: lxml/branch/lxml-2.0/doc/mklatex.py Log: PDF changes from trunk Modified: lxml/branch/lxml-2.0/doc/mklatex.py ============================================================================== --- lxml/branch/lxml-2.0/doc/mklatex.py (original) +++ lxml/branch/lxml-2.0/doc/mklatex.py Fri Jun 20 13:26:33 2008 @@ -149,10 +149,12 @@ l = process_line(l) if skipping(l): # To-Do minitoc instead of tableofcontents - pass - else: + continue + elif "\hypertarget{old-versions}" in l: + break + elif "listcnt0" in l: l = l.replace("listcnt0", counter_text) - dest.write(l) + dest.write(l) if not title: raise Exception("Bueee, no title") @@ -178,20 +180,20 @@ titles = {} replace_interdoc_hyperrefs = re.compile( - r'\\href\{([^/}]+)[.]([^./}]+)\}\{([^}]+)\}').sub + r'\\href\{([^/}]+)[.]([^./}]+)\}').sub replace_docinternal_hyperrefs = re.compile( r'\\href\{\\#([^}]+)\}').sub def build_hyperref(match): - basename, extension, linktext = match.groups() + basename, extension = match.groups() outname = BASENAME_MAP.get(basename, basename) if '#' in extension: anchor = extension.split('#')[-1] - return r"\hyperref[%s]{%s}" % (anchor, linktext) + return r"\hyperref[%s]" % anchor elif extension != 'html': - return r'\href{http://codespeak.net/lxml/%s.%s}{%s}' % ( - outname, extension, linktext) + return r'\href{http://codespeak.net/lxml/%s.%s}' % ( + outname, extension) else: - return r"\hyperref[_part_%s.tex]{%s}" % (outname, linktext) + return r"\hyperref[_part_%s.tex]" % outname def fix_relative_hyperrefs(line): if r'\href' not in line: return line From scoder at codespeak.net Sat Jun 21 08:25:59 2008 From: scoder at codespeak.net (scoder at codespeak.net) Date: Sat, 21 Jun 2008 08:25:59 +0200 (CEST) Subject: [Lxml-checkins] r56004 - in lxml/trunk: . src/lxml/html Message-ID: <20080621062559.ED55716A067@codespeak.net> Author: scoder Date: Sat Jun 21 08:25:58 2008 New Revision: 56004 Modified: lxml/trunk/ (props changed) lxml/trunk/src/lxml/html/__init__.py Log: r4538 at delle: sbehnel | 2008-06-21 07:43:34 +0200 API usage Modified: lxml/trunk/src/lxml/html/__init__.py ============================================================================== --- lxml/trunk/src/lxml/html/__init__.py (original) +++ lxml/trunk/src/lxml/html/__init__.py Sat Jun 21 08:25:58 2008 @@ -302,7 +302,7 @@ link you get is exactly the link in the document. """ link_attrs = defs.link_attrs - for el in self.getiterator(): + for el in self.iter(): attribs = el.attrib tag = _nons(el.tag) if tag != 'object': @@ -653,7 +653,7 @@ def _contains_block_level_tag(el): # FIXME: I could do this with XPath, but would that just be # unnecessarily slow? - for el in el.getiterator(): + for el in el.iter(): if _nons(el.tag) in defs.block_tags: return True return False From scoder at codespeak.net Sat Jun 21 08:26:05 2008 From: scoder at codespeak.net (scoder at codespeak.net) Date: Sat, 21 Jun 2008 08:26:05 +0200 (CEST) Subject: [Lxml-checkins] r56005 - in lxml/trunk: . src/lxml/html src/lxml/html/tests Message-ID: <20080621062605.1674F16A068@codespeak.net> Author: scoder Date: Sat Jun 21 08:26:05 2008 New Revision: 56005 Modified: lxml/trunk/ (props changed) lxml/trunk/src/lxml/html/__init__.py lxml/trunk/src/lxml/html/tests/test_rewritelinks.txt Log: r4539 at delle: sbehnel | 2008-06-21 08:23:47 +0200 strip links in rewrite_links() to avoid whitespace problems Modified: lxml/trunk/src/lxml/html/__init__.py ============================================================================== --- lxml/trunk/src/lxml/html/__init__.py (original) +++ lxml/trunk/src/lxml/html/__init__.py Sat Jun 21 08:26:05 2008 @@ -372,7 +372,7 @@ elif resolve_base_href: self.resolve_base_href() for el, attrib, link, pos in self.iterlinks(): - new_link = link_repl_func(link) + new_link = link_repl_func(link.strip()) if new_link == link: continue if new_link is None: Modified: lxml/trunk/src/lxml/html/tests/test_rewritelinks.txt ============================================================================== --- lxml/trunk/src/lxml/html/tests/test_rewritelinks.txt (original) +++ lxml/trunk/src/lxml/html/tests/test_rewritelinks.txt Sat Jun 21 08:26:05 2008 @@ -116,3 +116,57 @@ a href="/other.html" td style="/td-bg.png"@22 img src="/logo.gif" + +An application of ``iterlinks()`` is ``make_links_absolute()``:: + + >>> from lxml.html import make_links_absolute + >>> print(make_links_absolute(''' + ... + ... + ... + ... + ... + ... + ... + ... + ... + ... + ...
+ ... + ... Hi world! + ...
+ ... ''', + ... base_url="http://my.little.server/url/")) + + + + + + + + + + + +
+ + Hi world! +
+ + From lxml-checkins at codespeak.net Sun Jun 22 05:40:23 2008 From: lxml-checkins at codespeak.net (VIAGRA ® Official Site) Date: Sun, 22 Jun 2008 05:40:23 +0200 (CEST) Subject: [Lxml-checkins] Dear lxml-checkins@codespeak.net June 89% 0FF Message-ID: <20080622083923.106881.qmail@CBL217-132-87-187.bb.netvision.net.il> An HTML attachment was scrubbed... URL: http://codespeak.net/pipermail/lxml-checkins/attachments/20080622/5638b79e/attachment.htm From scoder at codespeak.net Sun Jun 22 08:40:35 2008 From: scoder at codespeak.net (scoder at codespeak.net) Date: Sun, 22 Jun 2008 08:40:35 +0200 (CEST) Subject: [Lxml-checkins] r56013 - in lxml/trunk: . doc Message-ID: <20080622064035.E7A78169E48@codespeak.net> Author: scoder Date: Sun Jun 22 08:40:33 2008 New Revision: 56013 Modified: lxml/trunk/ (props changed) lxml/trunk/doc/main.txt Log: r4543 at delle: sbehnel | 2008-06-22 08:38:31 +0200 doc update after release of 2.0.7 Modified: lxml/trunk/doc/main.txt ============================================================================== --- lxml/trunk/doc/main.txt (original) +++ lxml/trunk/doc/main.txt Sun Jun 22 08:40:33 2008 @@ -223,6 +223,8 @@ * `lxml 2.1alpha1`_, released 2008-03-27 (`changes for 2.1alpha1`_) +* `lxml 2.0.7`_, released 2008-06-20 (`changes for 2.0.7`_) + * `lxml 2.0.6`_, released 2008-05-31 (`changes for 2.0.6`_) * `lxml 2.0.5`_, released 2008-05-01 (`changes for 2.0.5`_) @@ -291,6 +293,7 @@ .. _`lxml 2.1beta2`: lxml-2.1beta2.tgz .. _`lxml 2.1beta1`: lxml-2.1beta1.tgz .. _`lxml 2.1alpha1`: lxml-2.1alpha1.tgz +.. _`lxml 2.0.7`: lxml-2.0.7.tgz .. _`lxml 2.0.6`: lxml-2.0.6.tgz .. _`lxml 2.0.5`: lxml-2.0.5.tgz .. _`lxml 2.0.4`: lxml-2.0.4.tgz @@ -327,6 +330,7 @@ .. _`changes for 2.1beta2`: changes-2.1beta2.html .. _`changes for 2.1beta1`: changes-2.1beta1.html .. _`changes for 2.1alpha1`: changes-2.1alpha1.html +.. _`changes for 2.0.7`: changes-2.0.7.html .. _`changes for 2.0.6`: changes-2.0.6.html .. _`changes for 2.0.5`: changes-2.0.5.html .. _`changes for 2.0.4`: changes-2.0.4.html From scoder at codespeak.net Sun Jun 22 11:01:46 2008 From: scoder at codespeak.net (scoder at codespeak.net) Date: Sun, 22 Jun 2008 11:01:46 +0200 (CEST) Subject: [Lxml-checkins] r56015 - in lxml/trunk: . src/lxml src/lxml/tests Message-ID: <20080622090146.413792D8005@codespeak.net> Author: scoder Date: Sun Jun 22 11:01:45 2008 New Revision: 56015 Modified: lxml/trunk/ (props changed) lxml/trunk/src/lxml/cssselect.py lxml/trunk/src/lxml/tests/test_css.txt Log: r4545 at delle: sbehnel | 2008-06-22 10:55:55 +0200 fix: cssselect parser skipped remaining expression after parsing function with parameters Modified: lxml/trunk/src/lxml/cssselect.py ============================================================================== --- lxml/trunk/src/lxml/cssselect.py (original) +++ lxml/trunk/src/lxml/cssselect.py Sun Jun 22 11:01:45 2008 @@ -657,9 +657,9 @@ result = parse_simple_selector(stream) while 1: peek = stream.peek() - if peek == ',' or peek == ')' or peek is None: + if peek == ',' or peek is None: return result - if stream.peek() in ('+', '>', '~'): + elif peek in ('+', '>', '~'): # A combinator combinator = stream.next() else: @@ -728,11 +728,11 @@ else: # FIXME: parse_simple_selector, or selector, or...? selector = parse_simple_selector(stream) - next = stream.next() - if not next == ')': - raise SelectorSyntaxError( - "Expected ), got %r and %r" - % (next, selector)) + next = stream.next() + if not next == ')': + raise SelectorSyntaxError( + "Expected ), got %r and %r" + % (next, selector)) result = Function(result, type, ident, selector) else: result = Pseudo(result, type, ident) Modified: lxml/trunk/src/lxml/tests/test_css.txt ============================================================================== --- lxml/trunk/src/lxml/tests/test_css.txt (original) +++ lxml/trunk/src/lxml/tests/test_css.txt Sun Jun 22 11:01:45 2008 @@ -16,6 +16,8 @@ Then of parsing: + >>> parse('td.foo, .bar') + Or([Class[Element[td].foo], CombinedSelector[Element[*] Class[Element[*].bar]]]) >>> parse('div, td.foo, div.bar span') Or([Element[div], Class[Element[td].foo], CombinedSelector[Class[Element[div].bar] Element[span]]]) >>> parse('div > p') @@ -32,6 +34,8 @@ Function[Element[div]:nth-child(10)] >>> parse('div:nth-of-type(10)') Function[Element[div]:nth-of-type(10)] + >>> parse('div div:nth-of-type(10) .aclass') + CombinedSelector[CombinedSelector[Element[div] Function[Element[div]:nth-of-type(10)]] Class[Element[*].aclass]] >>> parse('label:only') Pseudo[Element[label]:only] >>> parse('a:lang(fr)') @@ -78,6 +82,10 @@ */e[position() = 1] >>> xpath('E:nth-last-of-type(1)') */e[position() = last() - 1] + >>> xpath('E:nth-last-of-type(1)') + */e[position() = last() - 1] + >>> xpath('div E:nth-last-of-type(1) .aclass') + div/descendant::e[position() = last() - 1]/descendant::*[contains(concat(' ', normalize-space(@class), ' '), ' aclass ')] >>> xpath('E:first-child') */*[name() = 'e' and (position() = 1)] >>> xpath('E:last-child') From scoder at codespeak.net Sun Jun 22 11:01:51 2008 From: scoder at codespeak.net (scoder at codespeak.net) Date: Sun, 22 Jun 2008 11:01:51 +0200 (CEST) Subject: [Lxml-checkins] r56016 - lxml/trunk Message-ID: <20080622090151.6B2912D8005@codespeak.net> Author: scoder Date: Sun Jun 22 11:01:50 2008 New Revision: 56016 Modified: lxml/trunk/ (props changed) lxml/trunk/CHANGES.txt Log: r4546 at delle: sbehnel | 2008-06-22 10:56:31 +0200 merged in changelog section of 2.0.7 Modified: lxml/trunk/CHANGES.txt ============================================================================== --- lxml/trunk/CHANGES.txt (original) +++ lxml/trunk/CHANGES.txt Sun Jun 22 11:01:50 2008 @@ -11,12 +11,37 @@ Bugs fixed ---------- +Other changes +------------- + + +2.0.7 (2008-06-20) +================== + +Features added +-------------- + +* Pickling ``ElementTree`` objects in lxml.objectify. + +Bugs fixed +---------- + * Descending dot-separated classes in CSS selectors were not resolved correctly. +* ``ElementTree.parse()`` didn't handle target parser result. + +* Potential threading problem in XInclude. + +* Crash in Element class lookup classes when the __init__() method of + the super class is not called from Python subclasses. + Other changes ------------- +* Non-ASCII characters in attribute values are no longer escaped on + serialisation. + 2.1beta3 (2008-06-19) ===================== From scoder at codespeak.net Sun Jun 22 11:01:55 2008 From: scoder at codespeak.net (scoder at codespeak.net) Date: Sun, 22 Jun 2008 11:01:55 +0200 (CEST) Subject: [Lxml-checkins] r56017 - lxml/trunk Message-ID: <20080622090155.758902D8005@codespeak.net> Author: scoder Date: Sun Jun 22 11:01:54 2008 New Revision: 56017 Modified: lxml/trunk/ (props changed) lxml/trunk/CHANGES.txt Log: r4547 at delle: sbehnel | 2008-06-22 10:58:55 +0200 changelog Modified: lxml/trunk/CHANGES.txt ============================================================================== --- lxml/trunk/CHANGES.txt (original) +++ lxml/trunk/CHANGES.txt Sun Jun 22 11:01:54 2008 @@ -11,6 +11,9 @@ Bugs fixed ---------- +* CSS selector parser dropped remaining expression after a function + with parameters. + Other changes ------------- From scoder at codespeak.net Sun Jun 22 11:03:13 2008 From: scoder at codespeak.net (scoder at codespeak.net) Date: Sun, 22 Jun 2008 11:03:13 +0200 (CEST) Subject: [Lxml-checkins] r56018 - in lxml/branch/lxml-2.0: . src/lxml src/lxml/tests Message-ID: <20080622090313.99278168060@codespeak.net> Author: scoder Date: Sun Jun 22 11:03:12 2008 New Revision: 56018 Modified: lxml/branch/lxml-2.0/CHANGES.txt lxml/branch/lxml-2.0/src/lxml/cssselect.py lxml/branch/lxml-2.0/src/lxml/tests/test_css.txt Log: CSS parser fix from trunk Modified: lxml/branch/lxml-2.0/CHANGES.txt ============================================================================== --- lxml/branch/lxml-2.0/CHANGES.txt (original) +++ lxml/branch/lxml-2.0/CHANGES.txt Sun Jun 22 11:03:12 2008 @@ -2,6 +2,22 @@ lxml changelog ============== +Under development +================= + +Features added +-------------- + +Bugs fixed +---------- + +* CSS selector parser dropped remaining expression after a function + with parameters. + +Other changes +------------- + + 2.0.7 (2008-06-20) ================== Modified: lxml/branch/lxml-2.0/src/lxml/cssselect.py ============================================================================== --- lxml/branch/lxml-2.0/src/lxml/cssselect.py (original) +++ lxml/branch/lxml-2.0/src/lxml/cssselect.py Sun Jun 22 11:03:12 2008 @@ -645,9 +645,9 @@ result = parse_simple_selector(stream) while 1: peek = stream.peek() - if peek == ',' or peek == ')' or peek is None: + if peek == ',' or peek is None: return result - if stream.peek() in ('+', '>', '~'): + elif peek in ('+', '>', '~'): # A combinator combinator = stream.next() else: @@ -716,11 +716,11 @@ else: # FIXME: parse_simple_selector, or selector, or...? selector = parse_simple_selector(stream) - next = stream.next() - if not next == ')': - raise SelectorSyntaxError( - "Expected ), got %r and %r" - % (next, selector)) + next = stream.next() + if not next == ')': + raise SelectorSyntaxError( + "Expected ), got %r and %r" + % (next, selector)) result = Function(result, type, ident, selector) else: result = Pseudo(result, type, ident) Modified: lxml/branch/lxml-2.0/src/lxml/tests/test_css.txt ============================================================================== --- lxml/branch/lxml-2.0/src/lxml/tests/test_css.txt (original) +++ lxml/branch/lxml-2.0/src/lxml/tests/test_css.txt Sun Jun 22 11:03:12 2008 @@ -16,6 +16,8 @@ Then of parsing: + >>> parse('td.foo, .bar') + Or([Class[Element[td].foo], CombinedSelector[Element[*] Class[Element[*].bar]]]) >>> parse('div, td.foo, div.bar span') Or([Element[div], Class[Element[td].foo], CombinedSelector[Class[Element[div].bar] Element[span]]]) >>> parse('div > p') @@ -32,6 +34,8 @@ Function[Element[div]:nth-child(10)] >>> parse('div:nth-of-type(10)') Function[Element[div]:nth-of-type(10)] + >>> parse('div div:nth-of-type(10) .aclass') + CombinedSelector[CombinedSelector[Element[div] Function[Element[div]:nth-of-type(10)]] Class[Element[*].aclass]] >>> parse('label:only') Pseudo[Element[label]:only] >>> parse('a:lang(fr)') @@ -78,6 +82,10 @@ */e[position() = 1] >>> xpath('E:nth-last-of-type(1)') */e[position() = last() - 1] + >>> xpath('E:nth-last-of-type(1)') + */e[position() = last() - 1] + >>> xpath('div E:nth-last-of-type(1) .aclass') + div/descendant::e[position() = last() - 1]/descendant::*[contains(concat(' ', normalize-space(@class), ' '), ' aclass ')] >>> xpath('E:first-child') */*[name() = 'e' and (position() = 1)] >>> xpath('E:last-child') From scoder at codespeak.net Sun Jun 22 11:10:20 2008 From: scoder at codespeak.net (scoder at codespeak.net) Date: Sun, 22 Jun 2008 11:10:20 +0200 (CEST) Subject: [Lxml-checkins] r56019 - in lxml/branch/lxml-2.0: . src/lxml/html src/lxml/html/tests Message-ID: <20080622091020.D4C422D8005@codespeak.net> Author: scoder Date: Sun Jun 22 11:10:18 2008 New Revision: 56019 Modified: lxml/branch/lxml-2.0/CHANGES.txt lxml/branch/lxml-2.0/src/lxml/html/__init__.py lxml/branch/lxml-2.0/src/lxml/html/tests/test_rewritelinks.txt Log: from trunk: strip links in lxml.html.rewrite_links() Modified: lxml/branch/lxml-2.0/CHANGES.txt ============================================================================== --- lxml/branch/lxml-2.0/CHANGES.txt (original) +++ lxml/branch/lxml-2.0/CHANGES.txt Sun Jun 22 11:10:18 2008 @@ -8,6 +8,9 @@ Features added -------------- +* ``lxml.html.rewrite_links()`` strips links to work around documents + with whitespace in URL attributes. + Bugs fixed ---------- Modified: lxml/branch/lxml-2.0/src/lxml/html/__init__.py ============================================================================== --- lxml/branch/lxml-2.0/src/lxml/html/__init__.py (original) +++ lxml/branch/lxml-2.0/src/lxml/html/__init__.py Sun Jun 22 11:10:18 2008 @@ -319,7 +319,7 @@ elif resolve_base_href: self.resolve_base_href() for el, attrib, link, pos in self.iterlinks(): - new_link = link_repl_func(link) + new_link = link_repl_func(link.strip()) if new_link == link: continue if new_link is None: Modified: lxml/branch/lxml-2.0/src/lxml/html/tests/test_rewritelinks.txt ============================================================================== --- lxml/branch/lxml-2.0/src/lxml/html/tests/test_rewritelinks.txt (original) +++ lxml/branch/lxml-2.0/src/lxml/html/tests/test_rewritelinks.txt Sun Jun 22 11:10:18 2008 @@ -115,3 +115,57 @@ a href="/other.html" td style="/td-bg.png"@22 img src="/logo.gif" + +An application of ``iterlinks()`` is ``make_links_absolute()``:: + + >>> from lxml.html import make_links_absolute + >>> print(make_links_absolute(''' + ... + ... + ... + ... + ... + ... + ... + ... + ... + ... + ...
+ ... + ... Hi world! + ...
+ ... ''', + ... base_url="http://my.little.server/url/")) + + + + + + + + + + + +
+ + Hi world! +
+ + From scoder at codespeak.net Sun Jun 22 11:17:57 2008 From: scoder at codespeak.net (scoder at codespeak.net) Date: Sun, 22 Jun 2008 11:17:57 +0200 (CEST) Subject: [Lxml-checkins] r56020 - lxml/trunk Message-ID: <20080622091757.2D41B2D80C7@codespeak.net> Author: scoder Date: Sun Jun 22 11:17:56 2008 New Revision: 56020 Modified: lxml/trunk/ (props changed) lxml/trunk/CHANGES.txt Log: r4551 at delle: sbehnel | 2008-06-22 11:06:38 +0200 changelog Modified: lxml/trunk/CHANGES.txt ============================================================================== --- lxml/trunk/CHANGES.txt (original) +++ lxml/trunk/CHANGES.txt Sun Jun 22 11:17:56 2008 @@ -8,6 +8,9 @@ Features added -------------- +* ``lxml.html.rewrite_links()`` strips links to work around documents + with whitespace in URL attributes. + Bugs fixed ---------- From scoder at codespeak.net Tue Jun 24 19:40:10 2008 From: scoder at codespeak.net (scoder at codespeak.net) Date: Tue, 24 Jun 2008 19:40:10 +0200 (CEST) Subject: [Lxml-checkins] r56052 - in lxml/trunk: . doc Message-ID: <20080624174010.1F47A169EF3@codespeak.net> Author: scoder Date: Tue Jun 24 19:40:07 2008 New Revision: 56052 Modified: lxml/trunk/ (props changed) lxml/trunk/doc/lxmlhtml.txt Log: r4555 at delle: sbehnel | 2008-06-24 19:37:58 +0200 credits Modified: lxml/trunk/doc/lxmlhtml.txt ============================================================================== --- lxml/trunk/doc/lxmlhtml.txt (original) +++ lxml/trunk/doc/lxmlhtml.txt Tue Jun 24 19:40:07 2008 @@ -2,6 +2,9 @@ lxml.html ========= +:Author: + Ian Bicking + Since version 2.0, lxml comes with a dedicated package for dealing with HTML: ``lxml.html``. It provides a special Element API for HTML elements, as well as a number of utilities for common tasks. From scoder at codespeak.net Wed Jun 25 21:57:28 2008 From: scoder at codespeak.net (scoder at codespeak.net) Date: Wed, 25 Jun 2008 21:57:28 +0200 (CEST) Subject: [Lxml-checkins] r56077 - in lxml/trunk: . src/lxml src/lxml/tests Message-ID: <20080625195728.0F30D698042@codespeak.net> Author: scoder Date: Wed Jun 25 21:57:26 2008 New Revision: 56077 Modified: lxml/trunk/ (props changed) lxml/trunk/CHANGES.txt lxml/trunk/src/lxml/ElementInclude.py lxml/trunk/src/lxml/parser.pxi lxml/trunk/src/lxml/tests/test_etree.py lxml/trunk/src/lxml/tests/test_xmlschema.py lxml/trunk/src/lxml/xinclude.pxi lxml/trunk/src/lxml/xmlschema.pxi Log: r4557 at delle: sbehnel | 2008-06-25 09:25:08 +0200 applied and refactored schema/xinclude resolvers patch by Michael Ballback Modified: lxml/trunk/CHANGES.txt ============================================================================== --- lxml/trunk/CHANGES.txt (original) +++ lxml/trunk/CHANGES.txt Wed Jun 25 21:57:26 2008 @@ -8,6 +8,9 @@ Features added -------------- +* Custom resolvers are now used for XMLSchema includes/imports and + XInclude processing. + * ``lxml.html.rewrite_links()`` strips links to work around documents with whitespace in URL attributes. Modified: lxml/trunk/src/lxml/ElementInclude.py ============================================================================== --- lxml/trunk/src/lxml/ElementInclude.py (original) +++ lxml/trunk/src/lxml/ElementInclude.py Wed Jun 25 21:57:26 2008 @@ -91,7 +91,7 @@ # @throws IOError If the loader fails to load the resource. def default_loader(href, parse, encoding=None): - file = open(href) + file = open(href, 'rb') if parse == "xml": data = etree.parse(file).getroot() else: @@ -112,7 +112,7 @@ if "://" in href: f = urlopen(href) else: - f = open(href) + f = open(href, 'rb') data = f.read() f.close() if encoding: @@ -165,7 +165,7 @@ parser = elem.getroottree().parser include_elements = list( - elem.getiterator('{http://www.w3.org/2001/XInclude}*')) + elem.iter('{http://www.w3.org/2001/XInclude}*')) for e in include_elements: if e.tag == XINCLUDE_INCLUDE: Modified: lxml/trunk/src/lxml/parser.pxi ============================================================================== --- lxml/trunk/src/lxml/parser.pxi (original) +++ lxml/trunk/src/lxml/parser.pxi Wed Jun 25 21:57:26 2008 @@ -3,6 +3,13 @@ cimport xmlparser cimport htmlparser +cdef class _ParserContext(_ResolverContext) +cdef class _SaxParserContext(_ParserContext) +cdef class _TargetParserContext(_SaxParserContext) +cdef class _ParserSchemaValidationContext +cdef class _Validator +cdef class XMLSchema(_Validator) + class ParseError(LxmlSyntaxError): u"""Syntax error while parsing an XML document. @@ -42,6 +49,11 @@ cdef tree.xmlDict* _c_dict cdef _BaseParser _default_parser + cdef object _implied_parser_contexts + + def __init__(self): + self._implied_parser_contexts = [] + def __dealloc__(self): if self._c_dict is not NULL: xmlparser.xmlDictFree(self._c_dict) @@ -131,6 +143,45 @@ # otherwise we'd free data that's in use => segfault self.initThreadDictRef(&result.dict) + cdef _ParserContext findImpliedContext(self): + u"""Return any current implied xml parser context for the current + thread. This is used when the resolver functions are called + with an xmlParserCtxt that was generated from within libxml2 + (i.e. without a _ParserContext) - which happens when parsing + schema and xinclude external references.""" + cdef _ParserDictionaryContext context + cdef _ParserContext implied_context + cdef Py_ssize_t count + + # see if we have a current implied parser + context = self._findThreadParserContext() + count = python.PyList_GET_SIZE(context._implied_parser_contexts) + if count != 0: + implied_context = python.PyList_GET_ITEM( + context._implied_parser_contexts, count - 1) + python.Py_INCREF(implied_context) # borrowed reference + return implied_context + return None + + cdef void pushImpliedContextFromParser(self, _BaseParser parser): + u"Push a new implied context object taken from the parser." + if parser is not None: + self.pushImpliedContext(parser._getParserContext()) + else: + self.pushImpliedContext(None) + + cdef void pushImpliedContext(self, _ParserContext parser_context): + u"Push a new implied context object." + cdef _ParserDictionaryContext context + context = self._findThreadParserContext() + python.PyList_Append(context._implied_parser_contexts, parser_context) + + cdef void popImpliedContext(self): + u"Pop the current implied context object." + cdef _ParserDictionaryContext context + context = self._findThreadParserContext() + context._implied_parser_contexts.pop() + cdef _ParserDictionaryContext __GLOBAL_PARSER_CONTEXT __GLOBAL_PARSER_CONTEXT = _ParserDictionaryContext() __GLOBAL_PARSER_CONTEXT.initMainParserContext() @@ -346,16 +397,25 @@ ## support for custom document loaders ############################################################ -cdef xmlparser.xmlParserInput* _parser_resolve_from_python( - char* c_url, char* c_pubid, xmlparser.xmlParserCtxt* c_context, - int* error) with gil: - # call the Python document loaders - cdef xmlparser.xmlParserInput* c_input +cdef xmlparser.xmlParserInput* _local_resolver(char* c_url, char* c_pubid, + xmlparser.xmlParserCtxt* c_context) with gil: cdef _ResolverContext context - cdef _InputDocument doc_ref + cdef xmlparser.xmlParserInput* c_input + cdef _InputDocument doc_ref cdef _FileReaderContext file_context - error[0] = 0 - context = <_ResolverContext>c_context._private + # if there is no _ParserContext associated with the xmlParserCtxt + # passed, check to see if the thread state object has an implied + # context. + if c_context._private is not NULL: + context = <_ResolverContext>c_context._private + else: + context = __GLOBAL_PARSER_CONTEXT.findImpliedContext() + + if context is None: + if __DEFAULT_ENTITY_LOADER is NULL: + return NULL + return __DEFAULT_ENTITY_LOADER(c_url, c_pubid, c_context) + try: if c_url is NULL: url = None @@ -368,48 +428,31 @@ pubid = funicode(c_pubid) # always UTF-8 doc_ref = context._resolvers.resolve(url, pubid, context) - if doc_ref is None: - return NULL except: context._store_raised() - error[0] = 1 return NULL - c_input = NULL - data = None - if doc_ref._type == PARSER_DATA_STRING: - data = doc_ref._data_bytes - c_input = xmlparser.xmlNewStringInputStream( - c_context, _cstr(data)) - elif doc_ref._type == PARSER_DATA_FILENAME: - c_input = xmlparser.xmlNewInputFromFile( - c_context, _cstr(doc_ref._filename)) - elif doc_ref._type == PARSER_DATA_FILE: - file_context = _FileReaderContext(doc_ref._file, context, url) - c_input = file_context._createParserInput(c_context) - data = file_context - - if data is not None: - context._storage.add(data) - return c_input - -cdef xmlparser.xmlParserInput* _local_resolver(char* c_url, char* c_pubid, - xmlparser.xmlParserCtxt* c_context) nogil: - # no Python objects here, may be called without thread context ! - # when we declare a Python object, Pyrex will INCREF(None) ! - cdef xmlparser.xmlParserInput* c_input - cdef int error - if c_context._private is NULL: - if __DEFAULT_ENTITY_LOADER is NULL: - return NULL - return __DEFAULT_ENTITY_LOADER(c_url, c_pubid, c_context) + if doc_ref is not None: + if doc_ref._type == PARSER_DATA_STRING: + data = doc_ref._data_bytes + c_input = xmlparser.xmlNewStringInputStream( + c_context, _cstr(data)) + elif doc_ref._type == PARSER_DATA_FILENAME: + c_input = xmlparser.xmlNewInputFromFile( + c_context, _cstr(doc_ref._filename)) + elif doc_ref._type == PARSER_DATA_FILE: + file_context = _FileReaderContext(doc_ref._file, context, url) + c_input = file_context._createParserInput(c_context) + data = file_context + else: + data = None + c_input = NULL - c_input = _parser_resolve_from_python(c_url, c_pubid, c_context, &error) + if data is not None: + context._storage.add(data) + if c_input is not NULL: + return c_input - if c_input is not NULL: - return c_input - if error: - return NULL if __DEFAULT_ENTITY_LOADER is NULL: return NULL return __DEFAULT_ENTITY_LOADER(c_url, c_pubid, c_context) @@ -423,13 +466,6 @@ ## Parsers ############################################################ -cdef class _ParserContext(_ResolverContext) -cdef class _SaxParserContext(_ParserContext) -cdef class _TargetParserContext(_SaxParserContext) -cdef class _ParserSchemaValidationContext -cdef class _Validator -cdef class XMLSchema(_Validator) - cdef class _ParserContext(_ResolverContext): cdef _ErrorLog _error_log cdef _ParserSchemaValidationContext _validator Modified: lxml/trunk/src/lxml/tests/test_etree.py ============================================================================== --- lxml/trunk/src/lxml/tests/test_etree.py (original) +++ lxml/trunk/src/lxml/tests/test_etree.py Wed Jun 25 21:57:26 2008 @@ -2342,6 +2342,37 @@ 'a', tree.getroot()[1].tag) + def test_xinclude_resolver(self): + class res(etree.Resolver): + include_text = open(fileInTestDir('test.xml')).read() + called = {} + def resolve(self, url, id, context): + if url.endswith(".dtd"): + self.called["dtd"] = True + return self.resolve_filename( + fileInTestDir('test.dtd'), context) + elif url.endswith("test_xinclude.xml"): + self.called["input"] = True + return None # delegate to default resolver + else: + self.called["include"] = True + return self.resolve_string(self.include_text, context) + + res_instance = res() + parser = etree.XMLParser(load_dtd = True) + parser.resolvers.add(res_instance) + + tree = etree.parse(fileInTestDir('include/test_xinclude.xml'), + parser = parser) + + self.include(tree) + + called = res_instance.called.items() + called.sort() + self.assertEquals( + [("dtd", True), ("include", True), ("input", True)], + called) + class ETreeXIncludeTestCase(XIncludeTestCase): def include(self, tree): tree.xinclude() Modified: lxml/trunk/src/lxml/tests/test_xmlschema.py ============================================================================== --- lxml/trunk/src/lxml/tests/test_xmlschema.py (original) +++ lxml/trunk/src/lxml/tests/test_xmlschema.py Wed Jun 25 21:57:26 2008 @@ -152,7 +152,110 @@ self.assert_(tree_valid.xmlschema(schema)) self.assert_(not tree_invalid.xmlschema(schema)) - + # + # schema + resolvers tests&data: + # + + resolver_schema_int = BytesIO("""\ + + + +""") + + resolver_schema_int2 = BytesIO("""\ + + + +""") + + resolver_schema_ext = """\ + + + + +""" + + class simple_resolver(etree.Resolver): + def __init__(self, schema): + self.schema = schema + + def resolve(self, url, id, context): + assert url == 'XXX.xsd' + return self.resolve_string(self.schema, context) + + def test_xmlschema_resolvers(self): + """Test that resolvers work with schema.""" + parser = etree.XMLParser() + parser.resolvers.add(self.simple_resolver(self.resolver_schema_ext)) + schema_doc = etree.parse(self.resolver_schema_int, parser = parser) + schema = etree.XMLSchema(schema_doc) + + def test_xmlschema_resolvers_root(self): + """Test that the default resolver will get called if there's no + specific parser resolver.""" + root_resolver = self.simple_resolver(self.resolver_schema_ext) + etree.get_default_parser().resolvers.add(root_resolver) + schema_doc = etree.parse(self.resolver_schema_int) + schema = etree.XMLSchema(schema_doc) + etree.get_default_parser().resolvers.remove(root_resolver) + + def test_xmlschema_resolvers_noroot(self): + """Test that the default resolver will not get called when a more + specific resolver is registered.""" + + class res_root(etree.Resolver): + def resolve(self, url, id, context): + assert False + return None + + root_resolver = res_root() + etree.get_default_parser().resolvers.add(root_resolver) + + parser = etree.XMLParser() + parser.resolvers.add(self.simple_resolver(self.resolver_schema_ext)) + + schema_doc = etree.parse(self.resolver_schema_int, parser = parser) + schema = etree.XMLSchema(schema_doc) + etree.get_default_parser().resolvers.remove(root_resolver) + + def test_xmlschema_nested_resolvers(self): + """Test that resolvers work in a nested fashion.""" + + resolver_schema = self.resolver_schema_ext + + class res_nested(etree.Resolver): + def __init__(self, ext_schema): + self.ext_schema = ext_schema + + def resolve(self, url, id, context): + assert url == 'YYY.xsd' + return self.resolve_string(self.ext_schema, context) + + class res(etree.Resolver): + def __init__(self, ext_schema_1, ext_schema_2): + self.ext_schema_1 = ext_schema_1 + self.ext_schema_2 = ext_schema_2 + + def resolve(self, url, id, context): + assert url == 'XXX.xsd' + + new_parser = etree.XMLParser() + new_parser.resolvers.add(res_nested(self.ext_schema_2)) + new_schema_doc = etree.parse(self.ext_schema_1, parser = new_parser) + new_schema = etree.XMLSchema(new_schema_doc) + + return self.resolve_string(resolver_schema, context) + + parser = etree.XMLParser() + parser.resolvers.add(res(self.resolver_schema_int2, self.resolver_schema_ext)) + schema_doc = etree.parse(self.resolver_schema_int, parser = parser) + schema = etree.XMLSchema(schema_doc) + def test_suite(): suite = unittest.TestSuite() suite.addTests([unittest.makeSuite(ETreeXMLSchemaTestCase)]) Modified: lxml/trunk/src/lxml/xinclude.pxi ============================================================================== --- lxml/trunk/src/lxml/xinclude.pxi (original) +++ lxml/trunk/src/lxml/xinclude.pxi Wed Jun 25 21:57:26 2008 @@ -33,12 +33,15 @@ # i.e. as a sibling, which does not conflict with traversal. cdef int result self._error_log.connect() + __GLOBAL_PARSER_CONTEXT.pushImpliedContextFromParser( + node._doc._parser) with nogil: if node._doc._parser is not None: result = xinclude.xmlXIncludeProcessTreeFlags( node._c_node, node._doc._parser._parse_options) else: result = xinclude.xmlXIncludeProcessTree(node._c_node) + __GLOBAL_PARSER_CONTEXT.popImpliedContext() self._error_log.disconnect() if result == -1: Modified: lxml/trunk/src/lxml/xmlschema.pxi ============================================================================== --- lxml/trunk/src/lxml/xmlschema.pxi (original) +++ lxml/trunk/src/lxml/xmlschema.pxi Wed Jun 25 21:57:26 2008 @@ -65,7 +65,14 @@ raise XMLSchemaParseError, u"No tree or file given" if parser_ctxt is not NULL: + # calling xmlSchemaParse on a schema with imports or + # includes will cause libxml2 to create an internal + # context for parsing, so push an implied context to route + # resolve requests to the document's parser + __GLOBAL_PARSER_CONTEXT.pushImpliedContextFromParser(doc._parser) self._c_schema = xmlschema.xmlSchemaParse(parser_ctxt) + __GLOBAL_PARSER_CONTEXT.popImpliedContext() + if _LIBXML_VERSION_INT >= 20624: xmlschema.xmlSchemaFreeParserCtxt(parser_ctxt)