From ianb at codespeak.net Wed Dec 10 20:50:32 2008 From: ianb at codespeak.net (ianb at codespeak.net) Date: Wed, 10 Dec 2008 20:50:32 +0100 (CET) Subject: [Lxml-checkins] r60417 - in lxml/trunk: . src/lxml/html Message-ID: <20081210195032.884191684CD@codespeak.net> Author: ianb Date: Wed Dec 10 20:50:29 2008 New Revision: 60417 Modified: lxml/trunk/CHANGES.txt lxml/trunk/src/lxml/html/__init__.py lxml/trunk/src/lxml/html/diff.py Log: Small cleanups to lxml.html.diff. Export lxml.html.parse Modified: lxml/trunk/CHANGES.txt ============================================================================== --- lxml/trunk/CHANGES.txt (original) +++ lxml/trunk/CHANGES.txt Wed Dec 10 20:50:29 2008 @@ -2,6 +2,22 @@ lxml changelog ============== +svn trunk +========= + +Bugs fixed +---------- + +* Fixed missing whitespace before ``Link:...`` in ``lxml.html.diff`` + +Other changes +------------- + +* Export ``lxml.html.parse`` + +* Allow ``lxml.html.diff.htmldiff`` to accept element objects, not + just HTML strings + 2.2alpha1 (2008-11-23) ====================== Modified: lxml/trunk/src/lxml/html/__init__.py ============================================================================== --- lxml/trunk/src/lxml/html/__init__.py (original) +++ lxml/trunk/src/lxml/html/__init__.py Wed Dec 10 20:50:29 2008 @@ -51,7 +51,7 @@ 'document_fromstring', 'fragment_fromstring', 'fragments_fromstring', 'fromstring', 'tostring', 'Element', 'defs', 'open_in_browser', 'submit_form', 'find_rel_links', 'find_class', 'make_links_absolute', - 'resolve_base_href', 'iterlinks', 'rewrite_links', 'open_in_browser'] + 'resolve_base_href', 'iterlinks', 'rewrite_links', 'open_in_browser', 'parse'] XHTML_NAMESPACE = "http://www.w3.org/1999/xhtml" Modified: lxml/trunk/src/lxml/html/diff.py ============================================================================== --- lxml/trunk/src/lxml/html/diff.py (original) +++ lxml/trunk/src/lxml/html/diff.py Wed Dec 10 20:50:29 2008 @@ -1,6 +1,7 @@ import difflib from lxml import etree from lxml.html import fragment_fromstring +from lxml.etree import _Element import cgi import re @@ -507,7 +508,7 @@ hide_when_equal = True def html(self): - return 'Link: %s' % self + return ' Link: %s' % self def tokenize(html, include_hrefs=True): """ @@ -524,7 +525,10 @@ If include_hrefs is true, then the href attribute of tags is included as a special kind of diffable token.""" - body_el = parse_html(html, cleanup=True) + if isinstance(html, _Element): + body_el = html + else: + body_el = parse_html(html, cleanup=True) # Then we split the document into text chunks for each tag, word, and end tag: chunks = flatten_el(body_el, skip_tag=True, include_hrefs=include_hrefs) # Finally re-joining them into token objects: From scoder at codespeak.net Wed Dec 10 21:03:28 2008 From: scoder at codespeak.net (scoder at codespeak.net) Date: Wed, 10 Dec 2008 21:03:28 +0100 (CET) Subject: [Lxml-checkins] r60418 - in lxml/trunk: . src/lxml Message-ID: <20081210200328.116621684CA@codespeak.net> Author: scoder Date: Wed Dec 10 21:03:27 2008 New Revision: 60418 Modified: lxml/trunk/ (props changed) lxml/trunk/src/lxml/extensions.pxi lxml/trunk/src/lxml/python.pxd lxml/trunk/src/lxml/xslt.pxi Log: r4906 at delle: sbehnel | 2008-12-07 21:19:42 +0100 avoid calling PyDict_Copy() directly Modified: lxml/trunk/src/lxml/extensions.pxi ============================================================================== --- lxml/trunk/src/lxml/extensions.pxi (original) +++ lxml/trunk/src/lxml/extensions.pxi Wed Dec 10 21:03:27 2008 @@ -104,7 +104,7 @@ context = self.__class__(namespaces, None, False, self._build_smart_strings) if self._extensions is not None: - context._extensions = python.PyDict_Copy(self._extensions) + context._extensions = self._extensions.copy() return context cdef object _to_utf(self, s): Modified: lxml/trunk/src/lxml/python.pxd ============================================================================== --- lxml/trunk/src/lxml/python.pxd (original) +++ lxml/trunk/src/lxml/python.pxd Wed Dec 10 21:03:27 2008 @@ -65,7 +65,7 @@ cdef PyObject* PyDict_GetItem(object d, object key) # cdef int PyDict_DelItem(object d, object key) except -1 cdef void PyDict_Clear(object d) - cdef object PyDict_Copy(object d) +# cdef object PyDict_Copy(object d) cdef object PyDictProxy_New(object d) # cdef int PyDict_Contains(object d, object key) except -1 # Python 2.4+ cdef Py_ssize_t PyDict_Size(object d) Modified: lxml/trunk/src/lxml/xslt.pxi ============================================================================== --- lxml/trunk/src/lxml/xslt.pxi (original) +++ lxml/trunk/src/lxml/xslt.pxi Wed Dec 10 21:03:27 2008 @@ -283,7 +283,7 @@ if isinstance(extension, XSLTExtension): if self._extension_elements is EMPTY_DICT: self._extension_elements = {} - extensions = python.PyDict_Copy(extensions) + extensions = extensions.copy() ns_utf = _utf8(ns_name_tuple[0]) name_utf = _utf8(ns_name_tuple[1]) self._extension_elements[(ns_utf, name_utf)] = extension From scoder at codespeak.net Wed Dec 10 21:03:40 2008 From: scoder at codespeak.net (scoder at codespeak.net) Date: Wed, 10 Dec 2008 21:03:40 +0100 (CET) Subject: [Lxml-checkins] r60419 - in lxml/trunk: . src/lxml/html Message-ID: <20081210200340.A74D31684CD@codespeak.net> Author: scoder Date: Wed Dec 10 21:03:39 2008 New Revision: 60419 Modified: lxml/trunk/ (props changed) lxml/trunk/CHANGES.txt lxml/trunk/src/lxml/html/diff.py Log: r4909 at delle: sbehnel | 2008-12-10 21:02:11 +0100 cleanup Modified: lxml/trunk/CHANGES.txt ============================================================================== --- lxml/trunk/CHANGES.txt (original) +++ lxml/trunk/CHANGES.txt Wed Dec 10 21:03:39 2008 @@ -2,8 +2,11 @@ lxml changelog ============== -svn trunk -========= +Under development +================= + +Features added +-------------- Bugs fixed ---------- @@ -18,6 +21,7 @@ * Allow ``lxml.html.diff.htmldiff`` to accept element objects, not just HTML strings + 2.2alpha1 (2008-11-23) ====================== Modified: lxml/trunk/src/lxml/html/diff.py ============================================================================== --- lxml/trunk/src/lxml/html/diff.py (original) +++ lxml/trunk/src/lxml/html/diff.py Wed Dec 10 21:03:39 2008 @@ -1,7 +1,6 @@ import difflib from lxml import etree from lxml.html import fragment_fromstring -from lxml.etree import _Element import cgi import re @@ -525,7 +524,7 @@ If include_hrefs is true, then the href attribute of tags is included as a special kind of diffable token.""" - if isinstance(html, _Element): + if etree.iselement(html): body_el = html else: body_el = parse_html(html, cleanup=True) From ianb at codespeak.net Thu Dec 11 18:30:55 2008 From: ianb at codespeak.net (ianb at codespeak.net) Date: Thu, 11 Dec 2008 18:30:55 +0100 (CET) Subject: [Lxml-checkins] r60430 - lxml/trunk/doc Message-ID: <20081211173055.351D9168455@codespeak.net> Author: ianb Date: Thu Dec 11 18:30:52 2008 New Revision: 60430 Modified: lxml/trunk/doc/elementsoup.txt Log: tone down the degree to which BeautifulSoup is thought to parse better than libxml2 Modified: lxml/trunk/doc/elementsoup.txt ============================================================================== --- lxml/trunk/doc/elementsoup.txt (original) +++ lxml/trunk/doc/elementsoup.txt Thu Dec 11 18:30:52 2008 @@ -2,9 +2,11 @@ BeautifulSoup Parser ==================== -BeautifulSoup_ is a Python package that parses broken HTML. While libxml2 -(and thus lxml) can also parse broken HTML, BeautifulSoup is a bit more -forgiving and has superiour `support for encoding detection`_. +BeautifulSoup_ is a Python package that parses broken HTML. While +libxml2 (and thus lxml) can also parse broken HTML, BeautifulSoup is a +bit more forgiving in some cases (though it is also common that +lxml/libxml2 will parse common broken HTML better), and BeautifulSoup +and has superiour `support for encoding detection`_. .. _BeautifulSoup: http://www.crummy.com/software/BeautifulSoup/ .. _`support for encoding detection`: http://www.crummy.com/software/BeautifulSoup/documentation.html#Beautiful%20Soup%20Gives%20You%20Unicode%2C%20Dammit From scoder at codespeak.net Fri Dec 12 19:16:25 2008 From: scoder at codespeak.net (scoder at codespeak.net) Date: Fri, 12 Dec 2008 19:16:25 +0100 (CET) Subject: [Lxml-checkins] r60465 - lxml/trunk Message-ID: <20081212181625.B03EB168480@codespeak.net> Author: scoder Date: Fri Dec 12 19:16:23 2008 New Revision: 60465 Modified: lxml/trunk/ (props changed) lxml/trunk/buildlibxml.py Log: r4912 at delle: sbehnel | 2008-12-12 19:15:08 +0100 keep old env vars when setting Mac compile flags Modified: lxml/trunk/buildlibxml.py ============================================================================== --- lxml/trunk/buildlibxml.py (original) +++ lxml/trunk/buildlibxml.py Fri Dec 12 19:16:23 2008 @@ -171,11 +171,13 @@ # We compile Universal if we are on a machine > 10.3 major_version = int(os.uname()[2].split('.')[0]) if major_version > 7: - call_setup['env'] = { + env = os.environ.copy() + env.update({ 'CFLAGS' : "-arch ppc -arch i386 -isysroot /Developer/SDKs/MacOSX10.4u.sdk -O2", 'LDFLAGS' : "-arch ppc -arch i386 -isysroot /Developer/SDKs/MacOSX10.4u.sdk", 'MACOSX_DEPLOYMENT_TARGET' : "10.3" - } + }) + call_setup['env'] = env # We may loose the link to iconv, so make sure it's there static_binaries.append('-liconv') From scoder at codespeak.net Fri Dec 12 19:41:43 2008 From: scoder at codespeak.net (scoder at codespeak.net) Date: Fri, 12 Dec 2008 19:41:43 +0100 (CET) Subject: [Lxml-checkins] r60467 - in lxml/trunk: . src/lxml/html Message-ID: <20081212184143.0C13816846C@codespeak.net> Author: scoder Date: Fri Dec 12 19:41:42 2008 New Revision: 60467 Modified: lxml/trunk/ (props changed) lxml/trunk/src/lxml/html/__init__.py Log: r4916 at delle: sbehnel | 2008-12-12 19:40:26 +0100 fix bug #307233 Modified: lxml/trunk/src/lxml/html/__init__.py ============================================================================== --- lxml/trunk/src/lxml/html/__init__.py (original) +++ lxml/trunk/src/lxml/html/__init__.py Fri Dec 12 19:41:42 2008 @@ -40,6 +40,8 @@ basestring = (str, bytes) def __fix_docstring(s): + if not s: + return s import sys if sys.version_info[0] >= 3: sub = re.compile(r"^(\s*)u'", re.M).sub From scoder at codespeak.net Fri Dec 12 22:15:05 2008 From: scoder at codespeak.net (scoder at codespeak.net) Date: Fri, 12 Dec 2008 22:15:05 +0100 (CET) Subject: [Lxml-checkins] r60469 - in lxml/trunk: . src/lxml Message-ID: <20081212211505.953131683F1@codespeak.net> Author: scoder Date: Fri Dec 12 22:15:03 2008 New Revision: 60469 Modified: lxml/trunk/ (props changed) lxml/trunk/src/lxml/xpath.pxi Log: r4918 at delle: sbehnel | 2008-12-12 22:13:43 +0100 enable locking for XPath evaluators Modified: lxml/trunk/src/lxml/xpath.pxi ============================================================================== --- lxml/trunk/src/lxml/xpath.pxi (original) +++ lxml/trunk/src/lxml/xpath.pxi Fri Dec 12 22:15:03 2008 @@ -116,6 +116,10 @@ self._error_log = _ErrorLog() self._context = _XPathContext(namespaces, extensions, enable_regexp, None, smart_strings) + if config.ENABLE_THREADING: + self._eval_lock = python.PyThread_allocate_lock() + if self._eval_lock is NULL: + python.PyErr_NoMemory() property error_log: def __get__(self): From scoder at codespeak.net Fri Dec 12 22:18:31 2008 From: scoder at codespeak.net (scoder at codespeak.net) Date: Fri, 12 Dec 2008 22:18:31 +0100 (CET) Subject: [Lxml-checkins] r60470 - in lxml/trunk: . src/lxml Message-ID: <20081212211831.E357816846C@codespeak.net> Author: scoder Date: Fri Dec 12 22:18:31 2008 New Revision: 60470 Modified: lxml/trunk/ (props changed) lxml/trunk/src/lxml/xpath.pxi Log: r4920 at delle: sbehnel | 2008-12-12 22:17:15 +0100 cleanup Modified: lxml/trunk/src/lxml/xpath.pxi ============================================================================== --- lxml/trunk/src/lxml/xpath.pxi (original) +++ lxml/trunk/src/lxml/xpath.pxi Fri Dec 12 22:18:31 2008 @@ -410,7 +410,6 @@ cdef xpath.xmlXPathObject* xpathObj cdef _Document document cdef _Element element - cdef _XPathContext context document = _documentOrRaise(_etree_or_element) element = _rootNodeOrRaise(_etree_or_element) From scoder at codespeak.net Fri Dec 12 22:27:29 2008 From: scoder at codespeak.net (scoder at codespeak.net) Date: Fri, 12 Dec 2008 22:27:29 +0100 (CET) Subject: [Lxml-checkins] r60471 - lxml/branch/lxml-2.1/src/lxml Message-ID: <20081212212729.2EFD4168064@codespeak.net> Author: scoder Date: Fri Dec 12 22:27:27 2008 New Revision: 60471 Modified: lxml/branch/lxml-2.1/src/lxml/xpath.pxi Log: enable locking for XPath evaluators Modified: lxml/branch/lxml-2.1/src/lxml/xpath.pxi ============================================================================== --- lxml/branch/lxml-2.1/src/lxml/xpath.pxi (original) +++ lxml/branch/lxml-2.1/src/lxml/xpath.pxi Fri Dec 12 22:27:27 2008 @@ -116,6 +116,10 @@ self._error_log = _ErrorLog() self._context = _XPathContext(namespaces, extensions, enable_regexp, None, smart_strings) + if config.ENABLE_THREADING: + self._eval_lock = python.PyThread_allocate_lock() + if self._eval_lock is NULL: + python.PyErr_NoMemory() property error_log: def __get__(self): From scoder at codespeak.net Fri Dec 12 22:32:36 2008 From: scoder at codespeak.net (scoder at codespeak.net) Date: Fri, 12 Dec 2008 22:32:36 +0100 (CET) Subject: [Lxml-checkins] r60472 - lxml/branch/lxml-2.0/src/lxml Message-ID: <20081212213236.61FEA168495@codespeak.net> Author: scoder Date: Fri Dec 12 22:32:35 2008 New Revision: 60472 Modified: lxml/branch/lxml-2.0/src/lxml/xpath.pxi Log: enable locking for XPath evaluators Modified: lxml/branch/lxml-2.0/src/lxml/xpath.pxi ============================================================================== --- lxml/branch/lxml-2.0/src/lxml/xpath.pxi (original) +++ lxml/branch/lxml-2.0/src/lxml/xpath.pxi Fri Dec 12 22:32:35 2008 @@ -113,6 +113,10 @@ self._error_log = _ErrorLog() self._context = _XPathContext(namespaces, extensions, enable_regexp, None) + if config.ENABLE_THREADING: + self._eval_lock = python.PyThread_allocate_lock() + if self._eval_lock is NULL: + python.PyErr_NoMemory() property error_log: def __get__(self): From scoder at codespeak.net Fri Dec 12 22:35:49 2008 From: scoder at codespeak.net (scoder at codespeak.net) Date: Fri, 12 Dec 2008 22:35:49 +0100 (CET) Subject: [Lxml-checkins] r60473 - in lxml/branch/lxml-2.0: . doc Message-ID: <20081212213549.3834E1684D0@codespeak.net> Author: scoder Date: Fri Dec 12 22:35:48 2008 New Revision: 60473 Modified: lxml/branch/lxml-2.0/CHANGES.txt lxml/branch/lxml-2.0/doc/main.txt Log: prepare release of 2.0.11 Modified: lxml/branch/lxml-2.0/CHANGES.txt ============================================================================== --- lxml/branch/lxml-2.0/CHANGES.txt (original) +++ lxml/branch/lxml-2.0/CHANGES.txt Fri Dec 12 22:35:48 2008 @@ -2,6 +2,15 @@ lxml changelog ============== +2.0.11 (2008-12-12) +=================== + +Bugs fixed +---------- + +* Crash when using an XPath evaluator in multiple threads. + + 2.0.10 (2008-11-17) =================== Modified: lxml/branch/lxml-2.0/doc/main.txt ============================================================================== --- lxml/branch/lxml-2.0/doc/main.txt (original) +++ lxml/branch/lxml-2.0/doc/main.txt Fri Dec 12 22:35:48 2008 @@ -146,8 +146,8 @@ source release. If you can't wait, consider trying a less recent release version first. -The latest version is `lxml 2.0.10`_, released 2008-11-17 -(`changes for 2.0.10`_). `Older versions`_ are listed below. +The latest version is `lxml 2.0.11`_, released 2008-12-12 +(`changes for 2.0.11`_). `Older versions`_ are listed below. Please take a look at the `installation instructions`_! @@ -215,7 +215,9 @@ Old Versions ------------ -.. _`PDF documentation`: lxmldoc-2.0.10.pdf +.. _`PDF documentation`: lxmldoc-2.0.11.pdf + +* `lxml 2.0.10`_, released 2008-11-17 (`changes for 2.0.10`_) * `lxml 2.0.9`_, released 2008-09-05 (`changes for 2.0.9`_) @@ -287,6 +289,7 @@ * `lxml 0.5`_, released 2005-04-08 +.. _`lxml 2.0.11`: lxml-2.0.11.tgz .. _`lxml 2.0.10`: lxml-2.0.10.tgz .. _`lxml 2.0.9`: lxml-2.0.9.tgz .. _`lxml 2.0.8`: lxml-2.0.8.tgz @@ -323,6 +326,7 @@ .. _`lxml 0.5.1`: lxml-0.5.1.tgz .. _`lxml 0.5`: lxml-0.5.tgz +.. _`changes for 2.0.11`: changes-2.0.11.html .. _`changes for 2.0.10`: changes-2.0.10.html .. _`changes for 2.0.9`: changes-2.0.9.html .. _`changes for 2.0.8`: changes-2.0.8.html From scoder at codespeak.net Fri Dec 12 22:36:06 2008 From: scoder at codespeak.net (scoder at codespeak.net) Date: Fri, 12 Dec 2008 22:36:06 +0100 (CET) Subject: [Lxml-checkins] r60474 - in lxml/branch/lxml-2.1: . doc Message-ID: <20081212213606.829F3168495@codespeak.net> Author: scoder Date: Fri Dec 12 22:36:06 2008 New Revision: 60474 Modified: lxml/branch/lxml-2.1/CHANGES.txt lxml/branch/lxml-2.1/doc/main.txt Log: prepare release of 2.1.4 Modified: lxml/branch/lxml-2.1/CHANGES.txt ============================================================================== --- lxml/branch/lxml-2.1/CHANGES.txt (original) +++ lxml/branch/lxml-2.1/CHANGES.txt Fri Dec 12 22:36:06 2008 @@ -2,6 +2,15 @@ lxml changelog ============== +2.1.4 (2008-12-12) +================== + +Bugs fixed +---------- + +* Crash when using an XPath evaluator in multiple threads. + + 2.1.3 (2008-11-17) ================== Modified: lxml/branch/lxml-2.1/doc/main.txt ============================================================================== --- lxml/branch/lxml-2.1/doc/main.txt (original) +++ lxml/branch/lxml-2.1/doc/main.txt Fri Dec 12 22:36:06 2008 @@ -147,8 +147,8 @@ source release. If you can't wait, consider trying a less recent release version first. -The latest version is `lxml 2.1.3`_, released 2008-11-17 -(`changes for 2.1.3`_). `Older versions`_ are listed below. +The latest version is `lxml 2.1.4`_, released 2008-12-12 +(`changes for 2.1.4`_). `Older versions`_ are listed below. Please take a look at the `installation instructions`_! @@ -220,7 +220,9 @@ `2.0 `_ and the `current in-development version `_. -.. _`PDF documentation`: lxmldoc-2.1.3.pdf +.. _`PDF documentation`: lxmldoc-2.1.4.pdf + +* `lxml 2.1.3`_, released 2008-11-17 (`changes for 2.1.3`_) * `lxml 2.1.2`_, released 2008-09-05 (`changes for 2.1.2`_) @@ -236,6 +238,8 @@ * `lxml 2.1alpha1`_, released 2008-03-27 (`changes for 2.1alpha1`_) +* `lxml 2.0.11`_, released 2008-12-12 (`changes for 2.0.11`_) + * `lxml 2.0.10`_, released 2008-11-17 (`changes for 2.0.10`_) * `lxml 2.0.9`_, released 2008-09-05 (`changes for 2.0.9`_) @@ -308,6 +312,7 @@ * `lxml 0.5`_, released 2005-04-08 +.. _`lxml 2.1.4`: lxml-2.1.4.tgz .. _`lxml 2.1.3`: lxml-2.1.3.tgz .. _`lxml 2.1.2`: lxml-2.1.2.tgz .. _`lxml 2.1.1`: lxml-2.1.1.tgz @@ -352,6 +357,7 @@ .. _`lxml 0.5.1`: lxml-0.5.1.tgz .. _`lxml 0.5`: lxml-0.5.tgz +.. _`changes for 2.1.4`: changes-2.1.4.html .. _`changes for 2.1.3`: changes-2.1.3.html .. _`changes for 2.1.2`: changes-2.1.2.html .. _`changes for 2.1.1`: changes-2.1.1.html @@ -360,6 +366,7 @@ .. _`changes for 2.1beta2`: changes-2.1beta2.html .. _`changes for 2.1beta1`: changes-2.1beta1.html .. _`changes for 2.1alpha1`: changes-2.1alpha1.html +.. _`changes for 2.0.11`: changes-2.0.11.html .. _`changes for 2.0.10`: changes-2.0.10.html .. _`changes for 2.0.9`: changes-2.0.9.html .. _`changes for 2.0.8`: changes-2.0.8.html From scoder at codespeak.net Fri Dec 12 22:40:58 2008 From: scoder at codespeak.net (scoder at codespeak.net) Date: Fri, 12 Dec 2008 22:40:58 +0100 (CET) Subject: [Lxml-checkins] r60475 - lxml/branch/lxml-2.0 Message-ID: <20081212214058.526F6168495@codespeak.net> Author: scoder Date: Fri Dec 12 22:40:57 2008 New Revision: 60475 Modified: lxml/branch/lxml-2.0/version.txt Log: version Modified: lxml/branch/lxml-2.0/version.txt ============================================================================== --- lxml/branch/lxml-2.0/version.txt (original) +++ lxml/branch/lxml-2.0/version.txt Fri Dec 12 22:40:57 2008 @@ -1 +1 @@ -2.0.10 +2.0.11 From scoder at codespeak.net Fri Dec 12 22:41:03 2008 From: scoder at codespeak.net (scoder at codespeak.net) Date: Fri, 12 Dec 2008 22:41:03 +0100 (CET) Subject: [Lxml-checkins] r60476 - lxml/branch/lxml-2.1 Message-ID: <20081212214103.721EB168497@codespeak.net> Author: scoder Date: Fri Dec 12 22:41:03 2008 New Revision: 60476 Modified: lxml/branch/lxml-2.1/version.txt Log: version Modified: lxml/branch/lxml-2.1/version.txt ============================================================================== --- lxml/branch/lxml-2.1/version.txt (original) +++ lxml/branch/lxml-2.1/version.txt Fri Dec 12 22:41:03 2008 @@ -1 +1 @@ -2.1.3 +2.1.4 From scoder at codespeak.net Fri Dec 12 23:05:33 2008 From: scoder at codespeak.net (scoder at codespeak.net) Date: Fri, 12 Dec 2008 23:05:33 +0100 (CET) Subject: [Lxml-checkins] r60477 - lxml/branch/lxml-2.1/doc Message-ID: <20081212220533.0E299168495@codespeak.net> Author: scoder Date: Fri Dec 12 23:05:33 2008 New Revision: 60477 Modified: lxml/branch/lxml-2.1/doc/main.txt Log: missing doc link Modified: lxml/branch/lxml-2.1/doc/main.txt ============================================================================== --- lxml/branch/lxml-2.1/doc/main.txt (original) +++ lxml/branch/lxml-2.1/doc/main.txt Fri Dec 12 23:05:33 2008 @@ -321,6 +321,7 @@ .. _`lxml 2.1beta2`: lxml-2.1beta2.tgz .. _`lxml 2.1beta1`: lxml-2.1beta1.tgz .. _`lxml 2.1alpha1`: lxml-2.1alpha1.tgz +.. _`lxml 2.0.11`: lxml-2.0.11.tgz .. _`lxml 2.0.10`: lxml-2.0.10.tgz .. _`lxml 2.0.9`: lxml-2.0.9.tgz .. _`lxml 2.0.8`: lxml-2.0.8.tgz From scoder at codespeak.net Fri Dec 12 23:15:17 2008 From: scoder at codespeak.net (scoder at codespeak.net) Date: Fri, 12 Dec 2008 23:15:17 +0100 (CET) Subject: [Lxml-checkins] r60478 - in lxml/trunk: . doc Message-ID: <20081212221517.72D361684DE@codespeak.net> Author: scoder Date: Fri Dec 12 23:15:17 2008 New Revision: 60478 Modified: lxml/trunk/ (props changed) lxml/trunk/CHANGES.txt lxml/trunk/doc/main.txt lxml/trunk/setup.py lxml/trunk/version.txt Log: r4922 at delle: sbehnel | 2008-12-12 23:13:57 +0100 prepare release of 2.2beta1 Modified: lxml/trunk/CHANGES.txt ============================================================================== --- lxml/trunk/CHANGES.txt (original) +++ lxml/trunk/CHANGES.txt Fri Dec 12 23:15:17 2008 @@ -2,24 +2,44 @@ lxml changelog ============== -Under development -================= +2.2beta1 (2008-12-12) +===================== Features added -------------- +* Allow ``lxml.html.diff.htmldiff`` to accept Element objects, not + just HTML strings. + Bugs fixed ---------- -* Fixed missing whitespace before ``Link:...`` in ``lxml.html.diff`` +* Crash when using an XPath evaluator in multiple threads. + +* Fixed missing whitespace before ``Link:...`` in ``lxml.html.diff``. Other changes ------------- -* Export ``lxml.html.parse`` +* Export ``lxml.html.parse``. + + +2.1.4 (2008-12-12) +================== + +Bugs fixed +---------- + +* Crash when using an XPath evaluator in multiple threads. + + +2.0.11 (2008-12-12) +=================== + +Bugs fixed +---------- -* Allow ``lxml.html.diff.htmldiff`` to accept element objects, not - just HTML strings +* Crash when using an XPath evaluator in multiple threads. 2.2alpha1 (2008-11-23) Modified: lxml/trunk/doc/main.txt ============================================================================== --- lxml/trunk/doc/main.txt (original) +++ lxml/trunk/doc/main.txt Fri Dec 12 23:15:17 2008 @@ -147,8 +147,8 @@ source release. If you can't wait, consider trying a less recent release version first. -The latest version is `lxml 2.2alpha1`_, released 2008-11-23 -(`changes for 2.2alpha1`_). `Older versions`_ are listed below. +The latest version is `lxml 2.2beta1`_, released 2008-12-12 +(`changes for 2.2beta1`_). `Older versions`_ are listed below. Please take a look at the `installation instructions`_! @@ -220,7 +220,9 @@ `2.0 `_ and the `current in-development version `_. -.. _`PDF documentation`: lxmldoc-2.2alpha1.pdf +.. _`PDF documentation`: lxmldoc-2.2beta1.pdf + +* `lxml 2.1.4`_, released 2008-12-12 (`changes for 2.1.4`_) * `lxml 2.1.3`_, released 2008-11-17 (`changes for 2.1.3`_) @@ -230,6 +232,8 @@ * `lxml 2.1`_, released 2008-07-09 (`changes for 2.1`_) +* `lxml 2.0.11`_, released 2008-12-12 (`changes for 2.0.11`_) + * `lxml 2.0.10`_, released 2008-11-17 (`changes for 2.0.10`_) * `lxml 2.0.9`_, released 2008-09-05 (`changes for 2.0.9`_) @@ -303,10 +307,13 @@ * `lxml 0.5`_, released 2005-04-08 .. _`lxml 2.2alpha1`: lxml-2.2alpha1.tgz +.. _`lxml 2.2beta1`: lxml-2.2beta1.tgz +.. _`lxml 2.1.4`: lxml-2.1.4.tgz .. _`lxml 2.1.3`: lxml-2.1.3.tgz .. _`lxml 2.1.2`: lxml-2.1.2.tgz .. _`lxml 2.1.1`: lxml-2.1.1.tgz .. _`lxml 2.1`: lxml-2.1.tgz +.. _`lxml 2.0.11`: lxml-2.0.11.tgz .. _`lxml 2.0.10`: lxml-2.0.10.tgz .. _`lxml 2.0.9`: lxml-2.0.9.tgz .. _`lxml 2.0.8`: lxml-2.0.8.tgz @@ -343,11 +350,14 @@ .. _`lxml 0.5.1`: lxml-0.5.1.tgz .. _`lxml 0.5`: lxml-0.5.tgz +.. _`changes for 2.2beta1`: changes-2.2beta1.html .. _`changes for 2.2alpha1`: changes-2.2alpha1.html +.. _`changes for 2.1.4`: changes-2.1.4.html .. _`changes for 2.1.3`: changes-2.1.3.html .. _`changes for 2.1.2`: changes-2.1.2.html .. _`changes for 2.1.1`: changes-2.1.1.html .. _`changes for 2.1`: changes-2.1.html +.. _`changes for 2.0.11`: changes-2.0.11.html .. _`changes for 2.0.10`: changes-2.0.10.html .. _`changes for 2.0.9`: changes-2.0.9.html .. _`changes for 2.0.8`: changes-2.0.8.html Modified: lxml/trunk/setup.py ============================================================================== --- lxml/trunk/setup.py (original) +++ lxml/trunk/setup.py Fri Dec 12 23:15:17 2008 @@ -93,7 +93,13 @@ 'Intended Audience :: Developers', 'Intended Audience :: Information Technology', 'License :: OSI Approved :: BSD License', - 'Programming Language :: Python', + 'Programming Language :: Python :: 2', + 'Programming Language :: Python :: 2.3', + 'Programming Language :: Python :: 2.4', + 'Programming Language :: Python :: 2.5', + 'Programming Language :: Python :: 2.6', +# 'Programming Language :: Python :: 3', +# 'Programming Language :: Python :: 3.0', 'Programming Language :: C', 'Operating System :: OS Independent', 'Topic :: Text Processing :: Markup :: XML', Modified: lxml/trunk/version.txt ============================================================================== --- lxml/trunk/version.txt (original) +++ lxml/trunk/version.txt Fri Dec 12 23:15:17 2008 @@ -1 +1 @@ -2.2alpha1 +2.2beta1 From scoder at codespeak.net Fri Dec 12 23:34:19 2008 From: scoder at codespeak.net (scoder at codespeak.net) Date: Fri, 12 Dec 2008 23:34:19 +0100 (CET) Subject: [Lxml-checkins] r60479 - in lxml/trunk: . doc Message-ID: <20081212223419.A18F1168057@codespeak.net> Author: scoder Date: Fri Dec 12 23:34:17 2008 New Revision: 60479 Modified: lxml/trunk/ (props changed) lxml/trunk/doc/elementsoup.txt Log: r4931 at delle: sbehnel | 2008-12-12 23:32:48 +0100 docs Modified: lxml/trunk/doc/elementsoup.txt ============================================================================== --- lxml/trunk/doc/elementsoup.txt (original) +++ lxml/trunk/doc/elementsoup.txt Fri Dec 12 23:34:17 2008 @@ -2,21 +2,26 @@ BeautifulSoup Parser ==================== -BeautifulSoup_ is a Python package that parses broken HTML. While -libxml2 (and thus lxml) can also parse broken HTML, BeautifulSoup is a -bit more forgiving in some cases (though it is also common that -lxml/libxml2 will parse common broken HTML better), and BeautifulSoup -and has superiour `support for encoding detection`_. +BeautifulSoup_ is a Python package that parses broken HTML, just like +lxml supports it based on the parser of libxml2. BeautifulSoup uses a +different parsing approach. It is not a real HTML parser but uses +regular expressions to dive through tag soup. It is therefore more +forgiving in some cases and less good in others. It is not uncommon +that lxml/libxml2 parses and fixes broken HTML better, but +BeautifulSoup has superiour `support for encoding detection`_. It +very much depends on the input which parser works better. .. _BeautifulSoup: http://www.crummy.com/software/BeautifulSoup/ .. _`support for encoding detection`: http://www.crummy.com/software/BeautifulSoup/documentation.html#Beautiful%20Soup%20Gives%20You%20Unicode%2C%20Dammit .. _ElementSoup: http://effbot.org/zone/element-soup.htm -lxml can benefit from the parsing capabilities of BeautifulSoup -through the ``lxml.html.soupparser`` module. It provides three main -functions: ``fromstring()`` and ``parse()`` to parse a string or file -using BeautifulSoup, and ``convert_tree()`` to convert an existing -BeautifulSoup tree into a list of top-level Elements. +To prevent users from having to choose their parser library in +advance, lxml can interface to the parsing capabilities of +BeautifulSoup through the ``lxml.html.soupparser`` module. It +provides three main functions: ``fromstring()`` and ``parse()`` to +parse a string or file using BeautifulSoup into an ``lxml.html`` +document, and ``convert_tree()`` to convert an existing BeautifulSoup +tree into a list of top-level Elements. Parsing with the soupparser From scoder at codespeak.net Fri Dec 12 23:45:57 2008 From: scoder at codespeak.net (scoder at codespeak.net) Date: Fri, 12 Dec 2008 23:45:57 +0100 (CET) Subject: [Lxml-checkins] r60480 - in lxml/trunk: . src/lxml/html/tests Message-ID: <20081212224557.204E61684D6@codespeak.net> Author: scoder Date: Fri Dec 12 23:45:56 2008 New Revision: 60480 Modified: lxml/trunk/ (props changed) lxml/trunk/src/lxml/html/tests/test_diff.txt Log: r4933 at delle: sbehnel | 2008-12-12 23:44:42 +0100 test fix Modified: lxml/trunk/src/lxml/html/tests/test_diff.txt ============================================================================== --- lxml/trunk/src/lxml/html/tests/test_diff.txt (original) +++ lxml/trunk/src/lxml/html/tests/test_diff.txt Fri Dec 12 23:45:56 2008 @@ -59,8 +59,8 @@ images are treated like words: >>> pdiff('search', 'search') - search Link: http://google.com - Link: http://yahoo.com + search Link: http://google.com + Link: http://yahoo.com >>> pdiff('

Print this

', '

Print this

')

Print this

>>> pdiff('search', 'search') From scoder at codespeak.net Fri Dec 12 23:48:26 2008 From: scoder at codespeak.net (scoder at codespeak.net) Date: Fri, 12 Dec 2008 23:48:26 +0100 (CET) Subject: [Lxml-checkins] r60482 - lxml/tag/lxml-2.1.4 Message-ID: <20081212224826.5479B1684D6@codespeak.net> Author: scoder Date: Fri Dec 12 23:48:25 2008 New Revision: 60482 Added: lxml/tag/lxml-2.1.4/ - copied from r60481, lxml/branch/lxml-2.1/ Log: tag for lxml 2.1.4 From scoder at codespeak.net Fri Dec 12 23:48:56 2008 From: scoder at codespeak.net (scoder at codespeak.net) Date: Fri, 12 Dec 2008 23:48:56 +0100 (CET) Subject: [Lxml-checkins] r60483 - lxml/tag/lxml-2.0.11 Message-ID: <20081212224856.8BF5E1684D6@codespeak.net> Author: scoder Date: Fri Dec 12 23:48:56 2008 New Revision: 60483 Added: lxml/tag/lxml-2.0.11/ - copied from r60482, lxml/branch/lxml-2.0/ Log: tag for lxml 2.0.11 From scoder at codespeak.net Wed Dec 17 22:42:07 2008 From: scoder at codespeak.net (scoder at codespeak.net) Date: Wed, 17 Dec 2008 22:42:07 +0100 (CET) Subject: [Lxml-checkins] r60554 - lxml/trunk Message-ID: <20081217214207.31C0416843F@codespeak.net> Author: scoder Date: Wed Dec 17 22:42:05 2008 New Revision: 60554 Modified: lxml/trunk/ (props changed) lxml/trunk/setupinfo.py Log: r4935 at delle: sbehnel | 2008-12-17 22:40:49 +0100 win32 build fixes Modified: lxml/trunk/setupinfo.py ============================================================================== --- lxml/trunk/setupinfo.py (original) +++ lxml/trunk/setupinfo.py Wed Dec 17 22:42:05 2008 @@ -26,10 +26,14 @@ def decode_input(data): return data -def env_var(name, sep=None): +def env_var(name): value = os.getenv(name) if value: - return decode_input(value).split(sep) + value = decode_input(value) + if os.platform == 'win32' and ';' in value: + return value.split(';') + else: + return value.split() else: return [] @@ -139,9 +143,10 @@ def libraries(): if sys.platform in ('win32',): - libs = ['libxslt', 'libexslt', 'libxml2', 'iconv', 'zlib', 'WS2_32'] + libs = ['libxslt', 'libexslt', 'libxml2', 'iconv'] if OPTION_STATIC: libs = ['%s_a' % lib for lib in libs] + libs.extend(['zlib', 'WS2_32']) elif OPTION_STATIC: libs = ['z', 'm'] else: From lxml-checkins at codespeak.net Tue Dec 23 19:20:34 2008 From: lxml-checkins at codespeak.net (lxml-checkins at codespeak.net) Date: Tue, 23 Dec 2008 19:20:34 +0100 (CET) Subject: [Lxml-checkins] Our friend is busted! Message-ID: <20081223182034.C41B51684D8@codespeak.net> An HTML attachment was scrubbed... URL: http://codespeak.net/pipermail/lxml-checkins/attachments/20081223/b7761adc/attachment.htm From lxml-checkins at codespeak.net Thu Dec 25 16:27:33 2008 From: lxml-checkins at codespeak.net (lxml-checkins at codespeak.net) Date: Thu, 25 Dec 2008 16:27:33 +0100 (CET) Subject: [Lxml-checkins] Take her from behind with steel Message-ID: <20081225152733.98310168482@codespeak.net> An HTML attachment was scrubbed... URL: http://codespeak.net/pipermail/lxml-checkins/attachments/20081225/ef23459c/attachment.htm From scoder at codespeak.net Thu Dec 25 20:29:50 2008 From: scoder at codespeak.net (scoder at codespeak.net) Date: Thu, 25 Dec 2008 20:29:50 +0100 (CET) Subject: [Lxml-checkins] r60705 - lxml/trunk Message-ID: <20081225192950.B1D4816841C@codespeak.net> Author: scoder Date: Thu Dec 25 20:29:49 2008 New Revision: 60705 Modified: lxml/trunk/ (props changed) lxml/trunk/buildlibxml.py Log: r4939 at delle: sbehnel | 2008-12-25 20:28:26 +0100 fix finding header files when building libxml2/libxslt Modified: lxml/trunk/buildlibxml.py ============================================================================== --- lxml/trunk/buildlibxml.py (original) +++ lxml/trunk/buildlibxml.py Thu Dec 25 20:29:49 2008 @@ -213,6 +213,7 @@ lib_dir = os.path.join(prefix, 'lib') static_include_dirs.extend([ + os.path.join(prefix, 'include'), os.path.join(prefix, 'include', 'libxml2'), os.path.join(prefix, 'include', 'libxslt'), os.path.join(prefix, 'include', 'libexslt')]) From lxml-checkins at codespeak.net Mon Dec 29 04:38:57 2008 From: lxml-checkins at codespeak.net (lxml-checkins at codespeak.net) Date: Mon, 29 Dec 2008 04:38:57 +0100 (CET) Subject: [Lxml-checkins] Go for hours without stopping Message-ID: <20081229033857.98B2516845E@codespeak.net> An HTML attachment was scrubbed... URL: http://codespeak.net/pipermail/lxml-checkins/attachments/20081229/c3c17855/attachment.htm From lxml-checkins at codespeak.net Mon Dec 29 19:52:53 2008 From: lxml-checkins at codespeak.net (lxml-checkins at codespeak.net) Date: Mon, 29 Dec 2008 19:52:53 +0100 (CET) Subject: [Lxml-checkins] the ultimate New Years party Message-ID: <20081229185253.71EBF168402@codespeak.net> An HTML attachment was scrubbed... URL: http://codespeak.net/pipermail/lxml-checkins/attachments/20081229/523b4654/attachment.htm From lxml-checkins at codespeak.net Tue Dec 30 14:20:10 2008 From: lxml-checkins at codespeak.net (lxml-checkins at codespeak.net) Date: Tue, 30 Dec 2008 14:20:10 +0100 (CET) Subject: [Lxml-checkins] Attain the size you've always dreamed of Message-ID: <20081230132010.110841684BF@codespeak.net> An HTML attachment was scrubbed... URL: http://codespeak.net/pipermail/lxml-checkins/attachments/20081230/e4c682df/attachment.htm From lxml-checkins at codespeak.net Tue Dec 30 22:22:36 2008 From: lxml-checkins at codespeak.net (lxml-checkins at codespeak.net) Date: Tue, 30 Dec 2008 22:22:36 +0100 (CET) Subject: [Lxml-checkins] Finally, nobody laughs at my manhood Message-ID: <20081230212236.53B6A168495@codespeak.net> An HTML attachment was scrubbed... URL: http://codespeak.net/pipermail/lxml-checkins/attachments/20081230/8d21ec08/attachment.htm From lxml-checkins at codespeak.net Wed Dec 31 16:58:47 2008 From: lxml-checkins at codespeak.net (lxml-checkins at codespeak.net) Date: Wed, 31 Dec 2008 16:58:47 +0100 (CET) Subject: [Lxml-checkins] Crush the opposition with this Message-ID: <20081231155847.D3024168418@codespeak.net> An HTML attachment was scrubbed... URL: http://codespeak.net/pipermail/lxml-checkins/attachments/20081231/eec1a21a/attachment.htm