From ianb at codespeak.net Wed Dec 10 20:50:32 2008
From: ianb at codespeak.net (ianb at codespeak.net)
Date: Wed, 10 Dec 2008 20:50:32 +0100 (CET)
Subject: [Lxml-checkins] r60417 - in lxml/trunk: . src/lxml/html
Message-ID: <20081210195032.884191684CD@codespeak.net>
Author: ianb
Date: Wed Dec 10 20:50:29 2008
New Revision: 60417
Modified:
lxml/trunk/CHANGES.txt
lxml/trunk/src/lxml/html/__init__.py
lxml/trunk/src/lxml/html/diff.py
Log:
Small cleanups to lxml.html.diff.
Export lxml.html.parse
Modified: lxml/trunk/CHANGES.txt
==============================================================================
--- lxml/trunk/CHANGES.txt (original)
+++ lxml/trunk/CHANGES.txt Wed Dec 10 20:50:29 2008
@@ -2,6 +2,22 @@
lxml changelog
==============
+svn trunk
+=========
+
+Bugs fixed
+----------
+
+* Fixed missing whitespace before ``Link:...`` in ``lxml.html.diff``
+
+Other changes
+-------------
+
+* Export ``lxml.html.parse``
+
+* Allow ``lxml.html.diff.htmldiff`` to accept element objects, not
+ just HTML strings
+
2.2alpha1 (2008-11-23)
======================
Modified: lxml/trunk/src/lxml/html/__init__.py
==============================================================================
--- lxml/trunk/src/lxml/html/__init__.py (original)
+++ lxml/trunk/src/lxml/html/__init__.py Wed Dec 10 20:50:29 2008
@@ -51,7 +51,7 @@
'document_fromstring', 'fragment_fromstring', 'fragments_fromstring', 'fromstring',
'tostring', 'Element', 'defs', 'open_in_browser', 'submit_form',
'find_rel_links', 'find_class', 'make_links_absolute',
- 'resolve_base_href', 'iterlinks', 'rewrite_links', 'open_in_browser']
+ 'resolve_base_href', 'iterlinks', 'rewrite_links', 'open_in_browser', 'parse']
XHTML_NAMESPACE = "http://www.w3.org/1999/xhtml"
Modified: lxml/trunk/src/lxml/html/diff.py
==============================================================================
--- lxml/trunk/src/lxml/html/diff.py (original)
+++ lxml/trunk/src/lxml/html/diff.py Wed Dec 10 20:50:29 2008
@@ -1,6 +1,7 @@
import difflib
from lxml import etree
from lxml.html import fragment_fromstring
+from lxml.etree import _Element
import cgi
import re
@@ -507,7 +508,7 @@
hide_when_equal = True
def html(self):
- return 'Link: %s' % self
+ return ' Link: %s' % self
def tokenize(html, include_hrefs=True):
"""
@@ -524,7 +525,10 @@
If include_hrefs is true, then the href attribute of tags is
included as a special kind of diffable token."""
- body_el = parse_html(html, cleanup=True)
+ if isinstance(html, _Element):
+ body_el = html
+ else:
+ body_el = parse_html(html, cleanup=True)
# Then we split the document into text chunks for each tag, word, and end tag:
chunks = flatten_el(body_el, skip_tag=True, include_hrefs=include_hrefs)
# Finally re-joining them into token objects:
From scoder at codespeak.net Wed Dec 10 21:03:28 2008
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Wed, 10 Dec 2008 21:03:28 +0100 (CET)
Subject: [Lxml-checkins] r60418 - in lxml/trunk: . src/lxml
Message-ID: <20081210200328.116621684CA@codespeak.net>
Author: scoder
Date: Wed Dec 10 21:03:27 2008
New Revision: 60418
Modified:
lxml/trunk/ (props changed)
lxml/trunk/src/lxml/extensions.pxi
lxml/trunk/src/lxml/python.pxd
lxml/trunk/src/lxml/xslt.pxi
Log:
r4906 at delle: sbehnel | 2008-12-07 21:19:42 +0100
avoid calling PyDict_Copy() directly
Modified: lxml/trunk/src/lxml/extensions.pxi
==============================================================================
--- lxml/trunk/src/lxml/extensions.pxi (original)
+++ lxml/trunk/src/lxml/extensions.pxi Wed Dec 10 21:03:27 2008
@@ -104,7 +104,7 @@
context = self.__class__(namespaces, None, False,
self._build_smart_strings)
if self._extensions is not None:
- context._extensions = python.PyDict_Copy(self._extensions)
+ context._extensions = self._extensions.copy()
return context
cdef object _to_utf(self, s):
Modified: lxml/trunk/src/lxml/python.pxd
==============================================================================
--- lxml/trunk/src/lxml/python.pxd (original)
+++ lxml/trunk/src/lxml/python.pxd Wed Dec 10 21:03:27 2008
@@ -65,7 +65,7 @@
cdef PyObject* PyDict_GetItem(object d, object key)
# cdef int PyDict_DelItem(object d, object key) except -1
cdef void PyDict_Clear(object d)
- cdef object PyDict_Copy(object d)
+# cdef object PyDict_Copy(object d)
cdef object PyDictProxy_New(object d)
# cdef int PyDict_Contains(object d, object key) except -1 # Python 2.4+
cdef Py_ssize_t PyDict_Size(object d)
Modified: lxml/trunk/src/lxml/xslt.pxi
==============================================================================
--- lxml/trunk/src/lxml/xslt.pxi (original)
+++ lxml/trunk/src/lxml/xslt.pxi Wed Dec 10 21:03:27 2008
@@ -283,7 +283,7 @@
if isinstance(extension, XSLTExtension):
if self._extension_elements is EMPTY_DICT:
self._extension_elements = {}
- extensions = python.PyDict_Copy(extensions)
+ extensions = extensions.copy()
ns_utf = _utf8(ns_name_tuple[0])
name_utf = _utf8(ns_name_tuple[1])
self._extension_elements[(ns_utf, name_utf)] = extension
From scoder at codespeak.net Wed Dec 10 21:03:40 2008
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Wed, 10 Dec 2008 21:03:40 +0100 (CET)
Subject: [Lxml-checkins] r60419 - in lxml/trunk: . src/lxml/html
Message-ID: <20081210200340.A74D31684CD@codespeak.net>
Author: scoder
Date: Wed Dec 10 21:03:39 2008
New Revision: 60419
Modified:
lxml/trunk/ (props changed)
lxml/trunk/CHANGES.txt
lxml/trunk/src/lxml/html/diff.py
Log:
r4909 at delle: sbehnel | 2008-12-10 21:02:11 +0100
cleanup
Modified: lxml/trunk/CHANGES.txt
==============================================================================
--- lxml/trunk/CHANGES.txt (original)
+++ lxml/trunk/CHANGES.txt Wed Dec 10 21:03:39 2008
@@ -2,8 +2,11 @@
lxml changelog
==============
-svn trunk
-=========
+Under development
+=================
+
+Features added
+--------------
Bugs fixed
----------
@@ -18,6 +21,7 @@
* Allow ``lxml.html.diff.htmldiff`` to accept element objects, not
just HTML strings
+
2.2alpha1 (2008-11-23)
======================
Modified: lxml/trunk/src/lxml/html/diff.py
==============================================================================
--- lxml/trunk/src/lxml/html/diff.py (original)
+++ lxml/trunk/src/lxml/html/diff.py Wed Dec 10 21:03:39 2008
@@ -1,7 +1,6 @@
import difflib
from lxml import etree
from lxml.html import fragment_fromstring
-from lxml.etree import _Element
import cgi
import re
@@ -525,7 +524,7 @@
If include_hrefs is true, then the href attribute of tags is
included as a special kind of diffable token."""
- if isinstance(html, _Element):
+ if etree.iselement(html):
body_el = html
else:
body_el = parse_html(html, cleanup=True)
From ianb at codespeak.net Thu Dec 11 18:30:55 2008
From: ianb at codespeak.net (ianb at codespeak.net)
Date: Thu, 11 Dec 2008 18:30:55 +0100 (CET)
Subject: [Lxml-checkins] r60430 - lxml/trunk/doc
Message-ID: <20081211173055.351D9168455@codespeak.net>
Author: ianb
Date: Thu Dec 11 18:30:52 2008
New Revision: 60430
Modified:
lxml/trunk/doc/elementsoup.txt
Log:
tone down the degree to which BeautifulSoup is thought to parse better than libxml2
Modified: lxml/trunk/doc/elementsoup.txt
==============================================================================
--- lxml/trunk/doc/elementsoup.txt (original)
+++ lxml/trunk/doc/elementsoup.txt Thu Dec 11 18:30:52 2008
@@ -2,9 +2,11 @@
BeautifulSoup Parser
====================
-BeautifulSoup_ is a Python package that parses broken HTML. While libxml2
-(and thus lxml) can also parse broken HTML, BeautifulSoup is a bit more
-forgiving and has superiour `support for encoding detection`_.
+BeautifulSoup_ is a Python package that parses broken HTML. While
+libxml2 (and thus lxml) can also parse broken HTML, BeautifulSoup is a
+bit more forgiving in some cases (though it is also common that
+lxml/libxml2 will parse common broken HTML better), and BeautifulSoup
+and has superiour `support for encoding detection`_.
.. _BeautifulSoup: http://www.crummy.com/software/BeautifulSoup/
.. _`support for encoding detection`: http://www.crummy.com/software/BeautifulSoup/documentation.html#Beautiful%20Soup%20Gives%20You%20Unicode%2C%20Dammit
From scoder at codespeak.net Fri Dec 12 19:16:25 2008
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Fri, 12 Dec 2008 19:16:25 +0100 (CET)
Subject: [Lxml-checkins] r60465 - lxml/trunk
Message-ID: <20081212181625.B03EB168480@codespeak.net>
Author: scoder
Date: Fri Dec 12 19:16:23 2008
New Revision: 60465
Modified:
lxml/trunk/ (props changed)
lxml/trunk/buildlibxml.py
Log:
r4912 at delle: sbehnel | 2008-12-12 19:15:08 +0100
keep old env vars when setting Mac compile flags
Modified: lxml/trunk/buildlibxml.py
==============================================================================
--- lxml/trunk/buildlibxml.py (original)
+++ lxml/trunk/buildlibxml.py Fri Dec 12 19:16:23 2008
@@ -171,11 +171,13 @@
# We compile Universal if we are on a machine > 10.3
major_version = int(os.uname()[2].split('.')[0])
if major_version > 7:
- call_setup['env'] = {
+ env = os.environ.copy()
+ env.update({
'CFLAGS' : "-arch ppc -arch i386 -isysroot /Developer/SDKs/MacOSX10.4u.sdk -O2",
'LDFLAGS' : "-arch ppc -arch i386 -isysroot /Developer/SDKs/MacOSX10.4u.sdk",
'MACOSX_DEPLOYMENT_TARGET' : "10.3"
- }
+ })
+ call_setup['env'] = env
# We may loose the link to iconv, so make sure it's there
static_binaries.append('-liconv')
From scoder at codespeak.net Fri Dec 12 19:41:43 2008
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Fri, 12 Dec 2008 19:41:43 +0100 (CET)
Subject: [Lxml-checkins] r60467 - in lxml/trunk: . src/lxml/html
Message-ID: <20081212184143.0C13816846C@codespeak.net>
Author: scoder
Date: Fri Dec 12 19:41:42 2008
New Revision: 60467
Modified:
lxml/trunk/ (props changed)
lxml/trunk/src/lxml/html/__init__.py
Log:
r4916 at delle: sbehnel | 2008-12-12 19:40:26 +0100
fix bug #307233
Modified: lxml/trunk/src/lxml/html/__init__.py
==============================================================================
--- lxml/trunk/src/lxml/html/__init__.py (original)
+++ lxml/trunk/src/lxml/html/__init__.py Fri Dec 12 19:41:42 2008
@@ -40,6 +40,8 @@
basestring = (str, bytes)
def __fix_docstring(s):
+ if not s:
+ return s
import sys
if sys.version_info[0] >= 3:
sub = re.compile(r"^(\s*)u'", re.M).sub
From scoder at codespeak.net Fri Dec 12 22:15:05 2008
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Fri, 12 Dec 2008 22:15:05 +0100 (CET)
Subject: [Lxml-checkins] r60469 - in lxml/trunk: . src/lxml
Message-ID: <20081212211505.953131683F1@codespeak.net>
Author: scoder
Date: Fri Dec 12 22:15:03 2008
New Revision: 60469
Modified:
lxml/trunk/ (props changed)
lxml/trunk/src/lxml/xpath.pxi
Log:
r4918 at delle: sbehnel | 2008-12-12 22:13:43 +0100
enable locking for XPath evaluators
Modified: lxml/trunk/src/lxml/xpath.pxi
==============================================================================
--- lxml/trunk/src/lxml/xpath.pxi (original)
+++ lxml/trunk/src/lxml/xpath.pxi Fri Dec 12 22:15:03 2008
@@ -116,6 +116,10 @@
self._error_log = _ErrorLog()
self._context = _XPathContext(namespaces, extensions,
enable_regexp, None, smart_strings)
+ if config.ENABLE_THREADING:
+ self._eval_lock = python.PyThread_allocate_lock()
+ if self._eval_lock is NULL:
+ python.PyErr_NoMemory()
property error_log:
def __get__(self):
From scoder at codespeak.net Fri Dec 12 22:18:31 2008
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Fri, 12 Dec 2008 22:18:31 +0100 (CET)
Subject: [Lxml-checkins] r60470 - in lxml/trunk: . src/lxml
Message-ID: <20081212211831.E357816846C@codespeak.net>
Author: scoder
Date: Fri Dec 12 22:18:31 2008
New Revision: 60470
Modified:
lxml/trunk/ (props changed)
lxml/trunk/src/lxml/xpath.pxi
Log:
r4920 at delle: sbehnel | 2008-12-12 22:17:15 +0100
cleanup
Modified: lxml/trunk/src/lxml/xpath.pxi
==============================================================================
--- lxml/trunk/src/lxml/xpath.pxi (original)
+++ lxml/trunk/src/lxml/xpath.pxi Fri Dec 12 22:18:31 2008
@@ -410,7 +410,6 @@
cdef xpath.xmlXPathObject* xpathObj
cdef _Document document
cdef _Element element
- cdef _XPathContext context
document = _documentOrRaise(_etree_or_element)
element = _rootNodeOrRaise(_etree_or_element)
From scoder at codespeak.net Fri Dec 12 22:27:29 2008
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Fri, 12 Dec 2008 22:27:29 +0100 (CET)
Subject: [Lxml-checkins] r60471 - lxml/branch/lxml-2.1/src/lxml
Message-ID: <20081212212729.2EFD4168064@codespeak.net>
Author: scoder
Date: Fri Dec 12 22:27:27 2008
New Revision: 60471
Modified:
lxml/branch/lxml-2.1/src/lxml/xpath.pxi
Log:
enable locking for XPath evaluators
Modified: lxml/branch/lxml-2.1/src/lxml/xpath.pxi
==============================================================================
--- lxml/branch/lxml-2.1/src/lxml/xpath.pxi (original)
+++ lxml/branch/lxml-2.1/src/lxml/xpath.pxi Fri Dec 12 22:27:27 2008
@@ -116,6 +116,10 @@
self._error_log = _ErrorLog()
self._context = _XPathContext(namespaces, extensions,
enable_regexp, None, smart_strings)
+ if config.ENABLE_THREADING:
+ self._eval_lock = python.PyThread_allocate_lock()
+ if self._eval_lock is NULL:
+ python.PyErr_NoMemory()
property error_log:
def __get__(self):
From scoder at codespeak.net Fri Dec 12 22:32:36 2008
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Fri, 12 Dec 2008 22:32:36 +0100 (CET)
Subject: [Lxml-checkins] r60472 - lxml/branch/lxml-2.0/src/lxml
Message-ID: <20081212213236.61FEA168495@codespeak.net>
Author: scoder
Date: Fri Dec 12 22:32:35 2008
New Revision: 60472
Modified:
lxml/branch/lxml-2.0/src/lxml/xpath.pxi
Log:
enable locking for XPath evaluators
Modified: lxml/branch/lxml-2.0/src/lxml/xpath.pxi
==============================================================================
--- lxml/branch/lxml-2.0/src/lxml/xpath.pxi (original)
+++ lxml/branch/lxml-2.0/src/lxml/xpath.pxi Fri Dec 12 22:32:35 2008
@@ -113,6 +113,10 @@
self._error_log = _ErrorLog()
self._context = _XPathContext(namespaces, extensions,
enable_regexp, None)
+ if config.ENABLE_THREADING:
+ self._eval_lock = python.PyThread_allocate_lock()
+ if self._eval_lock is NULL:
+ python.PyErr_NoMemory()
property error_log:
def __get__(self):
From scoder at codespeak.net Fri Dec 12 22:35:49 2008
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Fri, 12 Dec 2008 22:35:49 +0100 (CET)
Subject: [Lxml-checkins] r60473 - in lxml/branch/lxml-2.0: . doc
Message-ID: <20081212213549.3834E1684D0@codespeak.net>
Author: scoder
Date: Fri Dec 12 22:35:48 2008
New Revision: 60473
Modified:
lxml/branch/lxml-2.0/CHANGES.txt
lxml/branch/lxml-2.0/doc/main.txt
Log:
prepare release of 2.0.11
Modified: lxml/branch/lxml-2.0/CHANGES.txt
==============================================================================
--- lxml/branch/lxml-2.0/CHANGES.txt (original)
+++ lxml/branch/lxml-2.0/CHANGES.txt Fri Dec 12 22:35:48 2008
@@ -2,6 +2,15 @@
lxml changelog
==============
+2.0.11 (2008-12-12)
+===================
+
+Bugs fixed
+----------
+
+* Crash when using an XPath evaluator in multiple threads.
+
+
2.0.10 (2008-11-17)
===================
Modified: lxml/branch/lxml-2.0/doc/main.txt
==============================================================================
--- lxml/branch/lxml-2.0/doc/main.txt (original)
+++ lxml/branch/lxml-2.0/doc/main.txt Fri Dec 12 22:35:48 2008
@@ -146,8 +146,8 @@
source release. If you can't wait, consider trying a less recent
release version first.
-The latest version is `lxml 2.0.10`_, released 2008-11-17
-(`changes for 2.0.10`_). `Older versions`_ are listed below.
+The latest version is `lxml 2.0.11`_, released 2008-12-12
+(`changes for 2.0.11`_). `Older versions`_ are listed below.
Please take a look at the `installation instructions`_!
@@ -215,7 +215,9 @@
Old Versions
------------
-.. _`PDF documentation`: lxmldoc-2.0.10.pdf
+.. _`PDF documentation`: lxmldoc-2.0.11.pdf
+
+* `lxml 2.0.10`_, released 2008-11-17 (`changes for 2.0.10`_)
* `lxml 2.0.9`_, released 2008-09-05 (`changes for 2.0.9`_)
@@ -287,6 +289,7 @@
* `lxml 0.5`_, released 2005-04-08
+.. _`lxml 2.0.11`: lxml-2.0.11.tgz
.. _`lxml 2.0.10`: lxml-2.0.10.tgz
.. _`lxml 2.0.9`: lxml-2.0.9.tgz
.. _`lxml 2.0.8`: lxml-2.0.8.tgz
@@ -323,6 +326,7 @@
.. _`lxml 0.5.1`: lxml-0.5.1.tgz
.. _`lxml 0.5`: lxml-0.5.tgz
+.. _`changes for 2.0.11`: changes-2.0.11.html
.. _`changes for 2.0.10`: changes-2.0.10.html
.. _`changes for 2.0.9`: changes-2.0.9.html
.. _`changes for 2.0.8`: changes-2.0.8.html
From scoder at codespeak.net Fri Dec 12 22:36:06 2008
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Fri, 12 Dec 2008 22:36:06 +0100 (CET)
Subject: [Lxml-checkins] r60474 - in lxml/branch/lxml-2.1: . doc
Message-ID: <20081212213606.829F3168495@codespeak.net>
Author: scoder
Date: Fri Dec 12 22:36:06 2008
New Revision: 60474
Modified:
lxml/branch/lxml-2.1/CHANGES.txt
lxml/branch/lxml-2.1/doc/main.txt
Log:
prepare release of 2.1.4
Modified: lxml/branch/lxml-2.1/CHANGES.txt
==============================================================================
--- lxml/branch/lxml-2.1/CHANGES.txt (original)
+++ lxml/branch/lxml-2.1/CHANGES.txt Fri Dec 12 22:36:06 2008
@@ -2,6 +2,15 @@
lxml changelog
==============
+2.1.4 (2008-12-12)
+==================
+
+Bugs fixed
+----------
+
+* Crash when using an XPath evaluator in multiple threads.
+
+
2.1.3 (2008-11-17)
==================
Modified: lxml/branch/lxml-2.1/doc/main.txt
==============================================================================
--- lxml/branch/lxml-2.1/doc/main.txt (original)
+++ lxml/branch/lxml-2.1/doc/main.txt Fri Dec 12 22:36:06 2008
@@ -147,8 +147,8 @@
source release. If you can't wait, consider trying a less recent
release version first.
-The latest version is `lxml 2.1.3`_, released 2008-11-17
-(`changes for 2.1.3`_). `Older versions`_ are listed below.
+The latest version is `lxml 2.1.4`_, released 2008-12-12
+(`changes for 2.1.4`_). `Older versions`_ are listed below.
Please take a look at the `installation instructions`_!
@@ -220,7 +220,9 @@
`2.0 Link: http://yahoo.com
+ search Link: http://google.com
+ Link: http://yahoo.com
>>> pdiff('
Print this 
Print this
')Print this 