foo"))
+ ''
+
+Also the parameters the functions accept are different.
+
+
+Function Reference
+==================
+
+``parse(filename_url_or_file)``:
+ Parses the named file or url, or if the object has a ``.read()``
+ method, parses from that.
+
+``document_fromstring(html, guess_charset=True)``:
+ Parses a document from the given string. This always creates a
+ correct HTML document, which means the parent node is ````,
+ and there is a body and possibly a head.
+
+ If a bytestring is passed and ``guess_charset`` is true the chardet
+ library (if installed) will guess the charset if ambiguities exist.
+
+``fragment_fromstring(string, create_parent=False, guess_charset=False)``:
+ Returns an HTML fragment from a string. The fragment must contain
+ just a single element, unless ``create_parent`` is given;
+ e.g,. ``fragment_fromstring(string, create_parent='div')`` will
+ wrap the element in a ````. If ``create_parent`` is true the
+ default parent tag (div) is used.
+
+ If a bytestring is passed and ``guess_charset`` is true the chardet
+ library (if installed) will guess the charset if ambiguities exist.
+
+``fragments_fromstring(string, no_leading_text=False, parser=None)``:
+ Returns a list of the elements found in the fragment. The first item in
+ the list may be a string. If ``no_leading_text`` is true, then it will
+ be an error if there is leading text, and it will always be a list of
+ only elements.
+
+ If a bytestring is passed and ``guess_charset`` is true the chardet
+ library (if installed) will guess the charset if ambiguities exist.
+
+``fromstring(string)``:
+ Returns ``document_fromstring`` or ``fragment_fromstring``, based
+ on whether the string looks like a full document, or just a
+ fragment.
+
+Additionally all parsing functions accept an ``parser`` keyword argument
+that can be set to a custom parser instance. To create custom parsers
+you can subclass the ``HTMLParser`` and ``XHTMLParser`` from the same
+module. Note that these are the parser classes provided by html5lib.
Added: lxml/trunk/src/lxml/html/_html5builder.py
==============================================================================
--- (empty file)
+++ lxml/trunk/src/lxml/html/_html5builder.py Wed Jul 16 08:58:10 2008
@@ -0,0 +1,96 @@
+"""
+This module implements a tree builder for html5lib that generates lxml
+html element trees. This module uses camelCase as it follows the
+html5lib style guide.
+"""
+
+from html5lib.treebuilders import _base, etree as etree_builders
+from lxml import html, etree
+
+
+class DocumentType(object):
+
+ def __init__(self, name, publicId, systemId):
+ self.name = name
+ self.publicId = publicId
+ self.systemId = systemId
+
+class Document(object):
+
+ def __init__(self):
+ self._elementTree = None
+ self.childNodes = []
+
+ def appendChild(self, element):
+ self._elementTree.getroot().addnext(element._element)
+
+
+class TreeBuilder(_base.TreeBuilder):
+ documentClass = Document
+ doctypeClass = DocumentType
+ elementClass = None
+ commentClass = None
+ fragmentClass = Document
+
+ def __init__(self):
+ html_builder = etree_builders.getETreeModule(html, fullTree=False)
+ etree_builder = etree_builders.getETreeModule(etree, fullTree=False)
+ self.elementClass = html_builder.Element
+ self.commentClass = etree_builder.Comment
+ _base.TreeBuilder.__init__(self)
+
+ def reset(self):
+ _base.TreeBuilder.reset(self)
+ self.rootInserted = False
+ self.initialComments = []
+ self.doctype = None
+
+ def getDocument(self):
+ return self.document._elementTree
+
+ def getFragment(self):
+ fragment = []
+ element = self.openElements[0]._element
+ if element.text:
+ fragment.append(element.text)
+ fragment.extend(element.getchildren())
+ if element.tail:
+ fragment.append(element.tail)
+ return fragment
+
+ def insertDoctype(self, name, publicId, systemId):
+ doctype = self.doctypeClass(name, publicId, systemId)
+ self.doctype = doctype
+
+ def insertComment(self, data, parent=None):
+ if not self.rootInserted:
+ self.initialComments.append(data)
+ else:
+ _base.TreeBuilder.insertComment(self, data, parent)
+
+ def insertRoot(self, name):
+ buf = []
+ if self.doctype and self.doctype.name:
+ buf.append('')
+ buf.append('')
+ root = html.fromstring(u''.join(buf))
+
+ # Append the initial comments:
+ for comment in self.initialComments:
+ root.addprevious(etree.Comment(comment))
+
+ # Create the root document and add the ElementTree to it
+ self.document = self.documentClass()
+ self.document._elementTree = root.getroottree()
+
+ # Add the root element to the internal child/open data structures
+ root_element = self.elementClass(name)
+ root_element._element = root
+ self.document.childNodes.append(root_element)
+ self.openElements.append(root_element)
+
+ self.rootInserted = True
Added: lxml/trunk/src/lxml/html/html5parser.py
==============================================================================
--- (empty file)
+++ lxml/trunk/src/lxml/html/html5parser.py Wed Jul 16 08:58:10 2008
@@ -0,0 +1,164 @@
+"""
+An interface to html5lib.
+"""
+
+import urllib
+from html5lib import HTMLParser as _HTMLParser, XHTMLParser as _XHTMLParser
+from lxml import etree
+from lxml.html import _contains_block_level_tag, XHTML_NAMESPACE
+from lxml.html._html5builder import TreeBuilder
+
+# python3 compatibility
+try:
+ _strings = basestring
+except NameError:
+ _strings = (bytes, str)
+
+
+class HTMLParser(_HTMLParser):
+ """An html5lib HTML parser with lxml as tree."""
+
+ def __init__(self, strict=False):
+ _HTMLParser.__init__(self, strict=strict, tree=TreeBuilder)
+
+
+class XHTMLParser(_XHTMLParser):
+ """An html5lib XHTML Parser with lxml as tree."""
+
+ def __init__(self, strict=False):
+ _XHTMLParser.__init__(self, strict=strict, tree=TreeBuilder)
+
+
+def _find_tag(tree, tag):
+ elem = tree.find(tag)
+ if elem is not None:
+ return elem
+ return tree.find('{%s}%s' % (XHTML_NAMESPACE, tag))
+
+
+def document_fromstring(html, guess_charset=True, parser=None):
+ """Parse a whole document into a string."""
+ if not isinstance(html, _strings):
+ raise TypeError('string required')
+
+ if parser is None:
+ parser = html_parser
+
+ return parser.parse(html, useChardet=guess_charset).getroot()
+
+
+def fragments_fromstring(html, no_leading_text=False,
+ guess_charset=False, parser=None):
+ """Parses several HTML elements, returning a list of elements.
+
+ The first item in the list may be a string. If no_leading_text is true,
+ then it will be an error if there is leading text, and it will always be
+ a list of only elements.
+
+ If `guess_charset` is `True` and the text was not unicode but a
+ bytestring, the `chardet` library will perform charset guessing on the
+ string.
+ """
+ if not isinstance(html, _strings):
+ raise TypeError('string required')
+
+ if parser is None:
+ parser = html_parser
+
+ children = parser.parseFragment(html, 'div', useChardet=guess_charset)
+ if children and isinstance(children[0], _strings):
+ if no_leading_text:
+ if children[0].strip():
+ raise etree.ParserError('There is leading text: %r' %
+ children[0])
+ del children[0]
+ return children
+
+
+def fragment_fromstring(html, create_parent=False,
+ guess_charset=False, parser=None):
+ """Parses a single HTML element; it is an error if there is more than
+ one element, or if anything but whitespace precedes or follows the
+ element.
+
+ If create_parent is true (or is a tag name) then a parent node
+ will be created to encapsulate the HTML in a single element.
+ """
+ if not isinstance(html, _strings):
+ raise TypeError('string required')
+
+ if create_parent:
+ container = create_parent or 'div'
+ html = '<%s>%s%s>' % (container, html, container)
+
+ children = fragments_fromstring(html, True, guess_charset, parser)
+ if not children:
+ raise etree.ParserError('No elements found')
+ if len(children) > 1:
+ raise etree.ParserError('Multiple elements found')
+
+ result = children[0]
+ if result.tail and result.tail.strip():
+ raise etree.ParserError('Element followed by text: %r' % el.tail)
+ result.tail = None
+ return result
+
+
+def fromstring(html, guess_charset=True, parser=None):
+ """Parse the html, returning a single element/document.
+
+ This tries to minimally parse the chunk of text, without knowing if it
+ is a fragment or a document.
+
+ base_url will set the document's base_url attribute (and the tree's docinfo.URL)
+ """
+ if not isinstance(html, _strings):
+ raise TypeError('string required')
+ doc = document_fromstring(html, parser=parser,
+ guess_charset=guess_charset)
+
+ # document starts with doctype or , full document!
+ start = html[:50].lstrip().lower()
+ if start.startswith(' implies too much structure.
+ if _contains_block_level_tag(body):
+ body.tag = 'div'
+ else:
+ body.tag = 'span'
+ return body
+
+
+def parse(filename_url_or_file, guess_charset=True, parser=None):
+ """Parse a filename, URL, or file-like object into an HTML document
+ tree. Note: this returns a tree, not an element. Use
+ ``parse(...).getroot()`` to get the document root.
+ """
+ if parser is None:
+ parser = html_parser
+ if isinstance(filename_url_or_file, basestring):
+ fp = urllib.urlopen(filename_url_or_file)
+ else:
+ fp = filename_url_or_file
+ return parser.parse(html, useChardet=guess_charset)
+
+
+html_parser = HTMLParser()
+xhtml_parser = XHTMLParser()
From scoder at codespeak.net Fri Jul 18 10:08:22 2008
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Fri, 18 Jul 2008 10:08:22 +0200 (CEST)
Subject: [Lxml-checkins] r56632 - in lxml/trunk: . benchmark
Message-ID: <20080718080822.DE8F616A060@codespeak.net>
Author: scoder
Date: Fri Jul 18 10:08:11 2008
New Revision: 56632
Modified:
lxml/trunk/ (props changed)
lxml/trunk/benchmark/bench_xpath.py
Log:
r4639 at delle: sbehnel | 2008-07-18 09:23:35 +0200
new benchmarks to compare multiple .iter(tag) loops to an or-ed XPath expression
Modified: lxml/trunk/benchmark/bench_xpath.py
==============================================================================
--- lxml/trunk/benchmark/bench_xpath.py (original)
+++ lxml/trunk/benchmark/bench_xpath.py Fri Jul 18 10:08:11 2008
@@ -3,7 +3,7 @@
from StringIO import StringIO
import benchbase
-from benchbase import with_attributes, with_text, onlylib, serialized, children
+from benchbase import with_attributes, with_text, onlylib, serialized, children, nochange
############################################################
# Benchmarks
@@ -43,6 +43,24 @@
@nochange
@onlylib('lxe')
@children
+ def bench_multiple_xpath_or(self, children):
+ xpath = self.etree.XPath(".//p:a00001|.//p:b00001|.//p:c00001",
+ namespaces={'p':'cdefg'})
+ for child in children:
+ xpath(child)
+
+ @nochange
+ @onlylib('lxe')
+ @children
+ def bench_multiple_iter_tag(self, children):
+ for child in children:
+ list(child.iter("{cdefg}a00001"))
+ list(child.iter("{cdefg}b00001"))
+ list(child.iter("{cdefg}c00001"))
+
+ @nochange
+ @onlylib('lxe')
+ @children
def bench_xpath_old_extensions(self, children):
def return_child(_, elements):
if elements:
From scoder at codespeak.net Sat Jul 19 10:22:17 2008
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Sat, 19 Jul 2008 10:22:17 +0200 (CEST)
Subject: [Lxml-checkins] r56656 - in lxml/trunk: . doc
Message-ID: <20080719082217.691532A805E@codespeak.net>
Author: scoder
Date: Sat Jul 19 10:22:14 2008
New Revision: 56656
Modified:
lxml/trunk/ (props changed)
lxml/trunk/doc/FAQ.txt
Log:
r4641 at delle: sbehnel | 2008-07-19 10:19:32 +0200
FAQ: new entry on testing Element types, some fixes
Modified: lxml/trunk/doc/FAQ.txt
==============================================================================
--- lxml/trunk/doc/FAQ.txt (original)
+++ lxml/trunk/doc/FAQ.txt Sat Jul 19 10:22:14 2008
@@ -138,7 +138,8 @@
As an XML library, lxml is often used under the hood of in-house
server applications, such as web servers or applications that
-facilitate some kind of document management. Therefore, it is hard to
+facilitate some kind of document management. Many people who deploy
+Zope_ or Plone_ use it together with lxml. Therefore, it is hard to
get an idea of who uses it, and the following list of 'users and
projects we know of' is definitely not a complete list of lxml's
users.
@@ -166,6 +167,8 @@
And don't miss the quotes by our generally happy_ users_, and other
`sites that link to lxml`_.
+.. _Zope: http://www.zope.org/
+.. _Plone: http://www.plone.org/
.. _cssutils: http://code.google.com/p/cssutils/source/browse/trunk/examples/style.py?r=917
.. _Deliverance: http://www.openplans.org/projects/deliverance/project-home
.. _`Enfold Proxy 4`: http://www.enfoldsystems.com/Products/Proxy/4
@@ -251,11 +254,11 @@
>>> print(etree.tostring(root))
TEXT
- >>> etree.tail = "TAIL"
+ >>> root.tail = "TAIL"
>>> print(etree.tostring(root))
TEXTTAIL
- >>> etree.tail = None
+ >>> root.tail = None
>>> print(etree.tostring(root))
TEXT
@@ -274,6 +277,22 @@
places, as most HTML algorithms benefit from a tail-free behaviour.
+How can I find out if an Element is a comment or PI?
+----------------------------------------------------
+
+.. sourcecode:: pycon
+
+ >>> from lxml import etree
+ >>> root = etree.XML(" ")
+
+ >>> root.tag
+ 'root'
+ >>> root.getprevious().tag is etree.PI
+ True
+ >>> root[0].tag is etree.Comment
+ True
+
+
Installation
============
@@ -723,7 +742,7 @@
.. sourcecode:: pycon
>>> parser = etree.XMLParser(remove_blank_text=True)
- >>> tree = etree.parse(file, parser)
+ >>> tree = etree.parse("test.xml", parser)
This will allow the parser to drop blank text nodes when constructing the
tree. If you now call a serialization function to pretty print this tree,
From scoder at codespeak.net Sat Jul 19 10:22:20 2008
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Sat, 19 Jul 2008 10:22:20 +0200 (CEST)
Subject: [Lxml-checkins] r56657 - in lxml/trunk: . src/lxml/tests
Message-ID: <20080719082220.DC7E12A805E@codespeak.net>
Author: scoder
Date: Sat Jul 19 10:22:19 2008
New Revision: 56657
Modified:
lxml/trunk/ (props changed)
lxml/trunk/src/lxml/tests/test_etree.py
Log:
r4642 at delle: sbehnel | 2008-07-19 10:19:49 +0200
run doctests in FAQ during etree testing
Modified: lxml/trunk/src/lxml/tests/test_etree.py
==============================================================================
--- lxml/trunk/src/lxml/tests/test_etree.py (original)
+++ lxml/trunk/src/lxml/tests/test_etree.py Sat Jul 19 10:22:19 2008
@@ -2404,6 +2404,8 @@
suite.addTests(
[make_doctest('../../../doc/api.txt')])
suite.addTests(
+ [make_doctest('../../../doc/FAQ.txt')])
+ suite.addTests(
[make_doctest('../../../doc/parsing.txt')])
suite.addTests(
[make_doctest('../../../doc/resolvers.txt')])
From scoder at codespeak.net Sat Jul 19 10:24:44 2008
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Sat, 19 Jul 2008 10:24:44 +0200 (CEST)
Subject: [Lxml-checkins] r56658 - in lxml/branch/lxml-2.1: doc src/lxml/tests
Message-ID: <20080719082444.418E92A805E@codespeak.net>
Author: scoder
Date: Sat Jul 19 10:24:43 2008
New Revision: 56658
Modified:
lxml/branch/lxml-2.1/doc/FAQ.txt
lxml/branch/lxml-2.1/src/lxml/tests/test_etree.py
Log:
merged FAQ updates from trunk
Modified: lxml/branch/lxml-2.1/doc/FAQ.txt
==============================================================================
--- lxml/branch/lxml-2.1/doc/FAQ.txt (original)
+++ lxml/branch/lxml-2.1/doc/FAQ.txt Sat Jul 19 10:24:43 2008
@@ -138,7 +138,8 @@
As an XML library, lxml is often used under the hood of in-house
server applications, such as web servers or applications that
-facilitate some kind of document management. Therefore, it is hard to
+facilitate some kind of document management. Many people who deploy
+Zope_ or Plone_ use it together with lxml. Therefore, it is hard to
get an idea of who uses it, and the following list of 'users and
projects we know of' is definitely not a complete list of lxml's
users.
@@ -166,6 +167,8 @@
And don't miss the quotes by our generally happy_ users_, and other
`sites that link to lxml`_.
+.. _Zope: http://www.zope.org/
+.. _Plone: http://www.plone.org/
.. _cssutils: http://code.google.com/p/cssutils/source/browse/trunk/examples/style.py?r=917
.. _Deliverance: http://www.openplans.org/projects/deliverance/project-home
.. _`Enfold Proxy 4`: http://www.enfoldsystems.com/Products/Proxy/4
@@ -251,11 +254,11 @@
>>> print(etree.tostring(root))
TEXT
- >>> etree.tail = "TAIL"
+ >>> root.tail = "TAIL"
>>> print(etree.tostring(root))
TEXTTAIL
- >>> etree.tail = None
+ >>> root.tail = None
>>> print(etree.tostring(root))
TEXT
@@ -274,6 +277,22 @@
places, as most HTML algorithms benefit from a tail-free behaviour.
+How can I find out if an Element is a comment or PI?
+----------------------------------------------------
+
+.. sourcecode:: pycon
+
+ >>> from lxml import etree
+ >>> root = etree.XML(" ")
+
+ >>> root.tag
+ 'root'
+ >>> root.getprevious().tag is etree.PI
+ True
+ >>> root[0].tag is etree.Comment
+ True
+
+
Installation
============
@@ -723,7 +742,7 @@
.. sourcecode:: pycon
>>> parser = etree.XMLParser(remove_blank_text=True)
- >>> tree = etree.parse(file, parser)
+ >>> tree = etree.parse("test.xml", parser)
This will allow the parser to drop blank text nodes when constructing the
tree. If you now call a serialization function to pretty print this tree,
Modified: lxml/branch/lxml-2.1/src/lxml/tests/test_etree.py
==============================================================================
--- lxml/branch/lxml-2.1/src/lxml/tests/test_etree.py (original)
+++ lxml/branch/lxml-2.1/src/lxml/tests/test_etree.py Sat Jul 19 10:24:43 2008
@@ -2404,6 +2404,8 @@
suite.addTests(
[make_doctest('../../../doc/api.txt')])
suite.addTests(
+ [make_doctest('../../../doc/FAQ.txt')])
+ suite.addTests(
[make_doctest('../../../doc/parsing.txt')])
suite.addTests(
[make_doctest('../../../doc/resolvers.txt')])
From scoder at codespeak.net Sat Jul 19 11:53:24 2008
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Sat, 19 Jul 2008 11:53:24 +0200 (CEST)
Subject: [Lxml-checkins] r56661 - lxml/branch/lxml-2.1/doc
Message-ID: <20080719095324.DE0D7169FF6@codespeak.net>
Author: scoder
Date: Sat Jul 19 11:53:24 2008
New Revision: 56661
Modified:
lxml/branch/lxml-2.1/doc/FAQ.txt
Log:
make doctest happy
Modified: lxml/branch/lxml-2.1/doc/FAQ.txt
==============================================================================
--- lxml/branch/lxml-2.1/doc/FAQ.txt (original)
+++ lxml/branch/lxml-2.1/doc/FAQ.txt Sat Jul 19 11:53:24 2008
@@ -726,6 +726,14 @@
Parsing and Serialisation
=========================
+..
+ making doctest happy:
+
+ >>> try: from StringIO import StringIO
+ ... except ImportError: from io import StringIO # Py3
+ >>> filename = StringIO(" ")
+
+
Why doesn't the ``pretty_print`` option reformat my XML output?
---------------------------------------------------------------
@@ -742,7 +750,7 @@
.. sourcecode:: pycon
>>> parser = etree.XMLParser(remove_blank_text=True)
- >>> tree = etree.parse("test.xml", parser)
+ >>> tree = etree.parse(filename, parser)
This will allow the parser to drop blank text nodes when constructing the
tree. If you now call a serialization function to pretty print this tree,
From scoder at codespeak.net Sat Jul 19 11:53:39 2008
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Sat, 19 Jul 2008 11:53:39 +0200 (CEST)
Subject: [Lxml-checkins] r56662 - in lxml/trunk: . doc
Message-ID: <20080719095339.2EAB2169FF6@codespeak.net>
Author: scoder
Date: Sat Jul 19 11:53:38 2008
New Revision: 56662
Modified:
lxml/trunk/ (props changed)
lxml/trunk/doc/FAQ.txt
Log:
r4645 at delle: sbehnel | 2008-07-19 11:51:16 +0200
make doctest happy
Modified: lxml/trunk/doc/FAQ.txt
==============================================================================
--- lxml/trunk/doc/FAQ.txt (original)
+++ lxml/trunk/doc/FAQ.txt Sat Jul 19 11:53:38 2008
@@ -726,6 +726,14 @@
Parsing and Serialisation
=========================
+..
+ making doctest happy:
+
+ >>> try: from StringIO import StringIO
+ ... except ImportError: from io import StringIO # Py3
+ >>> filename = StringIO(" ")
+
+
Why doesn't the ``pretty_print`` option reformat my XML output?
---------------------------------------------------------------
@@ -742,7 +750,7 @@
.. sourcecode:: pycon
>>> parser = etree.XMLParser(remove_blank_text=True)
- >>> tree = etree.parse("test.xml", parser)
+ >>> tree = etree.parse(filename, parser)
This will allow the parser to drop blank text nodes when constructing the
tree. If you now call a serialization function to pretty print this tree,
From scoder at codespeak.net Thu Jul 24 08:11:40 2008
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Thu, 24 Jul 2008 08:11:40 +0200 (CEST)
Subject: [Lxml-checkins] r56751 - in lxml/trunk: . src/lxml
Message-ID: <20080724061140.C8912169E01@codespeak.net>
Author: scoder
Date: Thu Jul 24 08:11:37 2008
New Revision: 56751
Modified:
lxml/trunk/ (props changed)
lxml/trunk/src/lxml/xslt.pxi
Log:
r4649 at delle: sbehnel | 2008-07-20 12:49:24 +0200
cleanup
Modified: lxml/trunk/src/lxml/xslt.pxi
==============================================================================
--- lxml/trunk/src/lxml/xslt.pxi (original)
+++ lxml/trunk/src/lxml/xslt.pxi Thu Jul 24 08:11:37 2008
@@ -417,11 +417,11 @@
def __get__(self):
return self._error_log.copy()
- def apply(self, _input, *, profile_run=False, **_kw):
- u"""apply(self, _input, profile_run=False, **_kw)
+ def apply(self, _input, *, profile_run=False, **kw):
+ u"""apply(self, _input, profile_run=False, **kw)
:deprecated: call the object, not this method."""
- return self(_input, profile_run=profile_run, **_kw)
+ return self(_input, profile_run=profile_run, **kw)
def tostring(self, _ElementTree result_tree):
u"""tostring(self, result_tree)
@@ -438,8 +438,8 @@
def __copy__(self):
return _copyXSLT(self)
- def __call__(self, _input, *, profile_run=False, **_kw):
- u"""__call__(self, _input, profile_run=False, **_kw)
+ def __call__(self, _input, *, profile_run=False, **kw):
+ u"""__call__(self, _input, profile_run=False, **kw)
Execute the XSL transformation on a tree or Element.
@@ -483,7 +483,7 @@
transform_ctxt._private = resolver_context
c_result = self._run_transform(
- c_doc, _kw, context, transform_ctxt)
+ c_doc, kw, context, transform_ctxt)
if transform_ctxt.state != xslt.XSLT_STATE_OK:
if c_result is not NULL:
From scoder at codespeak.net Thu Jul 24 08:11:46 2008
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Thu, 24 Jul 2008 08:11:46 +0200 (CEST)
Subject: [Lxml-checkins] r56752 - in lxml/trunk: . src/lxml
Message-ID: <20080724061146.17ACB169E04@codespeak.net>
Author: scoder
Date: Thu Jul 24 08:11:45 2008
New Revision: 56752
Modified:
lxml/trunk/ (props changed)
lxml/trunk/src/lxml/etree_defs.h
lxml/trunk/src/lxml/tree.pxd
Log:
r4650 at delle: sbehnel | 2008-07-23 22:46:57 +0200
new macros to traverse all nodes in the tree, not only elements
Modified: lxml/trunk/src/lxml/etree_defs.h
==============================================================================
--- lxml/trunk/src/lxml/etree_defs.h (original)
+++ lxml/trunk/src/lxml/etree_defs.h Thu Jul 24 08:11:45 2008
@@ -160,7 +160,9 @@
*
* Calls the code block between the BEGIN and END macros for all elements
* below c_tree_top (exclusively), starting at c_node (inclusively iff
- * 'inclusive' is 1).
+ * 'inclusive' is 1). The _ELEMENT_ variants will only stop on nodes
+ * that match _isElement(), the normal variant will stop on every node
+ * except text nodes.
*
* To traverse the node and all of its children and siblings in Pyrex, call
* cdef xmlNode* some_node
@@ -187,63 +189,80 @@
* should not segfault !
*/
-#define _ADVANCE_TO_NEXT_ELEMENT(c_node) \
- while ((c_node != 0) && (!_isElement(c_node))) \
+#define _LX__ELEMENT_MATCH(c_node, only_elements) \
+ ((only_elements) ? (_isElement(c_node)) : ((c_node)->type != XML_TEXT_NODE))
+
+#define _LX__ADVANCE_TO_NEXT(c_node, only_elements) \
+ while ((c_node != 0) && (!_LX__ELEMENT_MATCH(c_node, only_elements))) \
c_node = c_node->next;
-#define _TRAVERSE_TO_NEXT_ELEMENT(c_stop_node, c_node) \
-{ \
- /* walk through children first */ \
- xmlNode* ___next = c_node->children; \
- _ADVANCE_TO_NEXT_ELEMENT(___next) \
- if ((___next == 0) && (c_node != c_stop_node)) { \
- /* try siblings */ \
- ___next = c_node->next; \
- _ADVANCE_TO_NEXT_ELEMENT(___next) \
- /* back off through parents */ \
- while (___next == 0) { \
- c_node = c_node->parent; \
- if (c_node == 0) \
- break; \
- if (c_node == c_stop_node) \
- break; \
- if (!_isElement(c_node)) \
- break; \
- /* we already traversed the parents -> siblings */ \
- ___next = c_node->next; \
- _ADVANCE_TO_NEXT_ELEMENT(___next) \
- } \
- } \
- c_node = ___next; \
+#define _LX__TRAVERSE_TO_NEXT(c_stop_node, c_node, only_elements) \
+{ \
+ /* walk through children first */ \
+ xmlNode* _lx__next = c_node->children; \
+ _LX__ADVANCE_TO_NEXT(_lx__next, only_elements) \
+ if ((_lx__next == 0) && (c_node != c_stop_node)) { \
+ /* try siblings */ \
+ _lx__next = c_node->next; \
+ _LX__ADVANCE_TO_NEXT(_lx__next, only_elements) \
+ /* back off through parents */ \
+ while (_lx__next == 0) { \
+ c_node = c_node->parent; \
+ if (c_node == 0) \
+ break; \
+ if (c_node == c_stop_node) \
+ break; \
+ if ((only_elements) && !_isElement(c_node)) \
+ break; \
+ /* we already traversed the parents -> siblings */ \
+ _lx__next = c_node->next; \
+ _LX__ADVANCE_TO_NEXT(_lx__next, only_elements) \
+ } \
+ } \
+ c_node = _lx__next; \
}
-#define BEGIN_FOR_EACH_ELEMENT_FROM(c_tree_top, c_node, inclusive) \
-{ \
- if (c_node != 0) { \
- const xmlNode* ___tree_top = (c_tree_top); \
- /* make sure we start at an element */ \
- if (!_isElement(c_node)) { \
- /* we skip the node, so 'inclusive' is irrelevant */ \
- if (c_node == ___tree_top) \
- c_node = 0; /* nothing to traverse */ \
- else { \
- c_node = c_node->next; \
- _ADVANCE_TO_NEXT_ELEMENT(c_node) \
- } \
- } else if (! (inclusive)) { \
- /* skip the first node */ \
- _TRAVERSE_TO_NEXT_ELEMENT(___tree_top, c_node) \
- } \
- \
- /* now run the user code on the elements we find */ \
- while (c_node != 0) { \
+#define _LX__BEGIN_FOR_EACH_FROM(c_tree_top, c_node, inclusive, only_elements) \
+{ \
+ if (c_node != 0) { \
+ const xmlNode* _lx__tree_top = (c_tree_top); \
+ const int _lx__only_elements = (only_elements); \
+ /* make sure we start at an element */ \
+ if (!_LX__ELEMENT_MATCH(c_node, _lx__only_elements)) { \
+ /* we skip the node, so 'inclusive' is irrelevant */ \
+ if (c_node == _lx__tree_top) \
+ c_node = 0; /* nothing to traverse */ \
+ else { \
+ c_node = c_node->next; \
+ _LX__ADVANCE_TO_NEXT(c_node, _lx__only_elements) \
+ } \
+ } else if (! (inclusive)) { \
+ /* skip the first node */ \
+ _LX__TRAVERSE_TO_NEXT(_lx__tree_top, c_node, _lx__only_elements) \
+ } \
+ \
+ /* now run the user code on the elements we find */ \
+ while (c_node != 0) { \
/* here goes the code to be run for each element */
-#define END_FOR_EACH_ELEMENT_FROM(c_node) \
- _TRAVERSE_TO_NEXT_ELEMENT(___tree_top, c_node) \
- } \
- } \
+#define _LX__END_FOR_EACH_FROM(c_node) \
+ _LX__TRAVERSE_TO_NEXT(_lx__tree_top, c_node, _lx__only_elements) \
+ } \
+ } \
}
+#define BEGIN_FOR_EACH_ELEMENT_FROM(c_tree_top, c_node, inclusive) \
+ _LX__BEGIN_FOR_EACH_FROM(c_tree_top, c_node, inclusive, 1)
+
+#define END_FOR_EACH_ELEMENT_FROM(c_node) \
+ _LX__END_FOR_EACH_FROM(c_node)
+
+#define BEGIN_FOR_EACH_FROM(c_tree_top, c_node, inclusive) \
+ _LX__BEGIN_FOR_EACH_FROM(c_tree_top, c_node, inclusive, 0)
+
+#define END_FOR_EACH_FROM(c_node) \
+ _LX__END_FOR_EACH_FROM(c_node)
+
+
#endif /* HAS_ETREE_DEFS_H */
Modified: lxml/trunk/src/lxml/tree.pxd
==============================================================================
--- lxml/trunk/src/lxml/tree.pxd (original)
+++ lxml/trunk/src/lxml/tree.pxd Thu Jul 24 08:11:45 2008
@@ -322,3 +322,7 @@
xmlNode* start_node,
bint inclusive) nogil
cdef void END_FOR_EACH_ELEMENT_FROM(xmlNode* start_node) nogil
+ cdef void BEGIN_FOR_EACH_FROM(xmlNode* tree_top,
+ xmlNode* start_node,
+ bint inclusive) nogil
+ cdef void END_FOR_EACH_FROM(xmlNode* start_node) nogil
From scoder at codespeak.net Thu Jul 24 08:11:51 2008
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Thu, 24 Jul 2008 08:11:51 +0200 (CEST)
Subject: [Lxml-checkins] r56753 - in lxml/trunk: . src/lxml
Message-ID: <20080724061151.5FA5A169E0A@codespeak.net>
Author: scoder
Date: Thu Jul 24 08:11:50 2008
New Revision: 56753
Modified:
lxml/trunk/ (props changed)
lxml/trunk/src/lxml/proxy.pxi
lxml/trunk/src/lxml/xslt.pxi
Log:
r4651 at delle: sbehnel | 2008-07-24 08:06:43 +0200
more fine-grained dictionary name cleanup on threading: include attribute values and content
Modified: lxml/trunk/src/lxml/proxy.pxi
==============================================================================
--- lxml/trunk/src/lxml/proxy.pxi (original)
+++ lxml/trunk/src/lxml/proxy.pxi Thu Jul 24 08:11:50 2008
@@ -398,7 +398,7 @@
# 3) fix the names in the tree in case we moved it to a different thread
if doc._c_doc.dict is not c_source_doc.dict:
- fixThreadDictNames(c_start_node, doc._c_doc.dict)
+ fixThreadDictNames(c_start_node, c_source_doc.dict, doc._c_doc.dict)
# free now unused namespace declarations
if c_del_ns_list is not NULL:
@@ -413,26 +413,75 @@
return 0
-cdef void fixThreadDictNames(xmlNode* c_element, tree.xmlDict* c_dict) nogil:
+cdef void fixThreadDictNames(xmlNode* c_element,
+ tree.xmlDict* c_src_dict,
+ tree.xmlDict* c_dict) nogil:
# re-assign the names of tags and attributes
#
# this should only be called when the element is based on a
# different libxml2 tag name dictionary
- cdef xmlNode* c_node
- cdef char* c_name
- if not tree._isElementOrXInclude(c_element):
+ if c_element.type == tree.XML_DOCUMENT_NODE or \
+ c_element.type == tree.XML_HTML_DOCUMENT_NODE:
+ # may define "xml" namespace
+ fixThreadDictNsForNode(c_element, c_src_dict, c_dict)
+ c_element = c_element.children
+ while c_element is not NULL:
+ fixThreadDictNames(c_element, c_src_dict, c_dict)
+ c_element = c_element.next
+ return
+ elif not tree._isElementOrXInclude(c_element):
return
- tree.BEGIN_FOR_EACH_ELEMENT_FROM(c_element, c_element, 1)
- c_name = tree.xmlDictLookup(c_dict, c_element.name, -1)
+ tree.BEGIN_FOR_EACH_FROM(c_element, c_element, 1)
+ if c_element.name is not NULL:
+ fixThreadDictNameForNode(c_element, c_dict)
+ if c_element.type == tree.XML_ELEMENT_NODE:
+ fixThreadDictNamesForAttributes(
+ c_element.properties, c_src_dict, c_dict)
+ tree.END_FOR_EACH_FROM(c_element)
+
+cdef inline void fixThreadDictNamesForAttributes(tree.xmlAttr* c_attr,
+ tree.xmlDict* c_src_dict,
+ tree.xmlDict* c_dict) nogil:
+ cdef xmlNode* c_child
+ cdef xmlNode* c_node
+ c_node = c_attr
+ while c_node is not NULL:
+ fixThreadDictNameForNode(c_node, c_dict)
+ fixThreadDictContentForNode(c_node, c_src_dict, c_dict)
+ # libxml2 keeps some (!) attribute values in the dict
+ c_child = c_node.children
+ while c_child is not NULL:
+ fixThreadDictNameForNode(c_child, c_dict)
+ fixThreadDictContentForNode(c_child, c_src_dict, c_dict)
+ c_child = c_child.next
+ c_node = c_node.next
+
+cdef inline void fixThreadDictNameForNode(xmlNode* c_node,
+ tree.xmlDict* c_dict) nogil:
+ cdef char* c_name
# c_name can be NULL on memory error, but we don't handle that here
+ c_name = tree.xmlDictLookup(c_dict, c_node.name, -1)
if c_name is not NULL:
- c_element.name = c_name
- if c_element.type == tree.XML_ELEMENT_NODE:
- c_node = c_element.properties
- while c_node is not NULL:
- c_name = tree.xmlDictLookup(c_dict, c_node.name, -1)
- if c_name is not NULL:
- c_node.name = c_name
- c_node = c_node.next
- tree.END_FOR_EACH_ELEMENT_FROM(c_element)
+ c_node.name = c_name
+
+cdef inline void fixThreadDictContentForNode(xmlNode* c_node,
+ tree.xmlDict* c_src_dict,
+ tree.xmlDict* c_dict) nogil:
+ if c_node.content is not NULL:
+ if tree.xmlDictOwns(c_src_dict, c_node.content):
+ # result can be NULL on memory error, but we don't handle that here
+ c_node.content = tree.xmlDictLookup(c_dict, c_node.content, -1)
+
+cdef inline void fixThreadDictNsForNode(xmlNode* c_node,
+ tree.xmlDict* c_src_dict,
+ tree.xmlDict* c_dict) nogil:
+ cdef xmlNs* c_ns = c_node.nsDef
+ while c_ns is not NULL:
+ if c_ns.href is not NULL:
+ if tree.xmlDictOwns(c_src_dict, c_ns.href):
+ c_ns.href = tree.xmlDictLookup(c_dict, c_ns.href, -1)
+ if c_ns.prefix is not NULL:
+ if tree.xmlDictOwns(c_src_dict, c_ns.prefix):
+ c_ns.prefix = tree.xmlDictLookup(c_dict, c_ns.prefix, -1)
+ c_ns = c_ns.next
Modified: lxml/trunk/src/lxml/xslt.pxi
==============================================================================
--- lxml/trunk/src/lxml/xslt.pxi (original)
+++ lxml/trunk/src/lxml/xslt.pxi Thu Jul 24 08:11:50 2008
@@ -458,6 +458,7 @@
cdef xmlDoc* c_result
cdef xmlDoc* c_doc
cdef xmlNode* c_node
+ cdef tree.xmlDict* c_dict
input_doc = _documentOrRaise(_input)
root_node = _rootNodeOrRaise(_input)
@@ -533,13 +534,21 @@
result_doc = _documentFactory(c_result, input_doc._parser)
- if not _checkThreadDict(c_result.dict):
- # fix document dictionary
- c_node = _findChildForwards(c_result, 0)
- if c_node is not NULL:
- __GLOBAL_PARSER_CONTEXT.initThreadDictRef(&c_result.dict)
- with nogil:
- fixThreadDictNames(c_node, c_result.dict)
+ c_dict = c_result.dict
+ __GLOBAL_PARSER_CONTEXT.initThreadDictRef(&c_result.dict)
+ if c_dict is not c_result.dict or \
+ self._c_style.doc.dict is not c_result.dict or \
+ input_doc._c_doc.dict is not c_result.dict:
+ with nogil:
+ if c_dict is not c_result.dict:
+ fixThreadDictNames(c_result,
+ c_dict, c_result.dict)
+ if self._c_style.doc.dict is not c_result.dict:
+ fixThreadDictNames(c_result,
+ self._c_style.doc.dict, c_result.dict)
+ if input_doc._c_doc.dict is not c_result.dict:
+ fixThreadDictNames(c_result,
+ input_doc._c_doc.dict, c_result.dict)
return _xsltResultTreeFactory(result_doc, self, profile_doc)
From scoder at codespeak.net Thu Jul 24 08:11:55 2008
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Thu, 24 Jul 2008 08:11:55 +0200 (CEST)
Subject: [Lxml-checkins] r56754 - lxml/trunk
Message-ID: <20080724061155.CF4EE169E0B@codespeak.net>
Author: scoder
Date: Thu Jul 24 08:11:55 2008
New Revision: 56754
Modified:
lxml/trunk/ (props changed)
lxml/trunk/CHANGES.txt
Log:
r4652 at delle: sbehnel | 2008-07-24 08:07:57 +0200
changelog
Modified: lxml/trunk/CHANGES.txt
==============================================================================
--- lxml/trunk/CHANGES.txt (original)
+++ lxml/trunk/CHANGES.txt Thu Jul 24 08:11:55 2008
@@ -13,6 +13,9 @@
Bugs fixed
----------
+* Crash when parsing XSLT stylesheets in a thread and using them in
+ another.
+
* Encoding problem when including text with ElementInclude under
Python 3.
From scoder at codespeak.net Thu Jul 24 08:47:21 2008
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Thu, 24 Jul 2008 08:47:21 +0200 (CEST)
Subject: [Lxml-checkins] r56755 - lxml/branch/lxml-2.0
Message-ID: <20080724064721.82828169E32@codespeak.net>
Author: scoder
Date: Thu Jul 24 08:47:20 2008
New Revision: 56755
Modified:
lxml/branch/lxml-2.0/version.txt
Log:
version
Modified: lxml/branch/lxml-2.0/version.txt
==============================================================================
--- lxml/branch/lxml-2.0/version.txt (original)
+++ lxml/branch/lxml-2.0/version.txt Thu Jul 24 08:47:20 2008
@@ -1 +1 @@
-2.0.7
+2.0.8
From scoder at codespeak.net Thu Jul 24 08:49:43 2008
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Thu, 24 Jul 2008 08:49:43 +0200 (CEST)
Subject: [Lxml-checkins] r56756 - in lxml/branch/lxml-2.0: . src/lxml
src/lxml/tests
Message-ID: <20080724064943.D3255169E37@codespeak.net>
Author: scoder
Date: Thu Jul 24 08:49:43 2008
New Revision: 56756
Modified:
lxml/branch/lxml-2.0/CHANGES.txt
lxml/branch/lxml-2.0/src/lxml/etree_defs.h
lxml/branch/lxml-2.0/src/lxml/proxy.pxi
lxml/branch/lxml-2.0/src/lxml/tests/test_threading.py
lxml/branch/lxml-2.0/src/lxml/tree.pxd
lxml/branch/lxml-2.0/src/lxml/xslt.pxi
Log:
merged XSLT threading fix from trunk
Modified: lxml/branch/lxml-2.0/CHANGES.txt
==============================================================================
--- lxml/branch/lxml-2.0/CHANGES.txt (original)
+++ lxml/branch/lxml-2.0/CHANGES.txt Thu Jul 24 08:49:43 2008
@@ -14,6 +14,9 @@
Bugs fixed
----------
+* Crash when parsing XSLT stylesheets in a thread and using them in
+ another.
+
* CSS selector parser dropped remaining expression after a function
with parameters.
Modified: lxml/branch/lxml-2.0/src/lxml/etree_defs.h
==============================================================================
--- lxml/branch/lxml-2.0/src/lxml/etree_defs.h (original)
+++ lxml/branch/lxml-2.0/src/lxml/etree_defs.h Thu Jul 24 08:49:43 2008
@@ -139,7 +139,9 @@
*
* Calls the code block between the BEGIN and END macros for all elements
* below c_tree_top (exclusively), starting at c_node (inclusively iff
- * 'inclusive' is 1).
+ * 'inclusive' is 1). The _ELEMENT_ variants will only stop on nodes
+ * that match _isElement(), the normal variant will stop on every node
+ * except text nodes.
*
* To traverse the node and all of its children and siblings in Pyrex, call
* cdef xmlNode* some_node
@@ -166,63 +168,80 @@
* should not segfault !
*/
-#define _ADVANCE_TO_NEXT_ELEMENT(c_node) \
- while ((c_node != 0) && (!_isElement(c_node))) \
+#define _LX__ELEMENT_MATCH(c_node, only_elements) \
+ ((only_elements) ? (_isElement(c_node)) : ((c_node)->type != XML_TEXT_NODE))
+
+#define _LX__ADVANCE_TO_NEXT(c_node, only_elements) \
+ while ((c_node != 0) && (!_LX__ELEMENT_MATCH(c_node, only_elements))) \
c_node = c_node->next;
-#define _TRAVERSE_TO_NEXT_ELEMENT(c_stop_node, c_node) \
-{ \
- /* walk through children first */ \
- xmlNode* ___next = c_node->children; \
- _ADVANCE_TO_NEXT_ELEMENT(___next) \
- if ((___next == 0) && (c_node != c_stop_node)) { \
- /* try siblings */ \
- ___next = c_node->next; \
- _ADVANCE_TO_NEXT_ELEMENT(___next) \
- /* back off through parents */ \
- while (___next == 0) { \
- c_node = c_node->parent; \
- if (c_node == 0) \
- break; \
- if (c_node == c_stop_node) \
- break; \
- if (!_isElement(c_node)) \
- break; \
- /* we already traversed the parents -> siblings */ \
- ___next = c_node->next; \
- _ADVANCE_TO_NEXT_ELEMENT(___next) \
- } \
- } \
- c_node = ___next; \
+#define _LX__TRAVERSE_TO_NEXT(c_stop_node, c_node, only_elements) \
+{ \
+ /* walk through children first */ \
+ xmlNode* _lx__next = c_node->children; \
+ _LX__ADVANCE_TO_NEXT(_lx__next, only_elements) \
+ if ((_lx__next == 0) && (c_node != c_stop_node)) { \
+ /* try siblings */ \
+ _lx__next = c_node->next; \
+ _LX__ADVANCE_TO_NEXT(_lx__next, only_elements) \
+ /* back off through parents */ \
+ while (_lx__next == 0) { \
+ c_node = c_node->parent; \
+ if (c_node == 0) \
+ break; \
+ if (c_node == c_stop_node) \
+ break; \
+ if ((only_elements) && !_isElement(c_node)) \
+ break; \
+ /* we already traversed the parents -> siblings */ \
+ _lx__next = c_node->next; \
+ _LX__ADVANCE_TO_NEXT(_lx__next, only_elements) \
+ } \
+ } \
+ c_node = _lx__next; \
}
-#define BEGIN_FOR_EACH_ELEMENT_FROM(c_tree_top, c_node, inclusive) \
-{ \
- if (c_node != 0) { \
- const xmlNode* ___tree_top = (c_tree_top); \
- /* make sure we start at an element */ \
- if (!_isElement(c_node)) { \
- /* we skip the node, so 'inclusive' is irrelevant */ \
- if (c_node == ___tree_top) \
- c_node = 0; /* nothing to traverse */ \
- else { \
- c_node = c_node->next; \
- _ADVANCE_TO_NEXT_ELEMENT(c_node) \
- } \
- } else if (! (inclusive)) { \
- /* skip the first node */ \
- _TRAVERSE_TO_NEXT_ELEMENT(___tree_top, c_node) \
- } \
- \
- /* now run the user code on the elements we find */ \
- while (c_node != 0) { \
+#define _LX__BEGIN_FOR_EACH_FROM(c_tree_top, c_node, inclusive, only_elements) \
+{ \
+ if (c_node != 0) { \
+ const xmlNode* _lx__tree_top = (c_tree_top); \
+ const int _lx__only_elements = (only_elements); \
+ /* make sure we start at an element */ \
+ if (!_LX__ELEMENT_MATCH(c_node, _lx__only_elements)) { \
+ /* we skip the node, so 'inclusive' is irrelevant */ \
+ if (c_node == _lx__tree_top) \
+ c_node = 0; /* nothing to traverse */ \
+ else { \
+ c_node = c_node->next; \
+ _LX__ADVANCE_TO_NEXT(c_node, _lx__only_elements) \
+ } \
+ } else if (! (inclusive)) { \
+ /* skip the first node */ \
+ _LX__TRAVERSE_TO_NEXT(_lx__tree_top, c_node, _lx__only_elements) \
+ } \
+ \
+ /* now run the user code on the elements we find */ \
+ while (c_node != 0) { \
/* here goes the code to be run for each element */
-#define END_FOR_EACH_ELEMENT_FROM(c_node) \
- _TRAVERSE_TO_NEXT_ELEMENT(___tree_top, c_node) \
- } \
- } \
+#define _LX__END_FOR_EACH_FROM(c_node) \
+ _LX__TRAVERSE_TO_NEXT(_lx__tree_top, c_node, _lx__only_elements) \
+ } \
+ } \
}
+#define BEGIN_FOR_EACH_ELEMENT_FROM(c_tree_top, c_node, inclusive) \
+ _LX__BEGIN_FOR_EACH_FROM(c_tree_top, c_node, inclusive, 1)
+
+#define END_FOR_EACH_ELEMENT_FROM(c_node) \
+ _LX__END_FOR_EACH_FROM(c_node)
+
+#define BEGIN_FOR_EACH_FROM(c_tree_top, c_node, inclusive) \
+ _LX__BEGIN_FOR_EACH_FROM(c_tree_top, c_node, inclusive, 0)
+
+#define END_FOR_EACH_FROM(c_node) \
+ _LX__END_FOR_EACH_FROM(c_node)
+
+
#endif /* HAS_ETREE_DEFS_H */
Modified: lxml/branch/lxml-2.0/src/lxml/proxy.pxi
==============================================================================
--- lxml/branch/lxml-2.0/src/lxml/proxy.pxi (original)
+++ lxml/branch/lxml-2.0/src/lxml/proxy.pxi Thu Jul 24 08:49:43 2008
@@ -5,7 +5,7 @@
# the Python class
cdef inline _Element getProxy(xmlNode* c_node):
- """Get a proxy for a given node.
+ u"""Get a proxy for a given node.
"""
#print "getProxy for:", c_node
if c_node is not NULL and c_node._private is not NULL:
@@ -17,7 +17,7 @@
return c_node._private is not NULL
cdef inline int _registerProxy(_Element proxy) except -1:
- """Register a proxy and type for the node it's proxying for.
+ u"""Register a proxy and type for the node it's proxying for.
"""
cdef xmlNode* c_node
# cannot register for NULL
@@ -32,31 +32,33 @@
python.Py_INCREF(proxy._doc)
cdef inline int _unregisterProxy(_Element proxy) except -1:
- """Unregister a proxy for the node it's proxying for.
+ u"""Unregister a proxy for the node it's proxying for.
"""
cdef xmlNode* c_node
c_node = proxy._c_node
- assert c_node._private is proxy, "Tried to unregister unknown proxy"
+ assert c_node._private is proxy, u"Tried to unregister unknown proxy"
c_node._private = NULL
return 0
cdef inline void _releaseProxy(_Element proxy):
- """An additional DECREF for the document.
+ u"""An additional DECREF for the document.
"""
python.Py_XDECREF(proxy._gc_doc)
proxy._gc_doc = NULL
cdef inline void _updateProxyDocument(xmlNode* c_node, _Document doc):
- """Replace the document reference of a proxy.
+ u"""Replace the document reference of a proxy.
This may deallocate the original document of the proxy!
"""
+ cdef _Document old_doc
cdef _Element element = <_Element>c_node._private
if element._doc is not doc:
- python.Py_INCREF(doc)
- python.Py_DECREF(element._doc)
+ old_doc = element._doc
element._doc = doc
+ python.Py_INCREF(doc)
element._gc_doc = doc
+ python.Py_DECREF(old_doc)
################################################################################
# temporarily make a node the root node of its document
@@ -116,7 +118,7 @@
tree.xmlFreeDoc(c_doc)
cdef _Element _fakeDocElementFactory(_Document doc, xmlNode* c_element):
- """Special element factory for cases where we need to create a fake
+ u"""Special element factory for cases where we need to create a fake
root document, but still need to instantiate arbitrary nodes from
it. If we instantiate the fake root node, things will turn bad
when it's destroyed.
@@ -135,7 +137,7 @@
# support for freeing tree elements when proxy objects are destroyed
cdef int attemptDeallocation(xmlNode* c_node):
- """Attempt deallocation of c_node (or higher up in tree).
+ u"""Attempt deallocation of c_node (or higher up in tree).
"""
cdef xmlNode* c_top
# could be we actually aren't referring to the tree at all
@@ -151,7 +153,7 @@
return 0
cdef xmlNode* getDeallocationTop(xmlNode* c_node):
- """Return the top of the tree that can be deallocated, or NULL.
+ u"""Return the top of the tree that can be deallocated, or NULL.
"""
cdef xmlNode* c_current
cdef xmlNode* c_top
@@ -192,7 +194,7 @@
# fix _Document references and namespaces when a node changes documents
cdef void _copyParentNamespaces(xmlNode* c_from_node, xmlNode* c_to_node) nogil:
- """Copy the namespaces of all ancestors of c_from_node to c_to_node.
+ u"""Copy the namespaces of all ancestors of c_from_node to c_to_node.
"""
cdef xmlNode* c_parent
cdef xmlNs* c_ns
@@ -245,7 +247,7 @@
cdef int _stripRedundantNamespaceDeclarations(
xmlNode* c_element, _nscache* c_ns_cache, xmlNs** c_del_ns_list) except -1:
- """Removes namespace declarations from an element that are already
+ u"""Removes namespace declarations from an element that are already
defined in its parents. Does not free the xmlNs's, just prepends
them to the c_del_ns_list.
"""
@@ -276,7 +278,7 @@
cdef int moveNodeToDocument(_Document doc, xmlDoc* c_source_doc,
xmlNode* c_element) except -1:
- """Fix the xmlNs pointers of a node and its subtree that were moved.
+ u"""Fix the xmlNs pointers of a node and its subtree that were moved.
Mainly copied from libxml2's xmlReconciliateNs(). Expects libxml2 doc
pointers of node to be correct already, but fixes _Document references.
@@ -313,18 +315,10 @@
cdef xmlNs* c_nsdef
cdef xmlNs* c_del_ns_list
cdef cstd.size_t i
- cdef tree.xmlDict* c_dict
if not tree._isElementOrXInclude(c_element):
return 0
- # we need to copy the names of tags and attributes iff the element
- # is based on a different libxml2 tag name dictionary
- if doc._c_doc.dict is not c_source_doc.dict:
- c_dict = doc._c_doc.dict
- else:
- c_dict = NULL
-
c_start_node = c_element
c_del_ns_list = NULL
@@ -356,14 +350,6 @@
_appendToNsCache(&c_ns_cache, c_node.ns, c_ns)
c_node.ns = c_ns
- # 3) re-assign names from the target dict
- if c_dict is not NULL:
- c_name = tree.xmlDictLookup(c_dict, c_node.name, -1)
- # c_name can be NULL on memory error, but we don't
- # handle that here
- if c_name is not NULL:
- c_node.name = c_name
-
if c_node is c_element:
# after the element, continue with its attributes
c_node = c_element.properties
@@ -412,6 +398,10 @@
break # all done
c_element = c_node
+ # 3) fix the names in the tree in case we moved it to a different thread
+ if doc._c_doc.dict is not c_source_doc.dict:
+ fixThreadDictNames(c_start_node, c_source_doc.dict, doc._c_doc.dict)
+
# free now unused namespace declarations
if c_del_ns_list is not NULL:
tree.xmlFreeNsList(c_del_ns_list)
@@ -423,3 +413,77 @@
cstd.free(c_ns_cache.old)
return 0
+
+
+cdef void fixThreadDictNames(xmlNode* c_element,
+ tree.xmlDict* c_src_dict,
+ tree.xmlDict* c_dict) nogil:
+ # re-assign the names of tags and attributes
+ #
+ # this should only be called when the element is based on a
+ # different libxml2 tag name dictionary
+ if c_element.type == tree.XML_DOCUMENT_NODE or \
+ c_element.type == tree.XML_HTML_DOCUMENT_NODE:
+ # may define "xml" namespace
+ fixThreadDictNsForNode(c_element, c_src_dict, c_dict)
+ c_element = c_element.children
+ while c_element is not NULL:
+ fixThreadDictNames(c_element, c_src_dict, c_dict)
+ c_element = c_element.next
+ return
+ elif not tree._isElementOrXInclude(c_element):
+ return
+
+ tree.BEGIN_FOR_EACH_FROM(c_element, c_element, 1)
+ if c_element.name is not NULL:
+ fixThreadDictNameForNode(c_element, c_dict)
+ if c_element.type == tree.XML_ELEMENT_NODE:
+ fixThreadDictNamesForAttributes(
+ c_element.properties, c_src_dict, c_dict)
+ tree.END_FOR_EACH_FROM(c_element)
+
+cdef inline void fixThreadDictNamesForAttributes(tree.xmlAttr* c_attr,
+ tree.xmlDict* c_src_dict,
+ tree.xmlDict* c_dict) nogil:
+ cdef xmlNode* c_child
+ cdef xmlNode* c_node
+ c_node = c_attr
+ while c_node is not NULL:
+ fixThreadDictNameForNode(c_node, c_dict)
+ fixThreadDictContentForNode(c_node, c_src_dict, c_dict)
+ # libxml2 keeps some (!) attribute values in the dict
+ c_child = c_node.children
+ while c_child is not NULL:
+ fixThreadDictNameForNode(c_child, c_dict)
+ fixThreadDictContentForNode(c_child, c_src_dict, c_dict)
+ c_child = c_child.next
+ c_node = c_node.next
+
+cdef inline void fixThreadDictNameForNode(xmlNode* c_node,
+ tree.xmlDict* c_dict) nogil:
+ cdef char* c_name
+ # c_name can be NULL on memory error, but we don't handle that here
+ c_name = tree.xmlDictLookup(c_dict, c_node.name, -1)
+ if c_name is not NULL:
+ c_node.name = c_name
+
+cdef inline void fixThreadDictContentForNode(xmlNode* c_node,
+ tree.xmlDict* c_src_dict,
+ tree.xmlDict* c_dict) nogil:
+ if c_node.content is not NULL:
+ if tree.xmlDictOwns(c_src_dict, c_node.content):
+ # result can be NULL on memory error, but we don't handle that here
+ c_node.content = tree.xmlDictLookup(c_dict, c_node.content, -1)
+
+cdef inline void fixThreadDictNsForNode(xmlNode* c_node,
+ tree.xmlDict* c_src_dict,
+ tree.xmlDict* c_dict) nogil:
+ cdef xmlNs* c_ns = c_node.nsDef
+ while c_ns is not NULL:
+ if c_ns.href is not NULL:
+ if tree.xmlDictOwns(c_src_dict, c_ns.href):
+ c_ns.href = tree.xmlDictLookup(c_dict, c_ns.href, -1)
+ if c_ns.prefix is not NULL:
+ if tree.xmlDictOwns(c_src_dict, c_ns.prefix):
+ c_ns.prefix = tree.xmlDictLookup(c_dict, c_ns.prefix, -1)
+ c_ns = c_ns.next
Modified: lxml/branch/lxml-2.0/src/lxml/tests/test_threading.py
==============================================================================
--- lxml/branch/lxml-2.0/src/lxml/tests/test_threading.py (original)
+++ lxml/branch/lxml-2.0/src/lxml/tests/test_threading.py Thu Jul 24 08:49:43 2008
@@ -75,6 +75,35 @@
self.assertEquals('BCB',
tostring(root))
+ def test_thread_create_xslt(self):
+ XML = self.etree.XML
+ tostring = self.etree.tostring
+ root = XML('BC')
+
+ stylesheets = []
+
+ def run_thread():
+ style = XML('''\
+
+
+
+
+
+
+
+ ''')
+ stylesheets.append( etree.XSLT(style) )
+
+ self._run_thread(run_thread)
+
+ st = stylesheets[0]
+ result = tostring( st(root) )
+
+ self.assertEquals('BC ',
+ result)
+
def test_thread_mix(self):
XML = self.etree.XML
Element = self.etree.Element
Modified: lxml/branch/lxml-2.0/src/lxml/tree.pxd
==============================================================================
--- lxml/branch/lxml-2.0/src/lxml/tree.pxd (original)
+++ lxml/branch/lxml-2.0/src/lxml/tree.pxd Thu Jul 24 08:49:43 2008
@@ -315,3 +315,7 @@
xmlNode* start_node,
bint inclusive) nogil
cdef void END_FOR_EACH_ELEMENT_FROM(xmlNode* start_node) nogil
+ cdef void BEGIN_FOR_EACH_FROM(xmlNode* tree_top,
+ xmlNode* start_node,
+ bint inclusive) nogil
+ cdef void END_FOR_EACH_FROM(xmlNode* start_node) nogil
Modified: lxml/branch/lxml-2.0/src/lxml/xslt.pxi
==============================================================================
--- lxml/branch/lxml-2.0/src/lxml/xslt.pxi (original)
+++ lxml/branch/lxml-2.0/src/lxml/xslt.pxi Thu Jul 24 08:49:43 2008
@@ -388,11 +388,7 @@
cdef xslt.xsltTransformContext* transform_ctxt
cdef xmlDoc* c_result
cdef xmlDoc* c_doc
-
- if not _checkThreadDict(self._c_style.doc.dict):
- if profile_run is not False:
- _kw['profile_run'] = profile_run
- return _copyXSLT(self)(_input, **_kw)
+ cdef tree.xmlDict* c_dict
input_doc = _documentOrRaise(_input)
root_node = _rootNodeOrRaise(_input)
@@ -460,6 +456,23 @@
resolver_context.clear()
result_doc = _documentFactory(c_result, input_doc._parser)
+
+ c_dict = c_result.dict
+ __GLOBAL_PARSER_CONTEXT.initThreadDictRef(&c_result.dict)
+ if c_dict is not c_result.dict or \
+ self._c_style.doc.dict is not c_result.dict or \
+ input_doc._c_doc.dict is not c_result.dict:
+ with nogil:
+ if c_dict is not c_result.dict:
+ fixThreadDictNames(c_result,
+ c_dict, c_result.dict)
+ if self._c_style.doc.dict is not c_result.dict:
+ fixThreadDictNames(c_result,
+ self._c_style.doc.dict, c_result.dict)
+ if input_doc._c_doc.dict is not c_result.dict:
+ fixThreadDictNames(c_result,
+ input_doc._c_doc.dict, c_result.dict)
+
return _xsltResultTreeFactory(result_doc, self, profile_doc)
cdef xmlDoc* _run_transform(self, xmlDoc* c_input_doc,
From scoder at codespeak.net Thu Jul 24 08:52:09 2008
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Thu, 24 Jul 2008 08:52:09 +0200 (CEST)
Subject: [Lxml-checkins] r56757 - in lxml/trunk: . src/lxml
Message-ID: <20080724065209.B6AED169E3A@codespeak.net>
Author: scoder
Date: Thu Jul 24 08:52:09 2008
New Revision: 56757
Modified:
lxml/trunk/ (props changed)
lxml/trunk/src/lxml/xslt.pxi
Log:
r4657 at delle: sbehnel | 2008-07-24 08:42:49 +0200
cleanup
Modified: lxml/trunk/src/lxml/xslt.pxi
==============================================================================
--- lxml/trunk/src/lxml/xslt.pxi (original)
+++ lxml/trunk/src/lxml/xslt.pxi Thu Jul 24 08:52:09 2008
@@ -457,7 +457,6 @@
cdef xslt.xsltTransformContext* transform_ctxt
cdef xmlDoc* c_result
cdef xmlDoc* c_doc
- cdef xmlNode* c_node
cdef tree.xmlDict* c_dict
input_doc = _documentOrRaise(_input)
From scoder at codespeak.net Thu Jul 24 08:52:15 2008
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Thu, 24 Jul 2008 08:52:15 +0200 (CEST)
Subject: [Lxml-checkins] r56758 - in lxml/trunk: . src/lxml/tests
Message-ID: <20080724065215.8C0E6169E3D@codespeak.net>
Author: scoder
Date: Thu Jul 24 08:52:14 2008
New Revision: 56758
Modified:
lxml/trunk/ (props changed)
lxml/trunk/src/lxml/tests/test_threading.py
Log:
r4658 at delle: sbehnel | 2008-07-24 08:46:28 +0200
test case for XSLT threading crash
Modified: lxml/trunk/src/lxml/tests/test_threading.py
==============================================================================
--- lxml/trunk/src/lxml/tests/test_threading.py (original)
+++ lxml/trunk/src/lxml/tests/test_threading.py Thu Jul 24 08:52:14 2008
@@ -79,6 +79,35 @@
self.assertEquals(_bytes('BCB'),
tostring(root))
+ def test_thread_create_xslt(self):
+ XML = self.etree.XML
+ tostring = self.etree.tostring
+ root = XML(_bytes('BC'))
+
+ stylesheets = []
+
+ def run_thread():
+ style = XML(_bytes('''\
+
+
+
+
+
+
+
+ '''))
+ stylesheets.append( etree.XSLT(style) )
+
+ self._run_thread(run_thread)
+
+ st = stylesheets[0]
+ result = tostring( st(root) )
+
+ self.assertEquals(_bytes('BC '),
+ result)
+
def test_thread_mix(self):
XML = self.etree.XML
Element = self.etree.Element
From scoder at codespeak.net Thu Jul 24 09:00:44 2008
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Thu, 24 Jul 2008 09:00:44 +0200 (CEST)
Subject: [Lxml-checkins] r56759 - in lxml/branch/lxml-2.1/src/lxml: . tests
Message-ID: <20080724070044.976122A01A5@codespeak.net>
Author: scoder
Date: Thu Jul 24 09:00:44 2008
New Revision: 56759
Modified:
lxml/branch/lxml-2.1/src/lxml/etree_defs.h
lxml/branch/lxml-2.1/src/lxml/proxy.pxi
lxml/branch/lxml-2.1/src/lxml/tests/test_threading.py
lxml/branch/lxml-2.1/src/lxml/tree.pxd
lxml/branch/lxml-2.1/src/lxml/xslt.pxi
Log:
trunk merge of XSLT threading fix
Modified: lxml/branch/lxml-2.1/src/lxml/etree_defs.h
==============================================================================
--- lxml/branch/lxml-2.1/src/lxml/etree_defs.h (original)
+++ lxml/branch/lxml-2.1/src/lxml/etree_defs.h Thu Jul 24 09:00:44 2008
@@ -160,7 +160,9 @@
*
* Calls the code block between the BEGIN and END macros for all elements
* below c_tree_top (exclusively), starting at c_node (inclusively iff
- * 'inclusive' is 1).
+ * 'inclusive' is 1). The _ELEMENT_ variants will only stop on nodes
+ * that match _isElement(), the normal variant will stop on every node
+ * except text nodes.
*
* To traverse the node and all of its children and siblings in Pyrex, call
* cdef xmlNode* some_node
@@ -187,63 +189,80 @@
* should not segfault !
*/
-#define _ADVANCE_TO_NEXT_ELEMENT(c_node) \
- while ((c_node != 0) && (!_isElement(c_node))) \
+#define _LX__ELEMENT_MATCH(c_node, only_elements) \
+ ((only_elements) ? (_isElement(c_node)) : ((c_node)->type != XML_TEXT_NODE))
+
+#define _LX__ADVANCE_TO_NEXT(c_node, only_elements) \
+ while ((c_node != 0) && (!_LX__ELEMENT_MATCH(c_node, only_elements))) \
c_node = c_node->next;
-#define _TRAVERSE_TO_NEXT_ELEMENT(c_stop_node, c_node) \
-{ \
- /* walk through children first */ \
- xmlNode* ___next = c_node->children; \
- _ADVANCE_TO_NEXT_ELEMENT(___next) \
- if ((___next == 0) && (c_node != c_stop_node)) { \
- /* try siblings */ \
- ___next = c_node->next; \
- _ADVANCE_TO_NEXT_ELEMENT(___next) \
- /* back off through parents */ \
- while (___next == 0) { \
- c_node = c_node->parent; \
- if (c_node == 0) \
- break; \
- if (c_node == c_stop_node) \
- break; \
- if (!_isElement(c_node)) \
- break; \
- /* we already traversed the parents -> siblings */ \
- ___next = c_node->next; \
- _ADVANCE_TO_NEXT_ELEMENT(___next) \
- } \
- } \
- c_node = ___next; \
+#define _LX__TRAVERSE_TO_NEXT(c_stop_node, c_node, only_elements) \
+{ \
+ /* walk through children first */ \
+ xmlNode* _lx__next = c_node->children; \
+ _LX__ADVANCE_TO_NEXT(_lx__next, only_elements) \
+ if ((_lx__next == 0) && (c_node != c_stop_node)) { \
+ /* try siblings */ \
+ _lx__next = c_node->next; \
+ _LX__ADVANCE_TO_NEXT(_lx__next, only_elements) \
+ /* back off through parents */ \
+ while (_lx__next == 0) { \
+ c_node = c_node->parent; \
+ if (c_node == 0) \
+ break; \
+ if (c_node == c_stop_node) \
+ break; \
+ if ((only_elements) && !_isElement(c_node)) \
+ break; \
+ /* we already traversed the parents -> siblings */ \
+ _lx__next = c_node->next; \
+ _LX__ADVANCE_TO_NEXT(_lx__next, only_elements) \
+ } \
+ } \
+ c_node = _lx__next; \
}
-#define BEGIN_FOR_EACH_ELEMENT_FROM(c_tree_top, c_node, inclusive) \
-{ \
- if (c_node != 0) { \
- const xmlNode* ___tree_top = (c_tree_top); \
- /* make sure we start at an element */ \
- if (!_isElement(c_node)) { \
- /* we skip the node, so 'inclusive' is irrelevant */ \
- if (c_node == ___tree_top) \
- c_node = 0; /* nothing to traverse */ \
- else { \
- c_node = c_node->next; \
- _ADVANCE_TO_NEXT_ELEMENT(c_node) \
- } \
- } else if (! (inclusive)) { \
- /* skip the first node */ \
- _TRAVERSE_TO_NEXT_ELEMENT(___tree_top, c_node) \
- } \
- \
- /* now run the user code on the elements we find */ \
- while (c_node != 0) { \
+#define _LX__BEGIN_FOR_EACH_FROM(c_tree_top, c_node, inclusive, only_elements) \
+{ \
+ if (c_node != 0) { \
+ const xmlNode* _lx__tree_top = (c_tree_top); \
+ const int _lx__only_elements = (only_elements); \
+ /* make sure we start at an element */ \
+ if (!_LX__ELEMENT_MATCH(c_node, _lx__only_elements)) { \
+ /* we skip the node, so 'inclusive' is irrelevant */ \
+ if (c_node == _lx__tree_top) \
+ c_node = 0; /* nothing to traverse */ \
+ else { \
+ c_node = c_node->next; \
+ _LX__ADVANCE_TO_NEXT(c_node, _lx__only_elements) \
+ } \
+ } else if (! (inclusive)) { \
+ /* skip the first node */ \
+ _LX__TRAVERSE_TO_NEXT(_lx__tree_top, c_node, _lx__only_elements) \
+ } \
+ \
+ /* now run the user code on the elements we find */ \
+ while (c_node != 0) { \
/* here goes the code to be run for each element */
-#define END_FOR_EACH_ELEMENT_FROM(c_node) \
- _TRAVERSE_TO_NEXT_ELEMENT(___tree_top, c_node) \
- } \
- } \
+#define _LX__END_FOR_EACH_FROM(c_node) \
+ _LX__TRAVERSE_TO_NEXT(_lx__tree_top, c_node, _lx__only_elements) \
+ } \
+ } \
}
+#define BEGIN_FOR_EACH_ELEMENT_FROM(c_tree_top, c_node, inclusive) \
+ _LX__BEGIN_FOR_EACH_FROM(c_tree_top, c_node, inclusive, 1)
+
+#define END_FOR_EACH_ELEMENT_FROM(c_node) \
+ _LX__END_FOR_EACH_FROM(c_node)
+
+#define BEGIN_FOR_EACH_FROM(c_tree_top, c_node, inclusive) \
+ _LX__BEGIN_FOR_EACH_FROM(c_tree_top, c_node, inclusive, 0)
+
+#define END_FOR_EACH_FROM(c_node) \
+ _LX__END_FOR_EACH_FROM(c_node)
+
+
#endif /* HAS_ETREE_DEFS_H */
Modified: lxml/branch/lxml-2.1/src/lxml/proxy.pxi
==============================================================================
--- lxml/branch/lxml-2.1/src/lxml/proxy.pxi (original)
+++ lxml/branch/lxml-2.1/src/lxml/proxy.pxi Thu Jul 24 09:00:44 2008
@@ -400,7 +400,7 @@
# 3) fix the names in the tree in case we moved it to a different thread
if doc._c_doc.dict is not c_source_doc.dict:
- fixThreadDictNames(c_start_node, doc._c_doc.dict)
+ fixThreadDictNames(c_start_node, c_source_doc.dict, doc._c_doc.dict)
# free now unused namespace declarations
if c_del_ns_list is not NULL:
@@ -415,26 +415,75 @@
return 0
-cdef void fixThreadDictNames(xmlNode* c_element, tree.xmlDict* c_dict) nogil:
+cdef void fixThreadDictNames(xmlNode* c_element,
+ tree.xmlDict* c_src_dict,
+ tree.xmlDict* c_dict) nogil:
# re-assign the names of tags and attributes
#
# this should only be called when the element is based on a
# different libxml2 tag name dictionary
- cdef xmlNode* c_node
- cdef char* c_name
- if not tree._isElementOrXInclude(c_element):
+ if c_element.type == tree.XML_DOCUMENT_NODE or \
+ c_element.type == tree.XML_HTML_DOCUMENT_NODE:
+ # may define "xml" namespace
+ fixThreadDictNsForNode(c_element, c_src_dict, c_dict)
+ c_element = c_element.children
+ while c_element is not NULL:
+ fixThreadDictNames(c_element, c_src_dict, c_dict)
+ c_element = c_element.next
+ return
+ elif not tree._isElementOrXInclude(c_element):
return
- tree.BEGIN_FOR_EACH_ELEMENT_FROM(c_element, c_element, 1)
- c_name = tree.xmlDictLookup(c_dict, c_element.name, -1)
+ tree.BEGIN_FOR_EACH_FROM(c_element, c_element, 1)
+ if c_element.name is not NULL:
+ fixThreadDictNameForNode(c_element, c_dict)
+ if c_element.type == tree.XML_ELEMENT_NODE:
+ fixThreadDictNamesForAttributes(
+ c_element.properties, c_src_dict, c_dict)
+ tree.END_FOR_EACH_FROM(c_element)
+
+cdef inline void fixThreadDictNamesForAttributes(tree.xmlAttr* c_attr,
+ tree.xmlDict* c_src_dict,
+ tree.xmlDict* c_dict) nogil:
+ cdef xmlNode* c_child
+ cdef xmlNode* c_node
+ c_node = c_attr
+ while c_node is not NULL:
+ fixThreadDictNameForNode(c_node, c_dict)
+ fixThreadDictContentForNode(c_node, c_src_dict, c_dict)
+ # libxml2 keeps some (!) attribute values in the dict
+ c_child = c_node.children
+ while c_child is not NULL:
+ fixThreadDictNameForNode(c_child, c_dict)
+ fixThreadDictContentForNode(c_child, c_src_dict, c_dict)
+ c_child = c_child.next
+ c_node = c_node.next
+
+cdef inline void fixThreadDictNameForNode(xmlNode* c_node,
+ tree.xmlDict* c_dict) nogil:
+ cdef char* c_name
# c_name can be NULL on memory error, but we don't handle that here
+ c_name = tree.xmlDictLookup(c_dict, c_node.name, -1)
if c_name is not NULL:
- c_element.name = c_name
- if c_element.type == tree.XML_ELEMENT_NODE:
- c_node = c_element.properties
- while c_node is not NULL:
- c_name = tree.xmlDictLookup(c_dict, c_node.name, -1)
- if c_name is not NULL:
- c_node.name = c_name
- c_node = c_node.next
- tree.END_FOR_EACH_ELEMENT_FROM(c_element)
+ c_node.name = c_name
+
+cdef inline void fixThreadDictContentForNode(xmlNode* c_node,
+ tree.xmlDict* c_src_dict,
+ tree.xmlDict* c_dict) nogil:
+ if c_node.content is not NULL:
+ if tree.xmlDictOwns(c_src_dict, c_node.content):
+ # result can be NULL on memory error, but we don't handle that here
+ c_node.content = tree.xmlDictLookup(c_dict, c_node.content, -1)
+
+cdef inline void fixThreadDictNsForNode(xmlNode* c_node,
+ tree.xmlDict* c_src_dict,
+ tree.xmlDict* c_dict) nogil:
+ cdef xmlNs* c_ns = c_node.nsDef
+ while c_ns is not NULL:
+ if c_ns.href is not NULL:
+ if tree.xmlDictOwns(c_src_dict, c_ns.href):
+ c_ns.href = tree.xmlDictLookup(c_dict, c_ns.href, -1)
+ if c_ns.prefix is not NULL:
+ if tree.xmlDictOwns(c_src_dict, c_ns.prefix):
+ c_ns.prefix = tree.xmlDictLookup(c_dict, c_ns.prefix, -1)
+ c_ns = c_ns.next
Modified: lxml/branch/lxml-2.1/src/lxml/tests/test_threading.py
==============================================================================
--- lxml/branch/lxml-2.1/src/lxml/tests/test_threading.py (original)
+++ lxml/branch/lxml-2.1/src/lxml/tests/test_threading.py Thu Jul 24 09:00:44 2008
@@ -79,6 +79,35 @@
self.assertEquals(_bytes('BCB'),
tostring(root))
+ def test_thread_create_xslt(self):
+ XML = self.etree.XML
+ tostring = self.etree.tostring
+ root = XML(_bytes('BC'))
+
+ stylesheets = []
+
+ def run_thread():
+ style = XML(_bytes('''\
+
+
+
+
+
+
+
+ '''))
+ stylesheets.append( etree.XSLT(style) )
+
+ self._run_thread(run_thread)
+
+ st = stylesheets[0]
+ result = tostring( st(root) )
+
+ self.assertEquals(_bytes('BC '),
+ result)
+
def test_thread_mix(self):
XML = self.etree.XML
Element = self.etree.Element
Modified: lxml/branch/lxml-2.1/src/lxml/tree.pxd
==============================================================================
--- lxml/branch/lxml-2.1/src/lxml/tree.pxd (original)
+++ lxml/branch/lxml-2.1/src/lxml/tree.pxd Thu Jul 24 09:00:44 2008
@@ -322,3 +322,7 @@
xmlNode* start_node,
bint inclusive) nogil
cdef void END_FOR_EACH_ELEMENT_FROM(xmlNode* start_node) nogil
+ cdef void BEGIN_FOR_EACH_FROM(xmlNode* tree_top,
+ xmlNode* start_node,
+ bint inclusive) nogil
+ cdef void END_FOR_EACH_FROM(xmlNode* start_node) nogil
Modified: lxml/branch/lxml-2.1/src/lxml/xslt.pxi
==============================================================================
--- lxml/branch/lxml-2.1/src/lxml/xslt.pxi (original)
+++ lxml/branch/lxml-2.1/src/lxml/xslt.pxi Thu Jul 24 09:00:44 2008
@@ -417,11 +417,11 @@
def __get__(self):
return self._error_log.copy()
- def apply(self, _input, *, profile_run=False, **_kw):
- u"""apply(self, _input, profile_run=False, **_kw)
+ def apply(self, _input, *, profile_run=False, **kw):
+ u"""apply(self, _input, profile_run=False, **kw)
:deprecated: call the object, not this method."""
- return self(_input, profile_run=profile_run, **_kw)
+ return self(_input, profile_run=profile_run, **kw)
def tostring(self, _ElementTree result_tree):
u"""tostring(self, result_tree)
@@ -438,8 +438,8 @@
def __copy__(self):
return _copyXSLT(self)
- def __call__(self, _input, *, profile_run=False, **_kw):
- u"""__call__(self, _input, profile_run=False, **_kw)
+ def __call__(self, _input, *, profile_run=False, **kw):
+ u"""__call__(self, _input, profile_run=False, **kw)
Execute the XSL transformation on a tree or Element.
@@ -457,7 +457,7 @@
cdef xslt.xsltTransformContext* transform_ctxt
cdef xmlDoc* c_result
cdef xmlDoc* c_doc
- cdef xmlNode* c_node
+ cdef tree.xmlDict* c_dict
input_doc = _documentOrRaise(_input)
root_node = _rootNodeOrRaise(_input)
@@ -483,7 +483,7 @@
transform_ctxt._private = resolver_context
c_result = self._run_transform(
- c_doc, _kw, context, transform_ctxt)
+ c_doc, kw, context, transform_ctxt)
if transform_ctxt.state != xslt.XSLT_STATE_OK:
if c_result is not NULL:
@@ -533,13 +533,21 @@
result_doc = _documentFactory(c_result, input_doc._parser)
- if not _checkThreadDict(c_result.dict):
- # fix document dictionary
- c_node = _findChildForwards(c_result, 0)
- if c_node is not NULL:
- __GLOBAL_PARSER_CONTEXT.initThreadDictRef(&c_result.dict)
- with nogil:
- fixThreadDictNames(c_node, c_result.dict)
+ c_dict = c_result.dict
+ __GLOBAL_PARSER_CONTEXT.initThreadDictRef(&c_result.dict)
+ if c_dict is not c_result.dict or \
+ self._c_style.doc.dict is not c_result.dict or \
+ input_doc._c_doc.dict is not c_result.dict:
+ with nogil:
+ if c_dict is not c_result.dict:
+ fixThreadDictNames(c_result,
+ c_dict, c_result.dict)
+ if self._c_style.doc.dict is not c_result.dict:
+ fixThreadDictNames(c_result,
+ self._c_style.doc.dict, c_result.dict)
+ if input_doc._c_doc.dict is not c_result.dict:
+ fixThreadDictNames(c_result,
+ input_doc._c_doc.dict, c_result.dict)
return _xsltResultTreeFactory(result_doc, self, profile_doc)
From scoder at codespeak.net Thu Jul 24 09:02:44 2008
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Thu, 24 Jul 2008 09:02:44 +0200 (CEST)
Subject: [Lxml-checkins] r56760 - in lxml/branch/lxml-2.1: . doc
Message-ID: <20080724070244.3AA112A0194@codespeak.net>
Author: scoder
Date: Thu Jul 24 09:02:42 2008
New Revision: 56760
Modified:
lxml/branch/lxml-2.1/CHANGES.txt
lxml/branch/lxml-2.1/doc/main.txt
lxml/branch/lxml-2.1/version.txt
Log:
prepare release of 2.1.1
Modified: lxml/branch/lxml-2.1/CHANGES.txt
==============================================================================
--- lxml/branch/lxml-2.1/CHANGES.txt (original)
+++ lxml/branch/lxml-2.1/CHANGES.txt Thu Jul 24 09:02:42 2008
@@ -2,8 +2,8 @@
lxml changelog
==============
-Under development
-=================
+2.1.1 (2008-07-24)
+==================
Features added
--------------
@@ -11,6 +11,9 @@
Bugs fixed
----------
+* Crash when parsing XSLT stylesheets in a thread and using them in
+ another.
+
* Encoding problem when including text with ElementInclude under
Python 3.
Modified: lxml/branch/lxml-2.1/doc/main.txt
==============================================================================
--- lxml/branch/lxml-2.1/doc/main.txt (original)
+++ lxml/branch/lxml-2.1/doc/main.txt Thu Jul 24 09:02:42 2008
@@ -147,8 +147,8 @@
source release. If you can't wait, consider trying a less recent
release version first.
-The latest version is `lxml 2.1`_, released 2008-07-09
-(`changes for 2.1`_). `Older versions`_ are listed below.
+The latest version is `lxml 2.1.1`_, released 2008-07-24
+(`changes for 2.1.1`_). `Older versions`_ are listed below.
Please take a look at the `installation instructions`_!
@@ -216,7 +216,9 @@
Old Versions
------------
-.. _`PDF documentation`: lxmldoc-2.1.pdf
+.. _`PDF documentation`: lxmldoc-2.1.1.pdf
+
+* `lxml 2.1`_, released 2008-07-09 (`changes for 2.1`_)
* `lxml 2.1beta3`_, released 2008-06-19 (`changes for 2.1beta3`_)
@@ -292,6 +294,7 @@
* `lxml 0.5`_, released 2005-04-08
+.. _`lxml 2.1.1`: lxml-2.1.1.tgz
.. _`lxml 2.1`: lxml-2.1.tgz
.. _`lxml 2.1beta3`: lxml-2.1beta3.tgz
.. _`lxml 2.1beta2`: lxml-2.1beta2.tgz
@@ -330,6 +333,7 @@
.. _`lxml 0.5.1`: lxml-0.5.1.tgz
.. _`lxml 0.5`: lxml-0.5.tgz
+.. _`changes for 2.1.1`: changes-2.1.1.html
.. _`changes for 2.1`: changes-2.1.html
.. _`changes for 2.1beta3`: changes-2.1beta3.html
.. _`changes for 2.1beta2`: changes-2.1beta2.html
Modified: lxml/branch/lxml-2.1/version.txt
==============================================================================
--- lxml/branch/lxml-2.1/version.txt (original)
+++ lxml/branch/lxml-2.1/version.txt Thu Jul 24 09:02:42 2008
@@ -1 +1 @@
-2.1
+2.1.1
From scoder at codespeak.net Thu Jul 24 09:07:50 2008
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Thu, 24 Jul 2008 09:07:50 +0200 (CEST)
Subject: [Lxml-checkins] r56761 - lxml/branch/lxml-2.0/src/lxml
Message-ID: <20080724070750.9823F2A0194@codespeak.net>
Author: scoder
Date: Thu Jul 24 09:07:50 2008
New Revision: 56761
Modified:
lxml/branch/lxml-2.0/src/lxml/tree.pxd
Log:
incomplete merge fix
Modified: lxml/branch/lxml-2.0/src/lxml/tree.pxd
==============================================================================
--- lxml/branch/lxml-2.0/src/lxml/tree.pxd (original)
+++ lxml/branch/lxml-2.0/src/lxml/tree.pxd Thu Jul 24 09:07:50 2008
@@ -56,6 +56,7 @@
# libxml/dict.h appears to be broken to include in C
ctypedef struct xmlDict
cdef char* xmlDictLookup(xmlDict* dict, char* name, int len) nogil
+ cdef int xmlDictOwns(xmlDict* dict, char* name) nogil
cdef extern from "libxml/tree.h":
ctypedef struct xmlDoc
From scoder at codespeak.net Thu Jul 24 09:10:29 2008
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Thu, 24 Jul 2008 09:10:29 +0200 (CEST)
Subject: [Lxml-checkins] r56762 - in lxml/branch/lxml-2.0: . doc
Message-ID: <20080724071029.B77FE2A0194@codespeak.net>
Author: scoder
Date: Thu Jul 24 09:10:29 2008
New Revision: 56762
Modified:
lxml/branch/lxml-2.0/CHANGES.txt
lxml/branch/lxml-2.0/doc/main.txt
Log:
prepare release of 2.0.8
Modified: lxml/branch/lxml-2.0/CHANGES.txt
==============================================================================
--- lxml/branch/lxml-2.0/CHANGES.txt (original)
+++ lxml/branch/lxml-2.0/CHANGES.txt Thu Jul 24 09:10:29 2008
@@ -2,8 +2,8 @@
lxml changelog
==============
-Under development
-=================
+2.0.8 (2008-07-24)
+==================
Features added
--------------
Modified: lxml/branch/lxml-2.0/doc/main.txt
==============================================================================
--- lxml/branch/lxml-2.0/doc/main.txt (original)
+++ lxml/branch/lxml-2.0/doc/main.txt Thu Jul 24 09:10:29 2008
@@ -146,8 +146,8 @@
source release. If you can't wait, consider trying a less recent
release version first.
-The latest version is `lxml 2.0.7`_, released 2008-06-20
-(`changes for 2.0.7`_). `Older versions`_ are listed below.
+The latest version is `lxml 2.0.8`_, released 2008-07-24
+(`changes for 2.0.8`_). `Older versions`_ are listed below.
Please take a look at the `installation instructions`_!
@@ -215,7 +215,9 @@
Old Versions
------------
-.. _`PDF documentation`: lxmldoc-2.0.7.pdf
+.. _`PDF documentation`: lxmldoc-2.0.8.pdf
+
+* `lxml 2.0.7`_, released 2008-06-20 (`changes for 2.0.7`_)
* `lxml 2.0.6`_, released 2008-05-31 (`changes for 2.0.6`_)
@@ -281,6 +283,7 @@
* `lxml 0.5`_, released 2005-04-08
+.. _`lxml 2.0.8`: lxml-2.0.8.tgz
.. _`lxml 2.0.7`: lxml-2.0.7.tgz
.. _`lxml 2.0.6`: lxml-2.0.6.tgz
.. _`lxml 2.0.5`: lxml-2.0.5.tgz
@@ -314,6 +317,7 @@
.. _`lxml 0.5.1`: lxml-0.5.1.tgz
.. _`lxml 0.5`: lxml-0.5.tgz
+.. _`changes for 2.0.8`: changes-2.0.8.html
.. _`changes for 2.0.7`: changes-2.0.7.html
.. _`changes for 2.0.6`: changes-2.0.6.html
.. _`changes for 2.0.5`: changes-2.0.5.html
From scoder at codespeak.net Fri Jul 25 19:33:00 2008
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Fri, 25 Jul 2008 19:33:00 +0200 (CEST)
Subject: [Lxml-checkins] r56790 - in lxml/trunk: . doc
Message-ID: <20080725173300.61742169F56@codespeak.net>
Author: scoder
Date: Fri Jul 25 19:32:58 2008
New Revision: 56790
Modified:
lxml/trunk/ (props changed)
lxml/trunk/doc/mkhtml.py
Log:
r4702 at delle: sbehnel | 2008-07-25 19:32:31 +0200
cleanup
Modified: lxml/trunk/doc/mkhtml.py
==============================================================================
--- lxml/trunk/doc/mkhtml.py (original)
+++ lxml/trunk/doc/mkhtml.py Fri Jul 25 19:32:58 2008
@@ -60,7 +60,7 @@
def merge_menu(tree, menu, name):
menu_root = copy.deepcopy(menu)
tree.getroot()[1][0].insert(0, menu_root) # html->body->div[class=document]
- for el in menu_root.getiterator():
+ for el in menu_root.iter():
tag = el.tag
if tag[0] != '{':
el.tag = "{http://www.w3.org/1999/xhtml}" + tag
From scoder at codespeak.net Fri Jul 25 19:33:09 2008
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Fri, 25 Jul 2008 19:33:09 +0200 (CEST)
Subject: [Lxml-checkins] r56791 - in lxml/trunk: . doc
Message-ID: <20080725173309.510F0169F36@codespeak.net>
Author: scoder
Date: Fri Jul 25 19:33:08 2008
New Revision: 56791
Modified:
lxml/trunk/ (props changed)
lxml/trunk/doc/main.txt
Log:
r4703 at delle: sbehnel | 2008-07-25 19:32:55 +0200
link to other web site versions
Modified: lxml/trunk/doc/main.txt
==============================================================================
--- lxml/trunk/doc/main.txt (original)
+++ lxml/trunk/doc/main.txt Fri Jul 25 19:33:08 2008
@@ -216,6 +216,10 @@
Old Versions
------------
+See the web sites of lxml `1.3 `_,
+`2.0 `_ and the `current
+in-development version `_.
+
.. _`PDF documentation`: lxmldoc-2.1.pdf
* `lxml 2.1beta3`_, released 2008-06-19 (`changes for 2.1beta3`_)
From scoder at codespeak.net Sun Jul 27 12:04:40 2008
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Sun, 27 Jul 2008 12:04:40 +0200 (CEST)
Subject: [Lxml-checkins] r56806 - lxml/branch/lxml-2.1/doc
Message-ID: <20080727100440.459D0169E54@codespeak.net>
Author: scoder
Date: Sun Jul 27 12:04:36 2008
New Revision: 56806
Modified:
lxml/branch/lxml-2.1/doc/main.txt
lxml/branch/lxml-2.1/doc/mkhtml.py
Log:
doc merge from trunk
Modified: lxml/branch/lxml-2.1/doc/main.txt
==============================================================================
--- lxml/branch/lxml-2.1/doc/main.txt (original)
+++ lxml/branch/lxml-2.1/doc/main.txt Sun Jul 27 12:04:36 2008
@@ -216,6 +216,10 @@
Old Versions
------------
+See the web sites of lxml `1.3 `_,
+`2.0 `_ and the `current
+in-development version `_.
+
.. _`PDF documentation`: lxmldoc-2.1.1.pdf
* `lxml 2.1`_, released 2008-07-09 (`changes for 2.1`_)
Modified: lxml/branch/lxml-2.1/doc/mkhtml.py
==============================================================================
--- lxml/branch/lxml-2.1/doc/mkhtml.py (original)
+++ lxml/branch/lxml-2.1/doc/mkhtml.py Sun Jul 27 12:04:36 2008
@@ -60,7 +60,7 @@
def merge_menu(tree, menu, name):
menu_root = copy.deepcopy(menu)
tree.getroot()[1][0].insert(0, menu_root) # html->body->div[class=document]
- for el in menu_root.getiterator():
+ for el in menu_root.iter():
tag = el.tag
if tag[0] != '{':
el.tag = "{http://www.w3.org/1999/xhtml}" + tag
From scoder at codespeak.net Sun Jul 27 22:20:26 2008
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Sun, 27 Jul 2008 22:20:26 +0200 (CEST)
Subject: [Lxml-checkins] r56808 - in lxml/trunk: . doc
Message-ID: <20080727202026.4FA2116851E@codespeak.net>
Author: scoder
Date: Sun Jul 27 22:20:24 2008
New Revision: 56808
Modified:
lxml/trunk/ (props changed)
lxml/trunk/doc/mklatex.py
Log:
r4710 at delle: sbehnel | 2008-07-27 22:20:15 +0200
PDF fix
Modified: lxml/trunk/doc/mklatex.py
==============================================================================
--- lxml/trunk/doc/mklatex.py (original)
+++ lxml/trunk/doc/mklatex.py Sun Jul 27 22:20:24 2008
@@ -183,6 +183,8 @@
r'\\href\{([^/}]+)[.]([^./}]+)\}').sub
replace_docinternal_hyperrefs = re.compile(
r'\\href\{\\#([^}]+)\}').sub
+ replace_image_paths = re.compile(
+ r'^(\\includegraphics{)').sub
def build_hyperref(match):
basename, extension = match.groups()
outname = BASENAME_MAP.get(basename, basename)
@@ -195,6 +197,7 @@
else:
return r"\hyperref[_part_%s.tex]" % outname
def fix_relative_hyperrefs(line):
+ line = replace_image_paths(r'\1../html/', line)
if r'\href' not in line:
return line
line = replace_interdoc_hyperrefs(build_hyperref, line)
|