[Lxml-checkins] r46484 - in lxml/trunk: . doc src/lxml
scoder at codespeak.net
scoder at codespeak.net
Tue Sep 11 21:55:39 CEST 2007
Author: scoder
Date: Tue Sep 11 21:55:37 2007
New Revision: 46484
Added:
lxml/trunk/cython-with-GIL-simple.patch
Modified:
lxml/trunk/CHANGES.txt
lxml/trunk/doc/tutorial.txt
lxml/trunk/src/lxml/etree.pyx
lxml/trunk/src/lxml/extensions.pxi
lxml/trunk/src/lxml/parser.pxi
Log:
cleanup in parser code, ET-compatible target parser interface (SAX-like), tutorial section on parsing
Modified: lxml/trunk/CHANGES.txt
==============================================================================
--- lxml/trunk/CHANGES.txt (original)
+++ lxml/trunk/CHANGES.txt Tue Sep 11 21:55:37 2007
@@ -8,6 +8,9 @@
Features added
--------------
+* ElementTree-like parser target interface as described in
+ http://effbot.org/elementtree/elementtree-xmlparser.htm
+
* ElementTree-like feed parser interface on XMLParser and HTMLParser
(``feed()`` and ``close()`` methods)
Added: lxml/trunk/cython-with-GIL-simple.patch
==============================================================================
--- (empty file)
+++ lxml/trunk/cython-with-GIL-simple.patch Tue Sep 11 21:55:37 2007
@@ -0,0 +1,236 @@
+diff -r 43be72844df4 Compiler/Code.py
+--- a/Compiler/Code.py Mon Sep 03 20:07:01 2007 +0200
++++ b/Compiler/Code.py Mon Sep 10 20:13:13 2007 +0200
+@@ -284,6 +284,13 @@ class CCodeWriter:
+ # code = "((PyObject*)%s)" % code
+ self.put_init_to_py_none(code, entry.type)
+
++ def put_py_gil_state_ensure(self, cname):
++ self.putln("PyGILState_STATE %s;" % cname)
++ self.putln("%s = PyGILState_Ensure();" % cname)
++
++ def put_py_gil_state_release(self, cname):
++ self.putln("PyGILState_Release(%s);" % cname)
++
+ def put_pymethoddef(self, entry, term):
+ if entry.doc:
+ doc_code = entry.doc_cname
+diff -r 43be72844df4 Compiler/ExprNodes.py
+--- a/Compiler/ExprNodes.py Mon Sep 03 20:07:01 2007 +0200
++++ b/Compiler/ExprNodes.py Mon Sep 10 22:47:05 2007 +0200
+@@ -473,7 +473,7 @@ class ExprNode(Node):
+ else: # neither src nor dst are py types
+ # Added the string comparison, since for c types that
+ # is enough, but SageX gets confused when the types are
+- # in different files.
++ # in different files.
+ if not (str(src.type) == str(dst_type) or dst_type.assignable_from(src_type)):
+ error(self.pos, "Cannot assign type '%s' to '%s'" %
+ (src.type, dst_type))
+diff -r 43be72844df4 Compiler/Naming.py
+--- a/Compiler/Naming.py Mon Sep 03 20:07:01 2007 +0200
++++ b/Compiler/Naming.py Mon Sep 10 20:13:13 2007 +0200
+@@ -53,5 +53,6 @@ stringtab_cname = pyrex_prefix + "strin
+ stringtab_cname = pyrex_prefix + "string_tab"
+ vtabslot_cname = pyrex_prefix + "vtab"
+ c_api_tab_cname = pyrex_prefix + "c_api_tab"
++gilstate_cname = pyrex_prefix + "state"
+
+ extern_c_macro = pyrex_prefix.upper() + "EXTERN_C"
+diff -r 43be72844df4 Compiler/Nodes.py
+--- a/Compiler/Nodes.py Mon Sep 03 20:07:01 2007 +0200
++++ b/Compiler/Nodes.py Mon Sep 10 20:13:13 2007 +0200
+@@ -282,6 +282,7 @@ class CFuncDeclaratorNode(CDeclaratorNod
+ # has_varargs boolean
+ # exception_value ConstNode
+ # exception_check boolean True if PyErr_Occurred check needed
++ # with_gil boolean True if GIL should be grabbed/released
+
+ def analyse(self, return_type, env):
+ func_type_args = []
+@@ -317,7 +318,8 @@ class CFuncDeclaratorNode(CDeclaratorNod
+ exc_check = self.exception_check
+ func_type = PyrexTypes.CFuncType(
+ return_type, func_type_args, self.has_varargs,
+- exception_value = exc_val, exception_check = exc_check)
++ exception_value = exc_val, exception_check = exc_check,
++ with_gil = self.with_gil)
+ return self.base.analyse(func_type, env)
+
+
+@@ -572,6 +574,8 @@ class FuncDefNode(StatNode, BlockNode):
+ self.generate_keyword_list(code)
+ # ----- Extern library function declarations
+ lenv.generate_library_function_declarations(code)
++ # ----- Grab GIL
++ self.generate_grab_gil(code)
+ # ----- Fetch arguments
+ self.generate_argument_parsing_code(code)
+ self.generate_argument_increfs(lenv, code)
+@@ -623,6 +627,9 @@ class FuncDefNode(StatNode, BlockNode):
+ code.put_var_decrefs(lenv.var_entries, used_only = 1)
+ code.put_var_decrefs(lenv.arg_entries)
+ self.put_stararg_decrefs(code)
++ # ----- Release GIL
++ self.generate_release_gil(code)
++ # ----- Return
+ if not self.return_type.is_void:
+ retval_code = Naming.retval_cname
+ #if self.return_type.is_extension_type:
+@@ -651,6 +658,12 @@ class FuncDefNode(StatNode, BlockNode):
+ code.put_var_incref(entry)
+
+ def generate_execution_code(self, code):
++ pass
++
++ def generate_grab_gil(self, code):
++ pass
++
++ def generate_release_gil(self, code):
+ pass
+
+
+@@ -756,7 +769,19 @@ class CFuncDefNode(FuncDefNode):
+ else:
+ error(arg.pos, "Cannot test type of extern C class "
+ "without type object name specification")
+-
++
++ def generate_grab_gil(self, code):
++ if self.entry.type.with_gil:
++ code.putln("")
++ code.put_py_gil_state_ensure(Naming.gilstate_cname)
++ code.putln("")
++
++ def generate_release_gil(self, code):
++ if self.entry.type.with_gil:
++ code.putln("")
++ code.put_py_gil_state_release(Naming.gilstate_cname)
++ code.putln("")
++
+ def error_value(self):
+ if self.return_type.is_pyobject:
+ return "0"
+diff -r 43be72844df4 Compiler/Parsing.py
+--- a/Compiler/Parsing.py Mon Sep 03 20:07:01 2007 +0200
++++ b/Compiler/Parsing.py Tue Sep 11 21:06:49 2007 +0200
+@@ -5,7 +5,7 @@ import os, re
+ import os, re
+ from string import join, replace
+ from types import ListType, TupleType
+-from Scanning import PyrexScanner
++from Scanning import PyrexScanner, function_contexts
+ import Nodes
+ import ExprNodes
+ from ModuleNode import ModuleNode
+@@ -1462,10 +1462,10 @@ def p_c_declarator(s, empty = 0, is_type
+ args = p_c_arg_list(s, in_pyfunc = 0, cmethod_flag = cmethod_flag)
+ ellipsis = p_optional_ellipsis(s)
+ s.expect(')')
+- exc_val, exc_check = p_exception_value_clause(s)
++ options = p_c_func_options(s)
+ result = Nodes.CFuncDeclaratorNode(pos,
+ base = result, args = args, has_varargs = ellipsis,
+- exception_value = exc_val, exception_check = exc_check)
++ **options)
+ cmethod_flag = 0
+ return result
+
+@@ -1483,6 +1483,37 @@ def p_exception_value_clause(s):
+ s.next()
+ exc_val = p_simple_expr(s) #p_exception_value(s)
+ return exc_val, exc_check
++
++def p_c_with(s):
++ if s.sy == 'with':
++ s.next()
++ return p_ident_list(s)
++ return ()
++
++def p_c_func_options(s):
++ exc_val = None
++ exc_check = 0
++ contexts = []
++
++ if s.sy == 'except':
++ exc_val, exc_check = p_exception_value_clause(s)
++ contexts = p_c_with(s)
++ elif s.sy == 'with':
++ contexts = p_c_with(s)
++ exc_val, exc_check = p_exception_value_clause(s)
++
++ for context in contexts:
++ if context not in function_contexts:
++ s.error("Unknown context: " + context)
++ return None
++
++ ret = {
++ 'exception_value': exc_val,
++ 'exception_check': exc_check,
++ 'with_gil': 'GIL' in contexts,
++ }
++
++ return ret
+
+ #def p_exception_value(s):
+ # sign = ""
+diff -r 43be72844df4 Compiler/PyrexTypes.py
+--- a/Compiler/PyrexTypes.py Mon Sep 03 20:07:01 2007 +0200
++++ b/Compiler/PyrexTypes.py Tue Sep 11 12:07:03 2007 +0200
+@@ -488,16 +488,18 @@ class CFuncType(CType):
+ # has_varargs boolean
+ # exception_value string
+ # exception_check boolean True if PyErr_Occurred check needed
++ # with_gil boolean True if GIL should be grabbed/released
+
+ is_cfunction = 1
+
+ def __init__(self, return_type, args, has_varargs,
+- exception_value = None, exception_check = 0):
++ exception_value = None, exception_check = 0, with_gil = False):
+ self.return_type = return_type
+ self.args = args
+ self.has_varargs = has_varargs
+ self.exception_value = exception_value
+ self.exception_check = exception_check
++ self.with_gil = with_gil
+
+ def __repr__(self):
+ arg_reprs = map(repr, self.args)
+@@ -580,6 +582,7 @@ class CFuncType(CType):
+ if not arg_decl_code and not pyrex:
+ arg_decl_code = "void"
+ exc_clause = ""
++ with_gil_clause = ""
+ if pyrex or for_display:
+ if self.exception_value and self.exception_check:
+ exc_clause = " except? %s" % self.exception_value
+@@ -587,8 +590,11 @@ class CFuncType(CType):
+ exc_clause = " except %s" % self.exception_value
+ elif self.exception_check:
+ exc_clause = " except *"
++ if self.with_gil:
++ with_gil_clause = " with GIL"
+ return self.return_type.declaration_code(
+- "(%s(%s)%s)" % (entity_code, arg_decl_code, exc_clause),
++ "(%s(%s)%s%s)" % (entity_code, arg_decl_code,
++ exc_clause, with_gil_clause),
+ for_display, dll_linkage, pyrex)
+
+
+diff -r 43be72844df4 Compiler/Scanning.py
+--- a/Compiler/Scanning.py Mon Sep 03 20:07:01 2007 +0200
++++ b/Compiler/Scanning.py Tue Sep 11 21:05:33 2007 +0200
+@@ -138,7 +138,11 @@ reserved_words = [
+ "raise", "import", "exec", "try", "except", "finally",
+ "while", "if", "elif", "else", "for", "in", "assert",
+ "and", "or", "not", "is", "in", "lambda", "from",
+- "NULL", "cimport", "by"
++ "NULL", "cimport", "by", "with"
++]
++
++function_contexts = [ # allowed arguments to the "with" option
++ "GIL"
+ ]
+
+ class Method:
Modified: lxml/trunk/doc/tutorial.txt
==============================================================================
--- lxml/trunk/doc/tutorial.txt (original)
+++ lxml/trunk/doc/tutorial.txt Tue Sep 11 21:55:37 2007
@@ -6,24 +6,32 @@
Stefan Behnel
This tutorial briefly overviews the main concepts of the `ElementTree API`_ as
-implemented by lxml.etree, and some simple enhancements that make your life as
-a programmer easier.
+implemented by ``lxml.etree``, and some simple enhancements that make your
+life as a programmer easier.
.. _`ElementTree API`: http://effbot.org/zone/element-index.htm#documentation
.. contents::
..
- 1 Elements and ElementTrees
- 1.1 The Element class
- 1.2 The ElementTree class
- 2 Parsing and XML literals
- 2.1 The XML() function
- 2.2 The parse() function
- 3 Namespaces
- 4 The find*() methods
- 4.1 findall()
- 4.2 find()
- 4.3 findtext()
+ 1 The Element class
+ 1.1 Elements are lists
+ 1.2 Elements carry attributes
+ 1.3 Elements contain text
+ 1.4 Tree iteration
+ 2 The ElementTree class
+ 3 Parsing from strings and files
+ 3.1 The fromstring() function
+ 3.2 The XML() function
+ 3.3 The parse() function
+ 3.4 Parser objects
+ 3.5 Incremental parsing
+ 3.6 Event-driven parsing
+ 4 Namespaces
+ 5 The E-factory
+ 6 ElementPath
+ 6.1 findall()
+ 6.2 find()
+ 6.3 findtext()
A common way to import ``lxml.etree`` is as follows::
@@ -380,15 +388,208 @@
made lxml loose DTD information in an input-output cycle.
-Parsing files and XML literals
+Parsing from strings and files
==============================
+``lxml.etree`` supports parsing XML in a number of ways and from all important
+sources, namely strings, files and file-like objects. The main parse
+functions are ``fromstring()`` and ``parse()``, both called with the source as
+first argument. By default, they use the standard parser, but you can always
+pass a different parser as second argument.
+
+
+The fromstring() function
+-------------------------
+
+The ``fromstring()`` function is the easiest way to parse a string::
+
+ >>> some_xml_data = "<root>data</root>"
+
+ >>> root = etree.fromstring(some_xml_data)
+ >>> print root.tag
+ root
+ >>> print etree.tostring(root)
+ <root>data</root>
+
+
The XML() function
------------------
+The ``XML()`` function behaves like the ``fromstring()`` function, but is
+commonly used to write XML literals right into the source::
+
+ >>> root = etree.XML("<root>data</root>")
+ >>> print root.tag
+ root
+ >>> print etree.tostring(root)
+ <root>data</root>
+
+
The parse() function
--------------------
+The ``parse()`` function is used to parse from files and file-like objects::
+
+ >>> some_file_like = StringIO("<root>data</root>")
+
+ >>> tree = etree.parse(some_file_like)
+
+ >>> print etree.tostring(tree)
+ <root>data</root>
+
+Note that ``parse()`` returns an ElementTree object, not an Element object as
+the string parser functions::
+
+ >>> root = tree.getroot()
+ >>> print root.tag
+ root
+ >>> print etree.tostring(root)
+ <root>data</root>
+
+
+Parser objects
+--------------
+
+By default, ``lxml.etree`` uses a standard parser with a default setup. If
+you want to configure the parser, you can create a you instance::
+
+ >>> parser = etree.XMLParser(remove_blank_text=True) # lxml.etree only!
+
+This creates a parser that removes empty text between tags while parsing,
+which can reduce the size of the tree and avoid dangling tail text if you know
+that whitespace-only content is not meaningful for your data. An example::
+
+ >>> root = etree.XML("<root> <a/> <b> </b> </root>", parser)
+
+ >>> print etree.tostring(root)
+ <root><a/><b> </b></root>
+
+Note that the whitespace content inside the ``<b>`` tag was not removed, as
+content at leaf elements tends to be data content (even if blank). You can
+easily remove it in an additional step by traversing the tree::
+
+ >>> for element in root.getiterator("*"):
+ ... if element.text is not None and not element.text.strip():
+ ... element.text = None
+
+ >>> print etree.tostring(root)
+ <root><a/><b/></root>
+
+See ``help(etree.XMLParser)`` to find out about the available parser options.
+
+
+Incremental parsing
+-------------------
+
+``lxml.etree`` provides two ways for incremental step-by-step parsing. One is
+through file-like objects, where it calls the ``read()`` method repeatedly.
+This is best used where the data arrives from a source like ``urllib`` or any
+other file-like object that can provide data on request. Note that the parser
+will block and wait until data becomes available in this case::
+
+ >>> class DataSource:
+ ... data = iter(["<roo", "t><", "a/", "><", "/root>"])
+ ... def read(self, requested_size):
+ ... try:
+ ... return self.data.next()
+ ... except StopIteration:
+ ... return ""
+
+ >>> root = etree.parse(DataSource())
+
+ >>> print etree.tostring(root)
+ <root><a/></root>
+
+The second way is through a feed parser interface, given by the ``feed(data)``
+and ``close()`` methods::
+
+ >>> parser = etree.XMLParser()
+
+ >>> parser.feed("<roo")
+ >>> parser.feed("t><")
+ >>> parser.feed("a/")
+ >>> parser.feed("><")
+ >>> parser.feed("/root>")
+
+ >>> root = parser.close()
+
+ >>> print etree.tostring(root)
+ <root><a/></root>
+
+Here, you can interrupt the parsing process at any time and continue it later
+on with another call to the ``feed()`` method. This comes in handy if you
+want to avoid blocking calls to the parser, e.g. in frameworks like Twisted,
+or whenever data comes in slowly or in chunks and you want to do other things
+while waiting for the next chunk.
+
+You can reuse the parser by calling its ``feed()`` method again::
+
+ >>> parser.feed("<root/>")
+ >>> root = parser.close()
+ >>> print etree.tostring(root)
+ <root/>
+
+
+Event-driven parsing
+--------------------
+
+Sometimes, all you need from a document is a small fraction somewhere deep
+inside the tree, so parsing the whole tree into memory, traversing it and
+dropping it can be too much overhead. ``lxml.etree`` supports this use case
+with two event-driven parser interfaces, one that generates parser events
+while building the tree (``iterparse``), and one that does not build the tree
+at all, and instead calls feedback methods on a target object in a SAX-like
+fashion.
+
+Here is a simple ``iterparse()`` example::
+
+ >>> some_file_like = StringIO("<root><a>data</a></root>")
+
+ >>> for event, element in etree.iterparse(some_file_like):
+ ... print "%s, %4s, %s" % (event, element.tag, element.text)
+ end, a, data
+ end, root, None
+
+By default, ``iterparse()`` only generates events when it is done parsing an
+element, but you can control this through the ``events`` keyword argument::
+
+ >>> some_file_like = StringIO("<root><a>data</a></root>")
+
+ >>> for event, element in etree.iterparse(some_file_like,
+ ... events=("start", "end")):
+ ... print "%5s, %4s, %s" % (event, element.tag, element.text)
+ start, root, None
+ start, a, data
+ end, a, data
+ end, root, None
+
+Note that the text, tail and children of an Element are not necessarily there
+yet when receiving the ``start`` event. Only the ``end`` event guarantees
+that the Element has been parsed completely. It also allows to ``clear()`` or
+modify the content of an Element to save memory.
+
+If memory is a real bottleneck, or if building the tree is not desired at all,
+the target parser interface of ``lxml.etree`` can be used. It creates
+SAX-like events by calling the methods of a target object. By implementing
+some or all of these methods, you can control which events are generated::
+
+ >>> class ParserTarget:
+ ... events = []
+ ... def start(self, tag, attrib):
+ ... self.events.append(("start", tag, attrib))
+ ... def close(self):
+ ... return self.events
+
+ >>> parser = etree.XMLParser(target=ParserTarget())
+ >>> events = etree.fromstring('<root test="true"/>', parser)
+
+ >>> for event in events:
+ ... print 'event: %s - tag: %s' % (event[0], event[1])
+ ... for attr, value in event[2].iteritems():
+ ... print ' * %s = %s' % (attr, value)
+ event: start - tag: root
+ * test = true
+
Namespaces
==========
Modified: lxml/trunk/src/lxml/etree.pyx
==============================================================================
--- lxml/trunk/src/lxml/etree.pyx (original)
+++ lxml/trunk/src/lxml/etree.pyx Tue Sep 11 21:55:37 2007
@@ -1992,7 +1992,10 @@
if element is not None:
doc = element._doc
elif file is not None:
- doc = _parseDocument(file, parser)
+ try:
+ doc = _parseDocument(file, parser)
+ except _TargetParserResult, result_container:
+ return result_container.result
else:
c_doc = _newDoc()
doc = _documentFactory(c_doc, parser)
@@ -2015,8 +2018,11 @@
parser = __GLOBAL_PARSER_CONTEXT.getDefaultParser()
if not isinstance(parser, HTMLParser):
parser = __DEFAULT_HTML_PARSER
- doc = _parseMemoryDocument(text, base_url, parser)
- return doc.getroot()
+ try:
+ doc = _parseMemoryDocument(text, base_url, parser)
+ return doc.getroot()
+ except _TargetParserResult, result_container:
+ return result_container.result
def XML(text, _BaseParser parser=None, base_url=None):
"""Parses an XML document from a string constant. This function can be used
@@ -2036,8 +2042,11 @@
parser = __GLOBAL_PARSER_CONTEXT.getDefaultParser()
if not isinstance(parser, XMLParser):
parser = __DEFAULT_XML_PARSER
- doc = _parseMemoryDocument(text, base_url, parser)
- return doc.getroot()
+ try:
+ doc = _parseMemoryDocument(text, base_url, parser)
+ return doc.getroot()
+ except _TargetParserResult, result_container:
+ return result_container.result
def fromstring(text, _BaseParser parser=None, base_url=None):
"""Parses an XML document from a string.
@@ -2052,8 +2061,11 @@
cdef _Document doc
if parser is None:
parser = __GLOBAL_PARSER_CONTEXT.getDefaultParser()
- doc = _parseMemoryDocument(text, base_url, parser)
- return doc.getroot()
+ try:
+ doc = _parseMemoryDocument(text, base_url, parser)
+ return doc.getroot()
+ except _TargetParserResult, result_container:
+ return result_container.result
def iselement(element):
"""Checks if an object appears to be a valid element object.
@@ -2124,8 +2136,11 @@
is provided as second argument, the default parser is used.
"""
cdef _Document doc
- doc = _parseDocument(source, parser)
- return ElementTree(doc.getroot())
+ try:
+ doc = _parseDocument(source, parser)
+ return ElementTree(doc.getroot())
+ except _TargetParserResult, result_container:
+ return result_container.result
################################################################################
Modified: lxml/trunk/src/lxml/extensions.pxi
==============================================================================
--- lxml/trunk/src/lxml/extensions.pxi (original)
+++ lxml/trunk/src/lxml/extensions.pxi Tue Sep 11 21:55:37 2007
@@ -578,14 +578,8 @@
# lookup the function by name and call it
-cdef void _xpath_function_call(xpath.xmlXPathParserContext* ctxt, int nargs):
- cdef python.PyGILState_STATE gil_state
- gil_state = python.PyGILState_Ensure()
- _call_python_xpath_function(ctxt, nargs)
- python.PyGILState_Release(gil_state)
-
-cdef void _call_python_xpath_function(xpath.xmlXPathParserContext* ctxt,
- int nargs):
+cdef void _xpath_function_call(xpath.xmlXPathParserContext* ctxt,
+ int nargs) with GIL:
cdef xpath.xmlXPathContext* rctxt
cdef _BaseContext context
rctxt = ctxt.context
Modified: lxml/trunk/src/lxml/parser.pxi
==============================================================================
--- lxml/trunk/src/lxml/parser.pxi (original)
+++ lxml/trunk/src/lxml/parser.pxi Tue Sep 11 21:55:37 2007
@@ -261,10 +261,8 @@
cdef int copyToBuffer(self, char* c_buffer, int c_size):
cdef char* c_start
cdef Py_ssize_t byte_count, remaining
- cdef python.PyGILState_STATE gil_state
if self._bytes_read < 0:
return 0
- gil_state = python.PyGILState_Ensure()
try:
byte_count = python.PyString_GET_SIZE(self._bytes)
remaining = byte_count - self._bytes_read
@@ -276,21 +274,18 @@
self._bytes_read = 0
if remaining == 0:
self._bytes_read = -1
- python.PyGILState_Release(gil_state)
return 0
if c_size > remaining:
c_size = remaining
c_start = _cstr(self._bytes) + self._bytes_read
- python.PyGILState_Release(gil_state)
self._bytes_read = self._bytes_read + c_size
cstd.memcpy(c_buffer, c_start, c_size)
return c_size
except:
self._exc_context._store_raised()
- python.PyGILState_Release(gil_state)
return -1
-cdef int _readFilelikeParser(void* ctxt, char* c_buffer, int c_size):
+cdef int _readFilelikeParser(void* ctxt, char* c_buffer, int c_size) with GIL:
return (<_FileReaderContext>ctxt).copyToBuffer(c_buffer, c_size)
############################################################
@@ -298,7 +293,8 @@
############################################################
cdef xmlparser.xmlParserInput* _parser_resolve_from_python(
- char* c_url, char* c_pubid, xmlparser.xmlParserCtxt* c_context, int* error):
+ char* c_url, char* c_pubid, xmlparser.xmlParserCtxt* c_context,
+ int* error) with GIL:
# call the Python document loaders
cdef xmlparser.xmlParserInput* c_input
cdef _ResolverContext context
@@ -351,16 +347,13 @@
# no Python objects here, may be called without thread context !
# when we declare a Python object, Pyrex will INCREF(None) !
cdef xmlparser.xmlParserInput* c_input
- cdef python.PyGILState_STATE gil_state
cdef int error
if c_context._private is NULL:
if __DEFAULT_ENTITY_LOADER is NULL:
return NULL
return __DEFAULT_ENTITY_LOADER(c_url, c_pubid, c_context)
- gil_state = python.PyGILState_Ensure()
c_input = _parser_resolve_from_python(c_url, c_pubid, c_context, &error)
- python.PyGILState_Release(gil_state)
if c_input is not NULL:
return c_input
@@ -404,10 +397,7 @@
recover = parser._parse_options & xmlparser.XML_PARSE_RECOVER
return _handleParseResult(self, self._c_ctxt, result,
filename, recover)
-
-cdef class _InternalParserContext(_ParserContext):
- """Parser context for internal single-shot parsing
- """
+
cdef int _raiseParseError(xmlparser.xmlParserCtxt* ctxt, filename,
_ErrorLog error_log) except 0:
@@ -530,7 +520,7 @@
if target is not None:
return _TargetParserContext(target)
else:
- return _InternalParserContext()
+ return _ParserContext()
cdef xmlparser.xmlParserCtxt* _newParserCtxt(self):
if self._parser_type == LXML_HTML_PARSER:
@@ -841,16 +831,14 @@
cdef xmlparser.xmlParserCtxt* pctxt
cdef xmlDoc* c_doc
cdef _Document doc
- cdef int is_target_parser, error
if not self._feed_parser_running:
raise XMLSyntaxError, "no element found"
pctxt = self._parser_ctxt
self._feed_parser_running = 0
if self._parser_type == LXML_HTML_PARSER:
- error = htmlparser.htmlParseChunk(pctxt, NULL, 0, 1)
+ htmlparser.htmlParseChunk(pctxt, NULL, 0, 1)
else:
- error = xmlparser.xmlParseChunk(pctxt, NULL, 0, 1)
- is_target_parser = isinstance(self._context, _TargetParserContext)
+ xmlparser.xmlParseChunk(pctxt, NULL, 0, 1)
try:
result = self._context._handleParseResult(
self, pctxt.myDoc, None)
@@ -1150,7 +1138,6 @@
if recursive:
state = python.PyEval_SaveThread()
result = tree.xmlCopyDoc(c_doc, recursive)
- _bugFixURL(c_doc, result)
if recursive:
python.PyEval_RestoreThread(state)
__GLOBAL_PARSER_CONTEXT.initDocDict(result)
@@ -1162,7 +1149,6 @@
cdef xmlDoc* result
cdef xmlNode* c_node
result = tree.xmlCopyDoc(c_doc, 0) # non recursive
- _bugFixURL(c_doc, result)
__GLOBAL_PARSER_CONTEXT.initDocDict(result)
state = python.PyEval_SaveThread()
c_node = tree.xmlDocCopyNode(c_new_root, result, 1) # recursive
@@ -1178,14 +1164,6 @@
_copyTail(c_node.next, c_root)
return c_root
-cdef void _bugFixURL(xmlDoc* c_source_doc, xmlDoc* c_target_doc):
- """libxml2 <= 2.6.17 had a bug that prevented it from copying the document
- URL in xmlDocCopy()"""
- if c_source_doc.URL is not NULL and _LIBXML_VERSION_INT < 20618:
- if c_target_doc.URL is not NULL:
- tree.xmlFree(c_target_doc.URL)
- c_target_doc.URL = tree.xmlStrdup(c_source_doc.URL)
-
############################################################
## API level helper functions for _Document creation
More information about the lxml-checkins
mailing list