[Lxml-checkins] r46484 - in lxml/trunk: . doc src/lxml

scoder at codespeak.net scoder at codespeak.net
Tue Sep 11 21:55:39 CEST 2007


Author: scoder
Date: Tue Sep 11 21:55:37 2007
New Revision: 46484

Added:
   lxml/trunk/cython-with-GIL-simple.patch
Modified:
   lxml/trunk/CHANGES.txt
   lxml/trunk/doc/tutorial.txt
   lxml/trunk/src/lxml/etree.pyx
   lxml/trunk/src/lxml/extensions.pxi
   lxml/trunk/src/lxml/parser.pxi
Log:
cleanup in parser code, ET-compatible target parser interface (SAX-like), tutorial section on parsing

Modified: lxml/trunk/CHANGES.txt
==============================================================================
--- lxml/trunk/CHANGES.txt	(original)
+++ lxml/trunk/CHANGES.txt	Tue Sep 11 21:55:37 2007
@@ -8,6 +8,9 @@
 Features added
 --------------
 
+* ElementTree-like parser target interface as described in
+  http://effbot.org/elementtree/elementtree-xmlparser.htm
+
 * ElementTree-like feed parser interface on XMLParser and HTMLParser
   (``feed()`` and ``close()`` methods)
 

Added: lxml/trunk/cython-with-GIL-simple.patch
==============================================================================
--- (empty file)
+++ lxml/trunk/cython-with-GIL-simple.patch	Tue Sep 11 21:55:37 2007
@@ -0,0 +1,236 @@
+diff -r 43be72844df4 Compiler/Code.py
+--- a/Compiler/Code.py	Mon Sep 03 20:07:01 2007 +0200
++++ b/Compiler/Code.py	Mon Sep 10 20:13:13 2007 +0200
+@@ -284,6 +284,13 @@ class CCodeWriter:
+         #	code = "((PyObject*)%s)" % code
+         self.put_init_to_py_none(code, entry.type)
+ 
++    def put_py_gil_state_ensure(self, cname):
++        self.putln("PyGILState_STATE %s;" % cname)
++        self.putln("%s = PyGILState_Ensure();" % cname)
++
++    def put_py_gil_state_release(self, cname):
++        self.putln("PyGILState_Release(%s);" % cname)
++
+     def put_pymethoddef(self, entry, term):
+         if entry.doc:
+             doc_code = entry.doc_cname
+diff -r 43be72844df4 Compiler/ExprNodes.py
+--- a/Compiler/ExprNodes.py	Mon Sep 03 20:07:01 2007 +0200
++++ b/Compiler/ExprNodes.py	Mon Sep 10 22:47:05 2007 +0200
+@@ -473,7 +473,7 @@ class ExprNode(Node):
+         else: # neither src nor dst are py types
+             # Added the string comparison, since for c types that
+             # is enough, but SageX gets confused when the types are
+-            # in different files. 
++            # in different files.
+             if not (str(src.type) == str(dst_type) or dst_type.assignable_from(src_type)):
+                 error(self.pos, "Cannot assign type '%s' to '%s'" %
+                     (src.type, dst_type))
+diff -r 43be72844df4 Compiler/Naming.py
+--- a/Compiler/Naming.py	Mon Sep 03 20:07:01 2007 +0200
++++ b/Compiler/Naming.py	Mon Sep 10 20:13:13 2007 +0200
+@@ -53,5 +53,6 @@ stringtab_cname  = pyrex_prefix + "strin
+ stringtab_cname  = pyrex_prefix + "string_tab"
+ vtabslot_cname   = pyrex_prefix + "vtab"
+ c_api_tab_cname  = pyrex_prefix + "c_api_tab"
++gilstate_cname   = pyrex_prefix + "state"
+ 
+ extern_c_macro  = pyrex_prefix.upper() + "EXTERN_C"
+diff -r 43be72844df4 Compiler/Nodes.py
+--- a/Compiler/Nodes.py	Mon Sep 03 20:07:01 2007 +0200
++++ b/Compiler/Nodes.py	Mon Sep 10 20:13:13 2007 +0200
+@@ -282,6 +282,7 @@ class CFuncDeclaratorNode(CDeclaratorNod
+     # has_varargs      boolean
+     # exception_value  ConstNode
+     # exception_check  boolean    True if PyErr_Occurred check needed
++    # with_gil         boolean    True if GIL should be grabbed/released
+ 
+     def analyse(self, return_type, env):
+         func_type_args = []
+@@ -317,7 +318,8 @@ class CFuncDeclaratorNode(CDeclaratorNod
+             exc_check = self.exception_check
+         func_type = PyrexTypes.CFuncType(
+             return_type, func_type_args, self.has_varargs, 
+-            exception_value = exc_val, exception_check = exc_check)
++            exception_value = exc_val, exception_check = exc_check,
++            with_gil = self.with_gil)
+         return self.base.analyse(func_type, env)
+ 
+ 
+@@ -572,6 +574,8 @@ class FuncDefNode(StatNode, BlockNode):
+         self.generate_keyword_list(code)
+         # ----- Extern library function declarations
+         lenv.generate_library_function_declarations(code)
++        # ----- Grab GIL
++        self.generate_grab_gil(code)
+         # ----- Fetch arguments
+         self.generate_argument_parsing_code(code)
+         self.generate_argument_increfs(lenv, code)
+@@ -623,6 +627,9 @@ class FuncDefNode(StatNode, BlockNode):
+         code.put_var_decrefs(lenv.var_entries, used_only = 1)
+         code.put_var_decrefs(lenv.arg_entries)
+         self.put_stararg_decrefs(code)
++        # ----- Release GIL
++        self.generate_release_gil(code)
++        # ----- Return
+         if not self.return_type.is_void:
+             retval_code = Naming.retval_cname
+             #if self.return_type.is_extension_type:
+@@ -651,6 +658,12 @@ class FuncDefNode(StatNode, BlockNode):
+             code.put_var_incref(entry)
+ 
+     def generate_execution_code(self, code):
++        pass
++
++    def generate_grab_gil(self, code):
++        pass
++
++    def generate_release_gil(self, code):
+         pass
+ 
+ 
+@@ -756,7 +769,19 @@ class CFuncDefNode(FuncDefNode):
+         else:
+             error(arg.pos, "Cannot test type of extern C class "
+                 "without type object name specification")
+-    
++
++    def generate_grab_gil(self, code):
++        if self.entry.type.with_gil:
++            code.putln("")
++            code.put_py_gil_state_ensure(Naming.gilstate_cname)
++            code.putln("")
++
++    def generate_release_gil(self, code):
++        if self.entry.type.with_gil:
++            code.putln("")
++            code.put_py_gil_state_release(Naming.gilstate_cname)
++            code.putln("")
++
+     def error_value(self):
+         if self.return_type.is_pyobject:
+             return "0"
+diff -r 43be72844df4 Compiler/Parsing.py
+--- a/Compiler/Parsing.py	Mon Sep 03 20:07:01 2007 +0200
++++ b/Compiler/Parsing.py	Tue Sep 11 21:06:49 2007 +0200
+@@ -5,7 +5,7 @@ import os, re
+ import os, re
+ from string import join, replace
+ from types import ListType, TupleType
+-from Scanning import PyrexScanner
++from Scanning import PyrexScanner, function_contexts
+ import Nodes
+ import ExprNodes
+ from ModuleNode import ModuleNode
+@@ -1462,10 +1462,10 @@ def p_c_declarator(s, empty = 0, is_type
+                 args = p_c_arg_list(s, in_pyfunc = 0, cmethod_flag = cmethod_flag)
+                 ellipsis = p_optional_ellipsis(s)
+                 s.expect(')')
+-                exc_val, exc_check = p_exception_value_clause(s)
++                options = p_c_func_options(s)
+                 result = Nodes.CFuncDeclaratorNode(pos, 
+                     base = result, args = args, has_varargs = ellipsis,
+-                    exception_value = exc_val, exception_check = exc_check)
++                    **options)
+             cmethod_flag = 0
+     return result
+ 
+@@ -1483,6 +1483,37 @@ def p_exception_value_clause(s):
+                 s.next()
+             exc_val = p_simple_expr(s) #p_exception_value(s)
+     return exc_val, exc_check
++
++def p_c_with(s):
++    if s.sy == 'with':
++        s.next()
++        return p_ident_list(s)
++    return ()
++
++def p_c_func_options(s):
++    exc_val = None
++    exc_check = 0
++    contexts = []
++
++    if s.sy == 'except':
++        exc_val, exc_check = p_exception_value_clause(s)
++        contexts = p_c_with(s)
++    elif s.sy == 'with':
++        contexts = p_c_with(s)
++        exc_val, exc_check = p_exception_value_clause(s)
++
++    for context in contexts:
++        if context not in function_contexts:
++            s.error("Unknown context: " + context)
++            return None
++
++    ret = {
++        'exception_value': exc_val,
++        'exception_check': exc_check,
++        'with_gil': 'GIL' in contexts,
++        }
++
++    return ret
+ 
+ #def p_exception_value(s):
+ #	sign = ""
+diff -r 43be72844df4 Compiler/PyrexTypes.py
+--- a/Compiler/PyrexTypes.py	Mon Sep 03 20:07:01 2007 +0200
++++ b/Compiler/PyrexTypes.py	Tue Sep 11 12:07:03 2007 +0200
+@@ -488,16 +488,18 @@ class CFuncType(CType):
+     #  has_varargs      boolean
+     #  exception_value  string
+     #  exception_check  boolean  True if PyErr_Occurred check needed
++    #  with_gil         boolean  True if GIL should be grabbed/released
+     
+     is_cfunction = 1
+     
+     def __init__(self, return_type, args, has_varargs,
+-            exception_value = None, exception_check = 0):
++            exception_value = None, exception_check = 0, with_gil = False):
+         self.return_type = return_type
+         self.args = args
+         self.has_varargs = has_varargs
+         self.exception_value = exception_value
+         self.exception_check = exception_check
++        self.with_gil = with_gil
+     
+     def __repr__(self):
+         arg_reprs = map(repr, self.args)
+@@ -580,6 +582,7 @@ class CFuncType(CType):
+         if not arg_decl_code and not pyrex:
+             arg_decl_code = "void"
+         exc_clause = ""
++        with_gil_clause = ""
+         if pyrex or for_display:
+             if self.exception_value and self.exception_check:
+                 exc_clause = " except? %s" % self.exception_value
+@@ -587,8 +590,11 @@ class CFuncType(CType):
+                 exc_clause = " except %s" % self.exception_value
+             elif self.exception_check:
+                 exc_clause = " except *"
++            if self.with_gil:
++                with_gil_clause = " with GIL"
+         return self.return_type.declaration_code(
+-            "(%s(%s)%s)" % (entity_code, arg_decl_code, exc_clause),
++            "(%s(%s)%s%s)" % (entity_code, arg_decl_code,
++                              exc_clause, with_gil_clause),
+             for_display, dll_linkage, pyrex)
+ 
+ 
+diff -r 43be72844df4 Compiler/Scanning.py
+--- a/Compiler/Scanning.py	Mon Sep 03 20:07:01 2007 +0200
++++ b/Compiler/Scanning.py	Tue Sep 11 21:05:33 2007 +0200
+@@ -138,7 +138,11 @@ reserved_words = [
+     "raise", "import", "exec", "try", "except", "finally",
+     "while", "if", "elif", "else", "for", "in", "assert",
+     "and", "or", "not", "is", "in", "lambda", "from",
+-    "NULL", "cimport", "by"
++    "NULL", "cimport", "by", "with"
++]
++
++function_contexts = [ # allowed arguments to the "with" option
++    "GIL"
+ ]
+ 
+ class Method:

Modified: lxml/trunk/doc/tutorial.txt
==============================================================================
--- lxml/trunk/doc/tutorial.txt	(original)
+++ lxml/trunk/doc/tutorial.txt	Tue Sep 11 21:55:37 2007
@@ -6,24 +6,32 @@
   Stefan Behnel
 
 This tutorial briefly overviews the main concepts of the `ElementTree API`_ as
-implemented by lxml.etree, and some simple enhancements that make your life as
-a programmer easier.
+implemented by ``lxml.etree``, and some simple enhancements that make your
+life as a programmer easier.
 
 .. _`ElementTree API`: http://effbot.org/zone/element-index.htm#documentation
 
 .. contents::
 .. 
-   1  Elements and ElementTrees
-     1.1  The Element class
-     1.2  The ElementTree class
-   2  Parsing and XML literals
-     2.1  The XML() function
-     2.2  The parse() function
-   3  Namespaces
-   4  The find*() methods
-     4.1  findall()
-     4.2  find()
-     4.3  findtext()
+   1  The Element class
+     1.1  Elements are lists
+     1.2  Elements carry attributes
+     1.3  Elements contain text
+     1.4  Tree iteration
+   2  The ElementTree class
+   3  Parsing from strings and files
+     3.1  The fromstring() function
+     3.2  The XML() function
+     3.3  The parse() function
+     3.4  Parser objects
+     3.5  Incremental parsing
+     3.6  Event-driven parsing
+   4  Namespaces
+   5  The E-factory
+   6  ElementPath
+     6.1  findall()
+     6.2  find()
+     6.3  findtext()
 
 
 A common way to import ``lxml.etree`` is as follows::
@@ -380,15 +388,208 @@
 made lxml loose DTD information in an input-output cycle.
 
 
-Parsing files and XML literals
+Parsing from strings and files
 ==============================
 
+``lxml.etree`` supports parsing XML in a number of ways and from all important
+sources, namely strings, files and file-like objects.  The main parse
+functions are ``fromstring()`` and ``parse()``, both called with the source as
+first argument.  By default, they use the standard parser, but you can always
+pass a different parser as second argument.
+
+
+The fromstring() function
+-------------------------
+
+The ``fromstring()`` function is the easiest way to parse a string::
+
+    >>> some_xml_data = "<root>data</root>"
+
+    >>> root = etree.fromstring(some_xml_data)
+    >>> print root.tag
+    root
+    >>> print etree.tostring(root)
+    <root>data</root>
+
+
 The XML() function
 ------------------
 
+The ``XML()`` function behaves like the ``fromstring()`` function, but is
+commonly used to write XML literals right into the source::
+
+    >>> root = etree.XML("<root>data</root>")
+    >>> print root.tag
+    root
+    >>> print etree.tostring(root)
+    <root>data</root>
+
+
 The parse() function
 --------------------
 
+The ``parse()`` function is used to parse from files and file-like objects::
+
+    >>> some_file_like = StringIO("<root>data</root>")
+
+    >>> tree = etree.parse(some_file_like)
+
+    >>> print etree.tostring(tree)
+    <root>data</root>
+
+Note that ``parse()`` returns an ElementTree object, not an Element object as
+the string parser functions::
+
+    >>> root = tree.getroot()
+    >>> print root.tag
+    root
+    >>> print etree.tostring(root)
+    <root>data</root>
+
+
+Parser objects
+--------------
+
+By default, ``lxml.etree`` uses a standard parser with a default setup.  If
+you want to configure the parser, you can create a you instance::
+
+    >>> parser = etree.XMLParser(remove_blank_text=True) # lxml.etree only!
+
+This creates a parser that removes empty text between tags while parsing,
+which can reduce the size of the tree and avoid dangling tail text if you know
+that whitespace-only content is not meaningful for your data.  An example::
+
+    >>> root = etree.XML("<root>  <a/>   <b>  </b>     </root>", parser)
+
+    >>> print etree.tostring(root)
+    <root><a/><b>  </b></root>
+
+Note that the whitespace content inside the ``<b>`` tag was not removed, as
+content at leaf elements tends to be data content (even if blank).  You can
+easily remove it in an additional step by traversing the tree::
+
+    >>> for element in root.getiterator("*"):
+    ...     if element.text is not None and not element.text.strip():
+    ...         element.text = None
+
+    >>> print etree.tostring(root)
+    <root><a/><b/></root>
+
+See ``help(etree.XMLParser)`` to find out about the available parser options.
+
+
+Incremental parsing
+-------------------
+
+``lxml.etree`` provides two ways for incremental step-by-step parsing.  One is
+through file-like objects, where it calls the ``read()`` method repeatedly.
+This is best used where the data arrives from a source like ``urllib`` or any
+other file-like object that can provide data on request.  Note that the parser
+will block and wait until data becomes available in this case::
+
+    >>> class DataSource:
+    ...     data = iter(["<roo", "t><", "a/", "><", "/root>"])
+    ...     def read(self, requested_size):
+    ...         try:
+    ...             return self.data.next()
+    ...         except StopIteration:
+    ...             return ""
+
+    >>> root = etree.parse(DataSource())
+
+    >>> print etree.tostring(root)
+    <root><a/></root>
+
+The second way is through a feed parser interface, given by the ``feed(data)``
+and ``close()`` methods::
+
+    >>> parser = etree.XMLParser()
+
+    >>> parser.feed("<roo")
+    >>> parser.feed("t><")
+    >>> parser.feed("a/")
+    >>> parser.feed("><")
+    >>> parser.feed("/root>")
+
+    >>> root = parser.close()
+
+    >>> print etree.tostring(root)
+    <root><a/></root>
+
+Here, you can interrupt the parsing process at any time and continue it later
+on with another call to the ``feed()`` method.  This comes in handy if you
+want to avoid blocking calls to the parser, e.g. in frameworks like Twisted,
+or whenever data comes in slowly or in chunks and you want to do other things
+while waiting for the next chunk.
+
+You can reuse the parser by calling its ``feed()`` method again::
+
+    >>> parser.feed("<root/>")
+    >>> root = parser.close()
+    >>> print etree.tostring(root)
+    <root/>
+
+
+Event-driven parsing
+--------------------
+
+Sometimes, all you need from a document is a small fraction somewhere deep
+inside the tree, so parsing the whole tree into memory, traversing it and
+dropping it can be too much overhead.  ``lxml.etree`` supports this use case
+with two event-driven parser interfaces, one that generates parser events
+while building the tree (``iterparse``), and one that does not build the tree
+at all, and instead calls feedback methods on a target object in a SAX-like
+fashion.
+
+Here is a simple ``iterparse()`` example::
+
+    >>> some_file_like = StringIO("<root><a>data</a></root>")
+
+    >>> for event, element in etree.iterparse(some_file_like):
+    ...     print "%s, %4s, %s" % (event, element.tag, element.text)
+    end,    a, data
+    end, root, None
+
+By default, ``iterparse()`` only generates events when it is done parsing an
+element, but you can control this through the ``events`` keyword argument::
+
+    >>> some_file_like = StringIO("<root><a>data</a></root>")
+
+    >>> for event, element in etree.iterparse(some_file_like,
+    ...                                       events=("start", "end")):
+    ...     print "%5s, %4s, %s" % (event, element.tag, element.text)
+    start, root, None
+    start,    a, data
+      end,    a, data
+      end, root, None
+
+Note that the text, tail and children of an Element are not necessarily there
+yet when receiving the ``start`` event.  Only the ``end`` event guarantees
+that the Element has been parsed completely.  It also allows to ``clear()`` or
+modify the content of an Element to save memory.
+
+If memory is a real bottleneck, or if building the tree is not desired at all,
+the target parser interface of ``lxml.etree`` can be used.  It creates
+SAX-like events by calling the methods of a target object.  By implementing
+some or all of these methods, you can control which events are generated::
+
+    >>> class ParserTarget:
+    ...     events = []
+    ...     def start(self, tag, attrib):
+    ...         self.events.append(("start", tag, attrib))
+    ...     def close(self):
+    ...         return self.events
+
+    >>> parser = etree.XMLParser(target=ParserTarget())
+    >>> events = etree.fromstring('<root test="true"/>', parser)
+
+    >>> for event in events:
+    ...     print 'event: %s - tag: %s' % (event[0], event[1])
+    ...     for attr, value in event[2].iteritems():
+    ...         print ' * %s = %s' % (attr, value)
+    event: start - tag: root
+     * test = true
+
 
 Namespaces
 ==========

Modified: lxml/trunk/src/lxml/etree.pyx
==============================================================================
--- lxml/trunk/src/lxml/etree.pyx	(original)
+++ lxml/trunk/src/lxml/etree.pyx	Tue Sep 11 21:55:37 2007
@@ -1992,7 +1992,10 @@
     if element is not None:
         doc  = element._doc
     elif file is not None:
-        doc = _parseDocument(file, parser)
+        try:
+            doc = _parseDocument(file, parser)
+        except _TargetParserResult, result_container:
+            return result_container.result
     else:
         c_doc = _newDoc()
         doc = _documentFactory(c_doc, parser)
@@ -2015,8 +2018,11 @@
         parser = __GLOBAL_PARSER_CONTEXT.getDefaultParser()
         if not isinstance(parser, HTMLParser):
             parser = __DEFAULT_HTML_PARSER
-    doc = _parseMemoryDocument(text, base_url, parser)
-    return doc.getroot()
+    try:
+        doc = _parseMemoryDocument(text, base_url, parser)
+        return doc.getroot()
+    except _TargetParserResult, result_container:
+        return result_container.result
 
 def XML(text, _BaseParser parser=None, base_url=None):
     """Parses an XML document from a string constant. This function can be used
@@ -2036,8 +2042,11 @@
         parser = __GLOBAL_PARSER_CONTEXT.getDefaultParser()
         if not isinstance(parser, XMLParser):
             parser = __DEFAULT_XML_PARSER
-    doc = _parseMemoryDocument(text, base_url, parser)
-    return doc.getroot()
+    try:
+        doc = _parseMemoryDocument(text, base_url, parser)
+        return doc.getroot()
+    except _TargetParserResult, result_container:
+        return result_container.result
 
 def fromstring(text, _BaseParser parser=None, base_url=None):
     """Parses an XML document from a string.
@@ -2052,8 +2061,11 @@
     cdef _Document doc
     if parser is None:
         parser = __GLOBAL_PARSER_CONTEXT.getDefaultParser()
-    doc = _parseMemoryDocument(text, base_url, parser)
-    return doc.getroot()
+    try:
+        doc = _parseMemoryDocument(text, base_url, parser)
+        return doc.getroot()
+    except _TargetParserResult, result_container:
+        return result_container.result
 
 def iselement(element):
     """Checks if an object appears to be a valid element object.
@@ -2124,8 +2136,11 @@
     is provided as second argument, the default parser is used.
     """
     cdef _Document doc
-    doc = _parseDocument(source, parser)
-    return ElementTree(doc.getroot())
+    try:
+        doc = _parseDocument(source, parser)
+        return ElementTree(doc.getroot())
+    except _TargetParserResult, result_container:
+        return result_container.result
 
 
 ################################################################################

Modified: lxml/trunk/src/lxml/extensions.pxi
==============================================================================
--- lxml/trunk/src/lxml/extensions.pxi	(original)
+++ lxml/trunk/src/lxml/extensions.pxi	Tue Sep 11 21:55:37 2007
@@ -578,14 +578,8 @@
 
 # lookup the function by name and call it
 
-cdef void _xpath_function_call(xpath.xmlXPathParserContext* ctxt, int nargs):
-    cdef python.PyGILState_STATE gil_state
-    gil_state = python.PyGILState_Ensure()
-    _call_python_xpath_function(ctxt, nargs)
-    python.PyGILState_Release(gil_state)
-
-cdef void _call_python_xpath_function(xpath.xmlXPathParserContext* ctxt,
-                                      int nargs):
+cdef void _xpath_function_call(xpath.xmlXPathParserContext* ctxt,
+                               int nargs) with GIL:
     cdef xpath.xmlXPathContext* rctxt
     cdef _BaseContext context
     rctxt = ctxt.context

Modified: lxml/trunk/src/lxml/parser.pxi
==============================================================================
--- lxml/trunk/src/lxml/parser.pxi	(original)
+++ lxml/trunk/src/lxml/parser.pxi	Tue Sep 11 21:55:37 2007
@@ -261,10 +261,8 @@
     cdef int copyToBuffer(self, char* c_buffer, int c_size):
         cdef char* c_start
         cdef Py_ssize_t byte_count, remaining
-        cdef python.PyGILState_STATE gil_state
         if self._bytes_read < 0:
             return 0
-        gil_state = python.PyGILState_Ensure()
         try:
             byte_count = python.PyString_GET_SIZE(self._bytes)
             remaining = byte_count - self._bytes_read
@@ -276,21 +274,18 @@
                 self._bytes_read = 0
                 if remaining == 0:
                     self._bytes_read = -1
-                    python.PyGILState_Release(gil_state)
                     return 0
             if c_size > remaining:
                 c_size = remaining
             c_start = _cstr(self._bytes) + self._bytes_read
-            python.PyGILState_Release(gil_state)
             self._bytes_read = self._bytes_read + c_size
             cstd.memcpy(c_buffer, c_start, c_size)
             return c_size
         except:
             self._exc_context._store_raised()
-            python.PyGILState_Release(gil_state)
             return -1
 
-cdef int _readFilelikeParser(void* ctxt, char* c_buffer, int c_size):
+cdef int _readFilelikeParser(void* ctxt, char* c_buffer, int c_size) with GIL:
     return (<_FileReaderContext>ctxt).copyToBuffer(c_buffer, c_size)
 
 ############################################################
@@ -298,7 +293,8 @@
 ############################################################
 
 cdef  xmlparser.xmlParserInput* _parser_resolve_from_python(
-    char* c_url, char* c_pubid, xmlparser.xmlParserCtxt* c_context, int* error):
+    char* c_url, char* c_pubid, xmlparser.xmlParserCtxt* c_context,
+    int* error) with GIL:
     # call the Python document loaders
     cdef xmlparser.xmlParserInput* c_input
     cdef _ResolverContext context
@@ -351,16 +347,13 @@
     # no Python objects here, may be called without thread context !
     # when we declare a Python object, Pyrex will INCREF(None) !
     cdef xmlparser.xmlParserInput* c_input
-    cdef python.PyGILState_STATE gil_state
     cdef int error
     if c_context._private is NULL:
         if __DEFAULT_ENTITY_LOADER is NULL:
             return NULL
         return __DEFAULT_ENTITY_LOADER(c_url, c_pubid, c_context)
 
-    gil_state = python.PyGILState_Ensure()
     c_input = _parser_resolve_from_python(c_url, c_pubid, c_context, &error)
-    python.PyGILState_Release(gil_state)
 
     if c_input is not NULL:
         return c_input
@@ -404,10 +397,7 @@
         recover = parser._parse_options & xmlparser.XML_PARSE_RECOVER
         return _handleParseResult(self, self._c_ctxt, result,
                                    filename, recover)
-
-cdef class _InternalParserContext(_ParserContext):
-    """Parser context for internal single-shot parsing
-    """
+    
 
 cdef int _raiseParseError(xmlparser.xmlParserCtxt* ctxt, filename,
                           _ErrorLog error_log) except 0:
@@ -530,7 +520,7 @@
         if target is not None:
             return _TargetParserContext(target)
         else:
-            return _InternalParserContext()
+            return _ParserContext()
 
     cdef xmlparser.xmlParserCtxt* _newParserCtxt(self):
         if self._parser_type == LXML_HTML_PARSER:
@@ -841,16 +831,14 @@
         cdef xmlparser.xmlParserCtxt* pctxt
         cdef xmlDoc* c_doc
         cdef _Document doc
-        cdef int is_target_parser, error
         if not self._feed_parser_running:
             raise XMLSyntaxError, "no element found"
         pctxt = self._parser_ctxt
         self._feed_parser_running = 0
         if self._parser_type == LXML_HTML_PARSER:
-            error = htmlparser.htmlParseChunk(pctxt, NULL, 0, 1)
+            htmlparser.htmlParseChunk(pctxt, NULL, 0, 1)
         else:
-            error = xmlparser.xmlParseChunk(pctxt, NULL, 0, 1)
-        is_target_parser = isinstance(self._context, _TargetParserContext)
+            xmlparser.xmlParseChunk(pctxt, NULL, 0, 1)
         try:
             result = self._context._handleParseResult(
                 self, pctxt.myDoc, None)
@@ -1150,7 +1138,6 @@
     if recursive:
         state = python.PyEval_SaveThread()
     result = tree.xmlCopyDoc(c_doc, recursive)
-    _bugFixURL(c_doc, result)
     if recursive:
         python.PyEval_RestoreThread(state)
     __GLOBAL_PARSER_CONTEXT.initDocDict(result)
@@ -1162,7 +1149,6 @@
     cdef xmlDoc* result
     cdef xmlNode* c_node
     result = tree.xmlCopyDoc(c_doc, 0) # non recursive
-    _bugFixURL(c_doc, result)
     __GLOBAL_PARSER_CONTEXT.initDocDict(result)
     state = python.PyEval_SaveThread()
     c_node = tree.xmlDocCopyNode(c_new_root, result, 1) # recursive
@@ -1178,14 +1164,6 @@
     _copyTail(c_node.next, c_root)
     return c_root
 
-cdef void _bugFixURL(xmlDoc* c_source_doc, xmlDoc* c_target_doc):
-    """libxml2 <= 2.6.17 had a bug that prevented it from copying the document
-    URL in xmlDocCopy()"""
-    if c_source_doc.URL is not NULL and _LIBXML_VERSION_INT < 20618:
-        if c_target_doc.URL is not NULL:
-            tree.xmlFree(c_target_doc.URL)
-        c_target_doc.URL = tree.xmlStrdup(c_source_doc.URL)
-
 
 ############################################################
 ## API level helper functions for _Document creation


More information about the lxml-checkins mailing list