[Lxml-checkins] r54566 - in lxml/trunk: . src/lxml

scoder at codespeak.net scoder at codespeak.net
Thu May 8 17:45:02 CEST 2008


Author: scoder
Date: Thu May  8 17:45:01 2008
New Revision: 54566

Modified:
   lxml/trunk/   (props changed)
   lxml/trunk/CHANGES.txt
   lxml/trunk/src/lxml/cstd.pxd
   lxml/trunk/src/lxml/iterparse.pxi
Log:
 r4192 at delle:  sbehnel | 2008-05-07 20:32:28 +0200
 free GIL in iterparse() only when reading from a plain file


Modified: lxml/trunk/CHANGES.txt
==============================================================================
--- lxml/trunk/CHANGES.txt	(original)
+++ lxml/trunk/CHANGES.txt	Thu May  8 17:45:01 2008
@@ -8,6 +8,9 @@
 Features added
 --------------
 
+* Running ``iterparse()`` on a plain file (or filename) frees the GIL
+  on reading.
+
 * Conversion functions ``html_to_xhtml()`` and ``xhtml_to_html()`` in
   lxml.html (experimental).
 

Modified: lxml/trunk/src/lxml/cstd.pxd
==============================================================================
--- lxml/trunk/src/lxml/cstd.pxd	(original)
+++ lxml/trunk/src/lxml/cstd.pxd	Thu May  8 17:45:01 2008
@@ -1,9 +1,4 @@
 
-cdef extern from "stdio.h":
-    ctypedef struct FILE
-    cdef int sprintf(char* str, char* format, ...) nogil
-    cdef int printf(char* str) nogil
-
 cdef extern from "string.h":
     ctypedef int size_t
     cdef int strlen(char* s) nogil
@@ -15,6 +10,15 @@
     cdef void* memcpy(void* dest, void* src, size_t len) nogil
     cdef void* memset(void* s, int c, size_t len) nogil
 
+cdef extern from "stdio.h":
+    ctypedef struct FILE
+    cdef size_t fread(void *ptr, size_t size, size_t nmemb,
+                      FILE *stream) nogil
+    cdef int feof(FILE *stream) nogil
+    cdef int ferror(FILE *stream) nogil
+    cdef int sprintf(char* str, char* format, ...) nogil
+    cdef int printf(char* str) nogil
+
 cdef extern from "stdlib.h":
     cdef void* malloc(size_t size) nogil
     cdef void* realloc(void* ptr, size_t size) nogil

Modified: lxml/trunk/src/lxml/iterparse.pxi
==============================================================================
--- lxml/trunk/src/lxml/iterparse.pxi	(original)
+++ lxml/trunk/src/lxml/iterparse.pxi	Thu May  8 17:45:01 2008
@@ -235,7 +235,7 @@
 cdef void _iterparseSaxStart(void* ctxt, char* localname, char* prefix,
                              char* URI, int nb_namespaces, char** namespaces,
                              int nb_attributes, int nb_defaulted,
-                             char** attributes) with gil:
+                             char** attributes):
     cdef xmlparser.xmlParserCtxt* c_ctxt
     cdef _IterparseContext context
     c_ctxt = <xmlparser.xmlParserCtxt*>ctxt
@@ -246,7 +246,7 @@
         nb_attributes, nb_defaulted, attributes)
     _pushSaxStartEvent(context, c_ctxt.node)
 
-cdef void _iterparseSaxEnd(void* ctxt, char* localname, char* prefix, char* URI) with gil:
+cdef void _iterparseSaxEnd(void* ctxt, char* localname, char* prefix, char* URI):
     cdef xmlparser.xmlParserCtxt* c_ctxt
     cdef _IterparseContext context
     c_ctxt = <xmlparser.xmlParserCtxt*>ctxt
@@ -254,7 +254,7 @@
     _pushSaxEndEvent(context, c_ctxt.node)
     context._origSaxEnd(ctxt, localname, prefix, URI)
 
-cdef void _iterparseSaxStartNoNs(void* ctxt, char* name, char** attributes) with gil:
+cdef void _iterparseSaxStartNoNs(void* ctxt, char* name, char** attributes):
     cdef xmlparser.xmlParserCtxt* c_ctxt
     cdef _IterparseContext context
     c_ctxt = <xmlparser.xmlParserCtxt*>ctxt
@@ -262,7 +262,7 @@
     context._origSaxStartNoNs(ctxt, name, attributes)
     _pushSaxStartEvent(context, c_ctxt.node)
 
-cdef void _iterparseSaxEndNoNs(void* ctxt, char* name) with gil:
+cdef void _iterparseSaxEndNoNs(void* ctxt, char* name):
     cdef xmlparser.xmlParserCtxt* c_ctxt
     cdef _IterparseContext context
     c_ctxt = <xmlparser.xmlParserCtxt*>ctxt
@@ -270,7 +270,7 @@
     _pushSaxEndEvent(context, c_ctxt.node)
     context._origSaxEndNoNs(ctxt, name)
 
-cdef void _iterparseSaxComment(void* ctxt, char* text) with gil:
+cdef void _iterparseSaxComment(void* ctxt, char* text):
     cdef xmlNode* c_node
     cdef xmlparser.xmlParserCtxt* c_ctxt
     cdef _IterparseContext context
@@ -281,7 +281,7 @@
     if c_node is not NULL:
         _pushSaxEvent(context, "comment", c_node)
 
-cdef void _iterparseSaxPI(void* ctxt, char* target, char* data) with gil:
+cdef void _iterparseSaxPI(void* ctxt, char* target, char* data):
     cdef xmlNode* c_node
     cdef xmlparser.xmlParserCtxt* c_ctxt
     cdef _IterparseContext context
@@ -351,6 +351,7 @@
     cdef object _events
     cdef readonly object root
     cdef object _source
+    cdef object _buffer
     cdef int (*_parse_chunk)(xmlparser.xmlParserCtxt* ctxt,
                              char* chunk, int size, int terminate)
     def __init__(self, source, events=("end",), *, tag=None,
@@ -434,9 +435,10 @@
     def __next__(self):
         cdef _IterparseContext context
         cdef xmlparser.xmlParserCtxt* pctxt
+        cdef cstd.FILE* c_stream
         cdef char* c_data
         cdef Py_ssize_t c_data_len
-        cdef int error
+        cdef int error, done
         if self._source is None:
             raise StopIteration
 
@@ -449,24 +451,41 @@
 
         del context._events[:]
         pctxt = context._c_ctxt
-        error = 0
+        error = done = 0
+        c_stream = python.PyFile_AsFile(self._source)
         while python.PyList_GET_SIZE(context._events) == 0:
-            data = self._source.read(__ITERPARSE_CHUNK_SIZE)
-            if not python.PyString_Check(data):
-                self._source = None
-                raise TypeError, "reading file objects must return plain strings"
-            c_data_len = python.PyString_GET_SIZE(data)
-            if c_data_len == 0:
-                c_data = NULL
-            else:
+            if c_stream is NULL:
+                data = self._source.read(__ITERPARSE_CHUNK_SIZE)
+                if not python.PyString_Check(data):
+                    self._source = None
+                    raise TypeError, "reading file objects must return plain strings"
+                c_data_len = python.PyString_GET_SIZE(data)
                 c_data = _cstr(data)
-            with nogil:
-                error = self._parse_chunk(
-                    pctxt, c_data, c_data_len, (c_data_len == 0))
-            if error or c_data_len == 0:
+                done = (c_data_len == 0)
+                error = self._parse_chunk(pctxt, c_data, c_data_len, done)
+            else:
+                if self._buffer is None:
+                    self._buffer = python.PyString_FromStringAndSize(
+                        NULL, __ITERPARSE_CHUNK_SIZE)
+                c_data = _cstr(self._buffer)
+                with nogil:
+                    c_data_len = cstd.fread(
+                        c_data, 1, __ITERPARSE_CHUNK_SIZE, c_stream)
+                    if c_data_len < __ITERPARSE_CHUNK_SIZE:
+                        if cstd.ferror(c_stream):
+                            error = 1
+                        elif cstd.feof(c_stream):
+                            done = 1
+                if not error:
+                    error = self._parse_chunk(
+                        pctxt, c_data, c_data_len, done)
+            if error or done:
+                self._buffer = None
                 break
-        if error or (context._validator is not None and
-                     not context._validator.isvalid()):
+
+        if not error and context._validator is not None:
+            error = not context._validator.isvalid()
+        if error:
             self._source = None
             del context._events[:]
             context._assureDocGetsFreed()


More information about the lxml-checkins mailing list