[Lxml-checkins] r54569 - in lxml/trunk: . src/lxml
scoder at codespeak.net
scoder at codespeak.net
Thu May 8 17:45:15 CEST 2008
Author: scoder
Date: Thu May 8 17:45:14 2008
New Revision: 54569
Modified:
lxml/trunk/ (props changed)
lxml/trunk/CHANGES.txt
lxml/trunk/src/lxml/parser.pxi
Log:
r4195 at delle: sbehnel | 2008-05-08 17:43:13 +0200
when parsing from a plain file, free the GIL and do not pass through Python
Modified: lxml/trunk/CHANGES.txt
==============================================================================
--- lxml/trunk/CHANGES.txt (original)
+++ lxml/trunk/CHANGES.txt Thu May 8 17:45:14 2008
@@ -8,6 +8,8 @@
Features added
--------------
+* Parsing from a plain file object frees the GIL.
+
* Running ``iterparse()`` on a plain file (or filename) frees the GIL
on reading.
Modified: lxml/trunk/src/lxml/parser.pxi
==============================================================================
--- lxml/trunk/src/lxml/parser.pxi (original)
+++ lxml/trunk/src/lxml/parser.pxi Thu May 8 17:45:14 2008
@@ -222,45 +222,67 @@
self._bytes = ''
self._bytes_read = 0
+ cdef xmlparser.xmlParserInputBuffer* _createParserInputBuffer(self):
+ cdef cstd.FILE* c_stream
+ cdef xmlparser.xmlParserInputBuffer* c_buffer
+ c_buffer = xmlparser.xmlAllocParserInputBuffer(0)
+ c_stream = python.PyFile_AsFile(self._filelike)
+ if c_stream is NULL:
+ c_buffer.readcallback = _readFilelikeParser
+ c_buffer.context = <python.PyObject*>self
+ else:
+ c_buffer.readcallback = _readFileParser
+ c_buffer.context = c_stream
+ return c_buffer
+
cdef xmlparser.xmlParserInput* _createParserInput(
self, xmlparser.xmlParserCtxt* ctxt):
cdef xmlparser.xmlParserInputBuffer* c_buffer
- c_buffer = xmlparser.xmlAllocParserInputBuffer(0)
- c_buffer.context = <python.PyObject*>self
- c_buffer.readcallback = _readFilelikeParser
+ c_buffer = self._createParserInputBuffer()
return xmlparser.xmlNewIOInputStream(ctxt, c_buffer, 0)
+ cdef tree.xmlDtd* _readDtd(self):
+ cdef xmlparser.xmlParserInputBuffer* c_buffer
+ c_buffer = self._createParserInputBuffer()
+ with nogil:
+ return xmlparser.xmlIOParseDTD(NULL, c_buffer, 0)
+
cdef xmlDoc* _readDoc(self, xmlparser.xmlParserCtxt* ctxt, int options):
cdef xmlDoc* result
cdef char* c_encoding
+ cdef cstd.FILE* c_stream
+ cdef xmlparser.xmlInputReadCallback c_read_callback
+ cdef xmlparser.xmlInputCloseCallback c_close_callback
+ cdef void* c_callback_context
if self._encoding is None:
c_encoding = NULL
else:
c_encoding = _cstr(self._encoding)
+ c_stream = python.PyFile_AsFile(self._filelike)
+ if c_stream is NULL:
+ c_read_callback = _readFilelikeParser
+ c_callback_context = <python.PyObject*>self
+ else:
+ c_read_callback = _readFileParser
+ c_callback_context = c_stream
+
with nogil:
if ctxt.html:
result = htmlparser.htmlCtxtReadIO(
- ctxt, _readFilelikeParser, NULL, <python.PyObject*>self,
- self._c_url, c_encoding, options)
+ ctxt, c_read_callback, NULL, c_callback_context,
+ self._c_url, c_encoding, options)
if result is not NULL:
if _fixHtmlDictNames(ctxt.dict, result) < 0:
tree.xmlFreeDoc(result)
result = NULL
else:
result = xmlparser.xmlCtxtReadIO(
- ctxt, _readFilelikeParser, NULL, <python.PyObject*>self,
+ ctxt, c_read_callback, NULL, c_callback_context,
self._c_url, c_encoding, options)
- return result
- cdef tree.xmlDtd* _readDtd(self):
- cdef xmlparser.xmlParserInputBuffer* c_buffer
- c_buffer = xmlparser.xmlAllocParserInputBuffer(0)
- c_buffer.context = <python.PyObject*>self
- c_buffer.readcallback = _readFilelikeParser
- with nogil:
- return xmlparser.xmlIOParseDTD(NULL, c_buffer, 0)
+ return result
cdef int copyToBuffer(self, char* c_buffer, int c_size):
cdef char* c_start
@@ -293,6 +315,9 @@
cdef int _readFilelikeParser(void* ctxt, char* c_buffer, int c_size) with gil:
return (<_FileReaderContext>ctxt).copyToBuffer(c_buffer, c_size)
+cdef int _readFileParser(void* ctxt, char* c_buffer, int c_size) nogil:
+ return cstd.fread(c_buffer, 1, c_size, <cstd.FILE*>ctxt)
+
############################################################
## support for custom document loaders
############################################################
More information about the lxml-checkins
mailing list