[Lxml-checkins] r54565 - in lxml/trunk: . src/lxml
scoder at codespeak.net
scoder at codespeak.net
Thu May 8 17:44:56 CEST 2008
Author: scoder
Date: Thu May 8 17:44:56 2008
New Revision: 54565
Modified:
lxml/trunk/ (props changed)
lxml/trunk/src/lxml/iterparse.pxi
Log:
r4191 at delle: sbehnel | 2008-05-07 19:28:06 +0200
large cleanup in iterparse code
Modified: lxml/trunk/src/lxml/iterparse.pxi
==============================================================================
--- lxml/trunk/src/lxml/iterparse.pxi (original)
+++ lxml/trunk/src/lxml/iterparse.pxi Thu May 8 17:44:56 2008
@@ -1,7 +1,6 @@
-# iterparse -- incremental parsing
+# iterparse -- event-driven parsing
-cdef object __ITERPARSE_CHUNK_SIZE
-__ITERPARSE_CHUNK_SIZE = 32768
+DEF __ITERPARSE_CHUNK_SIZE = 32768
ctypedef enum _IterparseEventFilter:
ITERPARSE_FILTER_START = 1
@@ -201,91 +200,97 @@
self._c_ctxt.myDoc = NULL
-cdef inline void _pushSaxStartEvent(xmlparser.xmlParserCtxt* c_ctxt,
+cdef inline void _pushSaxStartEvent(_IterparseContext context,
xmlNode* c_node):
- cdef _IterparseContext context
- context = <_IterparseContext>c_ctxt._private
try:
- if c_ctxt.html:
- _fixHtmlDictNodeNames(c_ctxt.dict, c_node)
+ if context._c_ctxt.html:
+ _fixHtmlDictNodeNames(context._c_ctxt.dict, c_node)
context.startNode(c_node)
except:
- if c_ctxt.errNo == xmlerror.XML_ERR_OK:
- c_ctxt.errNo = xmlerror.XML_ERR_INTERNAL_ERROR
- c_ctxt.disableSAX = 1
+ if context._c_ctxt.errNo == xmlerror.XML_ERR_OK:
+ context._c_ctxt.errNo = xmlerror.XML_ERR_INTERNAL_ERROR
+ context._c_ctxt.disableSAX = 1
context._store_raised()
-cdef inline void _pushSaxEndEvent(xmlparser.xmlParserCtxt* c_ctxt,
+cdef inline void _pushSaxEndEvent(_IterparseContext context,
xmlNode* c_node):
- cdef _IterparseContext context
- context = <_IterparseContext>c_ctxt._private
try:
context.endNode(c_node)
except:
- if c_ctxt.errNo == xmlerror.XML_ERR_OK:
- c_ctxt.errNo = xmlerror.XML_ERR_INTERNAL_ERROR
- c_ctxt.disableSAX = 1
+ if context._c_ctxt.errNo == xmlerror.XML_ERR_OK:
+ context._c_ctxt.errNo = xmlerror.XML_ERR_INTERNAL_ERROR
+ context._c_ctxt.disableSAX = 1
context._store_raised()
-cdef inline void _pushSaxEvent(xmlparser.xmlParserCtxt* c_ctxt,
+cdef inline void _pushSaxEvent(_IterparseContext context,
event, xmlNode* c_node):
- cdef _IterparseContext context
- context = <_IterparseContext>c_ctxt._private
try:
context.pushEvent(event, c_node)
except:
- if c_ctxt.errNo == xmlerror.XML_ERR_OK:
- c_ctxt.errNo = xmlerror.XML_ERR_INTERNAL_ERROR
- c_ctxt.disableSAX = 1
+ if context._c_ctxt.errNo == xmlerror.XML_ERR_OK:
+ context._c_ctxt.errNo = xmlerror.XML_ERR_INTERNAL_ERROR
+ context._c_ctxt.disableSAX = 1
context._store_raised()
cdef void _iterparseSaxStart(void* ctxt, char* localname, char* prefix,
char* URI, int nb_namespaces, char** namespaces,
int nb_attributes, int nb_defaulted,
- char** attributes):
+ char** attributes) with gil:
cdef xmlparser.xmlParserCtxt* c_ctxt
+ cdef _IterparseContext context
c_ctxt = <xmlparser.xmlParserCtxt*>ctxt
- (<_IterparseContext>c_ctxt._private)._origSaxStart(
+ context = <_IterparseContext>c_ctxt._private
+ context._origSaxStart(
ctxt, localname, prefix, URI,
nb_namespaces, namespaces,
nb_attributes, nb_defaulted, attributes)
- _pushSaxStartEvent(c_ctxt, c_ctxt.node)
+ _pushSaxStartEvent(context, c_ctxt.node)
-cdef void _iterparseSaxEnd(void* ctxt, char* localname, char* prefix, char* URI):
+cdef void _iterparseSaxEnd(void* ctxt, char* localname, char* prefix, char* URI) with gil:
cdef xmlparser.xmlParserCtxt* c_ctxt
+ cdef _IterparseContext context
c_ctxt = <xmlparser.xmlParserCtxt*>ctxt
- _pushSaxEndEvent(c_ctxt, c_ctxt.node)
- (<_IterparseContext>c_ctxt._private)._origSaxEnd(ctxt, localname, prefix, URI)
+ context = <_IterparseContext>c_ctxt._private
+ _pushSaxEndEvent(context, c_ctxt.node)
+ context._origSaxEnd(ctxt, localname, prefix, URI)
-cdef void _iterparseSaxStartNoNs(void* ctxt, char* name, char** attributes):
+cdef void _iterparseSaxStartNoNs(void* ctxt, char* name, char** attributes) with gil:
cdef xmlparser.xmlParserCtxt* c_ctxt
+ cdef _IterparseContext context
c_ctxt = <xmlparser.xmlParserCtxt*>ctxt
- (<_IterparseContext>c_ctxt._private)._origSaxStartNoNs(ctxt, name, attributes)
- _pushSaxStartEvent(c_ctxt, c_ctxt.node)
+ context = <_IterparseContext>c_ctxt._private
+ context._origSaxStartNoNs(ctxt, name, attributes)
+ _pushSaxStartEvent(context, c_ctxt.node)
-cdef void _iterparseSaxEndNoNs(void* ctxt, char* name):
+cdef void _iterparseSaxEndNoNs(void* ctxt, char* name) with gil:
cdef xmlparser.xmlParserCtxt* c_ctxt
+ cdef _IterparseContext context
c_ctxt = <xmlparser.xmlParserCtxt*>ctxt
- _pushSaxEndEvent(c_ctxt, c_ctxt.node)
- (<_IterparseContext>c_ctxt._private)._origSaxEndNoNs(ctxt, name)
+ context = <_IterparseContext>c_ctxt._private
+ _pushSaxEndEvent(context, c_ctxt.node)
+ context._origSaxEndNoNs(ctxt, name)
-cdef void _iterparseSaxComment(void* ctxt, char* text):
+cdef void _iterparseSaxComment(void* ctxt, char* text) with gil:
cdef xmlNode* c_node
cdef xmlparser.xmlParserCtxt* c_ctxt
+ cdef _IterparseContext context
c_ctxt = <xmlparser.xmlParserCtxt*>ctxt
- (<_IterparseContext>c_ctxt._private)._origSaxComment(ctxt, text)
+ context = <_IterparseContext>c_ctxt._private
+ context._origSaxComment(ctxt, text)
c_node = _iterparseFindLastNode(c_ctxt)
if c_node is not NULL:
- _pushSaxEvent(c_ctxt, "comment", c_node)
+ _pushSaxEvent(context, "comment", c_node)
-cdef void _iterparseSaxPI(void* ctxt, char* target, char* data):
+cdef void _iterparseSaxPI(void* ctxt, char* target, char* data) with gil:
cdef xmlNode* c_node
cdef xmlparser.xmlParserCtxt* c_ctxt
+ cdef _IterparseContext context
c_ctxt = <xmlparser.xmlParserCtxt*>ctxt
- (<_IterparseContext>c_ctxt._private)._origSaxPI(ctxt, target, data)
+ context = <_IterparseContext>c_ctxt._private
+ context._origSaxPI(ctxt, target, data)
c_node = _iterparseFindLastNode(c_ctxt)
if c_node is not NULL:
- _pushSaxEvent(c_ctxt, "pi", c_node)
+ _pushSaxEvent(context, "pi", c_node)
cdef inline xmlNode* _iterparseFindLastNode(xmlparser.xmlParserCtxt* c_ctxt):
# this mimics what libxml2 creates for comments/PIs
@@ -342,10 +347,12 @@
- encoding - override the document encoding
- schema - an XMLSchema to validate against
"""
- cdef object _source
- cdef object _events
cdef object _tag
+ cdef object _events
cdef readonly object root
+ cdef object _source
+ cdef int (*_parse_chunk)(xmlparser.xmlParserCtxt* ctxt,
+ char* chunk, int size, int terminate)
def __init__(self, source, events=("end",), *, tag=None,
attribute_defaults=False, dtd_validation=False,
load_dtd=False, no_network=True, remove_blank_text=False,
@@ -394,6 +401,11 @@
remove_comments, remove_pis, strip_cdata,
None, filename, encoding)
+ if self._for_html:
+ self._parse_chunk = htmlparser.htmlParseChunk
+ else:
+ self._parse_chunk = xmlparser.xmlParseChunk
+
context = <_IterparseContext>self._getPushParserContext()
__GLOBAL_PARSER_CONTEXT.initParserDict(context._c_ctxt)
context.prepare()
@@ -422,41 +434,39 @@
def __next__(self):
cdef _IterparseContext context
cdef xmlparser.xmlParserCtxt* pctxt
+ cdef char* c_data
+ cdef Py_ssize_t c_data_len
cdef int error
if self._source is None:
raise StopIteration
- context = <_IterparseContext>self._getPushParserContext()
+ context = <_IterparseContext>self._push_parser_context
if python.PyList_GET_SIZE(context._events) > context._event_index:
item = python.PyList_GET_ITEM(context._events, context._event_index)
python.Py_INCREF(item) # 'borrowed reference' from PyList_GET_ITEM
- context._event_index = context._event_index + 1
+ context._event_index += 1
return item
del context._events[:]
pctxt = context._c_ctxt
error = 0
- while python.PyList_GET_SIZE(context._events) == 0 and error == 0:
+ while python.PyList_GET_SIZE(context._events) == 0:
data = self._source.read(__ITERPARSE_CHUNK_SIZE)
if not python.PyString_Check(data):
self._source = None
raise TypeError, "reading file objects must return plain strings"
- elif data:
- if self._for_html:
- error = htmlparser.htmlParseChunk(
- pctxt, _cstr(data), python.PyString_GET_SIZE(data), 0)
- else:
- error = xmlparser.xmlParseChunk(
- pctxt, _cstr(data), python.PyString_GET_SIZE(data), 0)
+ c_data_len = python.PyString_GET_SIZE(data)
+ if c_data_len == 0:
+ c_data = NULL
else:
- if self._for_html:
- error = htmlparser.htmlParseChunk(pctxt, NULL, 0, 1)
- else:
- error = xmlparser.xmlParseChunk(pctxt, NULL, 0, 1)
- self._source = None
+ c_data = _cstr(data)
+ with nogil:
+ error = self._parse_chunk(
+ pctxt, c_data, c_data_len, (c_data_len == 0))
+ if error or c_data_len == 0:
break
- if error != 0 or (context._validator is not None and
- not context._validator.isvalid()):
+ if error or (context._validator is not None and
+ not context._validator.isvalid()):
self._source = None
del context._events[:]
context._assureDocGetsFreed()
More information about the lxml-checkins
mailing list