[Lxml-checkins] r48970 - in lxml/trunk: . src/lxml
scoder at codespeak.net
scoder at codespeak.net
Fri Nov 23 09:27:51 CET 2007
Author: scoder
Date: Fri Nov 23 09:27:51 2007
New Revision: 48970
Modified:
lxml/trunk/CHANGES.txt
lxml/trunk/src/lxml/lxml.etree.pyx
lxml/trunk/src/lxml/parser.pxi
lxml/trunk/src/lxml/parsertarget.pxi
lxml/trunk/src/lxml/xmlparser.pxd
Log:
new SAX parser framework + TreeBuilder class implementation
Modified: lxml/trunk/CHANGES.txt
==============================================================================
--- lxml/trunk/CHANGES.txt (original)
+++ lxml/trunk/CHANGES.txt Fri Nov 23 09:27:51 2007
@@ -8,6 +8,8 @@
Features added
--------------
+* ElementTree compatible TreeBuilder class.
+
* Use default prefixes for some common XML namespaces
* ``lxml.html.clean.Cleaner`` now allows for a ``host_whitelist``, and
@@ -27,7 +29,7 @@
Bugs fixed
----------
-* Well hidden free-while-in-use crash bug in ObjectPath
+* Target parser failed to report comments.
* In the ``lxml.html`` ``iter_links`` method, links in ``<object>``
tags weren't recognized. (Note: plugin-specific link parameters
Modified: lxml/trunk/src/lxml/lxml.etree.pyx
==============================================================================
--- lxml/trunk/src/lxml/lxml.etree.pyx (original)
+++ lxml/trunk/src/lxml/lxml.etree.pyx Fri Nov 23 09:27:51 2007
@@ -31,6 +31,9 @@
cdef object ITER_EMPTY
ITER_EMPTY = iter(())
+cdef object EMPTY_READ_ONLY_DICT
+EMPTY_READ_ONLY_DICT = python.PyDictProxy_New({})
+
# the rules
# any libxml C argument/variable is prefixed with c_
# any non-public function/class is prefixed with an underscore
Modified: lxml/trunk/src/lxml/parser.pxi
==============================================================================
--- lxml/trunk/src/lxml/parser.pxi (original)
+++ lxml/trunk/src/lxml/parser.pxi Fri Nov 23 09:27:51 2007
@@ -376,7 +376,8 @@
############################################################
cdef class _ParserContext(_ResolverContext)
-cdef class _TargetParserContext(_ParserContext)
+cdef class _SaxParserContext(_ParserContext)
+cdef class _TargetParserContext(_SaxParserContext)
cdef class _ParserContext(_ResolverContext):
cdef _ErrorLog _error_log
@@ -577,39 +578,33 @@
cdef xmlparser.xmlParserCtxt* pctxt
if self._parser_context is None:
self._parser_context = self._createContext(self._target)
-
pctxt = self._newParserCtxt()
if pctxt is NULL:
python.PyErr_NoMemory()
-
+ _initParserContext(self._parser_context, self._resolvers, pctxt)
if self._remove_comments:
pctxt.sax.comment = NULL
if self._remove_pis:
pctxt.sax.processingInstruction = NULL
# hard switch-off for CDATA nodes => makes them plain text
pctxt.sax.cdataBlock = NULL
-
- _initParserContext(self._parser_context, self._resolvers, pctxt)
return self._parser_context
cdef _ParserContext _getPushParserContext(self):
cdef xmlparser.xmlParserCtxt* pctxt
if self._push_parser_context is None:
self._push_parser_context = self._createContext(self._target)
-
pctxt = self._newPushParserCtxt()
if pctxt is NULL:
python.PyErr_NoMemory()
-
+ _initParserContext(
+ self._push_parser_context, self._resolvers, pctxt)
if self._remove_comments:
pctxt.sax.comment = NULL
if self._remove_pis:
pctxt.sax.processingInstruction = NULL
# hard switch-off for CDATA nodes => makes them plain text
pctxt.sax.cdataBlock = NULL
-
- _initParserContext(
- self._push_parser_context, self._resolvers, pctxt)
return self._push_parser_context
cdef _ParserContext _createContext(self, target):
@@ -992,7 +987,422 @@
if c_data is not NULL and buffer_len > 0:
return htmlparser.htmlParseChunk(c_ctxt, c_data, buffer_len, 0)
return 0
-
+
+
+############################################################
+## SAX event handler
+############################################################
+
+ctypedef enum _SaxParserEvents:
+ SAX_EVENT_START = 1
+ SAX_EVENT_END = 2
+ SAX_EVENT_DATA = 4
+ SAX_EVENT_DOCTYPE = 8
+ SAX_EVENT_PI = 16
+ SAX_EVENT_COMMENT = 32
+
+cdef class _SaxParserTarget:
+ cdef int _sax_event_filter
+ cdef int _sax_event_propagate
+ cdef _handleSaxStart(self, tag, attrib, nsmap):
+ return None
+ cdef _handleSaxEnd(self, tag):
+ return None
+ cdef int _handleSaxData(self, data) except -1:
+ return 0
+ cdef int _handleSaxDoctype(self, root_tag, public_id, system_id) except -1:
+ return 0
+ cdef _handleSaxPi(self, target, data):
+ return None
+ cdef _handleSaxComment(self, comment):
+ return None
+
+cdef class _SaxParserContext(_ParserContext):
+ """This class maps SAX2 events to method calls.
+ """
+ cdef _SaxParserTarget _target
+ cdef xmlparser.startElementNsSAX2Func _origSaxStart
+ cdef xmlparser.endElementNsSAX2Func _origSaxEnd
+ cdef xmlparser.startElementSAXFunc _origSaxStartNoNs
+ cdef xmlparser.endElementSAXFunc _origSaxEndNoNs
+ cdef xmlparser.charactersSAXFunc _origSaxData
+ cdef xmlparser.internalSubsetSAXFunc _origSaxDoctype
+ cdef xmlparser.commentSAXFunc _origSaxComment
+ cdef xmlparser.processingInstructionSAXFunc _origSaxPi
+
+ cdef void _setSaxParserTarget(self, _SaxParserTarget target):
+ self._target = target
+
+ cdef void _initParserContext(self, xmlparser.xmlParserCtxt* c_ctxt):
+ "wrap original SAX2 callbacks"
+ cdef xmlparser.xmlSAXHandler* sax
+ _ParserContext._initParserContext(self, c_ctxt)
+ sax = c_ctxt.sax
+ if self._target._sax_event_propagate & SAX_EVENT_START:
+ # propagate => keep orig callback
+ self._origSaxStart = sax.startElementNs
+ self._origSaxStartNoNs = sax.startElement
+ else:
+ # otherwise: never call orig callback
+ self._origSaxStart = sax.startElementNs = NULL
+ self._origSaxStartNoNs = sax.startElement = NULL
+ if self._target._sax_event_filter & SAX_EVENT_START:
+ # intercept => overwrite orig callback
+ if sax.initialized == xmlparser.XML_SAX2_MAGIC:
+ sax.startElementNs = _handleSaxStart
+ sax.startElement = _handleSaxStartNoNs
+
+ if self._target._sax_event_propagate & SAX_EVENT_END:
+ self._origSaxEnd = sax.endElementNs
+ self._origSaxEndNoNs = sax.endElement
+ else:
+ self._origSaxEnd = sax.endElementNs = NULL
+ self._origSaxEndNoNs = sax.endElement = NULL
+ if self._target._sax_event_filter & SAX_EVENT_END:
+ if sax.initialized == xmlparser.XML_SAX2_MAGIC:
+ sax.endElementNs = _handleSaxEnd
+ sax.endElement = _handleSaxEndNoNs
+
+ if self._target._sax_event_propagate & SAX_EVENT_DATA:
+ self._origSaxData = sax.characters
+ else:
+ self._origSaxData = sax.characters = NULL
+ if self._target._sax_event_filter & SAX_EVENT_DATA:
+ sax.characters = _handleSaxData
+
+ if self._target._sax_event_propagate & SAX_EVENT_DOCTYPE:
+ self._origSaxDoctype = sax.internalSubset
+ else:
+ self._origSaxDoctype = sax.internalSubset = NULL
+ if self._target._sax_event_filter & SAX_EVENT_DOCTYPE:
+ sax.internalSubset = _handleSaxDoctype
+
+ if self._target._sax_event_propagate & SAX_EVENT_PI:
+ self._origSaxPi = sax.processingInstruction
+ else:
+ self._origSaxPi = sax.processingInstruction = NULL
+ if self._target._sax_event_filter & SAX_EVENT_PI:
+ sax.processingInstruction = _handleSaxPI
+
+ if self._target._sax_event_propagate & SAX_EVENT_COMMENT:
+ self._origSaxComment = sax.comment
+ else:
+ self._origSaxComment = sax.comment = NULL
+ if self._target._sax_event_filter & SAX_EVENT_COMMENT:
+ sax.comment = _handleSaxComment
+
+ cdef void _handleSaxException(self, xmlparser.xmlParserCtxt* c_ctxt):
+ self._store_raised()
+ if c_ctxt.errNo == xmlerror.XML_ERR_OK:
+ c_ctxt.errNo = xmlerror.XML_ERR_INTERNAL_ERROR
+ c_ctxt.disableSAX = 1
+
+cdef void _handleSaxStart(void* ctxt, char* c_localname, char* c_prefix,
+ char* c_namespace, int c_nb_namespaces,
+ char** c_namespaces,
+ int c_nb_attributes, int c_nb_defaulted,
+ char** c_attributes) with gil:
+ cdef _SaxParserContext context
+ cdef xmlparser.xmlParserCtxt* c_ctxt
+ cdef _Element element
+ cdef int i
+ c_ctxt = <xmlparser.xmlParserCtxt*>ctxt
+ if c_ctxt._private is NULL:
+ return
+ context = <_SaxParserContext>c_ctxt._private
+ if context._origSaxStart is not NULL:
+ context._origSaxStart(c_ctxt, c_localname, c_prefix, c_namespace,
+ c_nb_namespaces, c_namespaces, c_nb_attributes,
+ c_nb_defaulted, c_attributes)
+ try:
+ tag = _namespacedNameFromNsName(c_namespace, c_localname)
+ if c_nb_defaulted > 0:
+ # only add default attributes if we asked for them
+ if c_ctxt.loadsubset & xmlparser.XML_COMPLETE_ATTRS == 0:
+ c_nb_attributes = c_nb_attributes - c_nb_defaulted
+ if c_nb_attributes == 0:
+ attrib = EMPTY_READ_ONLY_DICT
+ else:
+ attrib = {}
+ for i from 0 <= i < c_nb_attributes:
+ name = _namespacedNameFromNsName(
+ c_attributes[2], c_attributes[0])
+ if c_attributes[3] is NULL:
+ value = ""
+ else:
+ value = python.PyUnicode_DecodeUTF8(
+ c_attributes[3], c_attributes[4] - c_attributes[3],
+ "strict")
+ python.PyDict_SetItem(attrib, name, value)
+ c_attributes = c_attributes + 5
+ if c_nb_namespaces == 0:
+ nsmap = EMPTY_READ_ONLY_DICT
+ else:
+ nsmap = {}
+ for i from 0 <= i < c_nb_namespaces:
+ if c_namespaces[0] is NULL:
+ prefix = None
+ else:
+ prefix = funicode(c_namespaces[0])
+ python.PyDict_SetItem(
+ nsmap, prefix, funicode(c_namespaces[1]))
+ c_namespaces = c_namespaces + 2
+ element = context._target._handleSaxStart(tag, attrib, nsmap)
+ if element is not None and c_ctxt.input is not NULL:
+ if c_ctxt.input.line < 65535:
+ element._c_node.line = <short>c_ctxt.input.line
+ else:
+ element._c_node.line = 65535
+ except:
+ context._handleSaxException(c_ctxt)
+
+cdef void _handleSaxStartNoNs(void* ctxt, char* c_name,
+ char** c_attributes) with gil:
+ cdef _SaxParserContext context
+ cdef xmlparser.xmlParserCtxt* c_ctxt
+ cdef _Element element
+ c_ctxt = <xmlparser.xmlParserCtxt*>ctxt
+ if c_ctxt._private is NULL:
+ return
+ context = <_SaxParserContext>c_ctxt._private
+ if context._origSaxStartNoNs is not NULL:
+ context._origSaxStartNoNs(c_ctxt, c_name, c_attributes)
+ try:
+ tag = funicode(c_name)
+ if c_attributes is NULL:
+ attrib = EMPTY_READ_ONLY_DICT
+ else:
+ attrib = {}
+ while c_attributes[0] is not NULL:
+ name = funicode(c_attributes[0])
+ if c_attributes[1] is NULL:
+ value = ""
+ else:
+ value = funicode(c_attributes[1])
+ c_attributes = c_attributes + 2
+ python.PyDict_SetItem(attrib, name, value)
+ element = context._target._handleSaxStart(
+ tag, attrib, EMPTY_READ_ONLY_DICT)
+ if element is not None and c_ctxt.input is not NULL:
+ if c_ctxt.input.line < 65535:
+ element._c_node.line = <short>c_ctxt.input.line
+ else:
+ element._c_node.line = 65535
+ except:
+ context._handleSaxException(c_ctxt)
+
+cdef void _handleSaxEnd(void* ctxt, char* c_localname, char* c_prefix,
+ char* c_namespace) with gil:
+ cdef _SaxParserContext context
+ cdef xmlparser.xmlParserCtxt* c_ctxt
+ c_ctxt = <xmlparser.xmlParserCtxt*>ctxt
+ if c_ctxt._private is NULL:
+ return
+ context = <_SaxParserContext>c_ctxt._private
+ if context._origSaxEnd is not NULL:
+ context._origSaxEnd(c_ctxt, c_localname, c_prefix, c_namespace)
+ try:
+ tag = _namespacedNameFromNsName(c_namespace, c_localname)
+ context._target._handleSaxEnd(tag)
+ except:
+ context._handleSaxException(c_ctxt)
+
+cdef void _handleSaxEndNoNs(void* ctxt, char* c_name) with gil:
+ cdef _SaxParserContext context
+ cdef xmlparser.xmlParserCtxt* c_ctxt
+ c_ctxt = <xmlparser.xmlParserCtxt*>ctxt
+ if c_ctxt._private is NULL:
+ return
+ context = <_SaxParserContext>c_ctxt._private
+ if context._origSaxEndNoNs is not NULL:
+ context._origSaxEndNoNs(c_ctxt, c_name)
+ try:
+ context._target._handleSaxEnd(funicode(c_name))
+ except:
+ context._handleSaxException(c_ctxt)
+
+cdef void _handleSaxData(void* ctxt, char* c_data, int data_len) with gil:
+ cdef _SaxParserContext context
+ cdef xmlparser.xmlParserCtxt* c_ctxt
+ c_ctxt = <xmlparser.xmlParserCtxt*>ctxt
+ if c_ctxt._private is NULL:
+ return
+ context = <_SaxParserContext>c_ctxt._private
+ if context._origSaxData is not NULL:
+ context._origSaxData(c_ctxt, c_data, data_len)
+ try:
+ context._target._handleSaxData(
+ python.PyUnicode_DecodeUTF8(c_data, data_len, NULL))
+ except:
+ context._handleSaxException(c_ctxt)
+
+cdef void _handleSaxDoctype(void* ctxt, char* c_name, char* c_public,
+ char* c_system) with gil:
+ cdef _SaxParserContext context
+ cdef xmlparser.xmlParserCtxt* c_ctxt
+ c_ctxt = <xmlparser.xmlParserCtxt*>ctxt
+ if c_ctxt._private is NULL:
+ return
+ context = <_SaxParserContext>c_ctxt._private
+ if context._origSaxDoctype is not NULL:
+ context._origSaxDoctype(c_ctxt, c_name, c_public, c_system)
+ try:
+ if c_public is not NULL:
+ public_id = funicode(c_public)
+ if c_system is not NULL:
+ system_id = funicode(c_system)
+ context._target._handleSaxDoctype(
+ funicode(c_name), public_id, system_id)
+ except:
+ context._handleSaxException(c_ctxt)
+
+cdef void _handleSaxPI(void* ctxt, char* c_target, char* c_data) with gil:
+ cdef _SaxParserContext context
+ cdef xmlparser.xmlParserCtxt* c_ctxt
+ c_ctxt = <xmlparser.xmlParserCtxt*>ctxt
+ if c_ctxt._private is NULL:
+ return
+ context = <_SaxParserContext>c_ctxt._private
+ if context._origSaxPi is not NULL:
+ context._origSaxPi(c_ctxt, c_target, c_data)
+ try:
+ if c_data is not NULL:
+ data = funicode(c_data)
+ context._target._handleSaxPi(funicode(c_target), data)
+ except:
+ context._handleSaxException(c_ctxt)
+
+cdef void _handleSaxComment(void* ctxt, char* c_data) with gil:
+ cdef _SaxParserContext context
+ cdef xmlparser.xmlParserCtxt* c_ctxt
+ c_ctxt = <xmlparser.xmlParserCtxt*>ctxt
+ if c_ctxt._private is NULL:
+ return
+ context = <_SaxParserContext>c_ctxt._private
+ if context._origSaxComment is not NULL:
+ context._origSaxComment(c_ctxt, c_data)
+ try:
+ context._target._handleSaxComment(funicode(c_data))
+ except:
+ context._handleSaxException(c_ctxt)
+
+
+############################################################
+## ET compatible XML tree builder
+############################################################
+
+cdef class TreeBuilder(_SaxParserTarget):
+ cdef _BaseParser _parser
+ cdef object _factory
+ cdef object _data
+ cdef object _element_stack
+ cdef object _element_stack_pop
+ cdef _Element _last
+ cdef bint _in_tail
+
+ def __init__(self, *, element_factory=None, parser=None):
+ self._sax_event_filter = \
+ SAX_EVENT_START | SAX_EVENT_END | SAX_EVENT_DATA | \
+ SAX_EVENT_PI | SAX_EVENT_COMMENT
+ self._data = [] # data collector
+ self._element_stack = [] # element stack
+ self._element_stack_pop = self._element_stack.pop
+ self._last = None # last element
+ self._in_tail = 0 # true if we're after an end tag
+ self._factory = element_factory
+ self._parser = parser
+
+ cdef int _flush(self) except -1:
+ if python.PyList_GET_SIZE(self._data) > 0:
+ if self._last is not None:
+ text = "".join(self._data)
+ if self._in_tail:
+ assert self._last.tail is None, "internal error (tail)"
+ self._last.tail = text
+ else:
+ assert self._last.text is None, "internal error (text)"
+ self._last.text = text
+ del self._data[:]
+ return 0
+
+ # Python level event handlers
+
+ def close(self):
+ """Flushes the builder buffers, and returns the toplevel document
+ element.
+ """
+ assert python.PyList_GET_SIZE(self._element_stack) == 0, "missing end tags"
+ assert self._last is not None, "missing toplevel element"
+ return self._last
+
+ def data(self, data):
+ """Adds text to the current element. The value should be either an
+ 8-bit string containing ASCII text, or a Unicode string.
+ """
+ self._handleSaxData(data)
+
+ def start(self, tag, attrs, nsmap=None):
+ "Opens a new element."
+ if nsmap is None:
+ nsmap = EMPTY_READ_ONLY_DICT
+ self._handleSaxStart(tag, attrs, nsmap)
+
+ def end(self, tag):
+ "Closes the current element."
+ element = self._handleSaxEnd(tag)
+ assert self._last.tag == tag,\
+ "end tag mismatch (expected %s, got %s)" % (
+ self._last.tag, tag)
+ return element
+
+ def pi(self, target, data):
+ return self._handleSaxPi(target, data)
+
+ def comment(self, comment):
+ return self._handleSaxComment(comment)
+
+ # internal SAX event handlers
+
+ cdef _handleSaxStart(self, tag, attrib, nsmap):
+ self._flush()
+ if self._factory is not None:
+ self._last = self._factory(tag, attrib)
+ if python.PyList_GET_SIZE(self._element_stack) > 0:
+ _appendChild(self._element_stack[-1], self._last)
+ elif python.PyList_GET_SIZE(self._element_stack) > 0:
+ self._last = _makeSubElement(
+ self._element_stack[-1], tag, None, None, attrib, nsmap, None)
+ else:
+ self._last = _makeElement(
+ tag, NULL, None, self._parser, None, None, attrib, nsmap, None)
+ python.PyList_Append(self._element_stack, self._last)
+ self._in_tail = 0
+ return self._last
+
+ cdef _handleSaxEnd(self, tag):
+ self._flush()
+ self._last = self._element_stack_pop()
+ self._in_tail = 1
+ return self._last
+
+ cdef int _handleSaxData(self, data) except -1:
+ python.PyList_Append(self._data, data)
+
+ cdef _handleSaxPi(self, target, data):
+ self._flush()
+ self._last = ProcessingInstruction(target, data)
+ if python.PyList_GET_SIZE(self._element_stack) > 0:
+ _appendChild(self._element_stack[-1], self._last)
+ self._in_tail = 1
+ return self._last
+
+ cdef _handleSaxComment(self, comment):
+ self._flush()
+ self._last = Comment(comment)
+ if python.PyList_GET_SIZE(self._element_stack) > 0:
+ _appendChild(self._element_stack[-1], self._last)
+ self._in_tail = 1
+ return self._last
############################################################
## XML parser
Modified: lxml/trunk/src/lxml/parsertarget.pxi
==============================================================================
--- lxml/trunk/src/lxml/parsertarget.pxi (original)
+++ lxml/trunk/src/lxml/parsertarget.pxi Fri Nov 23 09:27:51 2007
@@ -1,5 +1,8 @@
# Parser target context (ET target interface)
+cdef object inspect_getargspec
+from inspect import getargspec as inspect_getargspec
+
class _TargetParserResult(Exception):
# Admittedly, this is somewhat ugly, but it's the easiest way
# to push the Python level parser result through the parser
@@ -7,191 +10,110 @@
def __init__(self, result):
self.result = result
-cdef class _TargetParserContext(_ParserContext):
- """This class maps SAX2 events to the ET parser target interface.
- """
- cdef object _target
+cdef class _PythonSaxParserTarget(_SaxParserTarget):
cdef object _target_start
cdef object _target_end
cdef object _target_data
cdef object _target_doctype
cdef object _target_pi
cdef object _target_comment
+ cdef bint _start_takes_nsmap
- cdef void _setTarget(self, target):
- self._target = target
-
- cdef _ParserContext _copy(self):
- cdef _TargetParserContext context
- context = _ParserContext._copy(self)
- context._setTarget(self._target)
- return context
-
- cdef void _initParserContext(self, xmlparser.xmlParserCtxt* c_ctxt):
- "wrap original SAX2 callbacks"
- cdef xmlparser.xmlSAXHandler* sax
- _ParserContext._initParserContext(self, c_ctxt)
- sax = c_ctxt.sax
- cstd.memset(sax, 0, sizeof(xmlparser.xmlSAXHandler))
+ def __init__(self, target):
+ cdef int event_filter
+ event_filter = 0
+ self._start_takes_nsmap = 0
try:
- self._target_start = self._target.start
+ self._target_start = target.start
if self._target_start is not None:
- sax.startElementNs = _targetSaxStart
+ event_filter = event_filter | SAX_EVENT_START
except AttributeError:
pass
+ else:
+ try:
+ arguments = inspect_getargspec(self._target_start)
+ if len(arguments[0]) > 3 or arguments[1] is not None:
+ self._start_takes_nsmap = 1
+ except TypeError:
+ pass
try:
- self._target_end = self._target.end
+ self._target_end = target.end
if self._target_end is not None:
- sax.endElementNs = _targetSaxEnd
+ event_filter = event_filter | SAX_EVENT_END
except AttributeError:
pass
try:
- self._target_data = self._target.data
+ self._target_data = target.data
if self._target_data is not None:
- sax.characters = _targetSaxData
+ event_filter = event_filter | SAX_EVENT_DATA
except AttributeError:
pass
try:
- self._target_doctype = self._target.doctype
+ self._target_doctype = target.doctype
if self._target_doctype is not None:
- sax.internalSubset = _targetSaxDoctype
+ event_filter = event_filter | SAX_EVENT_DOCTYPE
except AttributeError:
pass
try:
- self._target_pi = self._target.pi
+ self._target_pi = target.pi
if self._target_pi is not None:
- sax.processingInstruction = _targetSaxPI
+ event_filter = event_filter | SAX_EVENT_PI
except AttributeError:
pass
try:
- self._target_comment = self._target.comment
+ self._target_comment = target.comment
if self._target_comment is not None:
- sax.startElementNs = _targetSaxStart
+ event_filter = event_filter | SAX_EVENT_COMMENT
except AttributeError:
pass
+ self._sax_event_filter = event_filter
+
+ cdef _handleSaxStart(self, tag, attrib, nsmap):
+ if self._start_takes_nsmap:
+ return self._target_start(tag, attrib, nsmap)
+ else:
+ return self._target_start(tag, attrib)
+
+ cdef _handleSaxEnd(self, tag):
+ return self._target_end(tag)
+
+ cdef int _handleSaxData(self, data) except -1:
+ self._target_data(data)
+
+ cdef int _handleSaxDoctype(self, root_tag, public_id, system_id) except -1:
+ self._target_doctype(root_tag, public_id, system_id)
- sax.initialized = xmlparser.XML_SAX2_MAGIC
+ cdef _handleSaxPi(self, target, data):
+ return self._target_pi(target, data)
+
+ cdef _handleSaxComment(self, comment):
+ return self._target_comment(comment)
+
+
+cdef class _TargetParserContext(_SaxParserContext):
+ """This class maps SAX2 events to the ET parser target interface.
+ """
+ cdef object _python_target
+ cdef int _setTarget(self, target) except -1:
+ self._python_target = target
+ if not isinstance(target, _SaxParserTarget) or \
+ hasattr(target, '__dict__'):
+ target = _PythonSaxParserTarget(target)
+ self._setSaxParserTarget(target)
+ return 0
+
+ cdef _ParserContext _copy(self):
+ cdef _TargetParserContext context
+ context = _ParserContext._copy(self)
+ context._setTarget(self._python_target)
+ return context
cdef object _handleParseResult(self, _BaseParser parser, xmlDoc* result,
filename):
self._raise_if_stored()
- return self._target.close()
+ return self._python_target.close()
cdef xmlDoc* _handleParseResultDoc(self, _BaseParser parser,
xmlDoc* result, filename) except NULL:
self._raise_if_stored()
- raise _TargetParserResult(self._target.close())
-
-
-cdef void _targetSaxStart(void* ctxt, char* c_localname, char* c_prefix,
- char* c_namespace, int c_nb_namespaces,
- char** c_namespaces,
- int c_nb_attributes, int c_nb_defaulted,
- char** c_attributes) with gil:
- cdef _TargetParserContext context
- cdef xmlparser.xmlParserCtxt* c_ctxt
- cdef int i
- c_ctxt = <xmlparser.xmlParserCtxt*>ctxt
- if c_ctxt._private is NULL:
- return
- context = <_TargetParserContext>c_ctxt._private
- try:
- tag = _namespacedNameFromNsName(c_namespace, c_localname)
- if c_nb_defaulted > 0:
- # only add default attributes if we asked for them
- if c_ctxt.loadsubset & xmlparser.XML_COMPLETE_ATTRS == 0:
- c_nb_attributes = c_nb_attributes - c_nb_defaulted
- attrib = {}
- for i from 0 <= i < c_nb_attributes:
- name = _namespacedNameFromNsName(
- c_attributes[2], c_attributes[0])
- if c_attributes[3] is NULL:
- value = ""
- else:
- value = python.PyUnicode_DecodeUTF8(
- c_attributes[3], c_attributes[4] - c_attributes[3],
- "strict")
- python.PyDict_SetItem(attrib, name, value)
- c_attributes = c_attributes + 5
- context._target_start(tag, attrib)
- except:
- _handleSaxTargetException(context, c_ctxt)
-
-cdef void _targetSaxEnd(void* ctxt, char* c_localname, char* c_prefix,
- char* c_namespace) with gil:
- cdef _TargetParserContext context
- cdef xmlparser.xmlParserCtxt* c_ctxt
- c_ctxt = <xmlparser.xmlParserCtxt*>ctxt
- if c_ctxt._private is NULL:
- return
- context = <_TargetParserContext>c_ctxt._private
- try:
- tag = _namespacedNameFromNsName(c_namespace, c_localname)
- context._target_end(tag)
- except:
- _handleSaxTargetException(context, c_ctxt)
-
-cdef void _targetSaxData(void* ctxt, char* c_data, int data_len) with gil:
- cdef _TargetParserContext context
- cdef xmlparser.xmlParserCtxt* c_ctxt
- c_ctxt = <xmlparser.xmlParserCtxt*>ctxt
- if c_ctxt._private is NULL:
- return
- context = <_TargetParserContext>c_ctxt._private
- try:
- context._target_data(
- python.PyUnicode_DecodeUTF8(c_data, data_len, NULL))
- except:
- _handleSaxTargetException(context, c_ctxt)
-
-cdef void _targetSaxDoctype(void* ctxt, char* c_name, char* c_public,
- char* c_system) with gil:
- cdef _TargetParserContext context
- cdef xmlparser.xmlParserCtxt* c_ctxt
- c_ctxt = <xmlparser.xmlParserCtxt*>ctxt
- if c_ctxt._private is NULL:
- return
- context = <_TargetParserContext>c_ctxt._private
- try:
- if c_public is not NULL:
- public_id = funicode(c_public)
- if c_system is not NULL:
- system_id = funicode(c_system)
- context._target_doctype(
- funicode(c_name), public_id, system_id)
- except:
- _handleSaxTargetException(context, c_ctxt)
-
-cdef void _targetSaxPI(void* ctxt, char* c_target, char* c_data) with gil:
- cdef _TargetParserContext context
- cdef xmlparser.xmlParserCtxt* c_ctxt
- c_ctxt = <xmlparser.xmlParserCtxt*>ctxt
- if c_ctxt._private is NULL:
- return
- context = <_TargetParserContext>c_ctxt._private
- try:
- if c_data is not NULL:
- data = funicode(c_data)
- context._target_pi(funicode(c_target), data)
- except:
- _handleSaxTargetException(context, c_ctxt)
-
-cdef void _targetSaxComment(void* ctxt, char* c_data, int data_len) with gil:
- cdef _TargetParserContext context
- cdef xmlparser.xmlParserCtxt* c_ctxt
- c_ctxt = <xmlparser.xmlParserCtxt*>ctxt
- if c_ctxt._private is NULL:
- return
- context = <_TargetParserContext>c_ctxt._private
- try:
- context._target_comment(
- python.PyUnicode_DecodeUTF8(c_data, data_len, NULL))
- except:
- _handleSaxTargetException(context, c_ctxt)
-
-cdef void _handleSaxTargetException(_TargetParserContext context,
- xmlparser.xmlParserCtxt* c_ctxt):
- context._store_raised()
- if c_ctxt.errNo == xmlerror.XML_ERR_OK:
- c_ctxt.errNo = xmlerror.XML_ERR_INTERNAL_ERROR
- c_ctxt.disableSAX = 1
+ raise _TargetParserResult(self._python_target.close())
Modified: lxml/trunk/src/lxml/xmlparser.pxd
==============================================================================
--- lxml/trunk/src/lxml/xmlparser.pxd (original)
+++ lxml/trunk/src/lxml/xmlparser.pxd Fri Nov 23 09:27:51 2007
@@ -43,7 +43,9 @@
cdef int XML_SAX2_MAGIC
cdef extern from "libxml/tree.h":
- ctypedef struct xmlParserInput
+ ctypedef struct xmlParserInput:
+ int line
+
ctypedef struct xmlParserInputBuffer:
void* context
xmlInputReadCallback readcallback
@@ -94,7 +96,8 @@
bint html
bint progressive
int charset
-
+ xmlParserInput* input
+
ctypedef enum xmlParserOption:
XML_PARSE_RECOVER = 1 # recover on errors
XML_PARSE_NOENT = 2 # substitute entities
More information about the lxml-checkins
mailing list