[Lxml-checkins] r48970 - in lxml/trunk: . src/lxml

scoder at codespeak.net scoder at codespeak.net
Fri Nov 23 09:27:51 CET 2007


Author: scoder
Date: Fri Nov 23 09:27:51 2007
New Revision: 48970

Modified:
   lxml/trunk/CHANGES.txt
   lxml/trunk/src/lxml/lxml.etree.pyx
   lxml/trunk/src/lxml/parser.pxi
   lxml/trunk/src/lxml/parsertarget.pxi
   lxml/trunk/src/lxml/xmlparser.pxd
Log:
new SAX parser framework + TreeBuilder class implementation

Modified: lxml/trunk/CHANGES.txt
==============================================================================
--- lxml/trunk/CHANGES.txt	(original)
+++ lxml/trunk/CHANGES.txt	Fri Nov 23 09:27:51 2007
@@ -8,6 +8,8 @@
 Features added
 --------------
 
+* ElementTree compatible TreeBuilder class.
+
 * Use default prefixes for some common XML namespaces
 
 * ``lxml.html.clean.Cleaner`` now allows for a ``host_whitelist``, and
@@ -27,7 +29,7 @@
 Bugs fixed
 ----------
 
-* Well hidden free-while-in-use crash bug in ObjectPath
+* Target parser failed to report comments.
 
 * In the ``lxml.html`` ``iter_links`` method, links in ``<object>``
   tags weren't recognized.  (Note: plugin-specific link parameters

Modified: lxml/trunk/src/lxml/lxml.etree.pyx
==============================================================================
--- lxml/trunk/src/lxml/lxml.etree.pyx	(original)
+++ lxml/trunk/src/lxml/lxml.etree.pyx	Fri Nov 23 09:27:51 2007
@@ -31,6 +31,9 @@
 cdef object ITER_EMPTY
 ITER_EMPTY = iter(())
 
+cdef object EMPTY_READ_ONLY_DICT
+EMPTY_READ_ONLY_DICT = python.PyDictProxy_New({})
+
 # the rules
 # any libxml C argument/variable is prefixed with c_
 # any non-public function/class is prefixed with an underscore

Modified: lxml/trunk/src/lxml/parser.pxi
==============================================================================
--- lxml/trunk/src/lxml/parser.pxi	(original)
+++ lxml/trunk/src/lxml/parser.pxi	Fri Nov 23 09:27:51 2007
@@ -376,7 +376,8 @@
 ############################################################
 
 cdef class _ParserContext(_ResolverContext)
-cdef class _TargetParserContext(_ParserContext)
+cdef class _SaxParserContext(_ParserContext)
+cdef class _TargetParserContext(_SaxParserContext)
 
 cdef class _ParserContext(_ResolverContext):
     cdef _ErrorLog _error_log
@@ -577,39 +578,33 @@
         cdef xmlparser.xmlParserCtxt* pctxt
         if self._parser_context is None:
             self._parser_context = self._createContext(self._target)
-
             pctxt = self._newParserCtxt()
             if pctxt is NULL:
                 python.PyErr_NoMemory()
-
+            _initParserContext(self._parser_context, self._resolvers, pctxt)
             if self._remove_comments:
                 pctxt.sax.comment = NULL
             if self._remove_pis:
                 pctxt.sax.processingInstruction = NULL
             # hard switch-off for CDATA nodes => makes them plain text
             pctxt.sax.cdataBlock = NULL
-
-            _initParserContext(self._parser_context, self._resolvers, pctxt)
         return self._parser_context
 
     cdef _ParserContext _getPushParserContext(self):
         cdef xmlparser.xmlParserCtxt* pctxt
         if self._push_parser_context is None:
             self._push_parser_context = self._createContext(self._target)
-
             pctxt = self._newPushParserCtxt()
             if pctxt is NULL:
                 python.PyErr_NoMemory()
-
+            _initParserContext(
+                self._push_parser_context, self._resolvers, pctxt)
             if self._remove_comments:
                 pctxt.sax.comment = NULL
             if self._remove_pis:
                 pctxt.sax.processingInstruction = NULL
             # hard switch-off for CDATA nodes => makes them plain text
             pctxt.sax.cdataBlock = NULL
-
-            _initParserContext(
-                self._push_parser_context, self._resolvers, pctxt)
         return self._push_parser_context
 
     cdef _ParserContext _createContext(self, target):
@@ -992,7 +987,422 @@
     if c_data is not NULL and buffer_len > 0:
         return htmlparser.htmlParseChunk(c_ctxt, c_data, buffer_len, 0)
     return 0
-        
+
+
+############################################################
+## SAX event handler
+############################################################
+
+ctypedef enum _SaxParserEvents:
+    SAX_EVENT_START   =  1
+    SAX_EVENT_END     =  2
+    SAX_EVENT_DATA    =  4
+    SAX_EVENT_DOCTYPE =  8
+    SAX_EVENT_PI      = 16
+    SAX_EVENT_COMMENT = 32
+
+cdef class _SaxParserTarget:
+    cdef int _sax_event_filter
+    cdef int _sax_event_propagate
+    cdef _handleSaxStart(self, tag, attrib, nsmap):
+        return None
+    cdef _handleSaxEnd(self, tag):
+        return None
+    cdef int _handleSaxData(self, data) except -1:
+        return 0
+    cdef int _handleSaxDoctype(self, root_tag, public_id, system_id) except -1:
+        return 0
+    cdef _handleSaxPi(self, target, data):
+        return None
+    cdef _handleSaxComment(self, comment):
+        return None
+
+cdef class _SaxParserContext(_ParserContext):
+    """This class maps SAX2 events to method calls.
+    """
+    cdef _SaxParserTarget _target
+    cdef xmlparser.startElementNsSAX2Func _origSaxStart
+    cdef xmlparser.endElementNsSAX2Func   _origSaxEnd
+    cdef xmlparser.startElementSAXFunc    _origSaxStartNoNs
+    cdef xmlparser.endElementSAXFunc      _origSaxEndNoNs
+    cdef xmlparser.charactersSAXFunc      _origSaxData
+    cdef xmlparser.internalSubsetSAXFunc  _origSaxDoctype
+    cdef xmlparser.commentSAXFunc         _origSaxComment
+    cdef xmlparser.processingInstructionSAXFunc    _origSaxPi
+
+    cdef void _setSaxParserTarget(self, _SaxParserTarget target):
+        self._target = target
+
+    cdef void _initParserContext(self, xmlparser.xmlParserCtxt* c_ctxt):
+        "wrap original SAX2 callbacks"
+        cdef xmlparser.xmlSAXHandler* sax
+        _ParserContext._initParserContext(self, c_ctxt)
+        sax = c_ctxt.sax
+        if self._target._sax_event_propagate & SAX_EVENT_START:
+            # propagate => keep orig callback
+            self._origSaxStart = sax.startElementNs
+            self._origSaxStartNoNs = sax.startElement
+        else:
+            # otherwise: never call orig callback
+            self._origSaxStart = sax.startElementNs = NULL
+            self._origSaxStartNoNs = sax.startElement = NULL
+        if self._target._sax_event_filter & SAX_EVENT_START:
+            # intercept => overwrite orig callback
+            if sax.initialized == xmlparser.XML_SAX2_MAGIC:
+                sax.startElementNs = _handleSaxStart
+            sax.startElement = _handleSaxStartNoNs
+
+        if self._target._sax_event_propagate & SAX_EVENT_END:
+            self._origSaxEnd = sax.endElementNs
+            self._origSaxEndNoNs = sax.endElement
+        else:
+            self._origSaxEnd = sax.endElementNs = NULL
+            self._origSaxEndNoNs = sax.endElement = NULL
+        if self._target._sax_event_filter & SAX_EVENT_END:
+            if sax.initialized == xmlparser.XML_SAX2_MAGIC:
+                sax.endElementNs = _handleSaxEnd
+            sax.endElement = _handleSaxEndNoNs
+
+        if self._target._sax_event_propagate & SAX_EVENT_DATA:
+            self._origSaxData = sax.characters
+        else:
+            self._origSaxData = sax.characters = NULL
+        if self._target._sax_event_filter & SAX_EVENT_DATA:
+            sax.characters = _handleSaxData
+
+        if self._target._sax_event_propagate & SAX_EVENT_DOCTYPE:
+            self._origSaxDoctype = sax.internalSubset
+        else:
+            self._origSaxDoctype = sax.internalSubset = NULL
+        if self._target._sax_event_filter & SAX_EVENT_DOCTYPE:
+            sax.internalSubset = _handleSaxDoctype
+
+        if self._target._sax_event_propagate & SAX_EVENT_PI:
+            self._origSaxPi = sax.processingInstruction
+        else:
+            self._origSaxPi = sax.processingInstruction = NULL
+        if self._target._sax_event_filter & SAX_EVENT_PI:
+            sax.processingInstruction = _handleSaxPI
+
+        if self._target._sax_event_propagate & SAX_EVENT_COMMENT:
+            self._origSaxComment = sax.comment
+        else:
+            self._origSaxComment = sax.comment = NULL
+        if self._target._sax_event_filter & SAX_EVENT_COMMENT:
+            sax.comment = _handleSaxComment
+
+    cdef void _handleSaxException(self, xmlparser.xmlParserCtxt* c_ctxt):
+        self._store_raised()
+        if c_ctxt.errNo == xmlerror.XML_ERR_OK:
+            c_ctxt.errNo = xmlerror.XML_ERR_INTERNAL_ERROR
+        c_ctxt.disableSAX = 1
+
+cdef void _handleSaxStart(void* ctxt, char* c_localname, char* c_prefix,
+                          char* c_namespace, int c_nb_namespaces,
+                          char** c_namespaces,
+                          int c_nb_attributes, int c_nb_defaulted,
+                          char** c_attributes) with gil:
+    cdef _SaxParserContext context
+    cdef xmlparser.xmlParserCtxt* c_ctxt
+    cdef _Element element
+    cdef int i
+    c_ctxt = <xmlparser.xmlParserCtxt*>ctxt
+    if c_ctxt._private is NULL:
+        return
+    context = <_SaxParserContext>c_ctxt._private
+    if context._origSaxStart is not NULL:
+        context._origSaxStart(c_ctxt, c_localname, c_prefix, c_namespace,
+                              c_nb_namespaces, c_namespaces, c_nb_attributes,
+                              c_nb_defaulted, c_attributes)
+    try:
+        tag = _namespacedNameFromNsName(c_namespace, c_localname)
+        if c_nb_defaulted > 0:
+            # only add default attributes if we asked for them
+            if c_ctxt.loadsubset & xmlparser.XML_COMPLETE_ATTRS == 0:
+                c_nb_attributes = c_nb_attributes - c_nb_defaulted
+        if c_nb_attributes == 0:
+            attrib = EMPTY_READ_ONLY_DICT
+        else:
+            attrib = {}
+            for i from 0 <= i < c_nb_attributes:
+                name = _namespacedNameFromNsName(
+                    c_attributes[2], c_attributes[0])
+                if c_attributes[3] is NULL:
+                    value = ""
+                else:
+                    value = python.PyUnicode_DecodeUTF8(
+                        c_attributes[3], c_attributes[4] - c_attributes[3],
+                        "strict")
+                python.PyDict_SetItem(attrib, name, value)
+                c_attributes = c_attributes + 5
+        if c_nb_namespaces == 0:
+            nsmap = EMPTY_READ_ONLY_DICT
+        else:
+            nsmap = {}
+            for i from 0 <= i < c_nb_namespaces:
+                if c_namespaces[0] is NULL:
+                    prefix = None
+                else:
+                    prefix = funicode(c_namespaces[0])
+                python.PyDict_SetItem(
+                    nsmap, prefix, funicode(c_namespaces[1]))
+                c_namespaces = c_namespaces + 2
+        element = context._target._handleSaxStart(tag, attrib, nsmap)
+        if element is not None and c_ctxt.input is not NULL:
+            if c_ctxt.input.line < 65535:
+                element._c_node.line = <short>c_ctxt.input.line
+            else:
+                element._c_node.line = 65535
+    except:
+        context._handleSaxException(c_ctxt)
+
+cdef void _handleSaxStartNoNs(void* ctxt, char* c_name,
+                              char** c_attributes) with gil:
+    cdef _SaxParserContext context
+    cdef xmlparser.xmlParserCtxt* c_ctxt
+    cdef _Element element
+    c_ctxt = <xmlparser.xmlParserCtxt*>ctxt
+    if c_ctxt._private is NULL:
+        return
+    context = <_SaxParserContext>c_ctxt._private
+    if context._origSaxStartNoNs is not NULL:
+        context._origSaxStartNoNs(c_ctxt, c_name, c_attributes)
+    try:
+        tag = funicode(c_name)
+        if c_attributes is NULL:
+            attrib = EMPTY_READ_ONLY_DICT
+        else:
+            attrib = {}
+            while c_attributes[0] is not NULL:
+                name = funicode(c_attributes[0])
+                if c_attributes[1] is NULL:
+                    value = ""
+                else:
+                    value = funicode(c_attributes[1])
+                c_attributes = c_attributes + 2
+                python.PyDict_SetItem(attrib, name, value)
+        element = context._target._handleSaxStart(
+            tag, attrib, EMPTY_READ_ONLY_DICT)
+        if element is not None and c_ctxt.input is not NULL:
+            if c_ctxt.input.line < 65535:
+                element._c_node.line = <short>c_ctxt.input.line
+            else:
+                element._c_node.line = 65535
+    except:
+        context._handleSaxException(c_ctxt)
+
+cdef void _handleSaxEnd(void* ctxt, char* c_localname, char* c_prefix,
+                        char* c_namespace) with gil:
+    cdef _SaxParserContext context
+    cdef xmlparser.xmlParserCtxt* c_ctxt
+    c_ctxt = <xmlparser.xmlParserCtxt*>ctxt
+    if c_ctxt._private is NULL:
+        return
+    context = <_SaxParserContext>c_ctxt._private
+    if context._origSaxEnd is not NULL:
+        context._origSaxEnd(c_ctxt, c_localname, c_prefix, c_namespace)
+    try:
+        tag = _namespacedNameFromNsName(c_namespace, c_localname)
+        context._target._handleSaxEnd(tag)
+    except:
+        context._handleSaxException(c_ctxt)
+
+cdef void _handleSaxEndNoNs(void* ctxt, char* c_name) with gil:
+    cdef _SaxParserContext context
+    cdef xmlparser.xmlParserCtxt* c_ctxt
+    c_ctxt = <xmlparser.xmlParserCtxt*>ctxt
+    if c_ctxt._private is NULL:
+        return
+    context = <_SaxParserContext>c_ctxt._private
+    if context._origSaxEndNoNs is not NULL:
+        context._origSaxEndNoNs(c_ctxt, c_name)
+    try:
+        context._target._handleSaxEnd(funicode(c_name))
+    except:
+        context._handleSaxException(c_ctxt)
+
+cdef void _handleSaxData(void* ctxt, char* c_data, int data_len) with gil:
+    cdef _SaxParserContext context
+    cdef xmlparser.xmlParserCtxt* c_ctxt
+    c_ctxt = <xmlparser.xmlParserCtxt*>ctxt
+    if c_ctxt._private is NULL:
+        return
+    context = <_SaxParserContext>c_ctxt._private
+    if context._origSaxData is not NULL:
+        context._origSaxData(c_ctxt, c_data, data_len)
+    try:
+        context._target._handleSaxData(
+            python.PyUnicode_DecodeUTF8(c_data, data_len, NULL))
+    except:
+        context._handleSaxException(c_ctxt)
+
+cdef void _handleSaxDoctype(void* ctxt, char* c_name, char* c_public,
+                            char* c_system) with gil:
+    cdef _SaxParserContext context
+    cdef xmlparser.xmlParserCtxt* c_ctxt
+    c_ctxt = <xmlparser.xmlParserCtxt*>ctxt
+    if c_ctxt._private is NULL:
+        return
+    context = <_SaxParserContext>c_ctxt._private
+    if context._origSaxDoctype is not NULL:
+        context._origSaxDoctype(c_ctxt, c_name, c_public, c_system)
+    try:
+        if c_public is not NULL:
+            public_id = funicode(c_public)
+        if c_system is not NULL:
+            system_id = funicode(c_system)
+        context._target._handleSaxDoctype(
+            funicode(c_name), public_id, system_id)
+    except:
+        context._handleSaxException(c_ctxt)
+
+cdef void _handleSaxPI(void* ctxt, char* c_target, char* c_data) with gil:
+    cdef _SaxParserContext context
+    cdef xmlparser.xmlParserCtxt* c_ctxt
+    c_ctxt = <xmlparser.xmlParserCtxt*>ctxt
+    if c_ctxt._private is NULL:
+        return
+    context = <_SaxParserContext>c_ctxt._private
+    if context._origSaxPi is not NULL:
+        context._origSaxPi(c_ctxt, c_target, c_data)
+    try:
+        if c_data is not NULL:
+            data = funicode(c_data)
+        context._target._handleSaxPi(funicode(c_target), data)
+    except:
+        context._handleSaxException(c_ctxt)
+
+cdef void _handleSaxComment(void* ctxt, char* c_data) with gil:
+    cdef _SaxParserContext context
+    cdef xmlparser.xmlParserCtxt* c_ctxt
+    c_ctxt = <xmlparser.xmlParserCtxt*>ctxt
+    if c_ctxt._private is NULL:
+        return
+    context = <_SaxParserContext>c_ctxt._private
+    if context._origSaxComment is not NULL:
+        context._origSaxComment(c_ctxt, c_data)
+    try:
+        context._target._handleSaxComment(funicode(c_data))
+    except:
+        context._handleSaxException(c_ctxt)
+
+
+############################################################
+## ET compatible XML tree builder
+############################################################
+
+cdef class TreeBuilder(_SaxParserTarget):
+    cdef _BaseParser _parser
+    cdef object _factory
+    cdef object _data
+    cdef object _element_stack
+    cdef object _element_stack_pop
+    cdef _Element _last
+    cdef bint _in_tail
+
+    def __init__(self, *, element_factory=None, parser=None):
+        self._sax_event_filter = \
+            SAX_EVENT_START | SAX_EVENT_END | SAX_EVENT_DATA | \
+            SAX_EVENT_PI | SAX_EVENT_COMMENT
+        self._data = [] # data collector
+        self._element_stack = [] # element stack
+        self._element_stack_pop = self._element_stack.pop
+        self._last = None # last element
+        self._in_tail = 0 # true if we're after an end tag
+        self._factory = element_factory
+        self._parser = parser
+
+    cdef int _flush(self) except -1:
+        if python.PyList_GET_SIZE(self._data) > 0:
+            if self._last is not None:
+                text = "".join(self._data)
+                if self._in_tail:
+                    assert self._last.tail is None, "internal error (tail)"
+                    self._last.tail = text
+                else:
+                    assert self._last.text is None, "internal error (text)"
+                    self._last.text = text
+            del self._data[:]
+        return 0
+
+    # Python level event handlers
+
+    def close(self):
+        """Flushes the builder buffers, and returns the toplevel document
+        element.
+        """
+        assert python.PyList_GET_SIZE(self._element_stack) == 0, "missing end tags"
+        assert self._last is not None, "missing toplevel element"
+        return self._last
+
+    def data(self, data):
+        """Adds text to the current element.  The value should be either an
+        8-bit string containing ASCII text, or a Unicode string.
+        """
+        self._handleSaxData(data)
+
+    def start(self, tag, attrs, nsmap=None):
+        "Opens a new element."
+        if nsmap is None:
+            nsmap = EMPTY_READ_ONLY_DICT
+        self._handleSaxStart(tag, attrs, nsmap)
+
+    def end(self, tag):
+        "Closes the current element."
+        element = self._handleSaxEnd(tag)
+        assert self._last.tag == tag,\
+               "end tag mismatch (expected %s, got %s)" % (
+                   self._last.tag, tag)
+        return element
+
+    def pi(self, target, data):
+        return self._handleSaxPi(target, data)
+
+    def comment(self, comment):
+        return self._handleSaxComment(comment)
+
+    # internal SAX event handlers
+
+    cdef _handleSaxStart(self, tag, attrib, nsmap):
+        self._flush()
+        if self._factory is not None:
+            self._last = self._factory(tag, attrib)
+            if python.PyList_GET_SIZE(self._element_stack) > 0:
+                _appendChild(self._element_stack[-1], self._last)
+        elif python.PyList_GET_SIZE(self._element_stack) > 0:
+            self._last = _makeSubElement(
+                self._element_stack[-1], tag, None, None, attrib, nsmap, None)
+        else:
+            self._last = _makeElement(
+                tag, NULL, None, self._parser, None, None, attrib, nsmap, None)
+        python.PyList_Append(self._element_stack, self._last)
+        self._in_tail = 0
+        return self._last
+
+    cdef _handleSaxEnd(self, tag):
+        self._flush()
+        self._last = self._element_stack_pop()
+        self._in_tail = 1
+        return self._last
+
+    cdef int _handleSaxData(self, data) except -1:
+        python.PyList_Append(self._data, data)
+
+    cdef _handleSaxPi(self, target, data):
+        self._flush()
+        self._last = ProcessingInstruction(target, data)
+        if python.PyList_GET_SIZE(self._element_stack) > 0:
+            _appendChild(self._element_stack[-1], self._last)
+        self._in_tail = 1
+        return self._last
+
+    cdef _handleSaxComment(self, comment):
+        self._flush()
+        self._last = Comment(comment)
+        if python.PyList_GET_SIZE(self._element_stack) > 0:
+            _appendChild(self._element_stack[-1], self._last)
+        self._in_tail = 1
+        return self._last
 
 ############################################################
 ## XML parser

Modified: lxml/trunk/src/lxml/parsertarget.pxi
==============================================================================
--- lxml/trunk/src/lxml/parsertarget.pxi	(original)
+++ lxml/trunk/src/lxml/parsertarget.pxi	Fri Nov 23 09:27:51 2007
@@ -1,5 +1,8 @@
 # Parser target context (ET target interface)
 
+cdef object inspect_getargspec
+from inspect import getargspec as inspect_getargspec
+
 class _TargetParserResult(Exception):
     # Admittedly, this is somewhat ugly, but it's the easiest way
     # to push the Python level parser result through the parser
@@ -7,191 +10,110 @@
     def __init__(self, result):
         self.result = result
 
-cdef class _TargetParserContext(_ParserContext):
-    """This class maps SAX2 events to the ET parser target interface.
-    """
-    cdef object _target
+cdef class _PythonSaxParserTarget(_SaxParserTarget):
     cdef object _target_start
     cdef object _target_end
     cdef object _target_data
     cdef object _target_doctype
     cdef object _target_pi
     cdef object _target_comment
+    cdef bint _start_takes_nsmap
 
-    cdef void _setTarget(self, target):
-        self._target = target
-
-    cdef _ParserContext _copy(self):
-        cdef _TargetParserContext context
-        context = _ParserContext._copy(self)
-        context._setTarget(self._target)
-        return context
-
-    cdef void _initParserContext(self, xmlparser.xmlParserCtxt* c_ctxt):
-        "wrap original SAX2 callbacks"
-        cdef xmlparser.xmlSAXHandler* sax
-        _ParserContext._initParserContext(self, c_ctxt)
-        sax = c_ctxt.sax
-        cstd.memset(sax, 0, sizeof(xmlparser.xmlSAXHandler))
+    def __init__(self, target):
+        cdef int event_filter
+        event_filter = 0
+        self._start_takes_nsmap = 0
         try:
-            self._target_start = self._target.start
+            self._target_start = target.start
             if self._target_start is not None:
-                sax.startElementNs = _targetSaxStart
+                event_filter = event_filter | SAX_EVENT_START
         except AttributeError:
             pass
+        else:
+            try:
+                arguments = inspect_getargspec(self._target_start)
+                if len(arguments[0]) > 3 or arguments[1] is not None:
+                    self._start_takes_nsmap = 1
+            except TypeError:
+                pass
         try:
-            self._target_end = self._target.end
+            self._target_end = target.end
             if self._target_end is not None:
-                sax.endElementNs = _targetSaxEnd
+                event_filter = event_filter | SAX_EVENT_END
         except AttributeError:
             pass
         try:
-            self._target_data = self._target.data
+            self._target_data = target.data
             if self._target_data is not None:
-                sax.characters = _targetSaxData
+                event_filter = event_filter | SAX_EVENT_DATA
         except AttributeError:
             pass
         try:
-            self._target_doctype = self._target.doctype
+            self._target_doctype = target.doctype
             if self._target_doctype is not None:
-                sax.internalSubset = _targetSaxDoctype
+                event_filter = event_filter | SAX_EVENT_DOCTYPE
         except AttributeError:
             pass
         try:
-            self._target_pi = self._target.pi
+            self._target_pi = target.pi
             if self._target_pi is not None:
-                sax.processingInstruction = _targetSaxPI
+                event_filter = event_filter | SAX_EVENT_PI
         except AttributeError:
             pass
         try:
-            self._target_comment = self._target.comment
+            self._target_comment = target.comment
             if self._target_comment is not None:
-                sax.startElementNs = _targetSaxStart
+                event_filter = event_filter | SAX_EVENT_COMMENT
         except AttributeError:
             pass
+        self._sax_event_filter = event_filter
+
+    cdef _handleSaxStart(self, tag, attrib, nsmap):
+        if self._start_takes_nsmap:
+            return self._target_start(tag, attrib, nsmap)
+        else:
+            return self._target_start(tag, attrib)
+
+    cdef _handleSaxEnd(self, tag):
+        return self._target_end(tag)
+
+    cdef int _handleSaxData(self, data) except -1:
+        self._target_data(data)
+
+    cdef int _handleSaxDoctype(self, root_tag, public_id, system_id) except -1:
+        self._target_doctype(root_tag, public_id, system_id)
 
-        sax.initialized = xmlparser.XML_SAX2_MAGIC
+    cdef _handleSaxPi(self, target, data):
+        return self._target_pi(target, data)
+
+    cdef _handleSaxComment(self, comment):
+        return self._target_comment(comment)
+
+
+cdef class _TargetParserContext(_SaxParserContext):
+    """This class maps SAX2 events to the ET parser target interface.
+    """
+    cdef object _python_target
+    cdef int _setTarget(self, target) except -1:
+        self._python_target = target
+        if not isinstance(target, _SaxParserTarget) or \
+                hasattr(target, '__dict__'):
+            target = _PythonSaxParserTarget(target)
+        self._setSaxParserTarget(target)
+        return 0
+
+    cdef _ParserContext _copy(self):
+        cdef _TargetParserContext context
+        context = _ParserContext._copy(self)
+        context._setTarget(self._python_target)
+        return context
 
     cdef object _handleParseResult(self, _BaseParser parser, xmlDoc* result,
                                    filename):
         self._raise_if_stored()
-        return self._target.close()
+        return self._python_target.close()
 
     cdef xmlDoc* _handleParseResultDoc(self, _BaseParser parser,
                                        xmlDoc* result, filename) except NULL:
         self._raise_if_stored()
-        raise _TargetParserResult(self._target.close())
-
-
-cdef void _targetSaxStart(void* ctxt, char* c_localname, char* c_prefix,
-                          char* c_namespace, int c_nb_namespaces,
-                          char** c_namespaces,
-                          int c_nb_attributes, int c_nb_defaulted,
-                          char** c_attributes) with gil:
-    cdef _TargetParserContext context
-    cdef xmlparser.xmlParserCtxt* c_ctxt
-    cdef int i
-    c_ctxt = <xmlparser.xmlParserCtxt*>ctxt
-    if c_ctxt._private is NULL:
-        return
-    context = <_TargetParserContext>c_ctxt._private
-    try:
-        tag = _namespacedNameFromNsName(c_namespace, c_localname)
-        if c_nb_defaulted > 0:
-            # only add default attributes if we asked for them
-            if c_ctxt.loadsubset & xmlparser.XML_COMPLETE_ATTRS == 0:
-                c_nb_attributes = c_nb_attributes - c_nb_defaulted
-        attrib = {}
-        for i from 0 <= i < c_nb_attributes:
-            name = _namespacedNameFromNsName(
-                c_attributes[2], c_attributes[0])
-            if c_attributes[3] is NULL:
-                value = ""
-            else:
-                value = python.PyUnicode_DecodeUTF8(
-                    c_attributes[3], c_attributes[4] - c_attributes[3],
-                    "strict")
-            python.PyDict_SetItem(attrib, name, value)
-            c_attributes = c_attributes + 5
-        context._target_start(tag, attrib)
-    except:
-        _handleSaxTargetException(context, c_ctxt)
-
-cdef void _targetSaxEnd(void* ctxt, char* c_localname, char* c_prefix,
-                        char* c_namespace) with gil:
-    cdef _TargetParserContext context
-    cdef xmlparser.xmlParserCtxt* c_ctxt
-    c_ctxt = <xmlparser.xmlParserCtxt*>ctxt
-    if c_ctxt._private is NULL:
-        return
-    context = <_TargetParserContext>c_ctxt._private
-    try:
-        tag = _namespacedNameFromNsName(c_namespace, c_localname)
-        context._target_end(tag)
-    except:
-        _handleSaxTargetException(context, c_ctxt)
-
-cdef void _targetSaxData(void* ctxt, char* c_data, int data_len) with gil:
-    cdef _TargetParserContext context
-    cdef xmlparser.xmlParserCtxt* c_ctxt
-    c_ctxt = <xmlparser.xmlParserCtxt*>ctxt
-    if c_ctxt._private is NULL:
-        return
-    context = <_TargetParserContext>c_ctxt._private
-    try:
-        context._target_data(
-            python.PyUnicode_DecodeUTF8(c_data, data_len, NULL))
-    except:
-        _handleSaxTargetException(context, c_ctxt)
-
-cdef void _targetSaxDoctype(void* ctxt, char* c_name, char* c_public,
-                       char* c_system) with gil:
-    cdef _TargetParserContext context
-    cdef xmlparser.xmlParserCtxt* c_ctxt
-    c_ctxt = <xmlparser.xmlParserCtxt*>ctxt
-    if c_ctxt._private is NULL:
-        return
-    context = <_TargetParserContext>c_ctxt._private
-    try:
-        if c_public is not NULL:
-            public_id = funicode(c_public)
-        if c_system is not NULL:
-            system_id = funicode(c_system)
-        context._target_doctype(
-            funicode(c_name), public_id, system_id)
-    except:
-        _handleSaxTargetException(context, c_ctxt)
-
-cdef void _targetSaxPI(void* ctxt, char* c_target, char* c_data) with gil:
-    cdef _TargetParserContext context
-    cdef xmlparser.xmlParserCtxt* c_ctxt
-    c_ctxt = <xmlparser.xmlParserCtxt*>ctxt
-    if c_ctxt._private is NULL:
-        return
-    context = <_TargetParserContext>c_ctxt._private
-    try:
-        if c_data is not NULL:
-            data = funicode(c_data)
-        context._target_pi(funicode(c_target), data)
-    except:
-        _handleSaxTargetException(context, c_ctxt)
-
-cdef void _targetSaxComment(void* ctxt, char* c_data, int data_len) with gil:
-    cdef _TargetParserContext context
-    cdef xmlparser.xmlParserCtxt* c_ctxt
-    c_ctxt = <xmlparser.xmlParserCtxt*>ctxt
-    if c_ctxt._private is NULL:
-        return
-    context = <_TargetParserContext>c_ctxt._private
-    try:
-        context._target_comment(
-            python.PyUnicode_DecodeUTF8(c_data, data_len, NULL))
-    except:
-        _handleSaxTargetException(context, c_ctxt)
-
-cdef void _handleSaxTargetException(_TargetParserContext context,
-                                    xmlparser.xmlParserCtxt* c_ctxt):
-    context._store_raised()
-    if c_ctxt.errNo == xmlerror.XML_ERR_OK:
-        c_ctxt.errNo = xmlerror.XML_ERR_INTERNAL_ERROR
-    c_ctxt.disableSAX = 1
+        raise _TargetParserResult(self._python_target.close())

Modified: lxml/trunk/src/lxml/xmlparser.pxd
==============================================================================
--- lxml/trunk/src/lxml/xmlparser.pxd	(original)
+++ lxml/trunk/src/lxml/xmlparser.pxd	Fri Nov 23 09:27:51 2007
@@ -43,7 +43,9 @@
     cdef int XML_SAX2_MAGIC
 
 cdef extern from "libxml/tree.h":
-    ctypedef struct xmlParserInput
+    ctypedef struct xmlParserInput:
+        int line
+
     ctypedef struct xmlParserInputBuffer:
         void* context
         xmlInputReadCallback  readcallback
@@ -94,7 +96,8 @@
         bint html
         bint progressive
         int charset
-        
+        xmlParserInput* input
+
     ctypedef enum xmlParserOption:
         XML_PARSE_RECOVER = 1 # recover on errors
         XML_PARSE_NOENT = 2 # substitute entities


More information about the lxml-checkins mailing list