[lxml-dev] schema validation and resolvers
Michael Ballbach
ballbach at rten.net
Wed Jun 25 01:04:40 CEST 2008
On Tue, Jun 24, 2008 at 09:36:37AM +0200, Stefan Behnel wrote:
> I think that's the way to go. And yes, it must be a stack (i.e. a
> list) in this case. I'd just push the right parser (not only its
> context) as a "current thread parser" in cases where we know that
> libxml2 will not provide us with a context (XInclude is another case,
> happy to see that resolved at the same time). Then we can fall back to
> using the top-most parser if the _private pointer is NULL.
How does this look? (Unit tests follow the patch)
Index: src/lxml/xinclude.pxi
===================================================================
--- src/lxml/xinclude.pxi (revision 56012)
+++ src/lxml/xinclude.pxi (working copy)
@@ -33,12 +33,14 @@
# i.e. as a sibling, which does not conflict with traversal.
cdef int result
self._error_log.connect()
+ __GLOBAL_PARSER_CONTEXT.pushImpliedContext(node._doc._parser)
with nogil:
if node._doc._parser is not None:
result = xinclude.xmlXIncludeProcessTreeFlags(
node._c_node, node._doc._parser._parse_options)
else:
result = xinclude.xmlXIncludeProcessTree(node._c_node)
+ __GLOBAL_PARSER_CONTEXT.popImpliedContext()
self._error_log.disconnect()
if result == -1:
Index: src/lxml/xmlschema.pxi
===================================================================
--- src/lxml/xmlschema.pxi (revision 56012)
+++ src/lxml/xmlschema.pxi (working copy)
@@ -65,7 +65,13 @@
raise XMLSchemaParseError, u"No tree or file given"
if parser_ctxt is not NULL:
+ # calling xmlSchemaParse on a schema with imports or includes will
+ # cause libxml2 to create an internal context for parsing, so push
+ # an implied context to route resolve requests to the document's parser
+ __GLOBAL_PARSER_CONTEXT.pushImpliedContext(doc._parser)
self._c_schema = xmlschema.xmlSchemaParse(parser_ctxt)
+ __GLOBAL_PARSER_CONTEXT.popImpliedContext()
+
if _LIBXML_VERSION_INT >= 20624:
xmlschema.xmlSchemaFreeParserCtxt(parser_ctxt)
Index: src/lxml/parser.pxi
===================================================================
--- src/lxml/parser.pxi (revision 56012)
+++ src/lxml/parser.pxi (working copy)
@@ -42,6 +42,11 @@
cdef tree.xmlDict* _c_dict
cdef _BaseParser _default_parser
+ cdef object _implied_parser_contexts
+
+ def __init__(self):
+ self._implied_parser_contexts = []
+
def __dealloc__(self):
if self._c_dict is not NULL:
xmlparser.xmlDictFree(self._c_dict)
@@ -131,6 +136,38 @@
# otherwise we'd free data that's in use => segfault
self.initThreadDictRef(&result.dict)
+ cdef xmlparser.xmlParserCtxt *findImpliedContext(self) with gil:
+ u"""Return any current implied xml parser context for this thread. This
+ is used when the _local_resolver function is called with a context
+ that was generated from within libxml2 - which happens when parsing
+ schema and xinclude external references."""
+
+ cdef _ParserDictionaryContext thread_context
+ cdef _BaseParser implied_parser
+ cdef Py_ssize_t count
+
+ # see if we have a current implied parser
+ count = python.PyList_GET_SIZE(self._implied_parser_contexts)
+ if count != 0:
+ implied_parser = python.PyList_GET_ITEM(self._implied_parser_contexts, count - 1)
+ python.Py_INCREF(implied_parser) # borrowed reference
+ if implied_parser is not None:
+ return implied_parser._getParserContext()._c_ctxt
+
+ # we don't, so use the thread's default parser context
+ thread_context = __GLOBAL_PARSER_CONTEXT._findThreadParserContext()
+ return thread_context._default_parser._getParserContext()._c_ctxt
+
+ cdef void pushImpliedContext(self, context) with gil:
+ u"Push a new implied context object."
+ if context is not None and not isinstance(context, _BaseParser):
+ raise TypeError, u"implied contexts must be _ParserContext objects"
+ python.PyList_Append(self._implied_parser_contexts, context)
+
+ cdef void popImpliedContext(self) with gil:
+ u"Pop and return the current implied context object."
+ self._implied_parser_contexts.pop()
+
cdef _ParserDictionaryContext __GLOBAL_PARSER_CONTEXT
__GLOBAL_PARSER_CONTEXT = _ParserDictionaryContext()
__GLOBAL_PARSER_CONTEXT.initMainParserContext()
@@ -399,7 +436,13 @@
# when we declare a Python object, Pyrex will INCREF(None) !
cdef xmlparser.xmlParserInput* c_input
cdef int error
+
+ # if there is no _ParserDictionaryContext associated with the xmlParserCtxt
+ # passed, check to see if the thread state object has an implied context.
if c_context._private is NULL:
+ c_context = __GLOBAL_PARSER_CONTEXT.findImpliedContext()
+
+ if c_context._private is NULL:
if __DEFAULT_ENTITY_LOADER is NULL:
return NULL
return __DEFAULT_ENTITY_LOADER(c_url, c_pubid, c_context)
Unit tests:
Index: src/lxml/tests/test_etree.py
===================================================================
--- src/lxml/tests/test_etree.py (revision 56012)
+++ src/lxml/tests/test_etree.py (working copy)
@@ -2342,6 +2342,28 @@
'a',
tree.getroot()[1].tag)
+ def test_xinclude_resolver(self):
+ """Test that xinclude references can be processed by a resolver."""
+
+ class res(etree.Resolver):
+ def __init__(self, text):
+ self.text = text
+ self.called = False
+
+ def resolve(self, url, Id, context):
+ self.called = True
+ return self.resolve_string(self.text, context)
+
+ include_text = open(fileInTestDir('test.xml')).read()
+ parser = etree.XMLParser()
+ res_instance = res(include_text)
+ parser.resolvers.add(res_instance)
+ tree = etree.parse(fileInTestDir('include/test_xinclude.xml'), parser = parser)
+ self.include(tree)
+
+ # make sure the resolver was used
+ self.assert_(res_instance.called)
+
class ETreeXIncludeTestCase(XIncludeTestCase):
def include(self, tree):
tree.xinclude()
Index: src/lxml/tests/test_xmlschema.py
===================================================================
--- src/lxml/tests/test_xmlschema.py (revision 56012)
+++ src/lxml/tests/test_xmlschema.py (working copy)
@@ -152,7 +152,108 @@
self.assert_(tree_valid.xmlschema(schema))
self.assert_(not tree_invalid.xmlschema(schema))
-
+ #
+ # schema + resolvers tests&data:
+ #
+
+ resolver_schema_int = BytesIO("""\
+<xsd:schema xmlns:xsd="http://www.w3.org/2001/XMLSchema"
+ xmlns:etype="http://codespeak.net/lxml/test/external"
+ targetNamespace="http://codespeak.net/lxml/test/internal">
+ <xsd:import namespace="http://codespeak.net/lxml/test/external" schemaLocation="XXX.xsd" />
+ <xsd:element name="a" type="etype:AType"/>
+</xsd:schema>""")
+
+ resolver_schema_int2 = BytesIO("""\
+<xsd:schema xmlns:xsd="http://www.w3.org/2001/XMLSchema"
+ xmlns:etype="http://codespeak.net/lxml/test/external"
+ targetNamespace="http://codespeak.net/lxml/test/internal">
+ <xsd:import namespace="http://codespeak.net/lxml/test/external" schemaLocation="YYY.xsd" />
+ <xsd:element name="a" type="etype:AType"/>
+</xsd:schema>""")
+
+ resolver_schema_ext = """\
+<xsd:schema xmlns:xsd="http://www.w3.org/2001/XMLSchema"
+ targetNamespace="http://codespeak.net/lxml/test/external">
+ <xsd:complexType name="AType">
+ <xsd:sequence><xsd:element name="b" type="xsd:string" minOccurs="0" maxOccurs="unbounded" /></xsd:sequence>
+ </xsd:complexType>
+</xsd:schema>"""
+
+ class simple_resolver(etree.Resolver):
+ def __init__(self, schema):
+ self.schema = schema
+
+ def resolve(self, url, Id, context):
+ assert(url == 'XXX.xsd')
+ return self.resolve_string(self.schema, context)
+
+ def test_xmlschema_resolvers(self):
+ """Test that resolvers work with schema."""
+
+ parser = etree.XMLParser()
+ parser.resolvers.add(self.simple_resolver(self.resolver_schema_ext))
+ schema_doc = etree.parse(self.resolver_schema_int, parser = parser)
+ schema = etree.XMLSchema(schema_doc)
+
+ def test_xmlschema_resolvers_root(self):
+ """Test that the default resolver will get called if there's no
+ specific parser resolver."""
+
+ root_resolver = self.simple_resolver(self.resolver_schema_ext)
+ etree.get_default_parser().resolvers.add(root_resolver)
+ schema_doc = etree.parse(self.resolver_schema_int)
+ schema = etree.XMLSchema(schema_doc)
+ etree.get_default_parser().resolvers.remove(root_resolver)
+
+ def test_xmlschema_resolvers_noroot(self):
+ """Test that the default resolver will not get called when a more
+ specific resolver is registered."""
+
+ class res_root(etree.Resolver):
+ def resolve(self, url, Id, context):
+ assert(False)
+ return None
+
+ root_resolver = res_root()
+ etree.get_default_parser().resolvers.add(root_resolver)
+ parser = etree.XMLParser()
+ parser.resolvers.add(self.simple_resolver(self.resolver_schema_ext))
+ schema_doc = etree.parse(self.resolver_schema_int, parser = parser)
+ schema = etree.XMLSchema(schema_doc)
+ etree.get_default_parser().resolvers.remove(root_resolver)
+
+ def test_xmlschema_nested_resolvers(self):
+ """Test that resolvers work in a nested fashion."""
+
+ class res_nested(etree.Resolver):
+ def __init__(self, ext_schema):
+ self.ext_schema = ext_schema
+
+ def resolve(self, url, Id, context):
+ assert(url == 'YYY.xsd')
+ return self.resolve_string(self.ext_schema, context)
+
+ class res(etree.Resolver):
+ def __init__(self, ext_schema_1, ext_schema_2):
+ self.ext_schema_1 = ext_schema_1
+ self.ext_schema_2 = ext_schema_2
+
+ def resolve(self, url, Id, context):
+ assert(url == 'XXX.xsd')
+
+ new_parser = etree.XMLParser()
+ new_parser.resolvers.add(res_nested(self.ext_schema_2))
+ new_schema_doc = etree.parse(self.ext_schema_1, parser = new_parser)
+ new_schema = etree.XMLSchema(new_schema_doc)
+
+ return self.resolve_string(ETreeXMLSchemaTestCase.resolver_schema_ext, context)
+
+ parser = etree.XMLParser()
+ parser.resolvers.add(res(self.resolver_schema_int2, self.resolver_schema_ext))
+ schema_doc = etree.parse(self.resolver_schema_int, parser = parser)
+ schema = etree.XMLSchema(schema_doc)
+
def test_suite():
suite = unittest.TestSuite()
suite.addTests([unittest.makeSuite(ETreeXMLSchemaTestCase)])
--
Michael Ballbach, N0ZTQ
ballbach at rten.net -- PGP KeyID: 0xA05D5555
http://www.rten.net/
-------------- next part --------------
A non-text attachment was scrubbed...
Name: not available
Type: application/pgp-signature
Size: 189 bytes
Desc: not available
Url : http://codespeak.net/pipermail/lxml-dev/attachments/20080624/b0633e38/attachment.pgp
More information about the lxml-dev
mailing list