[Lxml-checkins] r41601 - in lxml/trunk: . benchmark doc src/lxml
scoder at codespeak.net
scoder at codespeak.net
Thu Mar 29 09:52:17 CEST 2007
Author: scoder
Date: Thu Mar 29 09:52:15 2007
New Revision: 41601
Modified:
lxml/trunk/CHANGES.txt
lxml/trunk/benchmark/bench_xpath.py
lxml/trunk/doc/xpathxslt.txt
lxml/trunk/src/lxml/extensions.pxi
lxml/trunk/src/lxml/nsclasses.pxi
lxml/trunk/src/lxml/xpath.pxi
lxml/trunk/src/lxml/xslt.pxd
lxml/trunk/src/lxml/xslt.pxi
Log:
merged extension_refactoring branch
Modified: lxml/trunk/CHANGES.txt
==============================================================================
--- lxml/trunk/CHANGES.txt (original)
+++ lxml/trunk/CHANGES.txt Thu Mar 29 09:52:15 2007
@@ -2,12 +2,14 @@
lxml changelog
==============
-under development
+Under Development
=================
Features added
--------------
+* EXSLT RegExp support in standard XPath (not only XSLT)
+
* ``lxml.pyclasslookup`` module that can access the entire tree in read-only
mode to help determining a suitable Element class
@@ -19,10 +21,12 @@
Bugs fixed
----------
+* Thread safety in XPath evaluators
+
Other changes
-------------
-* major rewrite of internal extension function setup
+* major refactoring in XPath/XSLT extension function code
1.3beta (2007-02-27)
Modified: lxml/trunk/benchmark/bench_xpath.py
==============================================================================
--- lxml/trunk/benchmark/bench_xpath.py (original)
+++ lxml/trunk/benchmark/bench_xpath.py Thu Mar 29 09:52:15 2007
@@ -35,31 +35,32 @@
@onlylib('lxe')
def bench_xpath_old_extensions(self, root):
- def return_child(_, element):
- if element:
- return element[0]
+ def return_child(_, elements):
+ if elements:
+ return elements[0][0]
else:
return ()
- extensions = {(None, 'child') : return_child}
- xpath = self.etree.XPath("child(.)", extensions=extensions)
+ extensions = {("test", "child") : return_child}
+ xpath = self.etree.XPath("t:child(.)", namespaces={"test":"t"},
+ extensions=extensions)
for child in root:
xpath(child)
@onlylib('lxe')
def bench_xpath_extensions(self, root):
- def return_child(_, element):
- if element:
- return element[0]
+ def return_child(_, elements):
+ if elements:
+ return elements[0][0]
else:
return ()
- self.etree.FunctionNamespace("test")["t"] = return_child
+ self.etree.FunctionNamespace("testns")["t"] = return_child
try:
- xpath = self.etree.XPath("test:t(.)", {"test":"test"})
+ xpath = self.etree.XPath("test:t(.)", {"test":"testns"})
for child in root:
xpath(child)
finally:
- del self.etree.FunctionNamespace("test")["t"]
+ del self.etree.FunctionNamespace("testns")["t"]
if __name__ == '__main__':
benchbase.main(XPathBenchMark)
Modified: lxml/trunk/doc/xpathxslt.txt
==============================================================================
--- lxml/trunk/doc/xpathxslt.txt (original)
+++ lxml/trunk/doc/xpathxslt.txt Thu Mar 29 09:52:15 2007
@@ -72,11 +72,12 @@
>>> f = StringIO('''\
... <a:foo xmlns:a="http://codespeak.net/ns/test1"
- ... xmlns:b="http://codespeak.net/ns/test2">
+ ... xmlns:b="http://codespeak.net/ns/test2">
... <b:bar>Text</b:bar>
... </a:foo>
... ''')
>>> doc = etree.parse(f)
+
>>> r = doc.xpath('/t:foo/b:bar', {'t': 'http://codespeak.net/ns/test1',
... 'b': 'http://codespeak.net/ns/test2'})
>>> len(r)
Modified: lxml/trunk/src/lxml/extensions.pxi
==============================================================================
--- lxml/trunk/src/lxml/extensions.pxi (original)
+++ lxml/trunk/src/lxml/extensions.pxi Thu Mar 29 09:52:15 2007
@@ -1,4 +1,4 @@
-# supports for extension functions in XPath and XSLT
+# support for extension functions in XPath and XSLT
class XPathError(LxmlError):
pass
@@ -9,6 +9,11 @@
class XPathResultError(XPathError):
pass
+# forward declarations
+
+ctypedef int _register_function(void* ctxt, name_utf, ns_uri_utf)
+cdef class _ExsltRegExp
+
################################################################################
# Base class for XSLT and XPath evaluation contexts: functions, namespaces, ...
@@ -17,6 +22,7 @@
cdef _Document _doc
cdef object _extensions
cdef object _namespaces
+ cdef object _global_namespaces
cdef object _utf_refs
cdef object _function_cache
cdef object _function_cache_ns
@@ -25,18 +31,18 @@
cdef _TempStore _temp_refs
cdef _ExceptionContext _exc
- def __init__(self, namespaces, extensions):
- self._xpathCtxt = NULL
+ def __init__(self, namespaces, extensions, enable_regexp):
+ cdef _ExsltRegExp _regexp
self._utf_refs = {}
+ self._global_namespaces = []
self._function_cache = {}
self._function_cache_ns = {}
- self._called_function = None
if extensions is not None:
# convert extensions to UTF-8
if python.PyDict_Check(extensions):
extensions = (extensions,)
- # format: [ {(ns,name):function} ] -> {(ns_utf,name_utf):function}
+ # format: [ {(ns, name):function} ] -> {(ns_utf, name_utf):function}
new_extensions = {}
for extension in extensions:
for (ns_uri, name), function in extension.items():
@@ -49,17 +55,38 @@
new_extensions, (ns_utf, name_utf), function)
extensions = new_extensions or None
+ if namespaces is not None:
+ if python.PyDict_Check(namespaces):
+ namespaces = namespaces.items()
+ if namespaces:
+ ns = []
+ for prefix, ns_uri in namespaces:
+ if prefix is None:
+ raise TypeError, \
+ "empty namespace prefix is not supported in XPath"
+ if ns_uri is None:
+ raise TypeError, \
+ "setting default namespace is not supported in XPath"
+ prefix_utf = self._to_utf(prefix)
+ ns_uri_utf = self._to_utf(ns_uri)
+ python.PyList_Append(ns, (prefix_utf, ns_uri_utf))
+ namespaces = ns
+
self._doc = None
self._exc = _ExceptionContext()
self._extensions = extensions
self._namespaces = namespaces
self._temp_refs = _TempStore()
+ if enable_regexp:
+ _regexp = _ExsltRegExp()
+ _regexp._register_in_context(self)
+
cdef _copy(self):
cdef _BaseContext context
if self._namespaces is not None:
- namespaces = python.PyDict_Copy(self._namespaces)
- context = self.__class__(namespaces, None)
+ namespaces = self._namespaces[:]
+ context = self.__class__(namespaces, None, False)
if self._extensions is not None:
context._extensions = python.PyDict_Copy(self._extensions)
return context
@@ -83,41 +110,147 @@
cdef _register_context(self, _Document doc):
self._doc = doc
self._exc.clear()
- python.PyDict_Clear(self._function_cache)
- python.PyDict_Clear(self._function_cache_ns)
- namespaces = self._namespaces
- if namespaces is not None:
- self.registerNamespaces(namespaces)
- cdef _unregister_context(self):
- xpath.xmlXPathRegisteredNsCleanup(self._xpathCtxt)
- self._free_context()
-
- cdef _free_context(self):
+ cdef _cleanup_context(self):
+ #xpath.xmlXPathRegisteredNsCleanup(self._xpathCtxt)
+ #self.unregisterGlobalNamespaces()
python.PyDict_Clear(self._utf_refs)
self._doc = None
+
+ cdef _release_context(self):
if self._xpathCtxt is not NULL:
self._xpathCtxt.userData = NULL
self._xpathCtxt = NULL
# namespaces (internal UTF-8 methods with leading '_')
- cdef addNamespace(self, prefix, uri):
+ cdef addNamespace(self, prefix, ns_uri):
+ if prefix is None:
+ raise TypeError, "empty prefix is not supported in XPath"
+ prefix_utf = self._to_utf(prefix)
+ ns_uri_utf = self._to_utf(ns_uri)
+ new_item = (prefix_utf, ns_uri_utf)
if self._namespaces is None:
- self._namespaces = {}
- python.PyDict_SetItem(self._namespaces, prefix, uri)
+ self._namespaces = [new_item]
+ else:
+ namespaces = []
+ for item in self._namespaces:
+ if item[0] == prefix_utf:
+ item = new_item
+ new_item = None
+ python.PyList_Append(namespaces, item)
+ if new_item is not None:
+ python.PyList_Append(namespaces, new_item)
+ self._namespaces = namespaces
+ if self._xpathCtxt is not NULL:
+ xpath.xmlXPathRegisterNs(
+ self._xpathCtxt, _cstr(prefix_utf), _cstr(ns_uri_utf))
- cdef registerNamespaces(self, namespaces):
- for prefix, uri in namespaces.items():
- self.registerNamespace(prefix, uri)
-
cdef registerNamespace(self, prefix, ns_uri):
+ if prefix is None:
+ raise TypeError, "empty prefix is not supported in XPath"
prefix_utf = self._to_utf(prefix)
ns_uri_utf = self._to_utf(ns_uri)
- xpath.xmlXPathRegisterNs(self._xpathCtxt, prefix_utf, ns_uri_utf)
+ python.PyList_Append(self._global_namespaces, prefix_utf)
+ xpath.xmlXPathRegisterNs(self._xpathCtxt,
+ _cstr(prefix_utf), _cstr(ns_uri_utf))
+
+ cdef registerLocalNamespaces(self):
+ if self._namespaces is None:
+ return
+ for prefix_utf, ns_uri_utf in self._namespaces:
+ xpath.xmlXPathRegisterNs(
+ self._xpathCtxt, _cstr(prefix_utf), _cstr(ns_uri_utf))
+
+ cdef registerGlobalNamespaces(self):
+ ns_prefixes = _find_all_extension_prefixes()
+ if python.PyList_GET_SIZE(ns_prefixes) > 0:
+ for prefix_utf, ns_uri_utf in ns_prefixes:
+ python.PyList_Append(self._global_namespaces, prefix_utf)
+ xpath.xmlXPathRegisterNs(
+ self._xpathCtxt, _cstr(prefix_utf), _cstr(ns_uri_utf))
+
+ cdef unregisterGlobalNamespaces(self):
+ if python.PyList_GET_SIZE(self._global_namespaces) > 0:
+ for prefix_utf in self._global_namespaces:
+ xpath.xmlXPathRegisterNs(self._xpathCtxt,
+ _cstr(prefix_utf), NULL)
+ del self._global_namespaces[:]
+
+ cdef void _unregisterNamespace(self, prefix_utf):
+ xpath.xmlXPathRegisterNs(self._xpathCtxt,
+ _cstr(prefix_utf), NULL)
# extension functions
+ cdef void _addLocalExtensionFunction(self, ns_utf, name_utf, function):
+ if self._extensions is None:
+ self._extensions = {}
+ python.PyDict_SetItem(self._extensions, (ns_utf, name_utf), function)
+
+ cdef void registerGlobalFunctions(self, void* ctxt,
+ _register_function reg_func):
+ cdef python.PyObject* dict_result
+ for ns_utf, ns_functions in _iter_ns_extension_functions():
+ if ns_utf is None:
+ d = self._function_cache
+ else:
+ dict_result = python.PyDict_GetItem(
+ self._function_cache_ns, ns_utf)
+ if dict_result is NULL:
+ d = {}
+ python.PyDict_SetItem(
+ self._function_cache_ns, ns_utf, d)
+ else:
+ d = <object>dict_result
+ for name_utf, function in ns_functions.iteritems():
+ python.PyDict_SetItem(d, name_utf, function)
+ reg_func(ctxt, name_utf, ns_utf)
+
+ cdef void registerLocalFunctions(self, void* ctxt,
+ _register_function reg_func):
+ cdef python.PyObject* dict_result
+ if self._extensions is None:
+ return # done
+ last_ns = None
+ d = self._function_cache
+ for (ns_utf, name_utf), function in self._extensions.iteritems():
+ if ns_utf is not last_ns:
+ last_ns = ns_utf
+ if ns_utf is None:
+ d = self._function_cache
+ else:
+ dict_result = python.PyDict_GetItem(
+ self._function_cache_ns, ns_utf)
+ if dict_result is NULL:
+ d = {}
+ python.PyDict_SetItem(self._function_cache_ns,
+ ns_utf, d)
+ else:
+ d = <object>dict_result
+ python.PyDict_SetItem(d, name_utf, function)
+ reg_func(ctxt, name_utf, ns_utf)
+
+ cdef unregisterAllFunctions(self, void* ctxt,
+ _register_function unreg_func):
+ for name_utf in self._function_cache:
+ unreg_func(ctxt, name_utf, None)
+ for ns_utf, functions in self._function_cache_ns.iteritems():
+ for name_utf in functions:
+ unreg_func(ctxt, name_utf, ns_utf)
+
+ cdef unregisterGlobalFunctions(self, void* ctxt,
+ _register_function unreg_func):
+ for name_utf in self._function_cache:
+ if self._extensions is None or \
+ (None, name_utf) not in self._extensions:
+ unreg_func(ctxt, name_utf, None)
+ for ns_utf, functions in self._function_cache_ns.iteritems():
+ for name_utf in functions:
+ if self._extensions is None or \
+ (ns_utf, name_utf) not in self._extensions:
+ unreg_func(ctxt, name_utf, ns_utf)
+
cdef _find_cached_function(self, char* c_ns_uri, char* c_name):
"""Lookup an extension function in the cache and return it.
@@ -137,7 +270,7 @@
return <object>dict_result
return None
- cdef int _prepare_function_call(self, char* c_ns_uri, char* c_name):
+ cdef int __prepare_function_call(self, char* c_ns_uri, char* c_name):
"""Find an extension function and store it in 'self._called_function'.
This is absolutely performance-critical for XPath/XSLT!
@@ -233,19 +366,87 @@
################################################################################
-# helper functions
+# EXSLT regexp implementation
-cdef xpath.xmlXPathFunction _function_check(void* ctxt,
- char* c_name, char* c_ns_uri):
- "Module level lookup function for XPath/XSLT functions"
- cdef xpath.xmlXPathFunction c_func
- cdef _BaseContext context
- context = <_BaseContext>ctxt
- if context._prepare_function_call(c_ns_uri, c_name):
- c_func = _call_prepared_function
- else:
- c_func = NULL
- return c_func
+cdef class _ExsltRegExp:
+ cdef object _compile_map
+ def __init__(self):
+ self._compile_map = {}
+
+ cdef _make_string(self, value):
+ if _isString(value):
+ return value
+ else:
+ raise TypeError, "Invalid argument type %s" % type(value)
+
+ cdef _compile(self, rexp, ignore_case):
+ cdef python.PyObject* c_result
+ rexp = self._make_string(rexp)
+ key = (rexp, ignore_case)
+ c_result = python.PyDict_GetItem(self._compile_map, key)
+ if c_result is not NULL:
+ return <object>c_result
+ py_flags = re.UNICODE
+ if ignore_case:
+ py_flags = py_flags | re.IGNORECASE
+ rexp_compiled = re.compile(rexp, py_flags)
+ python.PyDict_SetItem(self._compile_map, key, rexp_compiled)
+ return rexp_compiled
+
+ def test(self, ctxt, s, rexp, flags=''):
+ flags = self._make_string(flags)
+ s = self._make_string(s)
+ rexpc = self._compile(rexp, 'i' in flags)
+ if rexpc.search(s) is None:
+ return False
+ else:
+ return True
+
+ def match(self, ctxt, s, rexp, flags=''):
+ flags = self._make_string(flags)
+ s = self._make_string(s)
+ rexpc = self._compile(rexp, 'i' in flags)
+ if 'g' in flags:
+ results = rexpc.findall(s)
+ if not results:
+ return ()
+ else:
+ result = rexpc.search(s)
+ if not result:
+ return ()
+ results = [ result.group() ]
+ results.extend( result.groups('') )
+ result_list = []
+ root = Element('matches')
+ join_groups = ''.join
+ for s_match in results:
+ if python.PyTuple_CheckExact(s_match):
+ s_match = join_groups(s_match)
+ elem = SubElement(root, 'match')
+ elem.text = s_match
+ python.PyList_Append(result_list, elem)
+ return result_list
+
+ def replace(self, ctxt, s, rexp, flags, replacement):
+ replacement = self._make_string(replacement)
+ flags = self._make_string(flags)
+ s = self._make_string(s)
+ rexpc = self._compile(rexp, 'i' in flags)
+ if 'g' in flags:
+ count = 0
+ else:
+ count = 1
+ return rexpc.sub(replacement, s, count)
+
+ cdef _register_in_context(self, _BaseContext context):
+ ns = "http://exslt.org/regular-expressions"
+ context._addLocalExtensionFunction(ns, "test", self.test)
+ context._addLocalExtensionFunction(ns, "match", self.match)
+ context._addLocalExtensionFunction(ns, "replace", self.replace)
+
+
+################################################################################
+# helper functions
cdef xpath.xmlXPathObject* _wrapXPathObject(object obj) except NULL:
cdef xpath.xmlNodeSet* resultSet
@@ -405,22 +606,6 @@
fref = "{%s}%s" % (rctxt.functionURI, rctxt.function)
else:
fref = rctxt.function
- xpath.xmlXPathErr(ctxt, xpath.XML_XPATH_UNKNOWN_FUNC_ERROR)
+ xpath.xmlXPathErr(ctxt, xpath.XPATH_UNKNOWN_FUNC_ERROR)
exception = XPathFunctionError("XPath function '%s' not found" % fref)
context._exc._store_exception(exception)
-
-# call the function that was stored in 'context._called_function'
-
-cdef void _call_prepared_function(xpath.xmlXPathParserContext* ctxt, int nargs):
- cdef python.PyGILState_STATE gil_state
- gil_state = python.PyGILState_Ensure()
- _call_prepared_python_function(ctxt, nargs)
- python.PyGILState_Release(gil_state)
-
-cdef void _call_prepared_python_function(xpath.xmlXPathParserContext* ctxt,
- int nargs):
- cdef xpath.xmlXPathContext* rctxt
- cdef _BaseContext context
- rctxt = ctxt.context
- context = <_BaseContext>(rctxt.userData)
- _extension_function_call(context, context._called_function, ctxt, nargs)
Modified: lxml/trunk/src/lxml/nsclasses.pxi
==============================================================================
--- lxml/trunk/src/lxml/nsclasses.pxi (original)
+++ lxml/trunk/src/lxml/nsclasses.pxi Thu Mar 29 09:52:15 2007
@@ -75,6 +75,11 @@
name = _utf8(name)
return self._get(name)
+ def __delitem__(self, name):
+ if name is not None:
+ name = _utf8(name)
+ python.PyDict_DelItem(self._entries, name)
+
cdef object _get(self, object name):
cdef python.PyObject* dict_result
dict_result = python.PyDict_GetItem(self._entries, name)
@@ -99,7 +104,7 @@
return self._entries.iteritems()
def clear(self):
- self._entries.clear()
+ python.PyDict_Clear(self._entries)
cdef class _ClassNamespaceRegistry(_NamespaceRegistry):
"Dictionary-like registry for namespace implementation classes"
@@ -130,32 +135,39 @@
cdef class _XPathFunctionNamespaceRegistry(_FunctionNamespaceRegistry):
cdef object _prefix
cdef object _prefix_utf
+
property prefix:
"Namespace prefix for extension functions."
def __del__(self):
self._prefix = None # no prefix configured
+ self._prefix_utf = None
def __get__(self):
- return self._prefix
+ if self._prefix is None:
+ return ''
+ else:
+ return self._prefix
def __set__(self, prefix):
+ if prefix == '':
+ prefix = None # empty prefix
if prefix is None:
- prefix = '' # empty prefix
- self._prefix_utf = _utf8(prefix)
+ self._prefix_utf = None
+ else:
+ self._prefix_utf = _utf8(prefix)
self._prefix = prefix
cdef object _find_all_extension_prefixes():
"Internal lookup function to find all function prefixes for XSLT/XPath."
cdef _XPathFunctionNamespaceRegistry registry
- ns_prefixes = {}
- for (ns_utf, registry) in __FUNCTION_NAMESPACE_REGISTRIES.iteritems():
+ ns_prefixes = []
+ for registry in __FUNCTION_NAMESPACE_REGISTRIES.itervalues():
if registry._prefix_utf is not None:
- ns_prefixes[registry._prefix_utf] = ns_utf
+ if registry._ns_uri_utf is not None:
+ python.PyList_Append(
+ ns_prefixes, (registry._prefix_utf, registry._ns_uri_utf))
return ns_prefixes
-cdef object _iter_extension_function_names():
- l = []
- for (ns_utf, registry) in __FUNCTION_NAMESPACE_REGISTRIES.iteritems():
- python.PyList_Append(l, (ns_utf, registry))
- return l
+cdef object _iter_ns_extension_functions():
+ return __FUNCTION_NAMESPACE_REGISTRIES.iteritems()
cdef object _find_extension(ns_uri_utf, name_utf):
cdef python.PyObject* dict_result
Modified: lxml/trunk/src/lxml/xpath.pxi
==============================================================================
--- lxml/trunk/src/lxml/xpath.pxi (original)
+++ lxml/trunk/src/lxml/xpath.pxi Thu Mar 29 09:52:15 2007
@@ -9,38 +9,58 @@
################################################################################
# XPath
+cdef int _register_xpath_function(void* ctxt, name_utf, ns_utf):
+ if ns_utf is None:
+ return xpath.xmlXPathRegisterFunc(
+ <xpath.xmlXPathContext*>ctxt, _cstr(name_utf),
+ _xpath_function_call)
+ else:
+ return xpath.xmlXPathRegisterFuncNS(
+ <xpath.xmlXPathContext*>ctxt, _cstr(name_utf), _cstr(ns_utf),
+ _xpath_function_call)
+
+cdef int _unregister_xpath_function(void* ctxt, name_utf, ns_utf):
+ if ns_utf is None:
+ return xpath.xmlXPathRegisterFunc(
+ <xpath.xmlXPathContext*>ctxt, _cstr(name_utf), NULL)
+ else:
+ return xpath.xmlXPathRegisterFuncNS(
+ <xpath.xmlXPathContext*>ctxt, _cstr(name_utf), _cstr(ns_utf), NULL)
+
+
cdef class _XPathContext(_BaseContext):
cdef object _variables
- def __init__(self, namespaces, extensions, variables):
+ def __init__(self, namespaces, extensions, enable_regexp, variables):
self._variables = variables
- _BaseContext.__init__(self, namespaces, extensions)
-
- cdef register_context(self, xpath.xmlXPathContext* xpathCtxt, _Document doc):
+ _BaseContext.__init__(self, namespaces, extensions, enable_regexp)
+
+ cdef set_context(self, xpath.xmlXPathContext* xpathCtxt):
self._set_xpath_context(xpathCtxt)
- ns_prefixes = _find_all_extension_prefixes()
- if ns_prefixes:
- self.registerNamespaces(ns_prefixes)
+ self._setupDict(xpathCtxt)
+ self.registerLocalNamespaces()
+ self.registerLocalFunctions(xpathCtxt, _register_xpath_function)
+
+ cdef register_context(self, _Document doc):
self._register_context(doc)
+ self.registerGlobalNamespaces()
+ self.registerGlobalFunctions(self._xpathCtxt, _register_xpath_function)
if self._variables is not None:
self.registerVariables(self._variables)
- xpath.xmlXPathRegisterFuncLookup(
- self._xpathCtxt, _function_check, <python.PyObject*>self)
cdef unregister_context(self):
- cdef xpath.xmlXPathContext* xpathCtxt
- xpathCtxt = self._xpathCtxt
- if xpathCtxt is NULL:
- return
- xpath.xmlXPathRegisteredVariablesCleanup(xpathCtxt)
- self._unregister_context()
+ self.unregisterGlobalFunctions(
+ self._xpathCtxt, _unregister_xpath_function)
+ self.unregisterGlobalNamespaces()
+ xpath.xmlXPathRegisteredVariablesCleanup(self._xpathCtxt)
+ self._cleanup_context()
- def registerVariables(self, variable_dict):
+ cdef registerVariables(self, variable_dict):
for name, value in variable_dict.items():
name_utf = self._to_utf(name)
xpath.xmlXPathRegisterVariable(
self._xpathCtxt, _cstr(name_utf), _wrapXPathObject(value))
- def registerVariable(self, name, value):
+ cdef registerVariable(self, name, value):
name_utf = self._to_utf(name)
xpath.xmlXPathRegisterVariable(
self._xpathCtxt, _cstr(name_utf), _wrapXPathObject(value))
@@ -49,20 +69,26 @@
xpath.xmlXPathRegisterVariable(
self._xpathCtxt, _cstr(name_utf), _wrapXPathObject(value))
-cdef void _setupDict(xpath.xmlXPathContext* xpathCtxt):
- __GLOBAL_PARSER_CONTEXT.initXPathParserDict(xpathCtxt)
+ cdef void _setupDict(self, xpath.xmlXPathContext* xpathCtxt):
+ __GLOBAL_PARSER_CONTEXT.initXPathParserDict(xpathCtxt)
cdef class _XPathEvaluatorBase:
cdef xpath.xmlXPathContext* _xpathCtxt
cdef _XPathContext _context
+ cdef python.PyThread_type_lock _eval_lock
- def __init__(self, namespaces, extensions, variables=None):
- self._context = _XPathContext(namespaces, extensions, variables)
+ def __init__(self, namespaces, extensions, enable_regexp):
+ self._context = _XPathContext(namespaces, extensions,
+ enable_regexp, None)
def __dealloc__(self):
if self._xpathCtxt is not NULL:
xpath.xmlXPathFreeContext(self._xpathCtxt)
+ cdef set_context(self, xpath.xmlXPathContext* xpathCtxt):
+ self._xpathCtxt = xpathCtxt
+ self._context.set_context(xpathCtxt)
+
def evaluate(self, _eval_arg, **_variables):
"""Evaluate an XPath expression.
@@ -84,6 +110,22 @@
c = path[0]
return c == c'/'
+ cdef int _lock(self) except -1:
+ cdef python.PyThreadState* state
+ cdef int result
+ if config.ENABLE_THREADING and self._eval_lock != NULL:
+ state = python.PyEval_SaveThread()
+ result = python.PyThread_acquire_lock(
+ self._eval_lock, python.WAIT_LOCK)
+ python.PyEval_RestoreThread(state)
+ if result == 0:
+ raise ParserError, "parser locking failed"
+ return 0
+
+ cdef void _unlock(self):
+ if config.ENABLE_THREADING and self._eval_lock != NULL:
+ python.PyThread_release_lock(self._eval_lock)
+
cdef _raise_parse_error(self):
if self._xpathCtxt is not NULL and \
self._xpathCtxt.lastError.message is not NULL:
@@ -119,21 +161,23 @@
Absolute XPath expressions (starting with '/') will be evaluated against
the ElementTree as returned by getroottree().
- XPath evaluators must not be shared between threads.
+ Additional namespace declarations can be passed with the 'namespace'
+ keyword argument. EXSLT regular expression support can be disabled with
+ the 'regexp' boolean keyword (defaults to True).
"""
cdef _Element _element
- def __init__(self, _Element element not None, namespaces=None, extensions=None):
+ def __init__(self, _Element element not None, namespaces=None,
+ extensions=None, regexp=True):
cdef xpath.xmlXPathContext* xpathCtxt
cdef int ns_register_status
cdef _Document doc
+ self._element = element
doc = element._doc
+ _XPathEvaluatorBase.__init__(self, namespaces, extensions, regexp)
xpathCtxt = xpath.xmlXPathNewContext(doc._c_doc)
- self._xpathCtxt = xpathCtxt
if xpathCtxt is NULL:
raise XPathContextError, "Unable to create new XPath context"
- _setupDict(xpathCtxt)
- self._element = element
- _XPathEvaluatorBase.__init__(self, namespaces, extensions)
+ self.set_context(xpathCtxt)
def registerNamespace(self, prefix, uri):
"""Register a namespace with the XPath context.
@@ -155,33 +199,41 @@
Absolute XPath expressions (starting with '/') will be evaluated
against the ElementTree as returned by getroottree().
"""
- cdef xpath.xmlXPathContext* xpathCtxt
+ cdef python.PyThreadState* state
cdef xpath.xmlXPathObject* xpathObj
cdef _Document doc
cdef char* c_path
path = _utf8(_path)
- xpathCtxt = self._xpathCtxt
- xpathCtxt.node = self._element._c_node
doc = self._element._doc
- self._context.register_context(xpathCtxt, doc)
+ self._lock()
+ self._xpathCtxt.node = self._element._c_node
try:
+ self._context.register_context(doc)
self._context.registerVariables(_variables)
- xpathObj = xpath.xmlXPathEvalExpression(_cstr(path), xpathCtxt)
+ state = python.PyEval_SaveThread()
+ xpathObj = xpath.xmlXPathEvalExpression(
+ _cstr(path), self._xpathCtxt)
+ python.PyEval_RestoreThread(state)
+ result = self._handle_result(xpathObj, doc)
finally:
self._context.unregister_context()
+ self._unlock()
- return self._handle_result(xpathObj, doc)
+ return result
cdef class XPathDocumentEvaluator(XPathElementEvaluator):
"""Create an XPath evaluator for an ElementTree.
- XPath evaluators must not be shared between threads.
+ Additional namespace declarations can be passed with the 'namespace'
+ keyword argument. EXSLT regular expression support can be disabled with
+ the 'regexp' boolean keyword (defaults to True).
"""
- def __init__(self, _ElementTree etree not None, namespaces=None, extensions=None):
+ def __init__(self, _ElementTree etree not None, namespaces=None,
+ extensions=None, regexp=True):
XPathElementEvaluator.__init__(
- self, etree._context_node, namespaces, extensions)
+ self, etree._context_node, namespaces, extensions, regexp)
def __call__(self, _path, **_variables):
"""Evaluate an XPath expression on the document.
@@ -189,67 +241,81 @@
Variables may be provided as keyword arguments. Note that namespaces
are currently not supported for variables.
"""
- cdef xpath.xmlXPathContext* xpathCtxt
+ cdef python.PyThreadState* state
cdef xpath.xmlXPathObject* xpathObj
cdef xmlDoc* c_doc
cdef _Document doc
path = _utf8(_path)
- xpathCtxt = self._xpathCtxt
doc = self._element._doc
- self._context.register_context(xpathCtxt, doc)
- c_doc = _fakeRootDoc(doc._c_doc, self._element._c_node)
+ self._lock()
try:
- self._context.registerVariables(_variables)
- xpathCtxt.doc = c_doc
- xpathCtxt.node = tree.xmlDocGetRootElement(c_doc)
- xpathObj = xpath.xmlXPathEvalExpression(_cstr(path), xpathCtxt)
+ self._context.register_context(doc)
+ c_doc = _fakeRootDoc(doc._c_doc, self._element._c_node)
+ try:
+ self._context.registerVariables(_variables)
+ state = python.PyEval_SaveThread()
+ self._xpathCtxt.doc = c_doc
+ self._xpathCtxt.node = tree.xmlDocGetRootElement(c_doc)
+ xpathObj = xpath.xmlXPathEvalExpression(
+ _cstr(path), self._xpathCtxt)
+ python.PyEval_RestoreThread(state)
+ result = self._handle_result(xpathObj, doc)
+ finally:
+ _destroyFakeDoc(doc._c_doc, c_doc)
+ self._context.unregister_context()
finally:
- _destroyFakeDoc(doc._c_doc, c_doc)
- self._context.unregister_context()
+ self._unlock()
- return self._handle_result(xpathObj, doc)
+ return result
-def XPathEvaluator(etree_or_element, namespaces=None, extensions=None):
+def XPathEvaluator(etree_or_element, namespaces=None, extensions=None,
+ regexp=True):
"""Creates an XPath evaluator for an ElementTree or an Element.
The resulting object can be called with an XPath expression as argument
and XPath variables provided as keyword arguments.
- XPath evaluators must not be shared between threads.
+ Additional namespace declarations can be passed with the 'namespace'
+ keyword argument. EXSLT regular expression support can be disabled with
+ the 'regexp' boolean keyword (defaults to True).
"""
if isinstance(etree_or_element, _ElementTree):
- return XPathDocumentEvaluator(etree_or_element, namespaces, extensions)
+ return XPathDocumentEvaluator(etree_or_element, namespaces,
+ extensions, regexp)
else:
- return XPathElementEvaluator(etree_or_element, namespaces, extensions)
+ return XPathElementEvaluator(etree_or_element, namespaces,
+ extensions, regexp)
cdef class XPath(_XPathEvaluatorBase):
"""A compiled XPath expression that can be called on Elements and
ElementTrees.
- Besides the XPath expression, you can pass namespace mappings and
- extensions to the constructor through the keyword arguments ``namespaces``
- and ``extensions``.
+ Besides the XPath expression, you can pass prefix-namespace mappings and
+ extension functions to the constructor through the keyword arguments
+ ``namespaces`` and ``extensions``. EXSLT regular expression support can
+ be disabled with the 'regexp' boolean keyword (defaults to True).
"""
cdef xpath.xmlXPathCompExpr* _xpath
cdef readonly object path
- def __init__(self, path, namespaces=None, extensions=None):
- _XPathEvaluatorBase.__init__(self, namespaces, extensions)
- self._xpath = NULL
+ def __init__(self, path, namespaces=None, extensions=None, regexp=True):
+ cdef xpath.xmlXPathContext* xpathCtxt
+ _XPathEvaluatorBase.__init__(self, namespaces, extensions, regexp)
self.path = path
path = _utf8(path)
- self._xpathCtxt = xpath.xmlXPathNewContext(NULL)
- _setupDict(self._xpathCtxt)
- self._xpath = xpath.xmlXPathCtxtCompile(self._xpathCtxt, _cstr(path))
+ xpathCtxt = xpath.xmlXPathNewContext(NULL)
+ if xpathCtxt is NULL:
+ raise XPathContextError, "Unable to create new XPath context"
+ self.set_context(xpathCtxt)
+ self._xpath = xpath.xmlXPathCtxtCompile(xpathCtxt, _cstr(path))
if self._xpath is NULL:
self._raise_parse_error()
def __call__(self, _etree_or_element, **_variables):
cdef python.PyThreadState* state
- cdef xpath.xmlXPathContext* xpathCtxt
cdef xpath.xmlXPathObject* xpathObj
cdef _Document document
cdef _Element element
@@ -258,20 +324,22 @@
document = _documentOrRaise(_etree_or_element)
element = _rootNodeOrRaise(_etree_or_element)
- xpathCtxt = self._xpathCtxt
- xpathCtxt.doc = document._c_doc
- xpathCtxt.node = element._c_node
+ self._lock()
+ self._xpathCtxt.doc = document._c_doc
+ self._xpathCtxt.node = element._c_node
- context = self._context
- context.register_context(xpathCtxt, document)
try:
- context.registerVariables(_variables)
+ self._context.register_context(document)
+ self._context.registerVariables(_variables)
state = python.PyEval_SaveThread()
- xpathObj = xpath.xmlXPathCompiledEval(self._xpath, xpathCtxt)
+ xpathObj = xpath.xmlXPathCompiledEval(
+ self._xpath, self._xpathCtxt)
python.PyEval_RestoreThread(state)
+ result = self._handle_result(xpathObj, document)
finally:
- context.unregister_context()
- return self._handle_result(xpathObj, document)
+ self._context.unregister_context()
+ self._unlock()
+ return result
def __dealloc__(self):
if self._xpath is not NULL:
Modified: lxml/trunk/src/lxml/xslt.pxd
==============================================================================
--- lxml/trunk/src/lxml/xslt.pxd (original)
+++ lxml/trunk/src/lxml/xslt.pxd Thu Mar 29 09:52:15 2007
@@ -35,6 +35,8 @@
xmlXPathFunction function)
cdef int xsltUnregisterExtModuleFunction(char* name, char* URI)
cdef xmlXPathFunction xsltExtModuleFunctionLookup(char* name, char* URI)
+ cdef int xsltRegisterExtPrefix(xsltStylesheet* style,
+ char* prefix, char* URI)
cdef extern from "libxslt/documents.h":
ctypedef enum xsltLoadType:
Modified: lxml/trunk/src/lxml/xslt.pxi
==============================================================================
--- lxml/trunk/src/lxml/xslt.pxi (original)
+++ lxml/trunk/src/lxml/xslt.pxi Thu Mar 29 09:52:15 2007
@@ -193,75 +193,48 @@
################################################################################
# XSLT
+cdef int _register_xslt_function(void* ctxt, name_utf, ns_utf):
+ if ns_utf is None:
+ return 0
+ return xslt.xsltRegisterExtFunction(
+ <xslt.xsltTransformContext*>ctxt, _cstr(name_utf), _cstr(ns_utf),
+ _xpath_function_call)
+
+cdef int _unregister_xslt_function(void* ctxt, name_utf, ns_utf):
+ if ns_utf is None:
+ return 0
+ return xslt.xsltRegisterExtFunction(
+ <xslt.xsltTransformContext*>ctxt, _cstr(name_utf), _cstr(ns_utf),
+ NULL)
+
+
cdef class _XSLTContext(_BaseContext):
cdef xslt.xsltTransformContext* _xsltCtxt
- def __init__(self, namespaces, extensions):
+ def __init__(self, namespaces, extensions, enable_regexp):
self._xsltCtxt = NULL
- if extensions and None in extensions:
- raise XSLTExtensionError, "extensions must not have empty namespaces"
- _BaseContext.__init__(self, namespaces, extensions)
+ if extensions is not None:
+ for ns, prefix in extensions:
+ if ns is None:
+ raise XSLTExtensionError, \
+ "extensions must not have empty namespaces"
+ _BaseContext.__init__(self, namespaces, extensions, enable_regexp)
cdef register_context(self, xslt.xsltTransformContext* xsltCtxt,
_Document doc):
self._xsltCtxt = xsltCtxt
self._set_xpath_context(xsltCtxt.xpathCtxt)
self._register_context(doc)
- xsltCtxt.xpathCtxt.userData = <void*>self
- self._registerExtensionFunctions()
+ self.registerLocalFunctions(xsltCtxt, _register_xslt_function)
+ self.registerGlobalFunctions(xsltCtxt, _register_xslt_function)
cdef free_context(self):
- cdef xslt.xsltTransformContext* xsltCtxt
- xsltCtxt = self._xsltCtxt
- if xsltCtxt is NULL:
- return
- self._free_context()
- self._xsltCtxt = NULL
- xslt.xsltFreeTransformContext(xsltCtxt)
+ self._cleanup_context()
+ self._release_context()
+ if self._xsltCtxt is not NULL:
+ xslt.xsltFreeTransformContext(self._xsltCtxt)
+ self._xsltCtxt = NULL
self._release_temp_refs()
- cdef void _addLocalExtensionFunction(self, ns_utf, name_utf, function):
- if self._extensions is None:
- self._extensions = {}
- python.PyDict_SetItem(self._extensions, (ns_utf, name_utf), function)
-
- cdef void _registerExtensionFunctions(self):
- cdef python.PyObject* dict_result
- for ns_utf, functions in _iter_extension_function_names():
- if ns_utf is None:
- continue
- dict_result = python.PyDict_GetItem(self._function_cache_ns, ns_utf)
- if dict_result is NULL:
- d = {}
- python.PyDict_SetItem(self._function_cache_ns, ns_utf, d)
- else:
- d = <object>dict_result
- for name_utf, function in functions.iteritems():
- python.PyDict_SetItem(d, name_utf, function)
- xslt.xsltRegisterExtFunction(
- self._xsltCtxt, _cstr(name_utf), _cstr(ns_utf),
- _xpath_function_call)
- if self._extensions is None:
- return # done
- last_ns = None
- for (ns_utf, name_utf), function in self._extensions.iteritems():
- if ns_utf is None:
- raise ValueError, \
- "extensions must have non empty namespaces"
- elif ns_utf is not last_ns:
- last_ns = ns_utf
- dict_result = python.PyDict_GetItem(
- self._function_cache_ns, ns_utf)
- if dict_result is NULL:
- d = {}
- python.PyDict_SetItem(self._function_cache_ns, ns_utf, d)
- else:
- d = <object>dict_result
- python.PyDict_SetItem(d, name_utf, function)
- xslt.xsltRegisterExtFunction(
- self._xsltCtxt, _cstr(name_utf), _cstr(ns_utf),
- _xpath_function_call)
-
-cdef class _ExsltRegExp # forward declaration
cdef class XSLT:
"""Turn a document into an XSLT object.
@@ -279,16 +252,17 @@
cdef xslt.xsltStylesheet* _c_style
cdef _XSLTResolverContext _xslt_resolver_context
cdef XSLTAccessControl _access_control
- cdef _ExsltRegExp _regexp
cdef _ErrorLog _error_log
- def __init__(self, xslt_input, extensions=None, regexp=True, access_control=None):
+ def __init__(self, xslt_input, extensions=None, regexp=True,
+ access_control=None):
cdef python.PyThreadState* state
cdef xslt.xsltStylesheet* c_style
cdef xmlDoc* c_doc
cdef xmlDoc* fake_c_doc
cdef _Document doc
cdef _Element root_node
+ cdef _ExsltRegExp _regexp
doc = _documentOrRaise(xslt_input)
root_node = _rootNodeOrRaise(xslt_input)
@@ -327,13 +301,7 @@
c_doc._private = NULL # no longer used!
self._c_style = c_style
- self._context = _XSLTContext(None, extensions)
- if regexp:
- self._regexp = _ExsltRegExp()
- self._regexp._register_in_context(self._context)
- else:
- self._regexp = None
- # XXX is it worthwile to use xsltPrecomputeStylesheet here?
+ self._context = _XSLTContext(None, extensions, regexp)
def __dealloc__(self):
if self._xslt_resolver_context is not None and \
@@ -346,20 +314,24 @@
def __get__(self):
return self._error_log.copy()
+ def apply(self, _input, profile_run=False, **_kw):
+ return self(_input, profile_run, **_kw)
+
+ def tostring(self, _ElementTree result_tree):
+ """Save result doc to string based on stylesheet output method.
+ """
+ return str(result_tree)
+
def __call__(self, _input, profile_run=False, **_kw):
- cdef python.PyThreadState* state
cdef _XSLTContext context
cdef _Document input_doc
cdef _Element root_node
cdef _Document result_doc
cdef _Document profile_doc
cdef xmlDoc* c_profile_doc
- cdef _XSLTResolverContext resolver_context
cdef xslt.xsltTransformContext* transform_ctxt
cdef xmlDoc* c_result
cdef xmlDoc* c_doc
- cdef char** params
- cdef Py_ssize_t i, kw_count
if not _checkThreadDict(self._c_style.doc.dict):
raise RuntimeError, "stylesheet is not usable in this thread"
@@ -367,9 +339,6 @@
input_doc = _documentOrRaise(_input)
root_node = _rootNodeOrRaise(_input)
- resolver_context = _XSLTResolverContext(input_doc._parser)
- resolver_context._c_style_doc = self._xslt_resolver_context._c_style_doc
-
c_doc = _fakeRootDoc(input_doc._c_doc, root_node._c_node)
transform_ctxt = xslt.xsltNewTransformContext(self._c_style, c_doc)
@@ -379,28 +348,82 @@
initTransformDict(transform_ctxt)
- self._error_log.connect()
+ if profile_run:
+ transform_ctxt.profile = 1
+
+ try:
+ self._error_log.connect()
+ context = self._context._copy()
+ context.register_context(transform_ctxt, input_doc)
+
+ c_result = self._run_transform(
+ input_doc, c_doc, _kw, context, transform_ctxt)
+
+ if transform_ctxt.profile:
+ c_profile_doc = xslt.xsltGetProfileInformation(transform_ctxt)
+ if c_profile_doc is not NULL:
+ profile_doc = _documentFactory(
+ c_profile_doc, input_doc._parser)
+ finally:
+ if context is not None:
+ context.free_context()
+ _destroyFakeDoc(input_doc._c_doc, c_doc)
+ self._error_log.disconnect()
+
+ try:
+ if self._xslt_resolver_context._has_raised():
+ if c_result is not NULL:
+ tree.xmlFreeDoc(c_result)
+ self._xslt_resolver_context._raise_if_stored()
+
+ if c_result is NULL:
+ error = self._error_log.last_error
+ if error is not None and error.message:
+ if error.line >= 0:
+ message = "%s, line %d" % (error.message, error.line)
+ else:
+ message = error.message
+ elif error.line >= 0:
+ message = "Error applying stylesheet, line %d" % error.line
+ else:
+ message = "Error applying stylesheet"
+ raise XSLTApplyError, message
+ finally:
+ self._xslt_resolver_context.clear()
+
+ result_doc = _documentFactory(c_result, input_doc._parser)
+ return _xsltResultTreeFactory(result_doc, self, profile_doc)
+
+ cdef xmlDoc* _run_transform(self, _Document input_doc, xmlDoc* c_input_doc,
+ parameters, _XSLTContext context,
+ xslt.xsltTransformContext* transform_ctxt):
+ cdef python.PyThreadState* state
+ cdef _XSLTResolverContext resolver_context
+ cdef xmlDoc* c_result
+ cdef char** params
+ cdef Py_ssize_t i, parameter_count
+
+ resolver_context = _XSLTResolverContext(input_doc._parser)
+ resolver_context._c_style_doc = self._xslt_resolver_context._c_style_doc
+
xslt.xsltSetTransformErrorFunc(transform_ctxt, <void*>self._error_log,
_receiveXSLTError)
if self._access_control is not None:
self._access_control._register_in_context(transform_ctxt)
- if profile_run:
- transform_ctxt.profile = 1
-
transform_ctxt._private = <python.PyObject*>self._xslt_resolver_context
- kw_count = python.PyDict_Size(_kw)
- if kw_count > 0:
+ parameter_count = python.PyDict_Size(parameters)
+ if parameter_count > 0:
# allocate space for parameters
# * 2 as we want an entry for both key and value,
# and + 1 as array is NULL terminated
params = <char**>python.PyMem_Malloc(
- sizeof(char*) * (kw_count * 2 + 1))
+ sizeof(char*) * (parameter_count * 2 + 1))
i = 0
keep_ref = []
- for key, value in _kw.iteritems():
+ for key, value in parameters.iteritems():
k = _utf8(key)
python.PyList_Append(keep_ref, k)
v = _utf8(value)
@@ -413,59 +436,16 @@
else:
params = NULL
- context = self._context._copy()
- context.register_context(transform_ctxt, input_doc)
-
state = python.PyEval_SaveThread()
- c_result = xslt.xsltApplyStylesheetUser(self._c_style, c_doc, params,
- NULL, NULL, transform_ctxt)
+ c_result = xslt.xsltApplyStylesheetUser(
+ self._c_style, c_input_doc, params, NULL, NULL, transform_ctxt)
python.PyEval_RestoreThread(state)
if params is not NULL:
# deallocate space for parameters
python.PyMem_Free(params)
- keep_ref = None
-
- if transform_ctxt.profile:
- c_profile_doc = xslt.xsltGetProfileInformation(transform_ctxt)
- if c_profile_doc is not NULL:
- profile_doc = _documentFactory(c_profile_doc, input_doc._parser)
-
- context.free_context()
- _destroyFakeDoc(input_doc._c_doc, c_doc)
- self._error_log.disconnect()
- try:
- if self._xslt_resolver_context._has_raised():
- if c_result is not NULL:
- tree.xmlFreeDoc(c_result)
- self._xslt_resolver_context._raise_if_stored()
-
- if c_result is NULL:
- error = self._error_log.last_error
- if error is not None and error.message:
- if error.line >= 0:
- message = "%s, line %d" % (error.message, error.line)
- else:
- message = error.message
- elif error.line >= 0:
- message = "Error applying stylesheet, line %d" % error.line
- else:
- message = "Error applying stylesheet"
- raise XSLTApplyError, message
- finally:
- self._xslt_resolver_context.clear()
-
- result_doc = _documentFactory(c_result, input_doc._parser)
- return _xsltResultTreeFactory(result_doc, self, profile_doc)
-
- def apply(self, _input, profile_run=False, **_kw):
- return self(_input, profile_run, **_kw)
-
- def tostring(self, _ElementTree result_tree):
- """Save result doc to string based on stylesheet output method.
- """
- return str(result_tree)
+ return c_result
cdef class _XSLTResultTree(_ElementTree):
cdef XSLT _xslt
@@ -542,17 +522,6 @@
# enable EXSLT support for XSLT
xslt.exsltRegisterAll()
-# extension function lookup for XSLT
-cdef xpath.xmlXPathFunction _xslt_function_check(void* ctxt,
- char* c_name, char* c_ns_uri):
- "Find XSLT extension function from set of XPath and XSLT functions"
- cdef xpath.xmlXPathFunction result
- result = _function_check(ctxt, c_name, c_ns_uri)
- if result is NULL:
- return xslt.xsltExtModuleFunctionLookup(c_name, c_ns_uri)
- else:
- return result
-
cdef void initTransformDict(xslt.xsltTransformContext* transform_ctxt):
__GLOBAL_PARSER_CONTEXT.initThreadDictRef(&transform_ctxt.dict)
@@ -649,82 +618,3 @@
if attr == key:
return value
return default
-
-################################################################################
-# EXSLT regexp implementation
-
-cdef class _ExsltRegExp:
- cdef object _compile_map
- def __init__(self):
- self._compile_map = {}
-
- cdef _make_string(self, value):
- if _isString(value):
- return value
- else:
- raise TypeError, "Invalid argument type %s" % type(value)
-
- cdef _compile(self, rexp, ignore_case):
- cdef python.PyObject* c_result
- rexp = self._make_string(rexp)
- key = (rexp, ignore_case)
- c_result = python.PyDict_GetItem(self._compile_map, key)
- if c_result is not NULL:
- return <object>c_result
- py_flags = re.UNICODE
- if ignore_case:
- py_flags = py_flags | re.IGNORECASE
- rexp_compiled = re.compile(rexp, py_flags)
- python.PyDict_SetItem(self._compile_map, key, rexp_compiled)
- return rexp_compiled
-
- def test(self, ctxt, s, rexp, flags=''):
- flags = self._make_string(flags)
- s = self._make_string(s)
- rexpc = self._compile(rexp, 'i' in flags)
- if rexpc.search(s) is None:
- return False
- else:
- return True
-
- def match(self, ctxt, s, rexp, flags=''):
- flags = self._make_string(flags)
- s = self._make_string(s)
- rexpc = self._compile(rexp, 'i' in flags)
- if 'g' in flags:
- results = rexpc.findall(s)
- if not results:
- return ()
- else:
- result = rexpc.search(s)
- if not result:
- return ()
- results = [ result.group() ]
- results.extend( result.groups('') )
- result_list = []
- root = Element('matches')
- join_groups = ''.join
- for s_match in results:
- if python.PyTuple_CheckExact(s_match):
- s_match = join_groups(s_match)
- elem = SubElement(root, 'match')
- elem.text = s_match
- python.PyList_Append(result_list, elem)
- return result_list
-
- def replace(self, ctxt, s, rexp, flags, replacement):
- replacement = self._make_string(replacement)
- flags = self._make_string(flags)
- s = self._make_string(s)
- rexpc = self._compile(rexp, 'i' in flags)
- if 'g' in flags:
- count = 0
- else:
- count = 1
- return rexpc.sub(replacement, s, count)
-
- cdef _register_in_context(self, _XSLTContext context):
- ns = "http://exslt.org/regular-expressions"
- context._addLocalExtensionFunction(ns, "test", self.test)
- context._addLocalExtensionFunction(ns, "match", self.match)
- context._addLocalExtensionFunction(ns, "replace", self.replace)
More information about the lxml-checkins
mailing list