[wwwsearch-commits] r33194 - wwwsearch/ClientForm/trunk
jjlee at codespeak.net
jjlee at codespeak.net
Wed Oct 11 22:59:11 CEST 2006
Author: jjlee
Date: Wed Oct 11 22:59:07 2006
New Revision: 33194
Modified:
wwwsearch/ClientForm/trunk/ClientForm.py
Log:
Fix entity reference / character reference handling for Python 2.5
Modified: wwwsearch/ClientForm/trunk/ClientForm.py
==============================================================================
--- wwwsearch/ClientForm/trunk/ClientForm.py (original)
+++ wwwsearch/ClientForm/trunk/ClientForm.py Wed Oct 11 22:59:07 2006
@@ -795,14 +795,30 @@
import sgmllib
# monkeypatch to fix http://www.python.org/sf/803422 :-(
sgmllib.charref = re.compile("&#(x?[0-9a-fA-F]+)[^0-9a-fA-F]")
+
class _AbstractSgmllibParser(_AbstractFormParser):
+
def do_option(self, attrs):
_AbstractFormParser._start_option(self, attrs)
- def unescape_attr_if_required(self, name):
- return self.unescape_attr(name)
- def unescape_attrs_if_required(self, attrs):
- return self.unescape_attrs(attrs)
+ if sys.version_info[:2] >= (2,5):
+ # we override this attr to decode hex charrefs
+ entity_or_charref = re.compile(
+ '&(?:([a-zA-Z][-.a-zA-Z0-9]*)|#(x?[0-9a-fA-F]+))(;?)')
+ def convert_entityref(self, name):
+ return unescape("&%s;" % name, self._entitydefs, self._encoding)
+ def convert_charref(self, name):
+ return unescape_charref("%s" % name, self._encoding)
+ def unescape_attr_if_required(self, name):
+ return name # sgmllib already did it
+ def unescape_attrs_if_required(self, attrs):
+ return attrs # ditto
+ else:
+ def unescape_attr_if_required(self, name):
+ return self.unescape_attr(name)
+ def unescape_attrs_if_required(self, attrs):
+ return self.unescape_attrs(attrs)
+
class FormParser(_AbstractSgmllibParser, sgmllib.SGMLParser):
"""Good for tolerance of incorrect HTML, bad for XHTML."""
More information about the wwwsearch-commits
mailing list