[wwwsearch-commits] r33194 - wwwsearch/ClientForm/trunk

jjlee at codespeak.net jjlee at codespeak.net
Wed Oct 11 22:59:11 CEST 2006


Author: jjlee
Date: Wed Oct 11 22:59:07 2006
New Revision: 33194

Modified:
   wwwsearch/ClientForm/trunk/ClientForm.py
Log:
Fix entity reference / character reference handling for Python 2.5

Modified: wwwsearch/ClientForm/trunk/ClientForm.py
==============================================================================
--- wwwsearch/ClientForm/trunk/ClientForm.py	(original)
+++ wwwsearch/ClientForm/trunk/ClientForm.py	Wed Oct 11 22:59:07 2006
@@ -795,14 +795,30 @@
 import sgmllib
 # monkeypatch to fix http://www.python.org/sf/803422 :-(
 sgmllib.charref = re.compile("&#(x?[0-9a-fA-F]+)[^0-9a-fA-F]")
+
 class _AbstractSgmllibParser(_AbstractFormParser):
+
     def do_option(self, attrs):
         _AbstractFormParser._start_option(self, attrs)
 
-    def unescape_attr_if_required(self, name):
-        return self.unescape_attr(name)
-    def unescape_attrs_if_required(self, attrs):
-        return self.unescape_attrs(attrs)
+    if sys.version_info[:2] >= (2,5):
+        # we override this attr to decode hex charrefs
+        entity_or_charref = re.compile(
+            '&(?:([a-zA-Z][-.a-zA-Z0-9]*)|#(x?[0-9a-fA-F]+))(;?)')
+        def convert_entityref(self, name):
+            return unescape("&%s;" % name, self._entitydefs, self._encoding)
+        def convert_charref(self, name):
+            return unescape_charref("%s" % name, self._encoding)
+        def unescape_attr_if_required(self, name):
+            return name  # sgmllib already did it
+        def unescape_attrs_if_required(self, attrs):
+            return attrs  # ditto
+    else:
+        def unescape_attr_if_required(self, name):
+            return self.unescape_attr(name)
+        def unescape_attrs_if_required(self, attrs):
+            return self.unescape_attrs(attrs)
+
 
 class FormParser(_AbstractSgmllibParser, sgmllib.SGMLParser):
     """Good for tolerance of incorrect HTML, bad for XHTML."""


More information about the wwwsearch-commits mailing list