[kupu-checkins] r36780 - kupu/trunk/kupu/plone
duncan at codespeak.net
duncan at codespeak.net
Mon Jan 15 16:35:45 CET 2007
Author: duncan
Date: Mon Jan 15 16:35:42 2007
New Revision: 36780
Modified:
kupu/trunk/kupu/plone/html2captioned.py
Log:
Make entity decoding a bit more robust.
Modified: kupu/trunk/kupu/plone/html2captioned.py
==============================================================================
--- kupu/trunk/kupu/plone/html2captioned.py (original)
+++ kupu/trunk/kupu/plone/html2captioned.py Mon Jan 15 16:35:42 2007
@@ -17,6 +17,8 @@
from urllib import unquote_plus, quote_plus
from Acquisition import aq_base
from htmlentitydefs import name2codepoint
+name2codepoint = name2codepoint.copy()
+name2codepoint['apos']=ord("'")
__revision__ = '$Id$'
@@ -622,11 +624,14 @@
EntityPattern = re.compile('&(?:#(\d+)|([a-zA-Z]+));')
def decodeEntities(s, encoding='utf-8'):
def unescape(match):
- code = match.group(1)
+ code = match.group(1)
if code:
return unichr(int(code, 10))
else:
code = match.group(2)
- return unichr(name2codepoint[code])
+ if code:
+ return unichr(int(code, 16))
+ else:
+ return unichr(name2codepoint[match.group(3)])
return EntityPattern.sub(unescape, s)
More information about the kupu-checkins
mailing list