[z3-checkins] r40545 - z3/deliverance/zdeliverance
ianb at codespeak.net
ianb at codespeak.net
Thu Mar 15 17:00:13 CET 2007
Author: ianb
Date: Thu Mar 15 17:00:08 2007
New Revision: 40545
Modified:
z3/deliverance/zdeliverance/traversal.py
Log:
Fixed encoding issues
Modified: z3/deliverance/zdeliverance/traversal.py
==============================================================================
--- z3/deliverance/zdeliverance/traversal.py (original)
+++ z3/deliverance/zdeliverance/traversal.py Thu Mar 15 17:00:08 2007
@@ -9,6 +9,7 @@
from deliverance import htmlserialize
from lxml import etree
import urllib
+import re
class DeliveranceRule(SimpleItem, PropertyManager):
""" An object which invokes a deliverance rule when its container
@@ -17,12 +18,14 @@
meta_type = 'Deliverance Rule'
theme_uri = ''
rule = ''
+ transform_ports = ()
id = 'deliverance_rule'
_properties = (
{'id':'title', 'type':'string', 'mode':'w'},
{'id':'theme_uri', 'type':'string', 'mode':'w', 'label':'Theme URI'},
{'id':'rule', 'type':'text', 'mode':'w', 'label':'Rule'},
+ {'id':'transform_ports', 'type':'lines', 'mode':'w', 'label':'Transform Ports'},
)
manage_options = PropertyManager.manage_options
@@ -38,12 +41,16 @@
def __call__(self, container, request):
response = request.RESPONSE
+ port = request.SERVER_PORT
+ if self.transform_ports and port not in self.transform_ports:
+ return
+ if not self.rule:
+ # Hasn't been initialized
+ return
orig_setBody = response.setBody
def setBody(*arg, **kw):
orig_setBody(*arg, **kw)
- # Should also stop if deliverance.theme doesn't match up to self
- if (not response.headers.get('content-type', '').startswith('text/html')
- or not self.rule):
+ if not response.headers.get('content-type', '').startswith('text/html'):
return response
body = self.transform_body(response)
return orig_setBody(body)
@@ -52,6 +59,16 @@
def transform_body(self, response):
interp = self.make_renderer()
body = response.body
+ content_type = response.headers['content-type']
+ match = self._meta_charset_re.search(body)
+ if match:
+ body = body.decode(match.group(1), 'ignore')
+ else:
+ match = self._charset_re.search(content_type)
+ if match:
+ body = body.decode(match.group(1), 'ignore')
+ content_type = content_type.split(';')[0] + '; charset=UTF-8'
+ response.headers['content-type'] = content_type
transformed = interp.render(etree.HTML(body))
return htmlserialize.tostring(transformed)
@@ -84,13 +101,25 @@
else:
return text
+ _charset_re = re.compile(r'charset=([a-z0-9_-]+)', re.I)
+ _meta_charset_re = re.compile(r'<meta[^>]*charset=([a-z0-9_-]+).*?>', re.I)
+
def get_resource(self, href):
if href.startswith('data:'):
return href[5:]
# @@: This is a really bad implementation
f = urllib.urlopen(href)
c = f.read()
- f.close()
+ match = self._meta_charset_re.search(c)
+ if match:
+ c = c.decode(match.group(1), 'ignore')
+ else:
+ content_type = f.info().get('Content-Type')
+ f.close()
+ if content_type:
+ match = self._charset_re.search(content_type)
+ if match:
+ c = c.decode(match.group(1), 'ignore')
return c
def manage_addDeliveranceRule(self, REQUEST=None):
More information about the z3-checkins
mailing list