[z3-checkins] r40545 - z3/deliverance/zdeliverance

ianb at codespeak.net ianb at codespeak.net
Thu Mar 15 17:00:13 CET 2007


Author: ianb
Date: Thu Mar 15 17:00:08 2007
New Revision: 40545

Modified:
   z3/deliverance/zdeliverance/traversal.py
Log:
Fixed encoding issues

Modified: z3/deliverance/zdeliverance/traversal.py
==============================================================================
--- z3/deliverance/zdeliverance/traversal.py	(original)
+++ z3/deliverance/zdeliverance/traversal.py	Thu Mar 15 17:00:08 2007
@@ -9,6 +9,7 @@
 from deliverance import htmlserialize
 from lxml import etree
 import urllib
+import re
 
 class DeliveranceRule(SimpleItem, PropertyManager):
     """ An object which invokes a deliverance rule when its container
@@ -17,12 +18,14 @@
     meta_type = 'Deliverance Rule'
     theme_uri = ''
     rule = ''
+    transform_ports = ()
     id = 'deliverance_rule'
 
     _properties = (
         {'id':'title', 'type':'string', 'mode':'w'},
         {'id':'theme_uri', 'type':'string', 'mode':'w', 'label':'Theme URI'},
         {'id':'rule', 'type':'text', 'mode':'w', 'label':'Rule'},
+        {'id':'transform_ports', 'type':'lines', 'mode':'w', 'label':'Transform Ports'},
         )
     manage_options = PropertyManager.manage_options
     
@@ -38,12 +41,16 @@
 
     def __call__(self, container, request):
         response = request.RESPONSE
+        port = request.SERVER_PORT
+        if self.transform_ports and port not in self.transform_ports:
+            return
+        if not self.rule:
+            # Hasn't been initialized
+            return
         orig_setBody = response.setBody
         def setBody(*arg, **kw):
             orig_setBody(*arg, **kw)
-            # Should also stop if deliverance.theme doesn't match up to self
-            if (not response.headers.get('content-type', '').startswith('text/html')
-                or not self.rule):
+            if not response.headers.get('content-type', '').startswith('text/html'):
                 return response
             body = self.transform_body(response)
             return orig_setBody(body)
@@ -52,6 +59,16 @@
     def transform_body(self, response):
         interp = self.make_renderer()
         body = response.body
+        content_type = response.headers['content-type']
+        match = self._meta_charset_re.search(body)
+        if match:
+            body = body.decode(match.group(1), 'ignore')
+        else:
+            match = self._charset_re.search(content_type)
+            if match:
+                body = body.decode(match.group(1), 'ignore')
+        content_type = content_type.split(';')[0] + '; charset=UTF-8'
+        response.headers['content-type'] = content_type
         transformed = interp.render(etree.HTML(body))
         return htmlserialize.tostring(transformed)
 
@@ -84,13 +101,25 @@
             else:
                 return text
 
+    _charset_re = re.compile(r'charset=([a-z0-9_-]+)', re.I)
+    _meta_charset_re = re.compile(r'<meta[^>]*charset=([a-z0-9_-]+).*?>', re.I)
+
     def get_resource(self, href):
         if href.startswith('data:'):
             return href[5:]
         # @@: This is a really bad implementation
         f = urllib.urlopen(href)
         c = f.read()
-        f.close()
+        match = self._meta_charset_re.search(c)
+        if match:
+            c = c.decode(match.group(1), 'ignore')
+        else:
+            content_type = f.info().get('Content-Type')
+            f.close()
+            if content_type:
+                match = self._charset_re.search(content_type)
+                if match:
+                    c = c.decode(match.group(1), 'ignore')
         return c
             
 def manage_addDeliveranceRule(self, REQUEST=None):


More information about the z3-checkins mailing list