[z3-checkins] r37818 - z3/deliverance/branches/cache_aware/deliverance

ltucker at codespeak.net ltucker at codespeak.net
Fri Feb 2 20:17:32 CET 2007


Author: ltucker
Date: Fri Feb  2 20:17:28 2007
New Revision: 37818

Added:
   z3/deliverance/branches/cache_aware/deliverance/cache_fixture.py
Modified:
   z3/deliverance/branches/cache_aware/deliverance/cache_utils.py
   z3/deliverance/branches/cache_aware/deliverance/resource_fetcher.py
   z3/deliverance/branches/cache_aware/deliverance/test_wsgi.py
   z3/deliverance/branches/cache_aware/deliverance/wsgimiddleware.py
Log:
wsgi testing & fixes

Added: z3/deliverance/branches/cache_aware/deliverance/cache_fixture.py
==============================================================================
--- (empty file)
+++ z3/deliverance/branches/cache_aware/deliverance/cache_fixture.py	Fri Feb  2 20:17:28 2007
@@ -0,0 +1,78 @@
+
+from paste.response import replace_header
+from paste.httpheaders import IF_MODIFIED_SINCE, EXPIRES, CONTENT_LENGTH, ETAG
+
+class CacheFixtureResponseInfo(object): 
+    def __init__(self, data, mod_time=None, 
+                 etag=None, cache_control=None, 
+                 expires=None): 
+        self.data = data
+        self.mod_time = mod_time
+        self.etag = etag
+        self.cache_control = cache_control 
+        self.expires = expires
+
+class CacheFixtureApp(object):
+    """
+    a crumby app that can be set up with 
+    dummy content for different urls and 
+    be configured with a variety of responses when 
+    cache related headers are present in the request. 
+
+    responds with a 404 for any url not explicitly 
+    mapped in. 
+    """
+    def __init__(self): 
+        self.responses = {}
+
+    def map_url(self, path, response_info): 
+        self.responses[path] = response_info
+
+    def get_response_info(self, path): 
+        return self.responses.get(path, None)
+
+    def __call__(self, environ, start_response): 
+        path = environ['PATH_INFO']
+
+        if path in self.responses: 
+            response_info = self.responses[path]
+            
+            headers = self.calc_headers(response_info)
+
+            if response_info.mod_time is not None and 'HTTP_IF_MODIFIED_SINCE' in environ: 
+                req_time = IF_MODIFIED_SINCE.parse(environ['HTTP_IF_MODIFIED_SINCE'])
+
+                if req_time > response_info.mod_time: 
+                    replace_header(headers, 'content-length', '0')
+                    start_response('304 Not Modified', headers)
+                    return []
+
+            if response_info.etag is not None and 'HTTP_IF_NONE_MATCH' in environ: 
+                # XXX this expects only one etag, but it could be more than one
+                req_etag = environ['HTTP_IF_NONE_MATCH']
+                if response_info.etag == req_etag: 
+                    replace_header(headers, 'content-length', '0')
+                    start_response('304 Not Modified', headers)
+                    return []
+
+            headers.append(('content-length', str(len(response_info.data))))
+            headers.append(('content-type', 'text/html'))        
+
+            start_response('200 OK', headers)
+            return [response_info.data]
+
+        else:
+            start_response('404 Not Found', [('content-length','0')])
+            return []
+
+    def calc_headers(self, response_info):
+        headers = []
+
+        if response_info.etag is not None: 
+            replace_header(headers, 'etag', response_info.etag)
+        if response_info.cache_control is not None: 
+            headers.add(('cache-control', response_info.cache_control))
+        if response_info.expires is not None: 
+            EXPIRES.update(headers,'expires', time=response_info.expires)
+
+        return headers

Modified: z3/deliverance/branches/cache_aware/deliverance/cache_utils.py
==============================================================================
--- z3/deliverance/branches/cache_aware/deliverance/cache_utils.py	(original)
+++ z3/deliverance/branches/cache_aware/deliverance/cache_utils.py	Fri Feb  2 20:17:28 2007
@@ -9,11 +9,11 @@
 utilities for fusing cache related HTTP headers from 
 multiple sources 
 
-XXX there is probably a good amount of work in here 
-that Paste could simplify 
+XXX 
+there is probably a good amount of work in here that Paste could simplify 
+tests that depend on set ordering 
 
 TODO: 
-handle expires 
 handle last-modified
 """
 
@@ -471,6 +471,8 @@
 
     newcc[directive] = str(min)
 
+
+
 def flatten_directive_map(d): 
     """ 
     flattens a map of directive -> fieldnames 

Modified: z3/deliverance/branches/cache_aware/deliverance/resource_fetcher.py
==============================================================================
--- z3/deliverance/branches/cache_aware/deliverance/resource_fetcher.py	(original)
+++ z3/deliverance/branches/cache_aware/deliverance/resource_fetcher.py	Fri Feb  2 20:17:28 2007
@@ -49,17 +49,13 @@
 	    self.environ['HTTP_ACCEPT_ENCODING'] = '' 
 
     def wsgi_get(self): 
-        print "Internal Resource get: %s" % self.uri
         if 'paste.recursive.include' in self.environ: 
-            print "Doing paste.recursive.include"
             # Try to do the redirect this way...
             includer = self.environ['paste.recursive.include']
             res = includer(self.uri, self.environ)
             return (res.status, res.headers, res.body)
         else: 
-            print "Doing intercept"
             status, headers, body = intercept_output(self.environ, self.app)
-            print "  => %s" % status 
             return (status, headers, body)
 
 
@@ -115,7 +111,6 @@
         #        self.environ['SERVER_PORT'] = '80'
 
     def wsgi_get(self): 
-        print "External Resource get: %s" % self.uri
         proxy_app = TransparentProxy() 
         return intercept_output(self.environ, proxy_app)
 

Modified: z3/deliverance/branches/cache_aware/deliverance/test_wsgi.py
==============================================================================
--- z3/deliverance/branches/cache_aware/deliverance/test_wsgi.py	(original)
+++ z3/deliverance/branches/cache_aware/deliverance/test_wsgi.py	Fri Feb  2 20:17:28 2007
@@ -3,9 +3,14 @@
 from lxml import etree
 from paste.fixture import TestApp
 from paste.urlparser import StaticURLParser
+from paste.response import header_value
 from deliverance.wsgimiddleware import DeliveranceMiddleware
 from formencode.doctest_xml_compare import xml_compare
 from deliverance.htmlserialize import tostring
+from deliverance.cache_fixture import CacheFixtureResponseInfo, CacheFixtureApp
+from deliverance import cache_utils
+from time import time as now
+from rfc822 import formatdate
 
 static_data = os.path.join(os.path.dirname(__file__), 'test-data', 'static')
 tasktracker_data = os.path.join(os.path.dirname(__file__), 'test-data', 'tasktracker')
@@ -25,6 +30,8 @@
 url_app = StaticURLParser(url_data)
 aggregate_app = StaticURLParser(aggregate_data)
 
+
+
 def html_string_compare(astr, bstr):
     """
     compare to strings containing html based on html 
@@ -148,8 +155,118 @@
     res2 = app.get('/expected.html?notheme')
     html_string_compare(res.body, res2.body)
 
+def do_cache(renderer_type, name): 
+    theme_data = """ 
+        <html>
+          <head><title>theme</title></head>
+          <body><div id="replaceme"></div></body>
+        </html>
+    """
+    rule_data = """ 
+        <rules xmlns="http://www.plone.org/deliverance">
+          <replace theme="//*[@id='replaceme']" content="//*[@id='content']" />
+        </rules>
+    """
+    
+    content_data = """
+         <html><head></head><body><div id="content">foo</div></body></html>
+    """
+
+    expected_data = """
+        <html>
+          <head>
+            <meta http-equiv="Content-Type" content="text/html; charset=UTF-8">
+            <title>theme</title></head>
+          <body><div id="content">foo</div></body>
+        </html>
+    """
+
+    theme_info = CacheFixtureResponseInfo(theme_data)
+    rule_info = CacheFixtureResponseInfo(rule_data)
+    content_info = CacheFixtureResponseInfo(content_data)
+    expected_info = CacheFixtureResponseInfo(expected_data)
+
+    capp = CacheFixtureApp()
+    capp.map_url('/theme.html',theme_info)
+    capp.map_url('/rules.xml',rule_info)
+    capp.map_url('/content.html',content_info)
+    capp.map_url('/expected.html',expected_info)
+
+    wsgi_app = DeliveranceMiddleware(capp, '/theme.html', '/rules.xml', 
+                                     renderer_type)
+
+    # check that everything works straight up 
+    app = TestApp(wsgi_app)
+    res = app.get('/content.html')
+    res2 = app.get('/expected.html?notheme')
+    html_string_compare(res.body, res2.body)
+
+    # set some etags on the fixture 
+    theme_info.etag = "theme_etag"
+    rule_info.etag = "rule_etag"
+    content_info.etag = "content_etag"
+
+
+    # grab the page and make sure an etag comes back 
+    res = app.get('/content.html')
+    composite_etag = header_value(res.headers, 'etag')
+    assert(composite_etag is not None and len(composite_etag) > 0)
+
+    # check that deliverance gives 304 when the composite etag is given
+    res = app.get('/content.html', headers={'If-None-Match': composite_etag})
+    status = res.status
+    assert(status == 304)
+
+    theme_info.etag = 'something_else'
+    # check that deliverance rebuilds when one of the etags changes 
+    res = app.get('/content.html', headers={'If-None-Match': composite_etag})
+    status = res.status
+    # make sure the response etag changed 
+    assert(header_value(res.headers, 'etag') != composite_etag)
+    assert(status == 200)
+
+    # clear etags 
+    theme_info.etag = None
+    rule_info.etag = None
+    content_info.etag = None
+
+    # make sure there is no more etag 
+    res = app.get('/content.html')
+    composite_etag = header_value(res.headers, 'etag')
+    assert(composite_etag is None or len(composite_etag) == 0)
+
+    # test modification dates 
+    then = now() 
+    theme_info.mod_time = then - 10 
+    rule_info.mod_time = then - 20 
+    content_info.mod_time = then - 30 
+    
+    res = app.get('/content.html')
+    status = res.status
+    assert(status == 200)
+
+    res = app.get('/content.html', 
+                  headers={'If-Modified-Since': formatdate(then)})
+    status = res.status
+    assert(status == 304)
+
+    res = app.get('/content.html', 
+                  headers={'If-Modified-Since': formatdate(then-60)})
+    status = res.status
+    assert(status == 200)
+
+    res = app.get('/content.html', 
+                  headers={'If-Modified-Since': formatdate(then-15)})
+    status = res.status
+    assert(status == 200)
+    
+
+    
+    
+                                          
+
 RENDERER_TYPES = ['py', 'xslt']
-TEST_FUNCS = [ do_url, do_basic, do_text, do_tasktracker, do_xinclude, do_with_spaces, do_nycsr, do_necoro, do_guidesearch, do_ajax, do_aggregate ] 
+TEST_FUNCS = [ do_url, do_basic, do_text, do_tasktracker, do_xinclude, do_with_spaces, do_nycsr, do_necoro, do_guidesearch, do_ajax, do_aggregate, do_cache ] 
 def test_all():
     for renderer_type in RENDERER_TYPES:
         for test_func in TEST_FUNCS: 

Modified: z3/deliverance/branches/cache_aware/deliverance/wsgimiddleware.py
==============================================================================
--- z3/deliverance/branches/cache_aware/deliverance/wsgimiddleware.py	(original)
+++ z3/deliverance/branches/cache_aware/deliverance/wsgimiddleware.py	Fri Feb  2 20:17:28 2007
@@ -50,10 +50,6 @@
         self.app = app
         self.theme_uri = theme_uri
         self.rule_uri = rule_uri
-        self._renderer = None
-        self._cache_time = datetime.datetime.now()
-        self._timeout = datetime.timedelta(0,10)
-        self._lock = threading.Lock()
 
         if renderer == 'py':
             import interpreter
@@ -67,18 +63,7 @@
             self._rendererType = renderer
 
     def get_renderer(self, environ):
-        """
-        retrieve the deliverance Renderer representing the transformation this 
-        middlware represents. Renderer may change according to caching rules. 
-        """
-        try:
-            self._lock.acquire()
-            if not self._renderer or self.cache_expired():
-                self._renderer = self.create_renderer(environ)
-                self._cache_time = datetime.datetime.now()
-            return self._renderer
-        finally:
-            self._lock.release()
+        return self.create_renderer(environ)
 
     def create_renderer(self, environ):
         """
@@ -127,13 +112,6 @@
             rule_uri=self.rule_uri,
             reference_resolver=reference_resolver)
 
-        
-    def cache_expired(self):
-        """
-        returns true if the stored Renderer should be refreshed 
-        """
-        return self._cache_time + self._timeout < datetime.datetime.now()
-
     def rule(self, environ):
         """
         retrieves the data referred to by the rule_uri passed to the 
@@ -193,8 +171,6 @@
                 return body
 
             # perform actual themeing 
-            print "Doing themeing" 
-
             body = self.filter_body(environ, body)
 
             replace_header(headers, 'content-length', str(len(body)))
@@ -240,11 +216,15 @@
 
 
     def rebuild_check(self, environ, start_response): 
-        print "===== rebuild check ====="
         # perform the request for content  
 
         content_url = construct_url(environ)
 
+        etag_map = {}
+        if 'HTTP_IF_NONE_MATCH' in environ: 
+            etag_map = cache_utils.parse_merged_etag(environ['HTTP_IF_NONE_MATCH'])
+            environ['HTTP_IF_NONE_MATCH'] = etag_map.get(content_url,None)
+
         status, headers, body = intercept_output(environ, self.app,
                                                  self.should_intercept,
                                                  start_response)            
@@ -252,11 +232,9 @@
 
         if status is None: 
             # should_intercept says this isn't HTML, we're done
-            print "ignore non-html: %s" % construct_url(environ)
             return (None, None, body)
 
         if self.should_ignore_url(content_url): 
-            print "ignore blacklisted url: %s" % construct_url(environ)
             start_response(status, headers)
             return (None, None, [body])
 
@@ -265,11 +243,8 @@
         
         # it was modified or an error, give it back for themeing 
         if not status.startswith('304'): 
-            print "Content %s modified, continue..." % content_url 
-
             # if it's not a full HTML page, skip it 
             if not self.hasHTMLTag(body): 
-                print "ignore non-html-tagged: %s" % construct_url(environ)
                 start_response(status, headers)
                 return (None, None, [body])
 
@@ -278,11 +253,10 @@
             
         # got 304 Not Modified for content, check other resources 
         rules = etree.XML(self.rule(environ))
-        resources = self.get_resource_uris(rules)
-        if self.any_modified(environ, resources): 
+        resources = self.get_resource_uris(rules)        
+        if self.any_modified(environ, resources, etag_map): 
             # something changed, 
             # get the content explicitly and give it back 
-            print "explicitly requesting %s" % construct_url(environ)
             if 'HTTP_IF_MODIFIED_SINCE' in environ: 
                 environ['HTTP_IF_MODIFIED_SINCE'] = ''
             if 'HTTP_IF_NONE_MATCH' in environ: 
@@ -293,8 +267,6 @@
 
             if not self.hasHTMLTag(body): 
                 # XXX yarg, we didn't care about it!
-                print "ARG ignore non-html: status: %s, %s" % (status, construct_url(environ))
-                #print "Environ: " , environ , " Headers: ", headers 
                 start_response(status, headers)
                 return (None, None, [body])
 
@@ -302,7 +274,6 @@
             return (status, headers, body)
 
         # nothing was modified, give back a 304 
-        print "giving back 304: %s" % construct_url(environ)
         cache_utils.merge_cache_headers(environ, 
                                         environ[DELIVERANCE_CACHE], 
                                         headers)
@@ -310,7 +281,7 @@
 
         return (None,None,[])
         
-    def any_modified(self, environ, resources): 
+    def any_modified(self, environ, resources, etag_map): 
         """
         returns a tuple containing a boolean and map of uris to HTTP response headers.  
         the first value represents whether any resource in resources has been 
@@ -319,16 +290,10 @@
         second element of the tuple. 
         """
 
-        print "====== rebuild check ======"
         moddate = None
-        etag_map = {}
 
         if 'HTTP_IF_MODIFIED_SINCE' in environ: 
-            print "using modification date: %s" % environ['HTTP_IF_MODIFIED_SINCE']
             moddate = environ['HTTP_IF_MODIFIED_SINCE']            
-        if 'HTTP_IF_NONE_MATCH' in environ: 
-            print "using composite etag: %s" % environ['HTTP_IF_NONE_MATCH']
-            etag_map = cache_utils.parse_merged_etag(environ['HTTP_IF_NONE_MATCH'])
             
         for uri in resources:
             if (self.check_modification(environ, uri, 
@@ -348,10 +313,8 @@
         if uri in environ[DELIVERANCE_CACHE]: 
             response = environ[DELIVERANCE_CACHE][uri]
             if response[0].startswith('200'): 
-                print "using previously fetched content for %s" % uri 
                 return response[2]
 
-        print "fetching resource from scratch: %s" % uri 
         fetcher = self.get_fetcher(environ, uri)
         
         # eliminate validation headers, we want the content 
@@ -418,7 +381,6 @@
 
         """
 
-        print "[!] Checking modification for: [%s] w/ [%s,%s]" % (uri, httpdate_since, etag)
 
         fetcher = self.get_fetcher(environ, uri)
         
@@ -437,10 +399,6 @@
         status, headers, body = fetcher.wsgi_get()
         environ[DELIVERANCE_CACHE][uri] = (status, headers, body)
 
-        print "status was: [%s]" % status
-        if not (status.startswith('200') or status.startswith('304')): 
-            print "status(%s), environ => %s, headers => %s" % (status, fetcher.environ, headers)
-
         if status.startswith('304'): # Not Modified 
             return False 
 


More information about the z3-checkins mailing list