[wwwsearch-commits] r21667 - in wwwsearch/ClientCookie/trunk: ClientCookie test

jjlee at codespeak.net jjlee at codespeak.net
Tue Jan 3 02:38:07 CET 2006


Author: jjlee
Date: Tue Jan  3 02:38:06 2006
New Revision: 21667

Modified:
   wwwsearch/ClientCookie/trunk/ClientCookie/_Util.py
   wwwsearch/ClientCookie/trunk/test/test_misc.py
Log:
Allow copying seek_wrapper, with copies having independent seek positions

Modified: wwwsearch/ClientCookie/trunk/ClientCookie/_Util.py
==============================================================================
--- wwwsearch/ClientCookie/trunk/ClientCookie/_Util.py	(original)
+++ wwwsearch/ClientCookie/trunk/ClientCookie/_Util.py	Tue Jan  3 02:38:06 2006
@@ -15,7 +15,7 @@
 
 import re, string, time
 from types import TupleType
-from StringIO import StringIO
+from cStringIO import StringIO
 
 try:
     from exceptions import StopIteration
@@ -399,23 +399,27 @@
     particular file object.
 
     """
-    # General strategy is to check that cache is full enough, then delegate
-    # everything to the cache (self._cache, which is a StringIO.StringIO
-    # instance.  Seems to be some cStringIO.StringIO problem on 1.5.2 -- I
-    # get a StringOobject, with no readlines method.
+    # General strategy is to check that cache is full enough, then delegate to
+    # the cache (self.__cache, which is a cStringIO.StringIO instance).  A seek
+    # position (self.__pos) is maintained independently of the cache, in order
+    # that a single cache may be shared between multiple seek_wrapper objects.
+    # Copying using module copy shares the cache in this way.
 
-    # XXX Does this work sensibly in the face of exceptions raised by .read()
-    # / .readline()??
+    # XXX Does this class work sensibly in the face of exceptions raised by
+    # .read() / .readline() / .readlines() / .seek()??
 
     def __init__(self, wrapped):
         self.wrapped = wrapped
         self.__have_readline = hasattr(self.wrapped, "readline")
         self.__cache = StringIO()
+        self.__pos = 0  # seek position
 
     def invariant(self):
+        return True  # XXX anything better??
+        # this was the old condition:
         # The end of the cache is always at the same place as the end of the
         # wrapped file.
-        return self.wrapped.tell() == len(self.__cache.getvalue())
+        #return self.wrapped.tell() == len(self.__cache.getvalue())
 
     def __getattr__(self, name):
         wrapped = self.__dict__.get("wrapped")
@@ -425,59 +429,78 @@
 
     def seek(self, offset, whence=0):
         assert whence in [0,1,2]
-        # make sure we have read all data up to the point we are seeking to
-        pos = self.__cache.tell()
 
         # how much data, if any, do we need to read?
         if whence == 2:  # 2: relative to end of *wrapped* file
+            if offset < 0: raise ValueError("negative seek offset")
             # since we don't know yet where the end of that file is, we must
             # read everything
             to_read = None
         else:
             if whence == 0:  # 0: absolute
-                want = offset - pos
+                if offset < 0: raise ValueError("negative seek offset")
+                dest = offset
             else:  # 1: relative to current position
-                want = offset
+                pos = self.__pos
+                if pos < offset:
+                    raise ValueError("seek to before start of file")
+                dest = pos + offset
             end = len(self.__cache.getvalue())
-            available = end - pos
-            if want <= available:
+            to_read = dest - end
+            if to_read < 0:
                 to_read = 0
-            else:
-                to_read = want - available
 
         if to_read != 0:
             self.__cache.seek(0, 2)
             if to_read is None:
+                assert whence == 2
                 self.__cache.write(self.wrapped.read())
+                self.__pos = self.__cache.tell() - offset
             else:
                 self.__cache.write(self.wrapped.read(to_read))
-            self.__cache.seek(pos)
-
-        return self.__cache.seek(offset, whence)
+                # Don't raise an exception even if we've seek()ed past the end
+                # of .wrapped, since fseek() doesn't complain in that case.
+                # Also like fseek(), pretend we have seek()ed past the end,
+                # i.e. not:
+                #self.__pos = self.__cache.tell()
+                # but rather:
+                self.__pos = dest
+        else:
+            self.__pos = dest
 
     def tell(self):
-        return self.__cache.tell()
+        return self.__pos
+
+    def __copy__(self):
+        cpy = self.__class__(self.wrapped)
+        cpy.__cache = self.__cache
+        return cpy
 
     def read(self, size=-1):
-        pos = self.__cache.tell()
+        pos = self.__pos
         end = len(self.__cache.getvalue())
         available = end - pos
 
         # enough data already cached?
         if size <= available and size != -1:
+            self.__cache.seek(pos)
+            self.__pos = pos+size
             return self.__cache.read(size)
 
         # no, so read sufficient data from wrapped file and cache it
-        to_read = size - available
-        assert to_read > 0 or size == -1
         self.__cache.seek(0, 2)
         if size == -1:
             self.__cache.write(self.wrapped.read())
         else:
+            to_read = size - available
+            assert to_read > 0
             self.__cache.write(self.wrapped.read(to_read))
         self.__cache.seek(pos)
 
-        return self.__cache.read(size)
+        data = self.__cache.read(size)
+        self.__pos = self.__cache.tell()
+        assert self.__pos == pos + len(data)
+        return data
 
     def readline(self, size=-1):
         if not self.__have_readline:
@@ -485,7 +508,7 @@
 
         # line we're about to read might not be complete in the cache, so
         # read another line first
-        pos = self.__cache.tell()
+        pos = self.__pos
         self.__cache.seek(0, 2)
         self.__cache.write(self.wrapped.readline())
         self.__cache.seek(pos)
@@ -493,20 +516,20 @@
         data = self.__cache.readline()
         if size != -1:
             r = data[:size]
-            self.__cache.seek(pos+size)
+            self.__pos = pos+size
         else:
             r = data
+            self.__pos = pos+len(data)
         return r
 
     def readlines(self, sizehint=-1):
-        pos = self.__cache.tell()
+        pos = self.__pos
         self.__cache.seek(0, 2)
         self.__cache.write(self.wrapped.read())
         self.__cache.seek(pos)
-        try:
-            return self.__cache.readlines(sizehint)
-        except TypeError:  # 1.5.2 hack
-            return self.__cache.readlines()
+        data = self.__cache.readlines(sizehint)
+        self.__pos = self.__cache.tell()
+        return data
 
     def __iter__(self): return self
     def next(self):

Modified: wwwsearch/ClientCookie/trunk/test/test_misc.py
==============================================================================
--- wwwsearch/ClientCookie/trunk/test/test_misc.py	(original)
+++ wwwsearch/ClientCookie/trunk/test/test_misc.py	Tue Jan  3 02:38:06 2006
@@ -26,6 +26,7 @@
 
 class SeekableTests(TestCase):
     def testSeekable(self):
+        import copy
         try:
             from exceptions import StopIteration
         except ImportError:
@@ -87,7 +88,7 @@
         sfh = seek_wrapper(fh)
         sfh.read(25)
         sfh.seek(-1, 1)
-        assert sfh.readlines() == ["s over the lazy\n"]+text_lines[2:]
+        self.assertEqual(sfh.readlines(), ["s over the lazy\n"]+text_lines[2:])
         nr_logs = len(fh.log)
         sfh.seek(0)
         assert sfh.readlines() == text_lines
@@ -113,6 +114,18 @@
         self.assert_(sfh.invariant())
         sfh.seek(0, 2)
         self.assert_(sfh.invariant())
+        sfh.seek(0)
+        self.assertEqual(sfh.read(), text)
+
+        # copies have independent seek positions
+        fh = TestUnSeekable(text)
+        sfh = seek_wrapper(fh)
+        sfh2 = copy.copy(sfh)
+        sfh.read(10)
+        self.assertEqual(sfh2.read(10), text[:10])
+        sfh2.seek(5)
+        self.assertEqual(sfh.read(10), text[10:20])
+        self.assertEqual(sfh2.read(10), text[5:15])
 
 
 if __name__ == "__main__":


More information about the wwwsearch-commits mailing list