[pypy-svn] r47084 - in pypy/dist/pypy: module/zlib module/zlib/test rlib rlib/test
arigo at codespeak.net
arigo at codespeak.net
Tue Oct 2 10:31:00 CEST 2007
Author: arigo
Date: Tue Oct 2 10:30:59 2007
New Revision: 47084
Modified:
pypy/dist/pypy/module/zlib/interp_zlib.py
pypy/dist/pypy/module/zlib/test/test_zlib.py
pypy/dist/pypy/rlib/rzlib.py
pypy/dist/pypy/rlib/test/test_rzlib.py
Log:
This mostly finishes the zlib module.
Modified: pypy/dist/pypy/module/zlib/interp_zlib.py
==============================================================================
--- pypy/dist/pypy/module/zlib/interp_zlib.py (original)
+++ pypy/dist/pypy/module/zlib/interp_zlib.py Tue Oct 2 10:30:59 2007
@@ -1,6 +1,7 @@
+import sys
from pypy.interpreter.gateway import ObjSpace, W_Root, interp2app
from pypy.interpreter.baseobjspace import Wrappable
-from pypy.interpreter.typedef import TypeDef
+from pypy.interpreter.typedef import TypeDef, interp_attrproperty
from pypy.interpreter.error import OperationError
from pypy.rlib.rarithmetic import intmask
@@ -61,8 +62,7 @@
try:
stream = rzlib.deflateInit(level)
except ValueError:
- raise OperationError(space.w_ValueError,
- space.wrap("Invalid initialization option"))
+ raise zlib_error(space, "Bad compression level")
try:
result = rzlib.compress(stream, string, rzlib.Z_FINISH)
finally:
@@ -78,16 +78,15 @@
decompress(string[, wbits[, bufsize]]) -- Return decompressed string.
Optional arg wbits is the window buffer size. Optional arg bufsize is
- the initial output buffer size.
+ only for compatibility with CPython and is ignored.
"""
try:
try:
stream = rzlib.inflateInit(wbits)
except ValueError:
- raise OperationError(space.w_ValueError,
- space.wrap("Invalid initialization option"))
+ raise zlib_error(space, "Bad window buffer size")
try:
- result = rzlib.decompress(stream, string, rzlib.Z_FINISH)
+ result, _, _ = rzlib.decompress(stream, string, rzlib.Z_FINISH)
finally:
rzlib.inflateEnd(stream)
except rzlib.RZlibError, e:
@@ -114,6 +113,7 @@
except ValueError:
raise OperationError(space.w_ValueError,
space.wrap("Invalid initialization option"))
+ self.lock = space.allocate_lock()
def __del__(self):
"""Automatically free the resources used by the stream."""
@@ -132,7 +132,12 @@
Call the flush() method to clear these buffers.
"""
try:
- result = rzlib.compress(self.stream, data)
+ lock = self.lock
+ lock.acquire(True)
+ try:
+ result = rzlib.compress(self.stream, data)
+ finally:
+ lock.release()
except rzlib.RZlibError, e:
raise zlib_error(self.space, e.msg)
return self.space.wrap(result)
@@ -152,7 +157,12 @@
compressed.
"""
try:
- result = rzlib.compress(self.stream, '', mode)
+ lock = self.lock
+ lock.acquire(True)
+ try:
+ result = rzlib.compress(self.stream, '', mode)
+ finally:
+ lock.release()
except rzlib.RZlibError, e:
raise zlib_error(self.space, e.msg)
return self.space.wrap(result)
@@ -198,6 +208,8 @@
inflateInit2.
"""
self.space = space
+ self.unused_data = ''
+ self.unconsumed_tail = ''
try:
self.stream = rzlib.inflateInit(wbits)
except rzlib.RZlibError, e:
@@ -205,6 +217,7 @@
except ValueError:
raise OperationError(space.w_ValueError,
space.wrap("Invalid initialization option"))
+ self.lock = space.allocate_lock()
def __del__(self):
"""Automatically free the resources used by the stream."""
@@ -222,14 +235,33 @@
no longer than max_length. Unconsumed input data will be stored in the
unconsumed_tail attribute.
"""
- if max_length != 0: # XXX
- raise OperationError(self.space.w_NotImplementedError,
- self.space.wrap("max_length != 0"))
+ if max_length == 0:
+ max_length = sys.maxint
+ elif max_length < 0:
+ raise OperationError(self.space.w_ValueError,
+ self.space.wrap("max_length must be "
+ "greater than zero"))
try:
- result = rzlib.decompress(self.stream, data)
+ lock = self.lock
+ lock.acquire(True)
+ try:
+ result = rzlib.decompress(self.stream, data,
+ max_length = max_length)
+ finally:
+ lock.release()
except rzlib.RZlibError, e:
raise zlib_error(self.space, e.msg)
- return self.space.wrap(result)
+
+ string, finished, unused_len = result
+ unused_start = len(data) - unused_len
+ assert unused_start >= 0
+ tail = data[unused_start:]
+ if finished:
+ self.unconsumed_tail = ''
+ self.unused_data = tail
+ else:
+ self.unconsumed_tail = tail
+ return self.space.wrap(string)
decompress.unwrap_spec = ['self', str, int]
@@ -264,6 +296,8 @@
__new__ = interp2app(Decompress___new__),
decompress = interp2app(Decompress.decompress),
flush = interp2app(Decompress.flush),
+ unused_data = interp_attrproperty('unused_data', Decompress),
+ unconsumed_tail = interp_attrproperty('unconsumed_tail', Decompress),
__doc__ = """decompressobj([wbits]) -- Return a decompressor object.
Optional arg wbits is the window buffer size.
Modified: pypy/dist/pypy/module/zlib/test/test_zlib.py
==============================================================================
--- pypy/dist/pypy/module/zlib/test/test_zlib.py (original)
+++ pypy/dist/pypy/module/zlib/test/test_zlib.py Tue Oct 2 10:30:59 2007
@@ -133,5 +133,45 @@
def test_decompress_invalid_input(self):
+ """
+ Try to feed garbage to zlib.decompress().
+ """
raises(self.zlib.error, self.zlib.decompress, self.compressed[:-2])
raises(self.zlib.error, self.zlib.decompress, 'foobar')
+
+
+ def test_unused_data(self):
+ """
+ Try to feed too much data to zlib.decompress().
+ It should show up in the unused_data attribute.
+ """
+ d = self.zlib.decompressobj()
+ s = d.decompress(self.compressed + 'extrastuff')
+ assert s == self.expanded
+ assert d.unused_data == 'extrastuff'
+ # try again with several decompression steps
+ d = self.zlib.decompressobj()
+ s1 = d.decompress(self.compressed[:10])
+ assert d.unused_data == ''
+ s2 = d.decompress(self.compressed[10:-3])
+ assert d.unused_data == ''
+ s3 = d.decompress(self.compressed[-3:] + 'spam' * 100)
+ assert d.unused_data == 'spam' * 100
+ assert s1 + s2 + s3 == self.expanded
+ s4 = d.decompress('egg' * 50)
+ assert d.unused_data == 'egg' * 50
+ assert s4 == ''
+
+
+ def test_max_length(self):
+ """
+ Test the max_length argument of the decompress() method
+ and the corresponding unconsumed_tail attribute.
+ """
+ d = self.zlib.decompressobj()
+ data = self.compressed
+ for i in range(0, 100, 10):
+ s1 = d.decompress(data, 10)
+ assert s1 == self.expanded[i:i+10]
+ data = d.unconsumed_tail
+ assert not data
Modified: pypy/dist/pypy/rlib/rzlib.py
==============================================================================
--- pypy/dist/pypy/rlib/rzlib.py (original)
+++ pypy/dist/pypy/rlib/rzlib.py Tue Oct 2 10:30:59 2007
@@ -1,3 +1,4 @@
+import sys
from pypy.rpython.lltypesystem import rffi, lltype
from pypy.rpython.tool import rffi_platform
@@ -263,15 +264,24 @@
"""
# Warning, reentrant calls to the zlib with a given stream can cause it
# to crash. The caller of pypy.rlib.rzlib should use locks if needed.
- return _operate(stream, data, flush, False, _deflate, "while compressing")
-
-
-def decompress(stream, data, flush=Z_SYNC_FLUSH):
- """
- Feed more data into an inflate stream. Returns a string containing
- (a part of) the decompressed data. If flush != Z_NO_FLUSH, this also
- flushes the output data; see zlib.h or the documentation of the
- zlib module for the possible values of 'flush'.
+ data, _, avail_in = _operate(stream, data, flush, sys.maxint, _deflate,
+ "while compressing")
+ assert not avail_in, "not all input consumed by deflate"
+ return data
+
+
+def decompress(stream, data, flush=Z_SYNC_FLUSH, max_length=sys.maxint):
+ """
+ Feed more data into an inflate stream. Returns a tuple (string,
+ finished, unused_data_length). The string contains (a part of) the
+ decompressed data. If flush != Z_NO_FLUSH, this also flushes the
+ output data; see zlib.h or the documentation of the zlib module for
+ the possible values of 'flush'.
+
+ The 'string' is never longer than 'max_length'. The
+ 'unused_data_length' is the number of unprocessed input characters,
+ either because they are after the end of the compressed stream or
+ because processing it would cause the 'max_length' to be exceeded.
"""
# Warning, reentrant calls to the zlib with a given stream can cause it
# to crash. The caller of pypy.rlib.rzlib should use locks if needed.
@@ -284,11 +294,18 @@
should_finish = True
else:
should_finish = False
- return _operate(stream, data, flush, should_finish, _inflate,
- "while decompressing")
+ result = _operate(stream, data, flush, max_length, _inflate,
+ "while decompressing")
+ if should_finish:
+ # detect incomplete input in the Z_FINISHED case
+ finished = result[1]
+ if not finished:
+ raise RZlibError("the input compressed stream of data is "
+ "incomplete")
+ return result
-def _operate(stream, data, flush, should_finish, cfunc, while_doing):
+def _operate(stream, data, flush, max_length, cfunc, while_doing):
"""Common code for compress() and decompress().
"""
# Prepare the input buffer for the stream
@@ -312,12 +329,19 @@
while True:
stream.c_next_out = rffi.cast(Bytefp, outbuf)
- rffi.setintfield(stream, 'c_avail_out', OUTPUT_BUFFER_SIZE)
+ bufsize = OUTPUT_BUFFER_SIZE
+ if max_length < bufsize:
+ if max_length <= 0:
+ err = Z_OK
+ break
+ bufsize = max_length
+ max_length -= bufsize
+ rffi.setintfield(stream, 'c_avail_out', bufsize)
err = cfunc(stream, flush)
if err == Z_OK or err == Z_STREAM_END:
# accumulate data into 'result'
avail_out = rffi.cast(lltype.Signed, stream.c_avail_out)
- for i in xrange(OUTPUT_BUFFER_SIZE - avail_out):
+ for i in xrange(bufsize - avail_out):
result.append(outbuf[i])
# if the output buffer is full, there might be more data
# so we need to try again. Otherwise, we're done.
@@ -335,7 +359,7 @@
# the output buffer was full but there wasn't more
# output when we tried again, so it is not an error
# condition.
- if avail_out == OUTPUT_BUFFER_SIZE:
+ if avail_out == bufsize:
break
# fallback case: report this error
@@ -347,10 +371,8 @@
lltype.free(inbuf, flavor='raw')
# When decompressing, if the compressed stream of data was truncated,
- # then the zlib simply returns Z_OK and waits for more. Let's detect
- # this situation and complain.
- if should_finish and err != Z_STREAM_END:
- raise RZlibError("the input compressed stream of data is not complete")
-
- assert not stream.c_avail_in, "not all input consumed by deflate/inflate"
- return ''.join(result)
+ # then the zlib simply returns Z_OK and waits for more. If it is
+ # complete it returns Z_STREAM_END.
+ return (''.join(result),
+ err == Z_STREAM_END,
+ rffi.cast(lltype.Signed, stream.c_avail_in))
Modified: pypy/dist/pypy/rlib/test/test_rzlib.py
==============================================================================
--- pypy/dist/pypy/rlib/test/test_rzlib.py (original)
+++ pypy/dist/pypy/rlib/test/test_rzlib.py Tue Oct 2 10:30:59 2007
@@ -121,10 +121,14 @@
should allow us to decompress bytes.
"""
stream = rzlib.inflateInit()
- bytes = rzlib.decompress(stream, compressed)
- bytes += rzlib.decompress(stream, "", rzlib.Z_FINISH)
+ bytes1, finished1, unused1 = rzlib.decompress(stream, compressed)
+ bytes2, finished2, unused2 = rzlib.decompress(stream, "", rzlib.Z_FINISH)
rzlib.inflateEnd(stream)
- assert bytes == expanded
+ assert bytes1 + bytes2 == expanded
+ assert finished1 is True
+ assert finished2 is True
+ assert unused1 == 0
+ assert unused2 == 0
def test_decompression_lots_of_data():
@@ -135,9 +139,12 @@
compressed = zlib.compress(expanded)
print len(compressed), '=>', len(expanded)
stream = rzlib.inflateInit()
- bytes = rzlib.decompress(stream, compressed, rzlib.Z_FINISH)
+ bytes, finished, unused = rzlib.decompress(stream, compressed,
+ rzlib.Z_FINISH)
rzlib.inflateEnd(stream)
assert bytes == expanded
+ assert finished is True
+ assert unused == 0
def test_decompression_truncated_input():
@@ -149,11 +156,75 @@
compressed = zlib.compress(expanded)
print len(compressed), '=>', len(expanded)
stream = rzlib.inflateInit()
- data = rzlib.decompress(stream, compressed[:1000])
+ data, finished1, unused1 = rzlib.decompress(stream, compressed[:1000])
assert expanded.startswith(data)
- data += rzlib.decompress(stream, compressed[1000:2000])
+ assert finished1 is False
+ assert unused1 == 0
+ data2, finished2, unused2 = rzlib.decompress(stream, compressed[1000:2000])
+ data += data2
+ assert finished2 is False
+ assert unused2 == 0
assert expanded.startswith(data)
py.test.raises(rzlib.RZlibError,
rzlib.decompress, stream, compressed[2000:-500],
rzlib.Z_FINISH)
rzlib.inflateEnd(stream)
+
+
+def test_decompression_too_much_input():
+ """
+ Check the case where we feed extra data to decompress().
+ """
+ stream = rzlib.inflateInit()
+ data1, finished1, unused1 = rzlib.decompress(stream, compressed[:-5])
+ assert finished1 is False
+ assert unused1 == 0
+ data2, finished2, unused2 = rzlib.decompress(stream,
+ compressed[-5:] + 'garbage')
+ assert finished2 is True
+ assert unused2 == len('garbage')
+ assert data1 + data2 == expanded
+ data3, finished3, unused3 = rzlib.decompress(stream, 'more_garbage')
+ assert finished3 is True
+ assert unused3 == len('more_garbage')
+ assert data3 == ''
+
+
+def test_decompress_max_length():
+ """
+ Test the max_length argument of decompress().
+ """
+ stream = rzlib.inflateInit()
+ data1, finished1, unused1 = rzlib.decompress(stream, compressed,
+ max_length = 17)
+ assert data1 == expanded[:17]
+ assert finished1 is False
+ assert unused1 > 0
+ data2, finished2, unused2 = rzlib.decompress(stream, compressed[-unused1:])
+ assert data2 == expanded[17:]
+ assert finished2 is True
+ assert unused2 == 0
+
+
+def test_cornercases():
+ """
+ Test degenerate arguments.
+ """
+ stream = rzlib.deflateInit()
+ bytes = rzlib.compress(stream, "")
+ bytes += rzlib.compress(stream, "")
+ bytes += rzlib.compress(stream, "", rzlib.Z_FINISH)
+ assert zlib.decompress(bytes) == ""
+
+ stream = rzlib.inflateInit()
+ data, finished, unused = rzlib.decompress(stream, "")
+ assert data == ""
+ assert finished is False
+ assert unused == 0
+ buf = compressed
+ for i in range(10):
+ data, finished, unused = rzlib.decompress(stream, buf, max_length=0)
+ assert data == ""
+ assert finished is False
+ assert unused > 0
+ buf = buf[-unused:]
More information about the pypy-svn
mailing list