#! /usr/bin/env python import zipfile, zlib, binascii, struct class FileTruncatedError(IOError): pass BadZipfile = zipfile.BadZipfile def read_one_entry(fp): fname = None def _read(size): data = fp.read(size) if len(data) < size: raise FileTruncatedError(fname) return data fheader = _read(4) if fheader == zipfile.stringCentralDir: return None if fheader != zipfile.stringFileHeader: raise BadZipfile, "Bad magic number %r for file header" % fheader fheader += _read(26) fheader = struct.unpack(zipfile.structFileHeader, fheader) fname = _read(fheader[zipfile._FH_FILENAME_LENGTH]) _read(fheader[zipfile._FH_EXTRA_FIELD_LENGTH]) bytes = _read(fheader[zipfile._FH_COMPRESSED_SIZE]) ct = fheader[zipfile._FH_COMPRESSION_METHOD] if ct == zipfile.ZIP_STORED: pass elif ct == zipfile.ZIP_DEFLATED: dc = zlib.decompressobj(-15) bytes = dc.decompress(bytes) # need to feed in unused pad byte so that zlib won't choke ex = dc.decompress('Z') + dc.flush() if ex: bytes = bytes + ex else: raise BadZipfile, \ "Unsupported compression method %d for file %s" % (ct, fname) crc = binascii.crc32(bytes) if crc != fheader[zipfile._FH_CRC]: raise BadZipfile, "Bad CRC-32 for file %s" % fname return fname, bytes def enum_zip_file(file): if hasattr(file, 'read'): fp = file close = False else: fp = open(file, 'rb') close = True while True: entry = read_one_entry(fp) if entry is None: break yield entry del entry if close: fp.close() if __name__ == '__main__': import sys, os assert len(sys.argv) == 2 try: for name, data in enum_zip_file(sys.argv[1]): print '%10d %s' % (len(data), name) if data: if os.path.sep in name: try: os.makedirs(os.path.dirname(name)) except OSError: pass g = open(name, 'wb') g.write(data) g.close() except IOError, e: sys.stderr.write('%s\n' % (e,)) sys.exit(1)