"""A lazily-read pickle-like file format for functional (immutable) objects. Use case: a single-file repository storing some data, with new data getting added to the file and no data getting mutated at any time. Reading and appending data is all done lazily. The advantage over using pickle.load/pickle.dump is that at start-up you don't have to wait until all data has been read and decoded in memory, and at shut-down you don't have to create a complete modified copy of the file. The assumption we make here is that all objects are NEVER going to be mutated after they are stored or loaded. The only mutation interface is setroot(x), which (if not already done) stores the object 'x' and all objects it references and then remembers 'x' as the object to be returned by the next call to getroot(). """ import os, struct, new class LazyObj(object): def __init__(self, fodb, pos): dct = getdict(self) dct['fodb'] = fodb dct['pos'] = pos def __getattribute__(self, name): dct = getdict(self) fodb = dct['fodb'] pos = dct['pos'] cls, newdct = fodb._load_object_desc(pos) object.__setattr__(self, '__class__', cls) object.__setattr__(self, '__dict__', newdct) return getattr(self, name) def __setattr__(self, name, value): raise AttributeError("only read-only attributes") def __delattr__(self, name): raise AttributeError("only read-only attributes") def getdict(x): return object.__getattribute__(x, '__dict__') class FODB(object): SIGNATURE = 'FODB0.1\n' HDR = "!q" HDREND = len(SIGNATURE) + struct.calcsize(HDR) def __init__(self, filename, model): self.filename = filename if isinstance(model, dict): m = new.module('FODB-model') m.__dict__.update(model) model = m self.model = model self._root = None self._f = None self._modified = False self._stringcache = {} self._stringposcache = {} self._objectcache = {} self._objectposcache = {} if os.path.exists(filename): self._load_initial_file() def close(self): if self._f is not None: self._f.close() self._f = None def getroot(self): return self._root def _modify(self): if not self._modified: if os.path.exists(self.filename): f = file(self.filename, 'r+b') else: f = file(self.filename, 'w+b') f.seek(0, 2) if f.tell() < self.HDREND: f.seek(0) f.write(self.SIGNATURE) f.write('\x00' * (self.HDREND - len(self.SIGNATURE))) if self._f is not None: self._f.close() self._f = f self._modified = True return self._f def setroot(self, newroot): if newroot is None: rootpos = 0 else: rootpos = self._putobj(newroot) f = self._modify() f.seek(len(self.SIGNATURE)) f.write(struct.pack(self.HDR, rootpos)) self._root = newroot def _load_initial_file(self): f = file(self.filename, 'rb') hdr = f.read(self.HDREND) if not hdr.startswith(self.SIGNATURE): raise ValueError("bad signature") rootpos, = struct.unpack(self.HDR, hdr[len(self.SIGNATURE):]) self._f = f if rootpos == 0: self._root = None else: self._root = self._getobj(rootpos) def _getobj(self, pos): try: return self._objectcache[pos] except KeyError: if pos < self.HDREND: raise ValueError("invalid object position") obj = LazyObj(self, pos) self._objectposcache[obj] = pos self._objectcache[pos] = obj return obj def _load_object_desc(self, pos): f = self._f f.seek(pos) clsname = load1(f, self) if type(clsname) is not str: raise ValueError("bad object format") cls = getattr(self.model, clsname) newdct = load1(f, self) if type(newdct) is not dict: raise ValueError("bad object format") return cls, newdct def _putobj(self, object): done = {} pending = [object] while pending: obj = pending.pop() if obj not in self._objectposcache: if obj not in done: pending.append(obj) list_references(obj.__dict__, pending) done[obj] = True else: cls = obj.__class__ if cls is not getattr(self.model, cls.__name__, None): raise TypeError("cannot find class '%s' in model" % ( cls.__name__,)) f = self._modify() f.seek(0, 2) origin = f.tell() store_str(f, self, cls.__name__) store_dict(f, self, obj.__dict__) self._objectposcache[obj] = origin self._objectcache[origin] = obj return self._objectposcache[object] def open(filename, model): return FODB(filename, model) # ____________________________________________________________ def load1(f, fodb): return LOADERS[f.read(1)](f, fodb) LOADERS = {} def load_posint(f, fodb=None): c = ord(f.read(1)) x = c & 0x7F while c >= 0x80: c = ord(f.read(1)) x = (x << 7) | (c & 0x7F) return x LOADERS['i'] = load_posint def load_posint7(f, c): x = c & 0x7F while c >= 0x80: c = ord(f.read(1)) x = (x << 7) | (c & 0x7F) return x for i in range(32): LOADERS[chr(i)] = lambda f, fodb, i=i: i LOADERS['N'] = lambda f, fodb: None LOADERS['0'] = lambda f, fodb: False LOADERS['1'] = lambda f, fodb: True LOADERS['-'] = lambda f, fodb: -load_posint(f) LOADERS['f'] = lambda f, fodb: struct.unpack("!f", f.read(4))[0] LOADERS['d'] = lambda f, fodb: struct.unpack("!d", f.read(8))[0] def load_string(f, fodb): p = f.tell() length = load_posint(f) s = f.read(length) fodb._stringcache[p] = s fodb._stringposcache[s] = p return s LOADERS['S'] = load_string def load_strref(f, fodb, c): p = load_posint7(f, c) try: return fodb._stringcache[p] except KeyError: p1 = f.tell() f.seek(p) s = load_string(f, fodb) f.seek(p1) return s for i in range(128, 256): LOADERS[chr(i)] = lambda f, fodb, c=i: load_strref(f, fodb, c) def load_list(f, fodb): length = load_posint(f) return [load1(f, fodb) for i in range(length)] LOADERS['L'] = load_list def load_tuple(f, fodb): return tuple(load_list(f, fodb)) LOADERS['T'] = load_tuple def load_dict(f, fodb): d = {} length = load_posint(f) for i in range(length): key = load1(f, fodb) value = load1(f, fodb) d[key] = value return d LOADERS['D'] = load_dict def load_objref(f, fodb): pos = load_posint(f) return fodb._getobj(pos) LOADERS['O'] = load_objref # ____________________________________________________________ def store1(f, fodb, x): STORERS.get(type(x), store_object)(f, fodb, x) STORERS = { type(None): lambda f, fodb, x: f.write('N'), bool: lambda f, fodb, x: f.write('01'[x]), } def store1_posint(f, x, flag=0): if x >= 0x80: store1_posint(f, x >> 7, 0x80) x &= 0x7F f.write(chr(x | flag)) def store1_posint7(f, x): if x >= 0x80: store1_posint(f, x) else: f.write(chr(0x80) + chr(x)) def store_int(f, fodb, x): if x < 0: f.write('-') store1_posint(f, -x) elif x < 32: f.write(chr(x)) else: f.write('i') store1_posint(f, x) STORERS[int] = store_int STORERS[long] = store_int def store_str(f, fodb, x): try: ref = fodb._stringposcache[x] except KeyError: f.write('S') pos = f.tell() fodb._stringposcache[x] = pos fodb._stringcache[pos] = x store1_posint(f, len(x)) f.write(x) else: store1_posint7(f, ref) STORERS[str] = store_str def store_list(f, fodb, x): f.write('L') store1_posint(f, len(x)) for item in x: store1(f, fodb, item) STORERS[list] = store_list def store_tuple(f, fodb, x): f.write('T') store1_posint(f, len(x)) for item in x: store1(f, fodb, item) STORERS[tuple] = store_tuple def store_dict(f, fodb, x): f.write('D') items = x.items() store1_posint(f, len(items)) for key, value in items: store1(f, fodb, key) store1(f, fodb, value) STORERS[dict] = store_dict def store_float(f, fodb, x): try: buf = struct.pack("!f", x) except OverflowError: pass else: if struct.unpack("!f", buf)[0] == x: # no precision lost f.write('f' + buf) return buf = struct.pack("!d", x) f.write('d' + buf) STORERS[float] = store_float def store_object(f, fodb, x): ref = fodb._objectposcache[x] f.write('O') store1_posint(f, ref) # ____________________________________________________________ ATOMIC_TYPES = dict.fromkeys([type(None), int, bool, float, str]) def list_references(x, result): tp = type(x) if tp in ATOMIC_TYPES: pass elif tp is dict: list_references(x.items(), result) elif tp in (list, tuple): for y in x: list_references(y, result) else: result.append(x)