"""A disk-based balanced tree (an AVL tree, more precisely). Works like a dictionary whose keys are any marshallable Python objects. The values must be strictly typed, though, as in hashtable.py. The keys are ordered, so that this data structure supports searching for which key is immediately before or after another, enumerating the keys in order, etc. """ import sys, os, struct import marshal if sys.version >= (2, 4): mdump = lambda x, f: marshal.dump(x, f, 0) else: mdump = marshal.dump mload = marshal.load deeperside = 0x00000001 linkmask = ~deeperside baselinksize = struct.calcsize("!ii") class ValueTree(object): SIG = ~0xa9ab def __init__(self, filename, valuefmt): self.filename = filename self.linkfmt = "!ii" + valuefmt self.linksize = struct.calcsize(self.linkfmt) def open_file(self): try: self.f = open(self.filename, 'r+b') except (IOError, OSError): if os.path.exists(self.filename): raise self.rootpos = 0 self.f = open(self.filename, 'w+b') self.f.write(struct.pack("!ii", self.SIG, self.rootpos)) else: header = self.f.read(baselinksize) sig, self.rootpos = struct.unpack("!ii", header) assert sig == self.SIG def close(self): if 'f' in self.__dict__: f = self.f self.f = None f.close() def __getattr__(self, attr): if attr in ('f', 'rootpos'): self.open_file() return getattr(self, attr) else: raise AttributeError("%s object has no attribute %r" % ( self.__class__.__name__, attr)) def lookup(self, key): f = self.f p = self.rootpos linksize = self.linksize path = [] while p: f.seek(p, 0) linkinfo = struct.unpack(self.linkfmt, f.read(linksize)) key1 = mload(f) diff = cmp(key, key1) path.append((p, linkinfo, diff, key1)) if not diff: return path, True # found p = linkinfo[diff > 0] & linkmask # enter the left/right subtree return path, False def __getitem__(self, key): path, found = self.lookup(key) if found: p, linkinfo, diff, key1 = path[-1] return linkinfo[2:] else: raise KeyError(key) def __contains__(self, key): path, found = self.lookup(key) return found def iteritems(self, reverse=False): """Iterates over the (key, values) in order.""" return iter_vt(self._read_node_item, self.rootpos, [], reverse) def iteritemsfrom(self, startkey, included=True, reverse=False): """Iterates over the (key, values) in order, starting with 'startkey' (or whatever key would be just after if 'startkey' is not in the tree or if included=False).""" path, found = self.lookup(startkey) startp = 0 if path: if found and not included: p, linkinfo, diff, key1 = path.pop() startp = linkinfo[not reverse] & linkmask if reverse: drop = -1 else: drop = 1 path = [((key1, linkinfo[2:]), linkinfo[not reverse] & linkmask) for p, linkinfo, diff, key1 in path if diff != drop] return iter_vt(self._read_node_item, startp, path, reverse) def __setitem__(self, key, values): f = self.f path, found = self.lookup(key) if found: # key already exists, just replace value p, linkinfo, diff, key1 = path[-1] f.seek(p, 0) f.write(struct.pack(self.linkfmt, linkinfo[0], linkinfo[1], *values)) else: # make a new node f.seek(0, 2) p = f.tell() prefix = '\x00' * ((-p) & 3) # some padding for alignment f.write(prefix + struct.pack(self.linkfmt, 0, 0, *values)) mdump(key, f) childp = p + len(prefix) childlinkinfo = [0, 0] balanced = False # link it into the tree while path: p, linkinfo, diff, key1 = path.pop() linkinfo = [linkinfo[0], linkinfo[1]] childwise = diff > 0 if balanced: # write the final node where it belongs, if needed, # and stop link = childp | (linkinfo[childwise] & deeperside) linkinfo[childwise] = link f.seek(p, 0) f.write(struct.pack("!ii", *linkinfo)) break if linkinfo[childwise] & deeperside: # hard case: a tree rotation is required # http://sky.fit.qut.edu.au/~maire/avl/System/AVLTree.html t1 = childlinkinfo[childwise] if t1 & deeperside: # a single L or R rotation does the trick # (cases LL or RR at the url above) t2 = childlinkinfo[not childwise] # not deeper linkinfo[childwise] = t2 childlinkinfo[not childwise] = p childlinkinfo[childwise] = t1 & linkmask f.seek(p, 0) f.write(struct.pack("!ii", *linkinfo)) p = childp linkinfo = childlinkinfo childlinkinfo = childchildlinkinfo else: wp = childlinkinfo[not childwise] # deeper assert wp & deeperside wp &= linkmask wlinkinfo = childchildlinkinfo t2 = wlinkinfo[childwise] t3 = wlinkinfo[not childwise] if t2 & deeperside: t2 &= linkmask linkinfo[not childwise] |= deeperside elif t3 & deeperside: t3 &= linkmask childlinkinfo[childwise] |= deeperside linkinfo[childwise] = t3 childlinkinfo[not childwise] = t2 wlinkinfo[childwise] = childp wlinkinfo[not childwise] = p f.seek(childp, 0) f.write(struct.pack("!ii", *childlinkinfo)) f.seek(p, 0) f.write(struct.pack("!ii", *linkinfo)) p = wp linkinfo = wlinkinfo #childlinkinfo = childlinkinfo balanced = True elif linkinfo[not childwise] & deeperside: # the tree is now balanced linkinfo[not childwise] &= linkmask linkinfo[childwise] = childp f.seek(p, 0) f.write(struct.pack("!ii", *linkinfo)) break # done else: # still unbalanced, go on linkinfo[childwise] = childp | deeperside f.seek(p, 0) f.write(struct.pack("!ii", *linkinfo)) childp = p childchildlinkinfo = childlinkinfo childlinkinfo = linkinfo else: # update the absolute root self.rootpos = childp f.seek(0, 0) f.write(struct.pack("!ii", self.SIG, self.rootpos)) def locate_key(self, key): "Return the node position with the given key." path, found = self.lookup(key) if found: p, linkinfo, diff, key1 = path[-1] return p else: raise KeyError(key) def load_node(self, nodepos): "Return the (key, values) stored in the node at the given position." f = self.f f.seek(nodepos, 0) linkinfo = struct.unpack(self.linkfmt, f.read(self.linksize)) key = mload(f) return key, linkinfo[2:] def _read_node_item(self, p): f = self.f f.seek(p, 0) linkinfo = struct.unpack(self.linkfmt, f.read(self.linksize)) key = mload(f) item = key, linkinfo[2:] links = linkinfo[0] & linkmask, linkinfo[1] & linkmask return item, links def iter_vt(read_node, p, pending, reverse=False): while True: while p: result, links = read_node(p) p = links[reverse] q = links[not reverse] pending.append((result, q)) if not pending: break result, p = pending.pop() yield result