import os, marshal, sys, fcntl from hashtable import ChecksumHashTable class IndexDB(object): KEYLEN = 20 SIG = ">>IndexDB" def __init__(self, indices_dir): self.indices_dir = indices_dir self._load() def __getitem__(self, key): assert len(key) == self.KEYLEN version, = self.hashtable[key] filename = self.filelist[version][0] return filename def get(self, key, default=None): try: return self[key] except KeyError: return default def _load(self): fn_hashtable = os.path.join(self.indices_dir, "cache-hashtable") fn_filelist = os.path.join(self.indices_dir, "cache-filelist") if os.path.exists(fn_filelist) or os.path.exists(fn_hashtable): f = open(fn_filelist, 'rb') filelist = marshal.load(f) f.close() else: filelist = {} hashtable = ChecksumHashTable(fn_hashtable, self.KEYLEN, "i") hashtable.outdated = self._outdated_bucket self.hashtable = hashtable self.filelist = filelist self.fn_hashtable = fn_hashtable self.fn_filelist = fn_filelist def _outdated_bucket(self, bucket, index=None): return self.hashtable.bucketvalues(bucket)[0] not in self.filelist def clear(self): self.hashtable.close() del self.hashtable del self.filelist os.unlink(self.fn_hashtable) os.unlink(self.fn_filelist) self._load() def update(self): fd = self.hashtable.fileno() fcntl.flock(fd, fcntl.LOCK_EX) try: self._update() finally: fcntl.flock(fd, fcntl.LOCK_UN) def _update(self): # collect actual directory contents {filename: timestamp} modified = False dircontent = {} for filename in os.listdir(self.indices_dir): if filename.endswith('.sha'): filepath = os.path.join(self.indices_dir, filename) st = os.stat(filepath) dircontent[filename[:-4]] = st.st_mtime # build newfilelist from the filelist entries that are still up-to-date newfilelist = {} for version, (filename, timestamp) in self.filelist.items(): if dircontent.get(filename) == timestamp: newfilelist[version] = (filename, timestamp) del dircontent[filename] else: modified = True # parse and record all new/modified dircontent entries if dircontent: modified = True if self.filelist: lastversion = max(self.filelist) if lastversion + len(dircontent) >= sys.maxint: # overflow! self.clear() return self._update() else: lastversion = 0 dircontent = [(timestamp, filename) for filename, timestamp in dircontent.items()] dircontent.sort() # older files first for timestamp, filename in dircontent: lastversion += 1 version = lastversion filepath = os.path.join(self.indices_dir, filename + '.sha') info = filename, timestamp self.filelist[version] = info newfilelist[version] = info self._parse(filepath, version) # replace the filelist - in future calls, self._outdated_bucket() # will consider as outdated the entries whose version is not in the # newfilelist if modified: tmp = self.fn_filelist + '~' f = open(tmp, 'wb') marshal.dump(newfilelist, f, 0) f.close() os.rename(tmp, self.fn_filelist) self.filelist = newfilelist def _parse(self, filepath, version): print >> sys.stderr, 'Reading %s [%d]...' % (filepath, version) keylen = self.KEYLEN hashtable = self.hashtable values = (version,) try: f = open(filepath, 'rb') f.seek(-len(self.SIG), 2) endpos = f.tell() if endpos % keylen != 0: raise IOError("invalid file size") sig = f.read(len(self.SIG)) if sig != self.SIG: raise IOError("invalid signature") f.seek(0) blobsize = keylen * 512 while True: blob = f.read(blobsize) if not blob: break for j in range(0, len(blob), keylen): hashtable[blob[j:j+keylen]] = values f.close() except (IOError, OSError), e: print >> sys.stderr, " FAILED: %s" % (e, ) if __name__ == '__main__': db = IndexDB('./indices') db.update() err = 0 for key in sys.argv[1:]: try: print db[key] except KeyError: print >> sys.stderr, "%r no found" % (key,) err = 1 if err: sys.exit(err)