import py import datetime SSH = 'wyvern' REMOTE_PATH = '/var/www/localhost/htdocs/irc-logs/pypy' R_BASENAME = '.*(2\d\d\d)(\d\d)(\d\d)$' R_TOPIC_CHANGE = '\[(\d+):(\d+)\] Topic changed on #pypy by [^:]+: [^(]+\(irc-logs: http://[^)]+\)(.*)' MIN_ALIVE_TIME = 30*60 # half an hour channel = py.execnet.SshGateway(SSH).remote_exec(""" import os, re path = channel.receive() r_basename = re.compile(channel.receive()) r_topic_change = re.compile(channel.receive()) files = os.listdir(path) files.sort() for basename in files: match1 = r_basename.match(basename) if match1: f = open(os.path.join(path, basename)) lines = f.readlines() f.close() for line in lines: match = r_topic_change.match(line) if match: channel.send(match1.groups() + match.groups()) """) channel.send(REMOTE_PATH) channel.send(R_BASENAME) channel.send(R_TOPIC_CHANGE) starttime = [] texts = [] for year, month, day, hour, minute, text in channel: text = text.strip().lstrip('|').strip() if text: starttime.append((datetime.datetime(int(year), int(month), int(day), int(hour), int(minute)))) texts.append(text) endtime = starttime[1:] + [datetime.datetime.now()] seen = {} for start, end, text in zip(starttime, endtime, texts): if end - start >= datetime.timedelta(seconds=MIN_ALIVE_TIME): atom = object() if seen.setdefault(text.upper(), atom) is atom: print text