#! /usr/bin/env python import re, sys s = r""" ----------- Full collection ------------------ \| used before collection: \| in ArenaCollection: (\d+) bytes \| raw_malloced: (\d+) bytes \| used after collection: \| in ArenaCollection: (\d+) bytes \| raw_malloced: (\d+) bytes \| number of major collects: (\d+) `---------------------------------------------- \[([0-9a-f]+)\] gc-collect\}""" r = re.compile(s.replace('\n', '')) s2 = r""" \{gc-collect-task starting ([\w-]+) \[([0-9a-f]+)\] gc-collect-task\}""" r2 = re.compile(s2.replace('\n', '')) #print repr(s) filename = sys.argv[1] data = open(filename).read() data = data.replace('\n', '') g = open(filename + '.gccollect.csv', 'w') for a,b,c,d,e,f in r.findall(data): print >> g, '%d,%d,%d' % (int(f, 16), int(a)+int(b), int(c)+int(d)) g.close() g = open(filename + '.tasks.csv', 'w') for a,b in r2.findall(data): print >> g, '%d,1,%s' % (int(b, 16), a) g.close()