[pypy-svn] r50284 - pypy/branch/asmgcroot/pypy/translator/c
arigo at codespeak.net
arigo at codespeak.net
Thu Jan 3 11:26:09 CET 2008
Author: arigo
Date: Thu Jan 3 11:26:03 2008
New Revision: 50284
Modified:
pypy/branch/asmgcroot/pypy/translator/c/trackgcroot.py
Log:
In-progress.
Modified: pypy/branch/asmgcroot/pypy/translator/c/trackgcroot.py
==============================================================================
--- pypy/branch/asmgcroot/pypy/translator/c/trackgcroot.py (original)
+++ pypy/branch/asmgcroot/pypy/translator/c/trackgcroot.py Thu Jan 3 11:26:03 2008
@@ -20,18 +20,16 @@
r_localvar = re.compile(LOCALVAR)
r_localvar_esp = re.compile(r"(\d*)[(]%esp[)]")
-# for sanity-checking, %esp should only appear as a way to access locals,
-# i.e. inside parenthesis, except if explicitly recognized otherwise
-r_esp_outside_paren = re.compile(r"(.+[)])?[^(]*[%]esp")
-
class GcRootTracker(object):
def __init__(self, verbose=0):
self.gcmaptable = []
self.verbose = verbose
+ self.seen_main = False
def dump(self, output):
+ assert self.seen_main
shapes = {}
print >> output, '\t.data'
print >> output, '\t.align\t4'
@@ -80,8 +78,6 @@
def process_function(self, lines, newfile, entrypoint, filename):
tracker = FunctionGcRootTracker(lines)
- if tracker.funcname == entrypoint:
- tracker.can_use_frame_pointer = True
if self.verbose:
print >> sys.stderr, '[trackgcroot:%s] %s' % (filename,
tracker.funcname)
@@ -89,13 +85,9 @@
if self.verbose > 1:
for label, state in table:
print >> sys.stderr, label, '\t', state
- if tracker.can_use_frame_pointer:
- # XXX for now we have no logic to track the gc roots of
- # functions using %ebp
- for label, state in table:
- assert len(state) == 1, (
- "XXX for now the entry point should not have any gc roots")
if tracker.funcname == entrypoint:
+ self.seen_main = True
+ xxx
table = [(label, (-1,)) for label, _ in table]
# ^^^ we set the framesize of the entry point to -1 as a marker
# (the code in asmgcroot.py actually takes any odd-valued number
@@ -113,28 +105,30 @@
assert self.funcname == match.group(1)
assert self.funcname == match.group(2)
self.lines = lines
- self.inconsistent_state = {}
- self.can_use_frame_pointer = False # unless changed by caller
def computegcmaptable(self, verbose=0):
self.findlabels()
self.parse_instructions()
- if not self.enumerate_call_insns():
- return []
- self.makeprevmap()
- self.findframesize()
- self.fixlocalvars()
- self.trackgcroots()
- self.extend_calls_with_labels()
- if verbose > 2:
- self.dump()
+ try:
+ if not self.list_call_insns():
+ return []
+ self.findprevinsns()
+ self.findframesize()
+ self.fixlocalvars()
+ self.trackgcroots()
+ self.extend_calls_with_labels()
+ finally:
+ if verbose > 2:
+ self.dump()
return self.gettable()
def gettable(self):
"Returns a list [(label_after_call, (framesize, gcroot0, gcroot1,..))]"
table = []
- for i, insn in self.enumerate_call_insns():
- info = [self.framesize[i]]
+ for insn in self.list_call_insns():
+ if not hasattr(insn, 'framesize'):
+ continue # calls that never end up reaching a RET
+ info = [insn.framesize]
# the first gcroots are always the ones corresponding to
# the callee-saved registers
for reg in CALLEE_SAVE_REGISTERS:
@@ -154,25 +148,20 @@
return table
def findlabels(self):
- self.labels = {} # {name: line number}
- for i, line in enumerate(self.lines):
+ self.labels = {} # {name: Label()}
+ for line in self.lines:
match = r_label.match(line)
if match:
label = match.group(1)
assert label not in self.labels, "duplicate label"
- self.labels[label] = i
+ self.labels[label] = Label(label)
def parse_instructions(self):
- self.jumpsto = {} # {label: [list-of-source-node-indices]}
- for label in self.labels:
- self.jumpsto[label] = []
self.insns = [InsnFunctionStart()]
- self.line2nodeindex = {0: 0}
in_APP = False
- for lin in range(1, len(self.lines)):
- self.currentlineno = lin
+ for lineno, line in enumerate(self.lines):
+ self.currentlineno = lineno
insn = []
- line = self.lines[lin]
match = r_insn.match(line)
if match:
if not in_APP:
@@ -188,7 +177,10 @@
in_APP = True
elif line == '#NO_APP\n':
in_APP = False
- self.line2nodeindex.setdefault(lin, len(self.insns))
+ else:
+ match = r_label.match(line)
+ if match:
+ insn = self.labels[match.group(1)]
if isinstance(insn, list):
self.insns.extend(insn)
else:
@@ -206,111 +198,107 @@
setattr(FunctionGcRootTracker, 'visit_' + opname, visit_nop)
return self.visit_nop
- def makeprevmap(self):
- # builds the prevmap, which only accounts for jumps. Each insn node
- # has an implicit previous node, which is (obviously) the previous
- # one in self.insns -- unless the previous one is an InsnStop.
- self.prevmap = {} # {node-index: [list-of-previous-node-indices]}
- for label, sourceindices in self.jumpsto.items():
- line = self.labels[label]
- while line not in self.line2nodeindex:
- assert line < len(self.lines), (
- "no Insn found after label %r" % (label,))
- line += 1
- self.prevmap[self.line2nodeindex[line]] = sourceindices
-
- def enumerate_call_insns(self):
- return [(i, insn) for (i, insn) in enumerate(self.insns)
- if isinstance(insn, InsnCall)]
+ def findprevinsns(self):
+ # builds the previous_insns of each Insn. For Labels, all jumps
+ # to them are already registered; all that is left to do is to
+ # make each Insn point to the Insn just before it.
+ for i in range(len(self.insns)-1):
+ previnsn = self.insns[i]
+ nextinsn = self.insns[i+1]
+ try:
+ lst = nextinsn.previous_insns
+ except AttributeError:
+ lst = nextinsn.previous_insns = []
+ if not isinstance(previnsn, InsnStop):
+ lst.append(previnsn)
+
+ def list_call_insns(self):
+ return [insn for insn in self.insns if isinstance(insn, InsnCall)]
def findframesize(self):
- self.framesize = {0: 0}
+ self.insns[0].framesize = 0
- def walker(i, insn, size_delta):
- check = deltas.setdefault(i, size_delta)
+ def walker(insn, size_delta):
+ check = deltas.setdefault(insn, size_delta)
assert check == size_delta, (
- "inconsistent frame size at instruction %d: %s" % (i, insn))
+ "inconsistent frame size at instruction %s" % (insn,))
if isinstance(insn, InsnStackAdjust):
size_delta -= insn.delta
- if i not in self.framesize:
+ if not hasattr(insn, 'framesize'):
yield size_delta # continue walking backwards
- for i, insn in enumerate(self.insns):
+ for insn in self.insns:
if insn.requestgcroots():
deltas = {}
- self.walk_instructions_backwards(walker, i, 0)
+ self.walk_instructions_backwards(walker, insn, 0)
size_at_insn = []
- for n in deltas:
- if n in self.framesize:
- size_at_insn.append(self.framesize[n] + deltas[n])
+ for insn1, delta1 in deltas.items():
+ if hasattr(insn1, 'framesize'):
+ size_at_insn.append(insn1.framesize + delta1)
assert len(size_at_insn) > 0, (
"cannot reach the start of the function??")
size_at_insn = size_at_insn[0]
- for n in deltas:
- size_at_n = size_at_insn - deltas[n]
- check = self.framesize.setdefault(n, size_at_n)
- assert check == size_at_n, (
- "inconsistent frame size at instruction %d: %s" % (
- n, self.insns[n]))
+ for insn1, delta1 in deltas.items():
+ size_at_insn1 = size_at_insn - delta1
+ if hasattr(insn1, 'framesize'):
+ assert insn1.framesize == size_at_insn1, (
+ "inconsistent frame size at instruction %s" %
+ (insn1,))
+ else:
+ insn1.framesize = size_at_insn1
def fixlocalvars(self):
- for i, insn in enumerate(self.insns):
- if i in self.framesize:
+ for insn in self.insns:
+ if hasattr(insn, 'framesize'):
for name in insn._locals_:
localvar = getattr(insn, name)
match = r_localvar_esp.match(localvar)
if match:
ofs_from_esp = int(match.group(1) or '0')
- localvar = ofs_from_esp - self.framesize[i]
+ localvar = ofs_from_esp - insn.framesize
assert localvar != 0 # that's the return address
setattr(insn, name, localvar)
def trackgcroots(self):
- def walker(i, insn, loc):
+ def walker(insn, loc):
source = insn.source_of(loc, tag)
if source is somenewvalue:
pass # done
else:
yield source
- for i, insn in enumerate(self.insns):
+ for insn in self.insns:
for loc, tag in insn.requestgcroots().items():
- self.walk_instructions_backwards(walker, i, loc)
+ self.walk_instructions_backwards(walker, insn, loc)
def dump(self):
- for n, insn in enumerate(self.insns):
- try:
- size = self.framesize[n]
- except (AttributeError, KeyError):
- size = '?'
+ for insn in self.insns:
+ size = getattr(insn, 'framesize', '?')
print '%4s %s' % (size, insn)
- def walk_instructions_backwards(self, walker, initial_i, initial_state):
+ def walk_instructions_backwards(self, walker, initial_insn, initial_state):
pending = []
seen = {}
- def schedule(i, state):
- assert 0 <= i < len(self.insns)
- key = i, state
+ def schedule(insn, state):
+ key = insn, state
if key not in seen:
seen[key] = True
pending.append(key)
- schedule(initial_i, initial_state)
+ schedule(initial_insn, initial_state)
while pending:
- i, state = pending.pop()
- for prevstate in walker(i, self.insns[i], state):
- if not isinstance(self.insns[i - 1], InsnStop):
- schedule(i - 1, prevstate)
- if i in self.prevmap:
- for previndex in self.prevmap[i]:
- schedule(previndex, prevstate)
+ insn, state = pending.pop()
+ for prevstate in walker(insn, state):
+ for previnsn in insn.previous_insns:
+ schedule(previnsn, prevstate)
def extend_calls_with_labels(self):
# walk backwards, because inserting the global labels in self.lines
# is going to invalidate the lineno of all the InsnCall objects
# after the current one.
- for i, call in self.enumerate_call_insns()[::-1]:
- self.create_global_label(call)
+ for call in self.list_call_insns()[::-1]:
+ if hasattr(call, 'framesize'):
+ self.create_global_label(call)
def create_global_label(self, call):
# we need a globally-declared label just after the call.
@@ -349,8 +337,15 @@
return []
IGNORE_OPS_WITH_PREFIXES = dict.fromkeys([
- 'cmp', 'test', 'set', 'sahf',
- 'f', # floating-point operations
+ 'cmp', 'test', 'set', 'sahf', 'cltd', 'cld', 'std',
+ 'rep', 'movs',
+ # floating-point operations cannot produce GC pointers
+ 'f',
+ # arithmetic operations should not produce GC pointers
+ 'inc', 'dec', 'not', 'neg', 'or', 'and',
+ 'shl', 'shr', 'sal', 'sar', 'mul', 'imul', 'div', 'idiv',
+ # zero-extending moves should not produce GC pointers
+ 'movz',
])
visit_movb = visit_nop
@@ -359,16 +354,8 @@
visit_addw = visit_nop
visit_subb = visit_nop
visit_subw = visit_nop
- visit_incb = visit_nop
- visit_incw = visit_nop
- visit_decb = visit_nop
- visit_decw = visit_nop
visit_xorb = visit_nop
visit_xorw = visit_nop
- visit_orb = visit_nop
- visit_orw = visit_nop
- visit_andb = visit_nop
- visit_andw = visit_nop
def visit_addl(self, line, sign=+1):
match = r_binaryinsn.match(line)
@@ -380,7 +367,7 @@
elif r_localvar.match(target):
return InsnSetLocal(target)
else:
- raise UnrecognizedOperation(line)
+ return []
def visit_subl(self, line):
return self.visit_addl(line, sign=-1)
@@ -391,35 +378,50 @@
if r_localvar.match(target):
return InsnSetLocal(target)
else:
- raise UnrecognizedOperation(line)
+ return []
- visit_incl = unary_insn
- visit_decl = unary_insn
+## visit_incl = unary_insn
+## visit_decl = unary_insn
+## visit_notl = unary_insn
+## visit_negl = unary_insn
def binary_insn(self, line):
match = r_binaryinsn.match(line)
+ if not match:
+ raise UnrecognizedOperation(line)
target = match.group(2)
if r_localvar.match(target):
return InsnSetLocal(target)
else:
raise UnrecognizedOperation(line)
- visit_xorl = binary_insn
- visit_orl = binary_insn
- visit_andl = binary_insn
- visit_movzbl = binary_insn
- visit_movzwl = binary_insn
+ visit_xorl = binary_insn # used in "xor reg, reg" to create a NULL GC ptr
+## visit_orl = binary_insn
+## visit_andl = binary_insn
+## visit_movzbl = binary_insn
+## visit_movzwl = binary_insn
visit_leal = binary_insn
- visit_imull = binary_insn
+
+ def unary_or_binary_insn(self, line):
+ if r_binaryinsn.match(line):
+ return self.binary_insn(line)
+ else:
+ return self.unary_insn(line)
+
+## visit_shll = unary_or_binary_insn
+## visit_shrl = unary_or_binary_insn
+## visit_sall = unary_or_binary_insn
+## visit_sarl = unary_or_binary_insn
+## visit_imull = unary_or_binary_insn
def insns_for_copy(self, source, target):
- if r_localvar.match(target):
+ if source == '%esp' or target == '%esp':
+ raise UnrecognizedOperation('%s -> %s' % (source, target))
+ elif r_localvar.match(target):
if r_localvar.match(source):
return [InsnCopyLocal(source, target)]
else:
return [InsnSetLocal(target)]
- elif target == '%esp':
- raise UnrecognizedOperation
else:
return []
@@ -472,8 +474,7 @@
return InsnStop()
def register_jump_to(self, label):
- currentpos = len(self.insns)
- self.jumpsto[label].append(currentpos)
+ self.labels[label].previous_insns.append(self.insns[-1])
def conditional_jump(self, line):
match = r_jump.match(line)
@@ -506,7 +507,8 @@
target = match.group(1)
if target in FUNCTIONS_NOT_RETURNING:
return InsnStop()
- return InsnCall(self.currentlineno)
+ return [InsnCall(self.currentlineno),
+ InsnSetLocal('%eax')] # the result is there
def visit_pypygetframeaddress(self, line):
xxx
@@ -543,7 +545,14 @@
def source_of(self, localvar, tag):
return localvar
+class Label(Insn):
+ _args_ = ['label']
+ def __init__(self, label):
+ self.label = label
+ self.previous_insns = [] # all insns that jump (or fallthrough) here
+
class InsnFunctionStart(Insn):
+ previous_insns = ()
def __init__(self):
self.arguments = {}
for reg in CALLEE_SAVE_REGISTERS:
@@ -638,7 +647,12 @@
if __name__ == '__main__':
- tracker = GcRootTracker(verbose=sys.maxint)
+ if sys.argv and sys.argv[1] == '-v':
+ del sys.argv[1]
+ verbose = sys.maxint
+ else:
+ verbose = 1
+ tracker = GcRootTracker(verbose=verbose)
for fn in sys.argv[1:]:
tmpfn = fn + '.TMP'
f = open(fn, 'r')
More information about the pypy-svn
mailing list