diff --git a/static2/disasm.py b/static2/disasm.py index 5a1d224a..830a3cb1 100644 --- a/static2/disasm.py +++ b/static2/disasm.py @@ -3,6 +3,7 @@ from capstone import * class Destination(object): + none = 0 cjump = 1 jump = 2 call = 3 @@ -32,65 +33,78 @@ class disasm(object): try: self.i = self.md.disasm(self.raw, self.address).next() self.decoded = True + self.regs_read = self.i.regs_read self.regs_write = self.i.regs_write + + self.dtype = Destination.none + if self.i.mnemonic == "call": + self.dtype = Destination.call + elif self.i.mnemonic == "jmp": + self.dtype = Destination.jump + #TODO: what about not x86? + elif x86.X86_GRP_JUMP in self.i.groups: + self.dtype = Destination.cjump + + #if capstone can't decode it, we're screwed except StopIteration: self.decoded = False + def __str__(self): if self.decoded: return "%s\t%s"%(self.i.mnemonic,self.i.op_str) return "" def is_jump(self): - #TODO: what about not x86? - if self.decoded: - return x86.X86_GRP_JUMP in self.i.groups - return False + if not self.decoded: + return False + return self.dtype in [Destination.jump,Destination.cjump] def is_ret(self): - if self.decoded: - return self.i.mnemonic == "ret" - return False + if not self.decoded: + return False + return self.i.mnemonic == "ret" #TODO: what about iret? and RET isn't in the apt version of capstone return x86.X86_GRP_RET in self.i.groups def is_call(self): - if self.decoded: - return self.i.mnemonic == "call" - return False + if not self.decoded: + return False + return self.dtype == Destination.call def is_ending(self): - if self.decoded: - '''is this something which should end a basic block''' - return self.is_jump() or self.is_ret() - return False + '''is this something which should end a basic block''' + if not self.decoded: + return False + return self.is_jump() or self.is_ret() + + def code_follows(self): + '''should the data after this instructino be treated as code + note that is_ending is different, as conditional jumps still have + code that follows''' + if not self.decoded: + return False + #code follows UNLESS we are a return or an unconditional jump + return not (self.is_ret() or self.dtype == Destination.jump) def size(self): return self.i.size if self.decoded else 0 def dests(self): - if self.decoded and not self.is_ret(): - dl = [] - - - - if self.is_jump() or self.is_call(): - if (self.i.operands[0].value.reg) and (self.i.operands[0].value.mem.disp == 0): - if self.i.mnemonic == "jmp": - dtype = Destination.jump - else: - #the next instruction after this one - dl.append((self.address+self.size(),Destination.implicit)) - if self.i.mnemonic == "call": - dtype = Destination.call - else: - dtype = Destination.cjump - dl.append((self.i.operands[0].value.imm,dtype)) #the target of the jump/call + if not self.decoded or self.is_ret(): + return [] - else: - dl.append((self.address+self.size(),Destination.implicit)) - return dl - else: - return [(self.address+self.size(),Destination.implicit)] - return [] \ No newline at end of file + dl = [] + + if self.code_follows(): + #this piece of code leads implicitly to the next instruction + dl.append((self.address+self.size(),Destination.implicit)) + + + if self.is_jump() or self.is_call(): + #if we take a PTR and not a MEM operand (TODO: better support for MEM operands) + if (self.i.operands[0].value.reg) and (self.i.operands[0].value.mem.disp == 0): + dl.append((self.i.operands[0].value.imm,self.dtype)) #the target of the jump/call + + return dl \ No newline at end of file diff --git a/static2/static2.py b/static2/static2.py index 1cc7d225..16d6c3e0 100755 --- a/static2/static2.py +++ b/static2/static2.py @@ -77,7 +77,7 @@ class Tags: # will only support radare2 for now # mostly tags, except for names and functions class Static: - def __init__(self, path): + def __init__(self, path, debug=False): self.tags = {} self.path = path @@ -95,6 +95,8 @@ class Static: # run the elf loader loader.load_binary(self, path) + self.debug = debug + # this should be replaced with a def set_name(self, address, name): if name not in self.rnames: @@ -176,9 +178,14 @@ class Static: self[address]['instruction'] = d self[address]['len'] = d.size() for (c,flag) in d.dests(): + #if we aren't just the next instruction, we have an explicit xref if c != address + d.size(): self[c]['crefs'].append(address) block_starts.add(c) + #if we come after a jump and are an implicit xref, we are the start + #of a new block + elif d.is_jump(): + block_starts.add(c) return d.dests() # recursive descent pass @@ -206,16 +213,21 @@ class Static: i = self[address]['instruction'] blocks.append((b, address)) - for b in blocks: - print hex(b[0]), hex(b[1]), self[b[0]]['crefs'], self[b[1]]['instruction'].dests() - for a in range(b[0], b[1]+1): - if self[a]['instruction'] != None: - print " ",hex(a),self[a]['instruction'] + #print out basic blocks in simple disassembly view + if self.debug: + for b in sorted(blocks,key=lambda b:b[0]): + print " ------- %s [%s] -------"%(hex(b[0])," ".join(map(hex,self[b[0]]['crefs']))) + for a in range(b[0], b[1]+1): + if self[a]['instruction'] != None: + print " ",hex(a),self[a]['instruction'] + + print " ------- %s [%s] -------"%(hex(b[1])," ".join( map(lambda x:hex(x[0]),self[b[1]]['instruction'].dests()) )) + print # *** STATIC TEST STUFF *** if __name__ == "__main__": - static = Static(sys.argv[1]) + static = Static(sys.argv[1],debug=True) print "arch:",static['arch'] # find main