Merge pull request #63 from ivg/integration-with-bap

Add BAP as backend.
This commit is contained in:
Ivan Gotovchits 2015-01-29 09:57:53 -05:00
commit 590f80f7a9
6 changed files with 170 additions and 129 deletions

View File

@ -1,116 +0,0 @@
#!/bin/bash -e
# this script is woefully incomplete
# because I already had tons of stuff on this VM
# update before release
pushd .
cd /tmp
wget https://bootstrap.pypa.io/get-pip.py
sudo python get-pip.py
popd
sudo apt-get install libgmp-dev m4 ocaml-interp libpqxx-3.1 libpqxx3-dev glogg python-dev postgresql-server-dev-9.3 g++-4.8 gcc-4.8 ocaml-findlib opam camlp4-extra cmake
opam init
opam install piqi zarith core_kernel
mkdir -p bap
cd bap
if [ ! -d capnproto ]; then
pushd .
git clone https://github.com/kentonv/capnproto.git
cd capnproto/c++
./setup-autotools.sh
autoreconf -i && ./configure && make -j6 check && sudo make install
popd
fi
sudo pip install -U cython
sudo pip install pycapnp
if [ ! -d bap-types ]; then
pushd .
git clone https://github.com/BinaryAnalysisPlatform/bap-types.git
cd bap-types
./configure
make -j $(grep processor < /proc/cpuinfo | wc -l)
make install
popd
fi
if [ ! -d llvm ]; then
pushd .
#wget http://ftp.de.debian.org/debian/pool/main/l/llvm-toolchain-snapshot/llvm-toolchain-snapshot_3.6~svn215195.orig.tar.bz2
#tar xvf llvm-toolchain-snapshot_3.6~svn215195.orig.tar.bz2
#mv llvm-toolchain-snapshot_3.6~svn215195 llvm
#cd llvm
git clone https://github.com/llvm-mirror/llvm.git
cd llvm
git checkout 0914f63cc3ce62b6872e2760dd325829b52d8396
patch -f -p1 < ../../extra/llvmpatch/c-disasm-mcinst
popd
fi
if [ ! -d llvm-build ]; then
pushd .
mkdir llvm-build
cd llvm-build
../llvm/configure --enable-optimized --disable-assertions
make -j $(grep processor < /proc/cpuinfo | wc -l)
# clobber the system llvm
sudo make install
popd
fi
if [ ! -d llvm-mc ]; then
pushd .
git clone https://github.com/BinaryAnalysisPlatform/llvm-mc.git
cd llvm-mc
./configure
make -j $(grep processor < /proc/cpuinfo | wc -l)
make install
popd
fi
if [ ! -d bap-lifter ]; then
pushd .
git clone https://github.com/BinaryAnalysisPlatform/bap-lifter.git
cd bap-lifter
./configure
make -j $(grep processor < /proc/cpuinfo | wc -l)
make install
popd
fi
# below this line is broken
if [ ! -d holmes ]; then
pushd .
git clone https://github.com/BinaryAnalysisPlatform/holmes.git
cd holmes
mkdir build && cd build
cmake -DCMAKE_CXX_COMPILER=g++-4.8 ..
make
popd
fi
if [ ! -d bap-container ]; then
git clone https://github.com/BinaryAnalysisPlatform/bap-container.git
cd bap-container
mkdir build && cd build
cmake -DCMAKE_CXX_COMPILER=g++-4.8 ..
make
fi
# installed at bap/bap-lifter/toil.native

View File

@ -9,6 +9,7 @@ if [[ "$unamestr" == 'Linux' ]]; then
# build for building qiradb and stuff for flask like gevent
if [ $(which apt-get) ]; then
echo "installing apt packages"
sudo apt-get update -qq
sudo apt-get -y install build-essential python-dev python-pip debootstrap libjpeg-dev zlib1g-dev unzip wget graphviz
# only python package we install globally
@ -35,7 +36,7 @@ fi
echo "installing pip packages"
virtualenv venv
source venv/bin/activate
$PIP install --upgrade -r requirements.txt
$PIP install --upgrade -r requirements.txt
# build capstone if we don't have it
if [ $(python -c "import capstone; exit(69 if (capstone.cs_version() == capstone.version_bind() and capstone.cs_version()[0] == 3) else 0)"; echo $?) == 69 ]; then
@ -44,6 +45,25 @@ else
./capstone_build.sh
fi
if [ -d bap -o "x$BAP" = "xdisable" ]; then
echo "Skipping BAP"
else
echo "Installing BAP"
export OPAMYES=1
export OPAMVERBOSE=1
export OPAMJOBS=4
echo 'yes' | sudo add-apt-repository ppa:avsm/ocaml42+opam12
sudo apt-get update -qq
sudo apt-get install -qq ocaml ocaml-native-compilers camlp4-extra opam
sudo apt-get install libgmp-dev llvm-3.4-dev time
opam init
opam install bap
$PIP install --upgrade git+git://github.com/BinaryAnalysisPlatform/bap.git
fi
echo "making symlink"
sudo ln -sf $(pwd)/qira /usr/local/bin/qira
@ -52,5 +72,4 @@ echo " Thanks for installing QIRA"
echo " Check out README for more info"
echo " Or just dive in with 'qira /bin/ls'"
echo " And point Chrome to localhost:3002"
echo " ~geohot"
echo " ~geohot"

View File

@ -16,10 +16,10 @@ sys.path.append(BASEDIR)
# capstone is now a requirement
WITH_CAPSTONE = True
WITH_BAP = True
# turn this off for now on releases
WITH_STATIC = False
STATIC_ENGINE = "builtin"
WEBSOCKET_DEBUG = False

View File

@ -9,6 +9,7 @@ if [ "$1" == "distrib" ] ; then
cd ../../
fi
eval `opam config env`
source venv/bin/activate
nosetests
@ -22,4 +23,3 @@ sleep 2
phantomjs qira_tests/load_page.js
kill $QIRA_PID

View File

@ -15,9 +15,6 @@ def analyze_functions(static):
# runs the recursive descent parser at address
# how to deal with block groupings?
def make_function_at(static, address, recurse = True):
if static['arch'] != "i386" and static['arch'] != "x86-64":
print "*** static only works with x86(_64), someone should fix it"
return
if static[address]['function'] != None:
# already function
return
@ -86,4 +83,3 @@ def make_function_at(static, address, recurse = True):
for f in function_starts:
if static[f]['function'] == None:
make_function_at(static, f)

View File

@ -1,5 +1,12 @@
from capstone import *
import capstone # for some unexported (yet) symbols in Capstone 3.0
import qira_config
if qira_config.WITH_BAP:
import bap
from bap import adt, arm, asm, bil
from bap.adt import Visitor, visit
from binascii import hexlify
__all__ = ["Tags", "Function", "Block", "Instruction", "DESTTYPE","ABITYPE"]
@ -10,10 +17,146 @@ class DESTTYPE(object):
call = 3
implicit = 4
# Instruction class
class Instruction(object):
def __new__(cls, *args, **kwargs):
if qira_config.WITH_BAP:
try:
return BapInsn(*args, **kwargs)
except Exception as exn:
print "bap failed", type(exn).__name__, exn
return CsInsn(*args, **kwargs)
else:
return CsInsn(*args, **kwargs)
class BapInsn(object):
def __init__(self, raw, address, arch):
if len(raw) == 0:
raise ValueError("Empty memory at {0:#x}".format(address))
arch = 'armv7' if arch == 'arm' else arch
insns = list(bap.disasm(raw,
addr=address,
arch=arch,
stop_conditions=[asm.Valid()]))
if len(insns) == 0:
raise ValueError("Invalid instruction for {1} at {2:#x}[{3}]:\n{0}".
format(hexlify(raw), arch, address, len(raw)))
self.insn = insns[0]
self.regs_read, self.regs_write = accesses(self.insn.bil)
self.jumps = jumps(self.insn.bil)
self.dtype = None
if self.is_call():
self.dtype = DESTTYPE.call
elif self.is_conditional():
self.dtype = DESTTYPE.cjump
elif self.is_jump():
self.dtype = DESTTYPE.jump
dests = []
if self.code_follows():
dests.append((self.insn.addr + self.insn.size,
DESTTYPE.implicit))
if self.insn.bil is not None:
for (jmp,dtype) in self.jumps:
if isinstance(jmp.arg, bil.Int):
dests.append((jmp.arg.value, dtype))
elif self.is_jump() or self.is_call():
dst = self.insn.operands[0]
if isinstance(dst, asm.Imm):
dests.append((dst.arg + address, self.dtype))
if self.is_ret():
self._dests = []
else:
self._dests = dests
def __str__(self):
return self.insn.asm
def is_jump(self):
if self.insn.bil is None:
return self.insn.has_kind(asm.Branch)
else:
return len(self.jumps) <> 0
def is_ret(self):
return self.insn.has_kind(asm.Return)
def is_call(self):
return self.insn.has_kind(asm.Call)
def is_ending(self):
return self.insn.has_kind(asm.Terminator)
def is_conditional(self):
return self.insn.has_kind(asm.Conditional_branch)
def is_unconditional(self):
return self.insn.has_kind(asm.Unconditional_branch)
def code_follows(self):
return not (self.is_ret() or self.is_unconditional())
def size(self):
return self.insn.size
def dests(self):
return self._dests
def exists(cont,f):
try:
r = (x for x in cont if f(x)).next()
return True
except StopIteration:
return False
if qira_config.WITH_BAP:
class Jmp_visitor(Visitor):
def __init__(self):
self.in_condition = False
self.jumps = []
def visit_If(self, exp):
was = self.in_condition
self.in_condition = True
self.run(exp.true)
self.run(exp.false)
self.in_condition = was
def visit_Jmp(self, exp):
self.jumps.append((exp,
DESTTYPE.cjump if self.in_condition else
DESTTYPE.jump))
class Access_visitor(Visitor):
def __init__(self):
self.reads = []
self.writes = []
def visit_Move(self, stmt):
self.writes.append(stmt.var.name)
self.run(stmt.expr)
def visit_Var(self, var):
self.reads.append(var.name)
def jumps(bil):
return visit(Jmp_visitor(), bil).jumps
def accesses(bil):
r = visit(Access_visitor(), bil)
return (r.reads, r.writes)
# Instruction class
class CsInsn(object):
"""one disassembled instruction"""
def __init__(self, raw, address, arch="i386"):
def __init__(self, raw, address, arch):
self.raw = raw
self.address = address
if arch == "i386":
@ -34,7 +177,6 @@ class Instruction(object):
try:
self.i = self.md.disasm(self.raw, self.address).next()
self.decoded = True
self.regs_read = self.i.regs_read
self.regs_write = self.i.regs_write
@ -116,6 +258,7 @@ class Instruction(object):
return dl
class ABITYPE(object):
UNKNOWN = ([],None)
X86_CDECL = ([],'EAX')
@ -202,4 +345,3 @@ class Tags:
# name can change by adding underscores
val = self.static.set_name(self.address, val)
self.backing[tag] = val