272 lines
10 KiB
Python
272 lines
10 KiB
Python
#!/usr/bin/env python3
|
|
# Unicorn Engine
|
|
# By Dang Hoang Vu, 2013
|
|
from __future__ import print_function
|
|
import sys, re, os
|
|
|
|
INCL_DIR = os.path.join('..', 'include', 'unicorn')
|
|
|
|
include = [ 'arm.h', 'arm64.h', 'mips.h', 'x86.h', 'sparc.h', 'm68k.h', 'ppc.h', 'riscv.h', 's390x.h', 'unicorn.h' ]
|
|
|
|
template = {
|
|
'python': {
|
|
'header': "# For Unicorn Engine. AUTO-GENERATED FILE, DO NOT EDIT [%s_const.py]\n",
|
|
'footer': "",
|
|
'line_format': 'UC_%s = %s\n',
|
|
'out_file': './python/unicorn/%s_const.py',
|
|
# prefixes for constant filenames of all archs - case sensitive
|
|
'arm.h': 'arm',
|
|
'arm64.h': 'arm64',
|
|
'mips.h': 'mips',
|
|
'x86.h': 'x86',
|
|
'sparc.h': 'sparc',
|
|
'm68k.h': 'm68k',
|
|
'ppc.h': 'ppc',
|
|
'riscv.h': 'riscv',
|
|
's390x.h' : 's390x',
|
|
'unicorn.h': 'unicorn',
|
|
'comment_open': '#',
|
|
'comment_close': '',
|
|
},
|
|
'ruby': {
|
|
'header': "# For Unicorn Engine. AUTO-GENERATED FILE, DO NOT EDIT [%s_const.rb]\n\nmodule UnicornEngine\n",
|
|
'footer': "end",
|
|
'line_format': '\tUC_%s = %s\n',
|
|
'out_file': './ruby/unicorn_gem/lib/unicorn_engine/%s_const.rb',
|
|
# prefixes for constant filenames of all archs - case sensitive
|
|
'arm.h': 'arm',
|
|
'arm64.h': 'arm64',
|
|
'mips.h': 'mips',
|
|
'x86.h': 'x86',
|
|
'sparc.h': 'sparc',
|
|
'm68k.h': 'm68k',
|
|
'ppc.h': 'ppc',
|
|
'riscv.h': 'riscv',
|
|
's390x.h' : 's390x',
|
|
'unicorn.h': 'unicorn',
|
|
'comment_open': '#',
|
|
'comment_close': '',
|
|
},
|
|
'go': {
|
|
'header': "package unicorn\n// For Unicorn Engine. AUTO-GENERATED FILE, DO NOT EDIT [%s_const.go]\nconst (\n",
|
|
'footer': ")",
|
|
'line_format': '\t%s = %s\n',
|
|
'out_file': './go/unicorn/%s_const.go',
|
|
# prefixes for constant filenames of all archs - case sensitive
|
|
'arm.h': 'arm',
|
|
'arm64.h': 'arm64',
|
|
'mips.h': 'mips',
|
|
'x86.h': 'x86',
|
|
'sparc.h': 'sparc',
|
|
'm68k.h': 'm68k',
|
|
'ppc.h': 'ppc',
|
|
'riscv.h': 'riscv',
|
|
's390x.h' : 's390x',
|
|
'unicorn.h': 'unicorn',
|
|
'comment_open': '//',
|
|
'comment_close': '',
|
|
},
|
|
'java': {
|
|
'header': "// For Unicorn Engine. AUTO-GENERATED FILE, DO NOT EDIT\n\npackage unicorn;\n\npublic interface %sConst {\n",
|
|
'footer': "\n}\n",
|
|
'line_format': ' public static final int UC_%s = %s;\n',
|
|
'out_file': './java/unicorn/%sConst.java',
|
|
# prefixes for constant filenames of all archs - case sensitive
|
|
'arm.h': 'Arm',
|
|
'arm64.h': 'Arm64',
|
|
'mips.h': 'Mips',
|
|
'x86.h': 'X86',
|
|
'sparc.h': 'Sparc',
|
|
'm68k.h': 'M68k',
|
|
'ppc.h': 'Ppc',
|
|
'riscv.h': 'Riscv',
|
|
's390x.h' : 'S390x',
|
|
'unicorn.h': 'Unicorn',
|
|
'comment_open': '//',
|
|
'comment_close': '',
|
|
},
|
|
'dotnet': {
|
|
'header': "// For Unicorn Engine. AUTO-GENERATED FILE, DO NOT EDIT\n\nnamespace UnicornManaged.Const\n\nopen System\n\n[<AutoOpen>]\nmodule %s =\n",
|
|
'footer': "\n",
|
|
'line_format': ' let UC_%s = %s\n',
|
|
'out_file': os.path.join('dotnet', 'UnicornManaged', 'Const', '%s.fs'),
|
|
# prefixes for constant filenames of all archs - case sensitive
|
|
'arm.h': 'Arm',
|
|
'arm64.h': 'Arm64',
|
|
'mips.h': 'Mips',
|
|
'x86.h': 'X86',
|
|
'sparc.h': 'Sparc',
|
|
'm68k.h': 'M68k',
|
|
'ppc.h': 'Ppc',
|
|
'riscv.h': 'Riscv',
|
|
's390x.h' : 'S390x',
|
|
'unicorn.h': 'Common',
|
|
'comment_open': ' //',
|
|
'comment_close': '',
|
|
},
|
|
'pascal': {
|
|
'header': "// For Unicorn Engine. AUTO-GENERATED FILE, DO NOT EDIT\n\nunit %sConst;\n\ninterface\n\nconst",
|
|
'footer': "\nimplementation\nend.",
|
|
'line_format': ' UC_%s = %s;\n',
|
|
'out_file': os.path.join('pascal', 'unicorn', '%sConst.pas'),
|
|
# prefixes for constant filenames of all archs - case sensitive
|
|
'arm.h': 'Arm',
|
|
'arm64.h': 'Arm64',
|
|
'mips.h': 'Mips',
|
|
'x86.h': 'X86',
|
|
'sparc.h': 'Sparc',
|
|
'm68k.h': 'M68k',
|
|
'ppc.h': 'Ppc',
|
|
'riscv.h': 'Riscv',
|
|
's390x.h' : 'S390x',
|
|
'unicorn.h': 'Unicorn',
|
|
'comment_open': '//',
|
|
'comment_close': '',
|
|
},
|
|
}
|
|
|
|
# markup for comments to be added to autogen files
|
|
MARKUP = '//>'
|
|
|
|
def gen(lang):
|
|
global include, INCL_DIR
|
|
templ = template[lang]
|
|
for target in include:
|
|
prefix = templ[target]
|
|
outfile = open(templ['out_file'] %(prefix), 'wb') # open as binary prevents windows newlines
|
|
outfile.write((templ['header'] % (prefix)).encode("utf-8"))
|
|
if target == 'unicorn.h':
|
|
prefix = ''
|
|
with open(os.path.join(INCL_DIR, target)) as f:
|
|
lines = f.readlines()
|
|
|
|
previous = {}
|
|
count = 0
|
|
skip = 0
|
|
in_comment = False
|
|
|
|
for lno, line in enumerate(lines):
|
|
if "/*" in line:
|
|
in_comment = True
|
|
if "*/" in line:
|
|
in_comment = False
|
|
if in_comment:
|
|
continue
|
|
if skip > 0:
|
|
# Due to clang-format, values may come up in the next line
|
|
skip -= 1
|
|
continue
|
|
line = line.strip()
|
|
|
|
if line.startswith(MARKUP): # markup for comments
|
|
outfile.write(("\n%s%s%s\n" %(templ['comment_open'], \
|
|
line.replace(MARKUP, ''), templ['comment_close'])).encode("utf-8"))
|
|
continue
|
|
|
|
if line == '' or line.startswith('//'):
|
|
continue
|
|
|
|
tmp = line.strip().split(',')
|
|
if len(tmp) >= 2 and tmp[0] != "#define" and not tmp[0].startswith("UC_"):
|
|
continue
|
|
for t in tmp:
|
|
t = t.strip()
|
|
if not t or t.startswith('//'): continue
|
|
f = re.split('\s+', t)
|
|
|
|
# parse #define UC_TARGET (num)
|
|
define = False
|
|
if f[0] == '#define' and len(f) >= 3:
|
|
define = True
|
|
f.pop(0)
|
|
f.insert(1, '=')
|
|
if f[0].startswith("UC_" + prefix.upper()) or f[0].startswith("UC_CPU"):
|
|
if len(f) > 1 and f[1] not in ('//', '='):
|
|
print("WARNING: Unable to convert %s" % f)
|
|
print(" Line =", line)
|
|
continue
|
|
elif len(f) > 1 and f[1] == '=':
|
|
# Like:
|
|
# UC_A =
|
|
# (1 << 2)
|
|
# #define UC_B \
|
|
# (UC_A | UC_C)
|
|
# Let's search the next line
|
|
if len(f) == 2:
|
|
if lno == len(lines) - 1:
|
|
print("WARNING: Unable to convert %s" % f)
|
|
print(" Line =", line)
|
|
continue
|
|
skip += 1
|
|
next_line = lines[lno + 1]
|
|
next_line_tmp = next_line.strip().split(",")
|
|
rhs = next_line_tmp[0]
|
|
elif f[-1] == "\\":
|
|
idx = 0
|
|
rhs = ""
|
|
while True:
|
|
idx += 1
|
|
if lno + idx == len(lines):
|
|
print("WARNING: Unable to convert %s" % f)
|
|
print(" Line =", line)
|
|
continue
|
|
skip += 1
|
|
next_line = lines[lno + idx]
|
|
next_line_f = re.split('\s+', next_line.strip())
|
|
if next_line_f[-1] == "\\":
|
|
rhs += "".join(next_line_f[:-1])
|
|
else:
|
|
rhs += next_line.strip()
|
|
break
|
|
else:
|
|
rhs = ''.join(f[2:])
|
|
else:
|
|
rhs = str(count)
|
|
|
|
|
|
lhs = f[0].strip()
|
|
#print(f'lhs: {lhs} rhs: {rhs} f:{f}')
|
|
# evaluate bitshifts in constants e.g. "UC_X86 = 1 << 1"
|
|
match = re.match(r'(?P<rhs>\s*\d+\s*<<\s*\d+\s*)', rhs)
|
|
if match:
|
|
rhs = str(eval(match.group(1)))
|
|
else:
|
|
# evaluate references to other constants e.g. "UC_ARM_REG_X = UC_ARM_REG_SP"
|
|
match = re.match(r'^([^\d]\w+)$', rhs)
|
|
if match:
|
|
rhs = previous[match.group(1)]
|
|
|
|
if not rhs.isdigit():
|
|
for k, v in previous.items():
|
|
rhs = re.sub(r'\b%s\b' % k, v, rhs)
|
|
rhs = str(eval(rhs))
|
|
|
|
lhs_strip = re.sub(r'^UC_', '', lhs)
|
|
count = int(rhs) + 1
|
|
if (count == 1):
|
|
outfile.write(("\n").encode("utf-8"))
|
|
|
|
outfile.write((templ['line_format'] % (lhs_strip, rhs)).encode("utf-8"))
|
|
previous[lhs] = str(rhs)
|
|
|
|
outfile.write((templ['footer']).encode("utf-8"))
|
|
outfile.close()
|
|
|
|
def main():
|
|
lang = sys.argv[1]
|
|
if lang == "all":
|
|
for lang in template.keys():
|
|
print("Generating constants for {}".format(lang))
|
|
gen(lang)
|
|
else:
|
|
if not lang in template:
|
|
raise RuntimeError("Unsupported binding %s" % lang)
|
|
gen(lang)
|
|
|
|
if __name__ == "__main__":
|
|
if len(sys.argv) < 2:
|
|
print("Usage:", sys.argv[0], " <python>")
|
|
print("Supported: {}".format(["all"] + [x for x in template.keys()]))
|
|
sys.exit(1)
|
|
main()
|