micropython/py/makeqstrdata.py
Damien George c3bd9415cc py: Make qstr hash size configurable, defaults to 2 bytes.
This patch makes configurable, via MICROPY_QSTR_BYTES_IN_HASH, the
number of bytes used for a qstr hash.  It was originally fixed at 2
bytes, and now defaults to 2 bytes.  Setting it to 1 byte will save
ROM and RAM at a small expense of hash collisions.
2015-07-20 11:03:13 +00:00

109 lines
3.9 KiB
Python

"""
Process raw qstr file and output qstr data with length, hash and data bytes.
This script works with Python 2.6, 2.7, 3.3 and 3.4.
"""
from __future__ import print_function
import re
import sys
# codepoint2name is different in Python 2 to Python 3
import platform
if platform.python_version_tuple()[0] == '2':
from htmlentitydefs import codepoint2name
elif platform.python_version_tuple()[0] == '3':
from html.entities import codepoint2name
codepoint2name[ord('-')] = 'hyphen';
# add some custom names to map characters that aren't in HTML
codepoint2name[ord(' ')] = 'space'
codepoint2name[ord('\'')] = 'squot'
codepoint2name[ord(',')] = 'comma'
codepoint2name[ord('.')] = 'dot'
codepoint2name[ord(':')] = 'colon'
codepoint2name[ord('/')] = 'slash'
codepoint2name[ord('%')] = 'percent'
codepoint2name[ord('#')] = 'hash'
codepoint2name[ord('(')] = 'paren_open'
codepoint2name[ord(')')] = 'paren_close'
codepoint2name[ord('[')] = 'bracket_open'
codepoint2name[ord(']')] = 'bracket_close'
codepoint2name[ord('{')] = 'brace_open'
codepoint2name[ord('}')] = 'brace_close'
codepoint2name[ord('*')] = 'star'
codepoint2name[ord('!')] = 'bang'
codepoint2name[ord('\\')] = 'backslash'
# this must match the equivalent function in qstr.c
def compute_hash(qstr, bytes_hash):
hash = 5381
for char in qstr:
hash = (hash * 33) ^ ord(char)
# Make sure that valid hash is never zero, zero means "hash not computed"
return (hash & ((1 << (8 * bytes_hash)) - 1)) or 1
def do_work(infiles):
# read the qstrs in from the input files
qcfgs = {}
qstrs = {}
for infile in infiles:
with open(infile, 'rt') as f:
for line in f:
line = line.strip()
# is this a config line?
match = re.match(r'^QCFG\((.+), (.+)\)', line)
if match:
value = match.group(2)
if value[0] == '(' and value[-1] == ')':
# strip parenthesis from config value
value = value[1:-1]
qcfgs[match.group(1)] = value
continue
# is this a QSTR line?
match = re.match(r'^Q\((.*)\)$', line)
if not match:
continue
# get the qstr value
qstr = match.group(1)
ident = re.sub(r'[^A-Za-z0-9_]', lambda s: "_" + codepoint2name[ord(s.group(0))] + "_", qstr)
# don't add duplicates
if ident in qstrs:
continue
# add the qstr to the list, with order number to retain original order in file
qstrs[ident] = (len(qstrs), ident, qstr)
# get config variables
cfg_bytes_len = int(qcfgs['BYTES_IN_LEN'])
cfg_bytes_hash = int(qcfgs['BYTES_IN_HASH'])
cfg_max_len = 1 << (8 * cfg_bytes_len)
# print out the starte of the generated C header file
print('// This file was automatically generated by makeqstrdata.py')
print('')
# add NULL qstr with no hash or data
print('QDEF(MP_QSTR_NULL, (const byte*)"%s%s" "")' % ('\\x00' * cfg_bytes_hash, '\\x00' * cfg_bytes_len))
# go through each qstr and print it out
for order, ident, qstr in sorted(qstrs.values(), key=lambda x: x[0]):
qhash = compute_hash(qstr, cfg_bytes_hash)
# Calculate len of str, taking escapes into account
qlen = len(qstr.replace("\\\\", "-").replace("\\", ""))
qdata = qstr.replace('"', '\\"')
if qlen >= cfg_max_len:
print('qstr is too long:', qstr)
assert False
qlen_str = ('\\x%02x' * cfg_bytes_len) % tuple(((qlen >> (8 * i)) & 0xff) for i in range(cfg_bytes_len))
qhash_str = ('\\x%02x' * cfg_bytes_hash) % tuple(((qhash >> (8 * i)) & 0xff) for i in range(cfg_bytes_hash))
print('QDEF(MP_QSTR_%s, (const byte*)"%s%s" "%s")' % (ident, qhash_str, qlen_str, qdata))
if __name__ == "__main__":
do_work(sys.argv[1:])