diff options
Diffstat (limited to 'py/makeqstrdata.py')
-rw-r--r-- | py/makeqstrdata.py | 72 |
1 files changed, 58 insertions, 14 deletions
diff --git a/py/makeqstrdata.py b/py/makeqstrdata.py index e8adb0cbb..c00ec1eb2 100644 --- a/py/makeqstrdata.py +++ b/py/makeqstrdata.py @@ -9,11 +9,15 @@ from __future__ import print_function import re import sys -# codepoint2name is different in Python 2 to Python 3 +# Python 2/3 compatibility: +# - iterating through bytes is different +# - codepoint2name lives in a different module import platform if platform.python_version_tuple()[0] == '2': + ord_bytes = ord from htmlentitydefs import codepoint2name elif platform.python_version_tuple()[0] == '3': + ord_bytes = lambda x:x from html.entities import codepoint2name codepoint2name[ord('-')] = 'hyphen'; @@ -23,6 +27,7 @@ codepoint2name[ord('\'')] = 'squot' codepoint2name[ord(',')] = 'comma' codepoint2name[ord('.')] = 'dot' codepoint2name[ord(':')] = 'colon' +codepoint2name[ord(';')] = 'semicolon' codepoint2name[ord('/')] = 'slash' codepoint2name[ord('%')] = 'percent' codepoint2name[ord('#')] = 'hash' @@ -36,6 +41,13 @@ codepoint2name[ord('*')] = 'star' codepoint2name[ord('!')] = 'bang' codepoint2name[ord('\\')] = 'backslash' codepoint2name[ord('+')] = 'plus' +codepoint2name[ord('$')] = 'dollar' +codepoint2name[ord('=')] = 'equals' +codepoint2name[ord('?')] = 'question' +codepoint2name[ord('@')] = 'at_sign' +codepoint2name[ord('^')] = 'caret' +codepoint2name[ord('|')] = 'pipe' +codepoint2name[ord('~')] = 'tilde' # this must match the equivalent function in qstr.c def compute_hash(qstr, bytes_hash): @@ -45,7 +57,17 @@ def compute_hash(qstr, bytes_hash): # Make sure that valid hash is never zero, zero means "hash not computed" return (hash & ((1 << (8 * bytes_hash)) - 1)) or 1 -def do_work(infiles): +def qstr_escape(qst): + def esc_char(m): + c = ord(m.group(0)) + try: + name = codepoint2name[c] + except KeyError: + name = '0x%02x' % c + return "_" + name + '_' + return re.sub(r'[^A-Za-z0-9_]', esc_char, qst) + +def parse_input_headers(infiles): # read the qstrs in from the input files qcfgs = {} qstrs = {} @@ -71,7 +93,13 @@ def do_work(infiles): # get the qstr value qstr = match.group(1) - ident = re.sub(r'[^A-Za-z0-9_]', lambda s: "_" + codepoint2name[ord(s.group(0))] + "_", qstr) + + # special case to specify control characters + if qstr == '\\n': + qstr = '\n' + + # work out the corresponding qstr name + ident = qstr_escape(qstr) # don't add duplicates if ident in qstrs: @@ -84,10 +112,30 @@ def do_work(infiles): sys.stderr.write("ERROR: Empty preprocessor output - check for errors above\n") sys.exit(1) + return qcfgs, qstrs + +def make_bytes(cfg_bytes_len, cfg_bytes_hash, qstr): + qhash = compute_hash(qstr, cfg_bytes_hash) + if all(32 <= ord(c) <= 126 and c != '\\' for c in qstr): + # qstr is all printable ASCII so render it as-is (for easier debugging) + qlen = len(qstr) + qdata = qstr + else: + # qstr contains non-printable codes so render entire thing as hex pairs + qbytes = qstr.encode('utf8') + qlen = len(qbytes) + qdata = ''.join(('\\x%02x' % ord_bytes(b)) for b in qbytes) + if qlen >= (1 << (8 * cfg_bytes_len)): + print('qstr is too long:', qstr) + assert False + qlen_str = ('\\x%02x' * cfg_bytes_len) % tuple(((qlen >> (8 * i)) & 0xff) for i in range(cfg_bytes_len)) + qhash_str = ('\\x%02x' * cfg_bytes_hash) % tuple(((qhash >> (8 * i)) & 0xff) for i in range(cfg_bytes_hash)) + return '(const byte*)"%s%s" "%s"' % (qhash_str, qlen_str, qdata) + +def print_qstr_data(qcfgs, qstrs): # get config variables cfg_bytes_len = int(qcfgs['BYTES_IN_LEN']) cfg_bytes_hash = int(qcfgs['BYTES_IN_HASH']) - cfg_max_len = 1 << (8 * cfg_bytes_len) # print out the starter of the generated C header file print('// This file was automatically generated by makeqstrdata.py') @@ -98,16 +146,12 @@ def do_work(infiles): # go through each qstr and print it out for order, ident, qstr in sorted(qstrs.values(), key=lambda x: x[0]): - qhash = compute_hash(qstr, cfg_bytes_hash) - # Calculate len of str, taking escapes into account - qlen = len(qstr.replace("\\\\", "-").replace("\\", "")) - qdata = qstr.replace('"', '\\"') - if qlen >= cfg_max_len: - print('qstr is too long:', qstr) - assert False - qlen_str = ('\\x%02x' * cfg_bytes_len) % tuple(((qlen >> (8 * i)) & 0xff) for i in range(cfg_bytes_len)) - qhash_str = ('\\x%02x' * cfg_bytes_hash) % tuple(((qhash >> (8 * i)) & 0xff) for i in range(cfg_bytes_hash)) - print('QDEF(MP_QSTR_%s, (const byte*)"%s%s" "%s")' % (ident, qhash_str, qlen_str, qdata)) + qbytes = make_bytes(cfg_bytes_len, cfg_bytes_hash, qstr) + print('QDEF(MP_QSTR_%s, %s)' % (ident, qbytes)) + +def do_work(infiles): + qcfgs, qstrs = parse_input_headers(infiles) + print_qstr_data(qcfgs, qstrs) if __name__ == "__main__": do_work(sys.argv[1:]) |