summaryrefslogtreecommitdiff
path: root/py/makeqstrdata.py
diff options
context:
space:
mode:
Diffstat (limited to 'py/makeqstrdata.py')
-rw-r--r--py/makeqstrdata.py72
1 files changed, 58 insertions, 14 deletions
diff --git a/py/makeqstrdata.py b/py/makeqstrdata.py
index e8adb0cbb..c00ec1eb2 100644
--- a/py/makeqstrdata.py
+++ b/py/makeqstrdata.py
@@ -9,11 +9,15 @@ from __future__ import print_function
import re
import sys
-# codepoint2name is different in Python 2 to Python 3
+# Python 2/3 compatibility:
+# - iterating through bytes is different
+# - codepoint2name lives in a different module
import platform
if platform.python_version_tuple()[0] == '2':
+ ord_bytes = ord
from htmlentitydefs import codepoint2name
elif platform.python_version_tuple()[0] == '3':
+ ord_bytes = lambda x:x
from html.entities import codepoint2name
codepoint2name[ord('-')] = 'hyphen';
@@ -23,6 +27,7 @@ codepoint2name[ord('\'')] = 'squot'
codepoint2name[ord(',')] = 'comma'
codepoint2name[ord('.')] = 'dot'
codepoint2name[ord(':')] = 'colon'
+codepoint2name[ord(';')] = 'semicolon'
codepoint2name[ord('/')] = 'slash'
codepoint2name[ord('%')] = 'percent'
codepoint2name[ord('#')] = 'hash'
@@ -36,6 +41,13 @@ codepoint2name[ord('*')] = 'star'
codepoint2name[ord('!')] = 'bang'
codepoint2name[ord('\\')] = 'backslash'
codepoint2name[ord('+')] = 'plus'
+codepoint2name[ord('$')] = 'dollar'
+codepoint2name[ord('=')] = 'equals'
+codepoint2name[ord('?')] = 'question'
+codepoint2name[ord('@')] = 'at_sign'
+codepoint2name[ord('^')] = 'caret'
+codepoint2name[ord('|')] = 'pipe'
+codepoint2name[ord('~')] = 'tilde'
# this must match the equivalent function in qstr.c
def compute_hash(qstr, bytes_hash):
@@ -45,7 +57,17 @@ def compute_hash(qstr, bytes_hash):
# Make sure that valid hash is never zero, zero means "hash not computed"
return (hash & ((1 << (8 * bytes_hash)) - 1)) or 1
-def do_work(infiles):
+def qstr_escape(qst):
+ def esc_char(m):
+ c = ord(m.group(0))
+ try:
+ name = codepoint2name[c]
+ except KeyError:
+ name = '0x%02x' % c
+ return "_" + name + '_'
+ return re.sub(r'[^A-Za-z0-9_]', esc_char, qst)
+
+def parse_input_headers(infiles):
# read the qstrs in from the input files
qcfgs = {}
qstrs = {}
@@ -71,7 +93,13 @@ def do_work(infiles):
# get the qstr value
qstr = match.group(1)
- ident = re.sub(r'[^A-Za-z0-9_]', lambda s: "_" + codepoint2name[ord(s.group(0))] + "_", qstr)
+
+ # special case to specify control characters
+ if qstr == '\\n':
+ qstr = '\n'
+
+ # work out the corresponding qstr name
+ ident = qstr_escape(qstr)
# don't add duplicates
if ident in qstrs:
@@ -84,10 +112,30 @@ def do_work(infiles):
sys.stderr.write("ERROR: Empty preprocessor output - check for errors above\n")
sys.exit(1)
+ return qcfgs, qstrs
+
+def make_bytes(cfg_bytes_len, cfg_bytes_hash, qstr):
+ qhash = compute_hash(qstr, cfg_bytes_hash)
+ if all(32 <= ord(c) <= 126 and c != '\\' for c in qstr):
+ # qstr is all printable ASCII so render it as-is (for easier debugging)
+ qlen = len(qstr)
+ qdata = qstr
+ else:
+ # qstr contains non-printable codes so render entire thing as hex pairs
+ qbytes = qstr.encode('utf8')
+ qlen = len(qbytes)
+ qdata = ''.join(('\\x%02x' % ord_bytes(b)) for b in qbytes)
+ if qlen >= (1 << (8 * cfg_bytes_len)):
+ print('qstr is too long:', qstr)
+ assert False
+ qlen_str = ('\\x%02x' * cfg_bytes_len) % tuple(((qlen >> (8 * i)) & 0xff) for i in range(cfg_bytes_len))
+ qhash_str = ('\\x%02x' * cfg_bytes_hash) % tuple(((qhash >> (8 * i)) & 0xff) for i in range(cfg_bytes_hash))
+ return '(const byte*)"%s%s" "%s"' % (qhash_str, qlen_str, qdata)
+
+def print_qstr_data(qcfgs, qstrs):
# get config variables
cfg_bytes_len = int(qcfgs['BYTES_IN_LEN'])
cfg_bytes_hash = int(qcfgs['BYTES_IN_HASH'])
- cfg_max_len = 1 << (8 * cfg_bytes_len)
# print out the starter of the generated C header file
print('// This file was automatically generated by makeqstrdata.py')
@@ -98,16 +146,12 @@ def do_work(infiles):
# go through each qstr and print it out
for order, ident, qstr in sorted(qstrs.values(), key=lambda x: x[0]):
- qhash = compute_hash(qstr, cfg_bytes_hash)
- # Calculate len of str, taking escapes into account
- qlen = len(qstr.replace("\\\\", "-").replace("\\", ""))
- qdata = qstr.replace('"', '\\"')
- if qlen >= cfg_max_len:
- print('qstr is too long:', qstr)
- assert False
- qlen_str = ('\\x%02x' * cfg_bytes_len) % tuple(((qlen >> (8 * i)) & 0xff) for i in range(cfg_bytes_len))
- qhash_str = ('\\x%02x' * cfg_bytes_hash) % tuple(((qhash >> (8 * i)) & 0xff) for i in range(cfg_bytes_hash))
- print('QDEF(MP_QSTR_%s, (const byte*)"%s%s" "%s")' % (ident, qhash_str, qlen_str, qdata))
+ qbytes = make_bytes(cfg_bytes_len, cfg_bytes_hash, qstr)
+ print('QDEF(MP_QSTR_%s, %s)' % (ident, qbytes))
+
+def do_work(infiles):
+ qcfgs, qstrs = parse_input_headers(infiles)
+ print_qstr_data(qcfgs, qstrs)
if __name__ == "__main__":
do_work(sys.argv[1:])