| 1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
 | #!/usr/bin/env python3
"""
This is a middle-processor for MicroPython source files.  It takes the output
of the C preprocessor, has the option to change it, then feeds this into the
C compiler.
It currently has the ability to reorder static hash tables so they are actually
hashed, resulting in faster lookup times at runtime.
To use, configure the Python variables below, and add the following line to the
Makefile:
CFLAGS += -no-integrated-cpp -B$(shell pwd)/../tools
"""
import sys
import os
import re
################################################################################
# these are the configuration variables
# TODO somehow make them externally configurable
# this is the path to the true C compiler
cc1_path = '/usr/lib/gcc/x86_64-unknown-linux-gnu/5.3.0/cc1'
#cc1_path = '/usr/lib/gcc/arm-none-eabi/5.3.0/cc1'
# this must be the same as MICROPY_QSTR_BYTES_IN_HASH
bytes_in_qstr_hash = 2
# this must be 1 or more (can be a decimal)
# larger uses more code size but yields faster lookups
table_size_mult = 1
# these control output during processing
print_stats = True
print_debug = False
# end configuration variables
################################################################################
# precompile regexs
re_preproc_line = re.compile(r'# [0-9]+ ')
re_map_entry = re.compile(r'\{.+?\(MP_QSTR_([A-Za-z0-9_]+)\).+\},')
re_mp_obj_dict_t = re.compile(r'(?P<head>(static )?const mp_obj_dict_t (?P<id>[a-z0-9_]+) = \{ \.base = \{&mp_type_dict\}, \.map = \{ \.all_keys_are_qstrs = 1, \.is_fixed = 1, \.is_ordered = )1(?P<tail>, \.used = .+ };)$')
re_mp_map_t = re.compile(r'(?P<head>(static )?const mp_map_t (?P<id>[a-z0-9_]+) = \{ \.all_keys_are_qstrs = 1, \.is_fixed = 1, \.is_ordered = )1(?P<tail>, \.used = .+ };)$')
re_mp_rom_map_elem_t = re.compile(r'static const mp_rom_map_elem_t [a-z_0-9]+\[\] = {$')
# this must match the equivalent function in qstr.c
def compute_hash(qstr):
    hash = 5381
    for char in qstr:
        hash = (hash * 33) ^ ord(char)
    # Make sure that valid hash is never zero, zero means "hash not computed"
    return (hash & ((1 << (8 * bytes_in_qstr_hash)) - 1)) or 1
# this algo must match the equivalent in map.c
def hash_insert(map, key, value):
    hash = compute_hash(key)
    pos = hash % len(map)
    start_pos = pos
    if print_debug:
        print('  insert %s: start at %u/%u -- ' % (key, pos, len(map)), end='')
    while True:
        if map[pos] is None:
            # found empty slot, so key is not in table
            if print_debug:
                print('put at %u' % pos)
            map[pos] = (key, value)
            return
        else:
            # not yet found, keep searching
            if map[pos][0] == key:
                raise AssertionError("duplicate key '%s'" % (key,))
            pos = (pos + 1) % len(map)
            assert pos != start_pos
def hash_find(map, key):
    hash = compute_hash(key)
    pos = hash % len(map)
    start_pos = pos
    attempts = 0
    while True:
        attempts += 1
        if map[pos] is None:
            return attempts, None
        elif map[pos][0] == key:
            return attempts, map[pos][1]
        else:
            pos = (pos + 1) % len(map)
            if pos == start_pos:
                return attempts, None
def process_map_table(file, line, output):
    output.append(line)
    # consume all lines that are entries of the table and concat them
    # (we do it this way because there can be multiple entries on one line)
    table_contents = []
    while True:
        line = file.readline()
        if len(line) == 0:
            print('unexpected end of input')
            sys.exit(1)
        line = line.strip()
        if len(line) == 0:
            # empty line
            continue
        if re_preproc_line.match(line):
            # preprocessor line number comment
            continue
        if line == '};':
            # end of table (we assume it appears on a single line)
            break
        table_contents.append(line)
    # make combined string of entries
    entries_str = ''.join(table_contents)
    # split into individual entries
    entries = []
    while entries_str:
        # look for single entry, by matching nested braces
        match = None
        if entries_str[0] == '{':
            nested_braces = 0
            for i in range(len(entries_str)):
                if entries_str[i] == '{':
                    nested_braces += 1
                elif entries_str[i] == '}':
                    nested_braces -= 1
                    if nested_braces == 0:
                        match = re_map_entry.match(entries_str[:i + 2])
                        break
        if not match:
            print('unknown line in table:', entries_str)
            sys.exit(1)
        # extract single entry
        line = match.group(0)
        qstr = match.group(1)
        entries_str = entries_str[len(line):].lstrip()
        # add the qstr and the whole line to list of all entries
        entries.append((qstr, line))
    # sort entries so hash table construction is deterministic
    entries.sort()
    # create hash table
    map = [None] * int(len(entries) * table_size_mult)
    for qstr, line in entries:
        # We assume that qstr does not have any escape sequences in it.
        # This is reasonably safe, since keys in a module or class dict
        # should be standard identifiers.
        # TODO verify this and raise an error if escape sequence found
        hash_insert(map, qstr, line)
    # compute statistics
    total_attempts = 0
    for qstr, _ in entries:
        attempts, line = hash_find(map, qstr)
        assert line is not None
        if print_debug:
            print('  %s lookup took %u attempts' % (qstr, attempts))
        total_attempts += attempts
    if len(entries):
        stats = len(map), len(entries) / len(map), total_attempts / len(entries)
    else:
        stats = 0, 0, 0
    if print_debug:
        print('  table stats: size=%d, load=%.2f, avg_lookups=%.1f' % stats)
    # output hash table
    for row in map:
        if row is None:
            output.append('{ 0, 0 },\n')
        else:
            output.append(row[1] + '\n')
    output.append('};\n')
    # skip to next non-blank line
    while True:
        line = file.readline()
        if len(line) == 0:
            print('unexpected end of input')
            sys.exit(1)
        line = line.strip()
        if len(line) == 0:
            continue
        break
    # transform the is_ordered param from 1 to 0
    match = re_mp_obj_dict_t.match(line)
    if match is None:
        match = re_mp_map_t.match(line)
    if match is None:
        print('expecting mp_obj_dict_t or mp_map_t definition')
        print(output[0])
        print(line)
        sys.exit(1)
    line = match.group('head') + '0' + match.group('tail') + '\n'
    output.append(line)
    return (match.group('id'),) + stats
def process_file(filename):
    output = []
    file_changed = False
    with open(filename, 'rt') as f:
        while True:
            line = f.readline()
            if not line:
                break
            if re_mp_rom_map_elem_t.match(line):
                file_changed = True
                stats = process_map_table(f, line, output)
                if print_stats:
                    print('  [%s: size=%d, load=%.2f, avg_lookups=%.1f]' % stats)
            else:
                output.append(line)
    if file_changed:
        if print_debug:
            print('  modifying static maps in', output[0].strip())
        with open(filename, 'wt') as f:
            for line in output:
                f.write(line)
def main():
    # run actual C compiler
    # need to quote args that have special characters in them
    def quote(s):
        if s.find('<') != -1 or s.find('>') != -1:
            return "'" + s + "'"
        else:
            return s
    ret = os.system(cc1_path + ' ' + ' '.join(quote(s) for s in sys.argv[1:]))
    if ret != 0:
        ret = (ret & 0x7f) or 127 # make it in range 0-127, but non-zero
        sys.exit(ret)
    if sys.argv[1] == '-E':
        # CPP has been run, now do our processing stage
        for i, arg in enumerate(sys.argv):
            if arg == '-o':
                return process_file(sys.argv[i + 1])
        print('%s: could not find "-o" option' % (sys.argv[0],))
        sys.exit(1)
    elif sys.argv[1] == '-fpreprocessed':
        # compiler has been run, nothing more to do
        return
    else:
        # unknown processing stage
        print('%s: unknown first option "%s"' % (sys.argv[0], sys.argv[1]))
        sys.exit(1)
if __name__ == '__main__':
    main()
 |