diff options
author | Damien George <damien.p.george@gmail.com> | 2019-09-25 15:45:47 +1000 |
---|---|---|
committer | Damien George <damien.p.george@gmail.com> | 2019-10-01 12:26:22 +1000 |
commit | c8c0fd4ca39fbdcf9ca5f2fc99ca4d6b81a28b65 (patch) | |
tree | 0fcbf3a263817be928c0b7db847d94293c79cce5 /py | |
parent | b5ebfadbd615de42c43851f27a062bacd9147996 (diff) |
py: Rework and compress second part of bytecode prelude.
This patch compresses the second part of the bytecode prelude which
contains the source file name, function name, source-line-number mapping
and cell closure information. This part of the prelude now begins with a
single varible length unsigned integer which encodes 2 numbers, being the
byte-size of the following 2 sections in the header: the "source info
section" and the "closure section". After decoding this variable unsigned
integer it's possible to skip over one or both of these sections very
easily.
This scheme saves about 2 bytes for most functions compared to the original
format: one in the case that there are no closure cells, and one because
padding was eliminated.
Diffstat (limited to 'py')
-rw-r--r-- | py/bc.c | 14 | ||||
-rw-r--r-- | py/bc.h | 64 | ||||
-rw-r--r-- | py/emitbc.c | 43 | ||||
-rw-r--r-- | py/emitnative.c | 15 | ||||
-rw-r--r-- | py/objfun.c | 2 | ||||
-rw-r--r-- | py/persistentcode.c | 16 | ||||
-rw-r--r-- | py/profile.c | 17 | ||||
-rw-r--r-- | py/showbc.c | 25 | ||||
-rw-r--r-- | py/vm.c | 11 |
9 files changed, 126 insertions, 81 deletions
@@ -269,19 +269,25 @@ continue2:; } } - // get the ip and skip argument names + // read the size part of the prelude const byte *ip = code_state->ip; + MP_BC_PRELUDE_SIZE_DECODE(ip); // jump over code info (source file and line-number mapping) - ip += mp_decode_uint_value(ip); + ip += n_info; // bytecode prelude: initialise closed over variables - size_t local_num; - while ((local_num = *ip++) != 255) { + for (; n_cell; --n_cell) { + size_t local_num = *ip++; code_state->state[n_state - 1 - local_num] = mp_obj_new_cell(code_state->state[n_state - 1 - local_num]); } + #if !MICROPY_PERSISTENT_CODE + // so bytecode is aligned + ip = MP_ALIGN(ip, sizeof(mp_uint_t)); + #endif + // now that we skipped over the prelude, set the ip for the VM code_state->ip = ip; @@ -42,17 +42,25 @@ // K = n_kwonly_args number of keyword-only arguments this function takes // D = n_def_pos_args number of default positional arguments // -// code_info_size : var uint | code_info_size counts bytes in this chunk -// simple_name : var qstr | -// source_file : var qstr | -// <line number info> | -// <word alignment padding> | only needed if bytecode contains pointers +// prelude size : var uint +// contains two values interleaved bit-wise as: xIIIIIIC repeated +// x = extension another byte follows +// I = n_info number of bytes in source info section +// C = n_cells number of bytes/cells in closure section +// +// source info section: +// simple_name : var qstr +// source_file : var qstr +// <line number info> +// +// closure section: +// local_num0 : byte +// ... : byte +// local_numN : byte N = n_cells-1 +// +// <word alignment padding> only needed if bytecode contains pointers // -// local_num0 : byte | -// ... : byte | -// local_numN : byte | N = num_cells -// 255 : byte | end of list sentinel -// <bytecode> | +// <bytecode> // // // constant table layout: @@ -122,6 +130,41 @@ do { \ size_t n_state, n_exc_stack, scope_flags, n_pos_args, n_kwonly_args, n_def_pos_args; \ MP_BC_PRELUDE_SIG_DECODE_INTO(ip, n_state, n_exc_stack, scope_flags, n_pos_args, n_kwonly_args, n_def_pos_args) +#define MP_BC_PRELUDE_SIZE_ENCODE(I, C, out_byte, out_env) \ +do { \ + /* Encode bit-wise as: xIIIIIIC */ \ + uint8_t z = 0; \ + do { \ + z = (I & 0x3f) << 1 | (C & 1); \ + C >>= 1; \ + I >>= 6; \ + if (C | I) { \ + z |= 0x80; \ + } \ + out_byte(out_env, z); \ + } while (C | I); \ +} while (0) + +#define MP_BC_PRELUDE_SIZE_DECODE_INTO(ip, I, C) \ +do { \ + uint8_t z; \ + C = 0; \ + I = 0; \ + for (unsigned n = 0;; ++n) { \ + z = *(ip)++; \ + /* xIIIIIIC */ \ + C |= (z & 1) << n; \ + I |= ((z & 0x7e) >> 1) << (6 * n); \ + if (!(z & 0x80)) { \ + break; \ + } \ + } \ +} while (0) + +#define MP_BC_PRELUDE_SIZE_DECODE(ip) \ + size_t n_info, n_cell; \ + MP_BC_PRELUDE_SIZE_DECODE_INTO(ip, n_info, n_cell) + // Sentinel value for mp_code_state_t.exc_sp_idx #define MP_CODE_STATE_EXC_SP_IDX_SENTINEL ((uint16_t)-1) @@ -139,7 +182,6 @@ typedef struct _mp_bytecode_prelude_t { qstr qstr_block_name; qstr qstr_source_file; const byte *line_info; - const byte *locals; const byte *opcodes; } mp_bytecode_prelude_t; diff --git a/py/emitbc.c b/py/emitbc.c index 1aa219ca8..34f6362ff 100644 --- a/py/emitbc.c +++ b/py/emitbc.c @@ -64,6 +64,9 @@ struct _emit_t { size_t bytecode_size; byte *code_base; // stores both byte code and code info + size_t n_info; + size_t n_cell; + #if MICROPY_PERSISTENT_CODE uint16_t ct_cur_obj; uint16_t ct_num_obj; @@ -123,10 +126,6 @@ STATIC void emit_write_code_info_byte(emit_t* emit, byte val) { *emit_get_cur_to_write_code_info(emit, 1) = val; } -STATIC void emit_write_code_info_uint(emit_t* emit, mp_uint_t val) { - emit_write_uint(emit, emit_get_cur_to_write_code_info, val); -} - STATIC void emit_write_code_info_qstr(emit_t *emit, qstr qst) { #if MICROPY_PERSISTENT_CODE assert((qst >> 16) == 0); @@ -346,29 +345,17 @@ void mp_emit_bc_start_pass(emit_t *emit, pass_kind_t pass, scope_t *scope) { MP_BC_PRELUDE_SIG_ENCODE(n_state, n_exc_stack, scope, emit_write_code_info_byte, emit); } - // Write size of the rest of the code info. We don't know how big this - // variable uint will be on the MP_PASS_CODE_SIZE pass so we reserve 2 bytes - // for it and hope that is enough! TODO assert this or something. - if (pass == MP_PASS_EMIT) { - emit_write_code_info_uint(emit, emit->code_info_size - emit->code_info_offset); - } else { - emit_get_cur_to_write_code_info(emit, 2); + // Write number of cells and size of the source code info + if (pass >= MP_PASS_CODE_SIZE) { + MP_BC_PRELUDE_SIZE_ENCODE(emit->n_info, emit->n_cell, emit_write_code_info_byte, emit); } + emit->n_info = emit->code_info_offset; + // Write the name and source file of this function. emit_write_code_info_qstr(emit, scope->simple_name); emit_write_code_info_qstr(emit, scope->source_file); - // bytecode prelude: initialise closed over variables - for (int i = 0; i < scope->id_info_len; i++) { - id_info_t *id = &scope->id_info[i]; - if (id->kind == ID_INFO_KIND_CELL) { - assert(id->local_num < 255); - emit_write_bytecode_raw_byte(emit, id->local_num); // write the local which should be converted to a cell - } - } - emit_write_bytecode_raw_byte(emit, 255); // end of list sentinel - #if MICROPY_PERSISTENT_CODE emit->ct_cur_obj = 0; emit->ct_cur_raw_code = 0; @@ -414,6 +401,20 @@ void mp_emit_bc_end_pass(emit_t *emit) { emit_write_code_info_byte(emit, 0); // end of line number info + // Calculate size of source code info section + emit->n_info = emit->code_info_offset - emit->n_info; + + // Emit closure section of prelude + emit->n_cell = 0; + for (size_t i = 0; i < emit->scope->id_info_len; ++i) { + id_info_t *id = &emit->scope->id_info[i]; + if (id->kind == ID_INFO_KIND_CELL) { + assert(id->local_num <= 255); + emit_write_code_info_byte(emit, id->local_num); // write the local which should be converted to a cell + ++emit->n_cell; + } + } + #if MICROPY_PERSISTENT_CODE assert(emit->pass <= MP_PASS_STACK_SIZE || (emit->ct_num_obj == emit->ct_cur_obj)); emit->ct_num_obj = emit->ct_cur_obj; diff --git a/py/emitnative.c b/py/emitnative.c index d0e758f56..2c976606c 100644 --- a/py/emitnative.c +++ b/py/emitnative.c @@ -208,6 +208,7 @@ struct _emit_t { uint16_t code_state_start; uint16_t stack_start; int stack_size; + uint16_t n_cell; uint16_t const_table_cur_obj; uint16_t const_table_num_obj; @@ -587,9 +588,14 @@ STATIC void emit_native_end_pass(emit_t *emit) { size_t n_exc_stack = 0; // exc-stack not needed for native code MP_BC_PRELUDE_SIG_ENCODE(n_state, n_exc_stack, emit->scope, emit_native_write_code_info_byte, emit); - // write code info #if MICROPY_PERSISTENT_CODE - mp_asm_base_data(&emit->as->base, 1, 5); + size_t n_info = 4; + #else + size_t n_info = 1; + #endif + MP_BC_PRELUDE_SIZE_ENCODE(n_info, emit->n_cell, emit_native_write_code_info_byte, emit); + + #if MICROPY_PERSISTENT_CODE mp_asm_base_data(&emit->as->base, 1, emit->scope->simple_name); mp_asm_base_data(&emit->as->base, 1, emit->scope->simple_name >> 8); mp_asm_base_data(&emit->as->base, 1, emit->scope->source_file); @@ -599,14 +605,15 @@ STATIC void emit_native_end_pass(emit_t *emit) { #endif // bytecode prelude: initialise closed over variables + size_t cell_start = mp_asm_base_get_code_pos(&emit->as->base); for (int i = 0; i < emit->scope->id_info_len; i++) { id_info_t *id = &emit->scope->id_info[i]; if (id->kind == ID_INFO_KIND_CELL) { - assert(id->local_num < 255); + assert(id->local_num <= 255); mp_asm_base_data(&emit->as->base, 1, id->local_num); // write the local which should be converted to a cell } } - mp_asm_base_data(&emit->as->base, 1, 255); // end of list sentinel + emit->n_cell = mp_asm_base_get_code_pos(&emit->as->base) - cell_start; } ASM_END_PASS(emit->as); diff --git a/py/objfun.c b/py/objfun.c index 053fe46b7..7051f3476 100644 --- a/py/objfun.c +++ b/py/objfun.c @@ -139,7 +139,7 @@ const mp_obj_type_t mp_type_fun_builtin_var = { /* byte code functions */ qstr mp_obj_code_get_name(const byte *code_info) { - code_info = mp_decode_uint_skip(code_info); // skip code_info_size entry + MP_BC_PRELUDE_SIZE_DECODE(code_info); #if MICROPY_PERSISTENT_CODE return code_info[0] | (code_info[1] << 8); #else diff --git a/py/persistentcode.c b/py/persistentcode.c index 9776acb1e..2109d9379 100644 --- a/py/persistentcode.c +++ b/py/persistentcode.c @@ -167,11 +167,10 @@ STATIC void extract_prelude(const byte **ip, const byte **ip2, bytecode_prelude_ prelude->n_pos_args = n_pos_args; prelude->n_kwonly_args = n_kwonly_args; prelude->n_def_pos_args = n_def_pos_args; + MP_BC_PRELUDE_SIZE_DECODE(*ip); *ip2 = *ip; - prelude->code_info_size = mp_decode_uint(ip2); - *ip += prelude->code_info_size; - while (*(*ip)++ != 255) { - } + *ip += n_info; + *ip += n_cell; } #endif // MICROPY_PERSISTENT_CODE_LOAD || MICROPY_PERSISTENT_CODE_SAVE @@ -286,12 +285,9 @@ STATIC void load_prelude(mp_reader_t *reader, byte **ip, byte **ip2, bytecode_pr byte *ip_read = *ip; read_uint(reader, &ip_read); // read in n_state/etc (is effectively a var-uint) byte *ip_read_save = ip_read; - size_t code_info_size = read_uint(reader, &ip_read); // read in code_info_size - code_info_size -= ip_read - ip_read_save; // subtract bytes taken by code_info_size itself - read_bytes(reader, ip_read, code_info_size); // read remaining code info - ip_read += code_info_size; - while ((*ip_read++ = read_byte(reader)) != 255) { - } + read_uint(reader, &ip_read); // read in n_info/n_cell (is effectively a var-uint) + MP_BC_PRELUDE_SIZE_DECODE(ip_read_save); + read_bytes(reader, ip_read, n_info + n_cell); // read remaining code info // Entire prelude has been read into *ip, now decode and extract values from it extract_prelude((const byte**)ip, (const byte**)ip2, prelude); diff --git a/py/profile.c b/py/profile.c index 230bb7cc2..f16d4d701 100644 --- a/py/profile.c +++ b/py/profile.c @@ -34,7 +34,7 @@ STATIC uint mp_prof_bytecode_lineno(const mp_raw_code_t *rc, size_t bc) { const mp_bytecode_prelude_t *prelude = &rc->prelude; - return mp_bytecode_get_source_line(prelude->line_info, bc + prelude->opcodes - prelude->locals); + return mp_bytecode_get_source_line(prelude->line_info, bc); } void mp_prof_extract_prelude(const byte *bytecode, mp_bytecode_prelude_t *prelude) { @@ -48,22 +48,15 @@ void mp_prof_extract_prelude(const byte *bytecode, mp_bytecode_prelude_t *prelud prelude->n_kwonly_args = n_kwonly_args; prelude->n_def_pos_args = n_def_pos_args; - const byte *code_info = ip; - size_t code_info_size = mp_decode_uint(&ip); + MP_BC_PRELUDE_SIZE_DECODE(ip); + + prelude->line_info = ip + 4; + prelude->opcodes = ip + n_info + n_cell; qstr block_name = ip[0] | (ip[1] << 8); qstr source_file = ip[2] | (ip[3] << 8); - ip += 4; prelude->qstr_block_name = block_name; prelude->qstr_source_file = source_file; - - prelude->line_info = ip; - prelude->locals = code_info + code_info_size; - - ip = prelude->locals; - while (*ip++ != 255) { - } - prelude->opcodes = ip; } /******************************************************************************/ diff --git a/py/showbc.c b/py/showbc.c index 216f35266..d154511dc 100644 --- a/py/showbc.c +++ b/py/showbc.c @@ -85,10 +85,8 @@ void mp_bytecode_print(const void *descr, const byte *ip, mp_uint_t len, const m // Decode prelude MP_BC_PRELUDE_SIG_DECODE(ip); - + MP_BC_PRELUDE_SIZE_DECODE(ip); const byte *code_info = ip; - mp_uint_t code_info_size = mp_decode_uint(&code_info); - ip += code_info_size; #if MICROPY_PERSISTENT_CODE qstr block_name = code_info[0] | (code_info[1] << 8); @@ -102,7 +100,9 @@ void mp_bytecode_print(const void *descr, const byte *ip, mp_uint_t len, const m qstr_str(source_file), qstr_str(block_name), descr, mp_showbc_code_start, len); // raw bytecode dump - printf("Raw bytecode (code_info_size=" UINT_FMT ", bytecode_size=" UINT_FMT "):\n", code_info_size, len - code_info_size); + size_t prelude_size = ip - mp_showbc_code_start + n_info + n_cell; + printf("Raw bytecode (code_info_size=" UINT_FMT ", bytecode_size=" UINT_FMT "):\n", + prelude_size, len - prelude_size); for (mp_uint_t i = 0; i < len; i++) { if (i > 0 && i % 16 == 0) { printf("\n"); @@ -121,21 +121,18 @@ void mp_bytecode_print(const void *descr, const byte *ip, mp_uint_t len, const m printf("(N_STATE %u)\n", (unsigned)n_state); printf("(N_EXC_STACK %u)\n", (unsigned)n_exc_stack); - // for printing line number info - const byte *bytecode_start = ip; + // skip over code_info + ip += n_info; // bytecode prelude: initialise closed over variables - { - uint local_num; - while ((local_num = *ip++) != 255) { - printf("(INIT_CELL %u)\n", local_num); - } - len -= ip - mp_showbc_code_start; + for (size_t i = 0; i < n_cell; ++i) { + uint local_num = *ip++; + printf("(INIT_CELL %u)\n", local_num); } // print out line number info { - mp_int_t bc = bytecode_start - ip; + mp_int_t bc = 0; mp_uint_t source_line = 1; printf(" bc=" INT_FMT " line=" UINT_FMT "\n", bc, source_line); for (const byte* ci = code_info; *ci;) { @@ -153,7 +150,7 @@ void mp_bytecode_print(const void *descr, const byte *ip, mp_uint_t len, const m printf(" bc=" INT_FMT " line=" UINT_FMT "\n", bc, source_line); } } - mp_bytecode_print2(ip, len - 0, const_table); + mp_bytecode_print2(ip, len - prelude_size, const_table); } const byte *mp_bytecode_print_str(const byte *ip) { @@ -1441,10 +1441,13 @@ unwind_loop: && *code_state->ip != MP_BC_RAISE_LAST) { const byte *ip = code_state->fun_bc->bytecode; MP_BC_PRELUDE_SIG_DECODE(ip); - size_t bc = code_state->ip - ip; - size_t code_info_size = mp_decode_uint_value(ip); - ip = mp_decode_uint_skip(ip); // skip code_info_size - bc -= code_info_size; + MP_BC_PRELUDE_SIZE_DECODE(ip); + const byte *bytecode_start = ip + n_info + n_cell; + #if !MICROPY_PERSISTENT_CODE + // so bytecode is aligned + bytecode_start = MP_ALIGN(bytecode_start, sizeof(mp_uint_t)); + #endif + size_t bc = code_state->ip - bytecode_start; #if MICROPY_PERSISTENT_CODE qstr block_name = ip[0] | (ip[1] << 8); qstr source_file = ip[2] | (ip[3] << 8); |