diff options
Diffstat (limited to 'py')
| -rw-r--r-- | py/bc.c | 6 | ||||
| -rw-r--r-- | py/bc0.h | 34 | ||||
| -rw-r--r-- | py/compile.c | 12 | ||||
| -rw-r--r-- | py/emit.h | 6 | ||||
| -rw-r--r-- | py/emitbc.c | 139 | ||||
| -rw-r--r-- | py/emitnative.c | 4 | ||||
| -rw-r--r-- | py/showbc.c | 24 | ||||
| -rw-r--r-- | py/vm.c | 26 |
8 files changed, 174 insertions, 77 deletions
@@ -335,7 +335,11 @@ uint mp_opcode_format(const byte *ip, size_t *opcode_size, bool count_var_uint) } } } else if (f == MP_BC_FORMAT_OFFSET) { - ip += 2; + if ((*ip & 0x80) == 0) { + ip += 1; + } else { + ip += 2; + } } ip += extra_byte; } @@ -28,6 +28,18 @@ // MicroPython bytecode opcodes, grouped based on the format of the opcode +// All opcodes are encoded as a byte with an optional argument. Arguments are +// variable-length encoded so they can be as small as possible. The possible +// encodings for arguments are (ip[0] is the opcode): +// +// - unsigned relative bytecode offset: +// - if ip[1] high bit is clear then: arg = ip[1] +// - if ip[1] high bit is set then: arg = ip[1] & 0x7f | ip[2] << 7 +// +// - signed relative bytecode offset: +// - if ip[1] high bit is clear then: arg = ip[1] - 0x40 +// - if ip[1] high bit is set then: arg = (ip[1] & 0x7f | ip[2] << 7) - 0x4000 + #define MP_BC_MASK_FORMAT (0xf0) #define MP_BC_MASK_EXTRA_BYTE (0x9e) @@ -101,17 +113,17 @@ #define MP_BC_ROT_TWO (MP_BC_BASE_BYTE_O + 0x0a) #define MP_BC_ROT_THREE (MP_BC_BASE_BYTE_O + 0x0b) -#define MP_BC_JUMP (MP_BC_BASE_JUMP_E + 0x02) // rel byte code offset, 16-bit signed, in excess -#define MP_BC_POP_JUMP_IF_TRUE (MP_BC_BASE_JUMP_E + 0x03) // rel byte code offset, 16-bit signed, in excess -#define MP_BC_POP_JUMP_IF_FALSE (MP_BC_BASE_JUMP_E + 0x04) // rel byte code offset, 16-bit signed, in excess -#define MP_BC_JUMP_IF_TRUE_OR_POP (MP_BC_BASE_JUMP_E + 0x05) // rel byte code offset, 16-bit signed, in excess -#define MP_BC_JUMP_IF_FALSE_OR_POP (MP_BC_BASE_JUMP_E + 0x06) // rel byte code offset, 16-bit signed, in excess -#define MP_BC_UNWIND_JUMP (MP_BC_BASE_JUMP_E + 0x00) // rel byte code offset, 16-bit signed, in excess; then a byte -#define MP_BC_SETUP_WITH (MP_BC_BASE_JUMP_E + 0x07) // rel byte code offset, 16-bit unsigned -#define MP_BC_SETUP_EXCEPT (MP_BC_BASE_JUMP_E + 0x08) // rel byte code offset, 16-bit unsigned -#define MP_BC_SETUP_FINALLY (MP_BC_BASE_JUMP_E + 0x09) // rel byte code offset, 16-bit unsigned -#define MP_BC_POP_EXCEPT_JUMP (MP_BC_BASE_JUMP_E + 0x0a) // rel byte code offset, 16-bit unsigned -#define MP_BC_FOR_ITER (MP_BC_BASE_JUMP_E + 0x0b) // rel byte code offset, 16-bit unsigned +#define MP_BC_UNWIND_JUMP (MP_BC_BASE_JUMP_E + 0x00) // signed relative bytecode offset; then a byte +#define MP_BC_JUMP (MP_BC_BASE_JUMP_E + 0x02) // signed relative bytecode offset +#define MP_BC_POP_JUMP_IF_TRUE (MP_BC_BASE_JUMP_E + 0x03) // signed relative bytecode offset +#define MP_BC_POP_JUMP_IF_FALSE (MP_BC_BASE_JUMP_E + 0x04) // signed relative bytecode offset +#define MP_BC_JUMP_IF_TRUE_OR_POP (MP_BC_BASE_JUMP_E + 0x05) // signed relative bytecode offset +#define MP_BC_JUMP_IF_FALSE_OR_POP (MP_BC_BASE_JUMP_E + 0x06) // signed relative bytecode offset +#define MP_BC_SETUP_WITH (MP_BC_BASE_JUMP_E + 0x07) // unsigned relative bytecode offset +#define MP_BC_SETUP_EXCEPT (MP_BC_BASE_JUMP_E + 0x08) // unsigned relative bytecode offset +#define MP_BC_SETUP_FINALLY (MP_BC_BASE_JUMP_E + 0x09) // unsigned relative bytecode offset +#define MP_BC_POP_EXCEPT_JUMP (MP_BC_BASE_JUMP_E + 0x0a) // unsigned relative bytecode offset +#define MP_BC_FOR_ITER (MP_BC_BASE_JUMP_E + 0x0b) // unsigned relative bytecode offset #define MP_BC_WITH_CLEANUP (MP_BC_BASE_BYTE_O + 0x0c) #define MP_BC_END_FINALLY (MP_BC_BASE_BYTE_O + 0x0d) #define MP_BC_GET_ITER (MP_BC_BASE_BYTE_O + 0x0e) diff --git a/py/compile.c b/py/compile.c index eb7389ec5..d61dabb9a 100644 --- a/py/compile.c +++ b/py/compile.c @@ -219,7 +219,7 @@ STATIC void mp_emit_common_start_pass(mp_emit_common_t *emit, pass_kind_t pass) } else if (pass > MP_PASS_STACK_SIZE) { emit->ct_cur_obj = emit->ct_cur_obj_base; } - if (pass == MP_PASS_EMIT) { + if (pass == MP_PASS_CODE_SIZE) { if (emit->ct_cur_child == 0) { emit->children = NULL; } else { @@ -3020,7 +3020,7 @@ STATIC void check_for_doc_string(compiler_t *comp, mp_parse_node_t pn) { #endif } -STATIC void compile_scope(compiler_t *comp, scope_t *scope, pass_kind_t pass) { +STATIC bool compile_scope(compiler_t *comp, scope_t *scope, pass_kind_t pass) { comp->pass = pass; comp->scope_cur = scope; comp->next_label = 0; @@ -3187,10 +3187,12 @@ STATIC void compile_scope(compiler_t *comp, scope_t *scope, pass_kind_t pass) { EMIT(return_value); } - EMIT(end_pass); + bool pass_complete = EMIT(end_pass); // make sure we match all the exception levels assert(comp->cur_except_level == 0); + + return pass_complete; } #if MICROPY_EMIT_INLINE_ASM @@ -3600,8 +3602,10 @@ mp_compiled_module_t mp_compile_to_raw_code(mp_parse_tree_t *parse_tree, qstr so } // final pass: emit code + // the emitter can request multiple of these passes if (comp->compile_error == MP_OBJ_NULL) { - compile_scope(comp, s, MP_PASS_EMIT); + while (!compile_scope(comp, s, MP_PASS_EMIT)) { + } } } } @@ -43,7 +43,7 @@ typedef enum { MP_PASS_SCOPE = 1, // work out id's and their kind, and number of labels MP_PASS_STACK_SIZE = 2, // work out maximum stack size MP_PASS_CODE_SIZE = 3, // work out code size and label offsets - MP_PASS_EMIT = 4, // emit code + MP_PASS_EMIT = 4, // emit code (may be run multiple times if the emitter requests it) } pass_kind_t; #define MP_EMIT_STAR_FLAG_SINGLE (0x01) @@ -116,7 +116,7 @@ typedef struct _emit_method_table_t { #endif void (*start_pass)(emit_t *emit, pass_kind_t pass, scope_t *scope); - void (*end_pass)(emit_t *emit); + bool (*end_pass)(emit_t *emit); bool (*last_emit_was_return_value)(emit_t *emit); void (*adjust_stack_size)(emit_t *emit, mp_int_t delta); void (*set_source_line)(emit_t *emit, mp_uint_t line); @@ -233,7 +233,7 @@ void emit_native_xtensa_free(emit_t *emit); void emit_native_xtensawin_free(emit_t *emit); void mp_emit_bc_start_pass(emit_t *emit, pass_kind_t pass, scope_t *scope); -void mp_emit_bc_end_pass(emit_t *emit); +bool mp_emit_bc_end_pass(emit_t *emit); bool mp_emit_bc_last_emit_was_return_value(emit_t *emit); void mp_emit_bc_adjust_stack_size(emit_t *emit, mp_int_t delta); void mp_emit_bc_set_source_line(emit_t *emit, mp_uint_t line); diff --git a/py/emitbc.c b/py/emitbc.c index c04701ca7..14a72e276 100644 --- a/py/emitbc.c +++ b/py/emitbc.c @@ -28,6 +28,7 @@ #include <stdint.h> #include <stdio.h> #include <string.h> +#include <unistd.h> #include <assert.h> #include "py/mpstate.h" @@ -55,8 +56,8 @@ struct _emit_t { mp_uint_t last_source_line_offset; mp_uint_t last_source_line; - mp_uint_t max_num_labels; - mp_uint_t *label_offsets; + size_t max_num_labels; + size_t *label_offsets; size_t code_info_offset; size_t code_info_size; @@ -76,11 +77,11 @@ emit_t *emit_bc_new(mp_emit_common_t *emit_common) { void emit_bc_set_max_num_labels(emit_t *emit, mp_uint_t max_num_labels) { emit->max_num_labels = max_num_labels; - emit->label_offsets = m_new(mp_uint_t, emit->max_num_labels); + emit->label_offsets = m_new(size_t, emit->max_num_labels); } void emit_bc_free(emit_t *emit) { - m_del(mp_uint_t, emit->label_offsets, emit->max_num_labels); + m_del(size_t, emit->label_offsets, emit->max_num_labels); m_del_obj(emit_t, emit); } @@ -213,34 +214,55 @@ STATIC void emit_write_bytecode_byte_child(emit_t *emit, int stack_adj, byte b, #endif } -// unsigned labels are relative to ip following this instruction, stored as 16 bits -STATIC void emit_write_bytecode_byte_unsigned_label(emit_t *emit, int stack_adj, byte b1, mp_uint_t label) { +// Emit a jump opcode to a destination label. +// The offset to the label is relative to the ip following this instruction. +// The offset is encoded as either 1 or 2 bytes, depending on how big it is. +// The encoding of this jump opcode can change size from one pass to the next, +// but it must only ever decrease in size on successive passes. +STATIC void emit_write_bytecode_byte_label(emit_t *emit, int stack_adj, byte b1, mp_uint_t label) { mp_emit_bc_adjust_stack_size(emit, stack_adj); - mp_uint_t bytecode_offset; - if (emit->pass < MP_PASS_EMIT) { - bytecode_offset = 0; - } else { - bytecode_offset = emit->label_offsets[label] - emit->bytecode_offset - 3; + + // Determine if the jump offset is signed or unsigned, based on the opcode. + const bool is_signed = b1 <= MP_BC_JUMP_IF_FALSE_OR_POP; + + // Default to a 2-byte encoding (the largest) with an unknown jump offset. + unsigned int jump_encoding_size = 1; + ssize_t bytecode_offset = 0; + + // Compute the jump size and offset only when code size is known. + if (emit->pass >= MP_PASS_CODE_SIZE) { + // The -2 accounts for this jump opcode taking 2 bytes (at least). + bytecode_offset = emit->label_offsets[label] - emit->bytecode_offset - 2; + + // Check if the bytecode_offset is small enough to use a 1-byte encoding. + if ((is_signed && -64 <= bytecode_offset && bytecode_offset <= 63) + || (!is_signed && (size_t)bytecode_offset <= 127)) { + // Use a 1-byte jump offset. + jump_encoding_size = 0; + } + + // Adjust the offset depending on the size of the encoding of the offset. + bytecode_offset -= jump_encoding_size; + + assert(is_signed || bytecode_offset >= 0); } - byte *c = emit_get_cur_to_write_bytecode(emit, 3); - c[0] = b1; - c[1] = bytecode_offset; - c[2] = bytecode_offset >> 8; -} -// signed labels are relative to ip following this instruction, stored as 16 bits, in excess -STATIC void emit_write_bytecode_byte_signed_label(emit_t *emit, int stack_adj, byte b1, mp_uint_t label) { - mp_emit_bc_adjust_stack_size(emit, stack_adj); - int bytecode_offset; - if (emit->pass < MP_PASS_EMIT) { - bytecode_offset = 0; + // Emit the opcode. + byte *c = emit_get_cur_to_write_bytecode(emit, 2 + jump_encoding_size); + c[0] = b1; + if (jump_encoding_size == 0) { + if (is_signed) { + bytecode_offset += 0x40; + } + assert(0 <= bytecode_offset && bytecode_offset <= 0x7f); + c[1] = bytecode_offset; } else { - bytecode_offset = emit->label_offsets[label] - emit->bytecode_offset - 3 + 0x8000; + if (is_signed) { + bytecode_offset += 0x4000; + } + c[1] = 0x80 | (bytecode_offset & 0x7f); + c[2] = bytecode_offset >> 7; } - byte *c = emit_get_cur_to_write_bytecode(emit, 3); - c[0] = b1; - c[1] = bytecode_offset; - c[2] = bytecode_offset >> 8; } void mp_emit_bc_start_pass(emit_t *emit, pass_kind_t pass, scope_t *scope) { @@ -250,12 +272,6 @@ void mp_emit_bc_start_pass(emit_t *emit, pass_kind_t pass, scope_t *scope) { emit->scope = scope; emit->last_source_line_offset = 0; emit->last_source_line = 1; - #ifndef NDEBUG - // With debugging enabled labels are checked for unique assignment - if (pass < MP_PASS_EMIT && emit->label_offsets != NULL) { - memset(emit->label_offsets, -1, emit->max_num_labels * sizeof(mp_uint_t)); - } - #endif emit->bytecode_offset = 0; emit->code_info_offset = 0; @@ -315,9 +331,9 @@ void mp_emit_bc_start_pass(emit_t *emit, pass_kind_t pass, scope_t *scope) { } } -void mp_emit_bc_end_pass(emit_t *emit) { +bool mp_emit_bc_end_pass(emit_t *emit) { if (emit->pass == MP_PASS_SCOPE) { - return; + return true; } // check stack is back to zero size @@ -344,6 +360,20 @@ void mp_emit_bc_end_pass(emit_t *emit) { emit->code_base = m_new0(byte, emit->code_info_size + emit->bytecode_size); } else if (emit->pass == MP_PASS_EMIT) { + // Code info and/or bytecode can shrink during this pass. + assert(emit->code_info_offset <= emit->code_info_size); + assert(emit->bytecode_offset <= emit->bytecode_size); + + if (emit->code_info_offset != emit->code_info_size + || emit->bytecode_offset != emit->bytecode_size) { + // Code info and/or bytecode changed size in this pass, so request the + // compiler to do another pass with these updated sizes. + emit->code_info_size = emit->code_info_offset; + emit->bytecode_size = emit->bytecode_offset; + return false; + } + + // Bytecode is finalised, assign it to the raw code object. mp_emit_glue_assign_bytecode(emit->scope->raw_code, emit->code_base, #if MICROPY_PERSISTENT_CODE_SAVE || MICROPY_DEBUG_PRINTERS emit->code_info_size + emit->bytecode_size, @@ -354,6 +384,8 @@ void mp_emit_bc_end_pass(emit_t *emit) { #endif emit->scope->scope_flags); } + + return true; } bool mp_emit_bc_last_emit_was_return_value(emit_t *emit) { @@ -396,15 +428,16 @@ void mp_emit_bc_label_assign(emit_t *emit, mp_uint_t l) { if (emit->pass == MP_PASS_SCOPE) { return; } + + // Label offsets can change from one pass to the next, but they must only + // decrease (ie code can only shrink). There will be multiple MP_PASS_EMIT + // stages until the labels no longer change, which is when the code size + // stays constant after a MP_PASS_EMIT. assert(l < emit->max_num_labels); - if (emit->pass < MP_PASS_EMIT) { - // assign label offset - assert(emit->label_offsets[l] == (mp_uint_t)-1); - emit->label_offsets[l] = emit->bytecode_offset; - } else { - // ensure label offset has not changed from MP_PASS_CODE_SIZE to MP_PASS_EMIT - assert(emit->label_offsets[l] == emit->bytecode_offset); - } + assert(emit->pass == MP_PASS_STACK_SIZE || emit->bytecode_offset <= emit->label_offsets[l]); + + // Assign label offset. + emit->label_offsets[l] = emit->bytecode_offset; } void mp_emit_bc_import(emit_t *emit, qstr qst, int kind) { @@ -552,22 +585,22 @@ void mp_emit_bc_rot_three(emit_t *emit) { } void mp_emit_bc_jump(emit_t *emit, mp_uint_t label) { - emit_write_bytecode_byte_signed_label(emit, 0, MP_BC_JUMP, label); + emit_write_bytecode_byte_label(emit, 0, MP_BC_JUMP, label); } void mp_emit_bc_pop_jump_if(emit_t *emit, bool cond, mp_uint_t label) { if (cond) { - emit_write_bytecode_byte_signed_label(emit, -1, MP_BC_POP_JUMP_IF_TRUE, label); + emit_write_bytecode_byte_label(emit, -1, MP_BC_POP_JUMP_IF_TRUE, label); } else { - emit_write_bytecode_byte_signed_label(emit, -1, MP_BC_POP_JUMP_IF_FALSE, label); + emit_write_bytecode_byte_label(emit, -1, MP_BC_POP_JUMP_IF_FALSE, label); } } void mp_emit_bc_jump_if_or_pop(emit_t *emit, bool cond, mp_uint_t label) { if (cond) { - emit_write_bytecode_byte_signed_label(emit, -1, MP_BC_JUMP_IF_TRUE_OR_POP, label); + emit_write_bytecode_byte_label(emit, -1, MP_BC_JUMP_IF_TRUE_OR_POP, label); } else { - emit_write_bytecode_byte_signed_label(emit, -1, MP_BC_JUMP_IF_FALSE_OR_POP, label); + emit_write_bytecode_byte_label(emit, -1, MP_BC_JUMP_IF_FALSE_OR_POP, label); } } @@ -581,9 +614,9 @@ void mp_emit_bc_unwind_jump(emit_t *emit, mp_uint_t label, mp_uint_t except_dept emit_write_bytecode_raw_byte(emit, MP_BC_POP_TOP); } } - emit_write_bytecode_byte_signed_label(emit, 0, MP_BC_JUMP, label & ~MP_EMIT_BREAK_FROM_FOR); + emit_write_bytecode_byte_label(emit, 0, MP_BC_JUMP, label & ~MP_EMIT_BREAK_FROM_FOR); } else { - emit_write_bytecode_byte_signed_label(emit, 0, MP_BC_UNWIND_JUMP, label & ~MP_EMIT_BREAK_FROM_FOR); + emit_write_bytecode_byte_label(emit, 0, MP_BC_UNWIND_JUMP, label & ~MP_EMIT_BREAK_FROM_FOR); emit_write_bytecode_raw_byte(emit, ((label & MP_EMIT_BREAK_FROM_FOR) ? 0x80 : 0) | except_depth); } } @@ -595,7 +628,7 @@ void mp_emit_bc_setup_block(emit_t *emit, mp_uint_t label, int kind) { // The SETUP_WITH opcode pops ctx_mgr from the top of the stack // and then pushes 3 entries: __exit__, ctx_mgr, as_value. int stack_adj = kind == MP_EMIT_SETUP_BLOCK_WITH ? 2 : 0; - emit_write_bytecode_byte_unsigned_label(emit, stack_adj, MP_BC_SETUP_WITH + kind, label); + emit_write_bytecode_byte_label(emit, stack_adj, MP_BC_SETUP_WITH + kind, label); } void mp_emit_bc_with_cleanup(emit_t *emit, mp_uint_t label) { @@ -617,7 +650,7 @@ void mp_emit_bc_get_iter(emit_t *emit, bool use_stack) { } void mp_emit_bc_for_iter(emit_t *emit, mp_uint_t label) { - emit_write_bytecode_byte_unsigned_label(emit, 1, MP_BC_FOR_ITER, label); + emit_write_bytecode_byte_label(emit, 1, MP_BC_FOR_ITER, label); } void mp_emit_bc_for_iter_end(emit_t *emit) { @@ -626,7 +659,7 @@ void mp_emit_bc_for_iter_end(emit_t *emit) { void mp_emit_bc_pop_except_jump(emit_t *emit, mp_uint_t label, bool within_exc_handler) { (void)within_exc_handler; - emit_write_bytecode_byte_unsigned_label(emit, 0, MP_BC_POP_EXCEPT_JUMP, label); + emit_write_bytecode_byte_label(emit, 0, MP_BC_POP_EXCEPT_JUMP, label); } void mp_emit_bc_unary_op(emit_t *emit, mp_unary_op_t op) { diff --git a/py/emitnative.c b/py/emitnative.c index ca34e89f6..bddd66142 100644 --- a/py/emitnative.c +++ b/py/emitnative.c @@ -652,7 +652,7 @@ static inline void emit_native_write_code_info_qstr(emit_t *emit, qstr qst) { mp_encode_uint(&emit->as->base, mp_asm_base_get_cur_to_write_bytes, mp_emit_common_use_qstr(emit->emit_common, qst)); } -STATIC void emit_native_end_pass(emit_t *emit) { +STATIC bool emit_native_end_pass(emit_t *emit) { emit_native_global_exc_exit(emit); if (!emit->do_viper_types) { @@ -736,6 +736,8 @@ STATIC void emit_native_end_pass(emit_t *emit) { #endif emit->scope->scope_flags, 0, 0); } + + return true; } STATIC bool emit_native_last_emit_was_return_value(emit_t *emit) { diff --git a/py/showbc.c b/py/showbc.c index 8430739d8..178fa451a 100644 --- a/py/showbc.c +++ b/py/showbc.c @@ -38,8 +38,28 @@ unum = (unum << 7) + (*ip & 0x7f); \ } while ((*ip++ & 0x80) != 0); \ } -#define DECODE_ULABEL do { unum = (ip[0] | (ip[1] << 8)); ip += 2; } while (0) -#define DECODE_SLABEL do { unum = (ip[0] | (ip[1] << 8)) - 0x8000; ip += 2; } while (0) + +#define DECODE_ULABEL \ + do { \ + if (ip[0] & 0x80) { \ + unum = ((ip[0] & 0x7f) | (ip[1] << 7)); \ + ip += 2; \ + } else { \ + unum = ip[0]; \ + ip += 1; \ + } \ + } while (0) + +#define DECODE_SLABEL \ + do { \ + if (ip[0] & 0x80) { \ + unum = ((ip[0] & 0x7f) | (ip[1] << 7)) - 0x4000; \ + ip += 2; \ + } else { \ + unum = ip[0] - 0x40; \ + ip += 1; \ + } \ + } while (0) #if MICROPY_EMIT_BYTECODE_USES_QSTR_TABLE @@ -61,8 +61,30 @@ do { \ unum = (unum << 7) + (*ip & 0x7f); \ } while ((*ip++ & 0x80) != 0) -#define DECODE_ULABEL size_t ulab = (ip[0] | (ip[1] << 8)); ip += 2 -#define DECODE_SLABEL size_t slab = (ip[0] | (ip[1] << 8)) - 0x8000; ip += 2 + +#define DECODE_ULABEL \ + size_t ulab; \ + do { \ + if (ip[0] & 0x80) { \ + ulab = ((ip[0] & 0x7f) | (ip[1] << 7)); \ + ip += 2; \ + } else { \ + ulab = ip[0]; \ + ip += 1; \ + } \ + } while (0) + +#define DECODE_SLABEL \ + size_t slab; \ + do { \ + if (ip[0] & 0x80) { \ + slab = ((ip[0] & 0x7f) | (ip[1] << 7)) - 0x4000; \ + ip += 2; \ + } else { \ + slab = ip[0] - 0x40; \ + ip += 1; \ + } \ + } while (0) #if MICROPY_EMIT_BYTECODE_USES_QSTR_TABLE |
