diff options
| author | Alessandro Gatti <a.gatti@frob.it> | 2025-11-04 09:46:16 +0100 |
|---|---|---|
| committer | Damien George <damien@micropython.org> | 2025-12-19 14:26:01 +1100 |
| commit | f0895f0ea0d3daea42161bc29c3ff8abeb101e57 (patch) | |
| tree | 954b84054f4a378367da2ad9fc700770e0ca6752 | |
| parent | e67d4a2a777f09215cdda928ec259696b86fcb81 (diff) | |
py/emitnative: Optimise register clearing.
This commit introduces a new generic ASM API function to clear a
register (i.e. clearing all the registers' bits).
The native emitter used to perform a XOR operation to clear a given
register, but different platform have more optimised method to achieve
the same result taking up less space - either for the generated code or
for the code generator itself.
Arm, RV32, X86, and X64 already had an already optimised generator and
generated optimised code. The code generator when build for Thumb takes
less space generating a constant immediate move rather than a XOR
operation, even though both operations would distill down to a single
narrow opcode. On Xtensa the situation is almost the same as Thumb,
with the exception that a constant immediate move would take one byte
less than a XOR operation.
Signed-off-by: Alessandro Gatti <a.gatti@frob.it>
| -rw-r--r-- | py/asmarm.h | 2 | ||||
| -rw-r--r-- | py/asmrv32.h | 2 | ||||
| -rw-r--r-- | py/asmthumb.h | 2 | ||||
| -rw-r--r-- | py/asmx64.h | 2 | ||||
| -rw-r--r-- | py/asmx86.h | 2 | ||||
| -rw-r--r-- | py/asmxtensa.h | 2 | ||||
| -rw-r--r-- | py/emitnative.c | 22 | ||||
| -rw-r--r-- | py/emitndebug.c | 3 |
8 files changed, 22 insertions, 15 deletions
diff --git a/py/asmarm.h b/py/asmarm.h index 405457d44..5ae952ee8 100644 --- a/py/asmarm.h +++ b/py/asmarm.h @@ -230,6 +230,8 @@ void asm_arm_bx_reg(asm_arm_t *as, uint reg_src); #define ASM_STORE16_REG_REG_REG(as, reg_val, reg_base, reg_index) asm_arm_strh_reg_reg_reg((as), (reg_val), (reg_base), (reg_index)) #define ASM_STORE32_REG_REG_REG(as, reg_val, reg_base, reg_index) asm_arm_str_reg_reg_reg((as), (reg_val), (reg_base), (reg_index)) +#define ASM_CLR_REG(as, reg_dest) asm_arm_eor_reg_reg_reg((as), (reg_dest), (reg_dest), (reg_dest)) + #endif // GENERIC_ASM_API #endif // MICROPY_INCLUDED_PY_ASMARM_H diff --git a/py/asmrv32.h b/py/asmrv32.h index 6f709daa1..1100d0980 100644 --- a/py/asmrv32.h +++ b/py/asmrv32.h @@ -804,7 +804,7 @@ void asm_rv32_emit_store_reg_reg_offset(asm_rv32_t *state, mp_uint_t source, mp_ #define ASM_STORE32_REG_REG_OFFSET(state, rd, rs, offset) asm_rv32_emit_store_reg_reg_offset(state, rd, rs, offset, 2) #define ASM_SUB_REG_REG(state, rd, rs) asm_rv32_opcode_sub(state, rd, rd, rs) #define ASM_XOR_REG_REG(state, rd, rs) asm_rv32_emit_optimised_xor(state, rd, rs) -#define ASM_CLR_REG(state, rd) +#define ASM_CLR_REG(state, rd) asm_rv32_emit_optimised_xor(state, rd, rd) #define ASM_LOAD8_REG_REG_REG(state, rd, rs1, rs2) asm_rv32_emit_load_reg_reg_reg(state, rd, rs1, rs2, 0) #define ASM_LOAD16_REG_REG_REG(state, rd, rs1, rs2) asm_rv32_emit_load_reg_reg_reg(state, rd, rs1, rs2, 1) #define ASM_LOAD32_REG_REG_REG(state, rd, rs1, rs2) asm_rv32_emit_load_reg_reg_reg(state, rd, rs1, rs2, 2) diff --git a/py/asmthumb.h b/py/asmthumb.h index 5edf6573e..88f4e399b 100644 --- a/py/asmthumb.h +++ b/py/asmthumb.h @@ -485,6 +485,8 @@ void asm_thumb_b_rel12(asm_thumb_t *as, int rel); asm_thumb_str_rlo_rlo_rlo((as), (reg_val), (reg_base), (reg_index)); \ } while (0) +#define ASM_CLR_REG(as, reg_dest) asm_thumb_mov_rlo_i8((as), (reg_dest), 0) + #endif // GENERIC_ASM_API #endif // MICROPY_INCLUDED_PY_ASMTHUMB_H diff --git a/py/asmx64.h b/py/asmx64.h index d80c5dcc1..efc3027b1 100644 --- a/py/asmx64.h +++ b/py/asmx64.h @@ -221,6 +221,8 @@ void asm_x64_call_ind(asm_x64_t *as, size_t fun_id, int temp_r32); #define ASM_STORE32_REG_REG(as, reg_src, reg_base) ASM_STORE32_REG_REG_OFFSET((as), (reg_src), (reg_base), 0) #define ASM_STORE32_REG_REG_OFFSET(as, reg_src, reg_base, dword_offset) asm_x64_mov_r32_to_mem32((as), (reg_src), (reg_base), 4 * (dword_offset)) +#define ASM_CLR_REG(as, reg_dest) asm_x64_xor_r64_r64((as), (reg_dest), (reg_dest)) + #endif // GENERIC_ASM_API #endif // MICROPY_INCLUDED_PY_ASMX64_H diff --git a/py/asmx86.h b/py/asmx86.h index d2e078ad5..80a67794d 100644 --- a/py/asmx86.h +++ b/py/asmx86.h @@ -216,6 +216,8 @@ void asm_x86_call_ind(asm_x86_t *as, size_t fun_id, mp_uint_t n_args, int temp_r #define ASM_STORE32_REG_REG(as, reg_src, reg_base) ASM_STORE32_REG_REG_OFFSET((as), (reg_src), (reg_base), 0) #define ASM_STORE32_REG_REG_OFFSET(as, reg_src, reg_base, dword_offset) asm_x86_mov_r32_to_mem32((as), (reg_src), (reg_base), 4 * (dword_offset)) +#define ASM_CLR_REG(as, reg_dest) asm_x86_xor_r32_r32((as), (reg_dest), (reg_dest)) + #endif // GENERIC_ASM_API #endif // MICROPY_INCLUDED_PY_ASMX86_H diff --git a/py/asmxtensa.h b/py/asmxtensa.h index 559b3cacd..15f8b4d92 100644 --- a/py/asmxtensa.h +++ b/py/asmxtensa.h @@ -464,6 +464,8 @@ void asm_xtensa_l32r(asm_xtensa_t *as, mp_uint_t reg, mp_uint_t label); asm_xtensa_op_s32i_n((as), (reg_val), (reg_base), 0); \ } while (0) +#define ASM_CLR_REG(as, reg_dest) asm_xtensa_op_movi_n((as), (reg_dest), 0) + #endif // GENERIC_ASM_API #endif // MICROPY_INCLUDED_PY_ASMXTENSA_H diff --git a/py/emitnative.c b/py/emitnative.c index a33ec01ec..6cf01dcab 100644 --- a/py/emitnative.c +++ b/py/emitnative.c @@ -282,17 +282,13 @@ struct _emit_t { ASM_T *as; }; -#ifndef REG_ZERO -#define REG_ZERO REG_TEMP0 -#define ASM_CLR_REG(state, rd) ASM_XOR_REG_REG(state, rd, rd) -#endif - -#if N_RV32 +#ifdef REG_ZERO #define ASM_MOV_LOCAL_MP_OBJ_NULL(as, local_num, reg_temp) \ ASM_MOV_LOCAL_REG(as, local_num, REG_ZERO) #else +#define REG_ZERO REG_TEMP0 #define ASM_MOV_LOCAL_MP_OBJ_NULL(as, local_num, reg_temp) \ - ASM_MOV_REG_IMM(as, reg_temp, (mp_uint_t)MP_OBJ_NULL); \ + ASM_CLR_REG(as, reg_temp); \ ASM_MOV_LOCAL_REG(as, local_num, reg_temp) #endif @@ -1145,7 +1141,7 @@ static void emit_native_leave_exc_stack(emit_t *emit, bool start_of_handler) { // Optimisation: PC is already cleared by global exc handler return; } - ASM_XOR_REG_REG(emit->as, REG_RET, REG_RET); + ASM_CLR_REG(emit->as, REG_RET); } else { // Found new active handler, get its PC ASM_MOV_REG_PCREL(emit->as, REG_RET, e->label); @@ -1242,8 +1238,7 @@ static void emit_native_global_exc_entry(emit_t *emit) { ASM_JUMP_IF_REG_ZERO(emit->as, REG_RET, start_label, true); } else { // Clear the unwind state - ASM_CLR_REG(emit->as, REG_ZERO); - ASM_MOV_LOCAL_REG(emit->as, LOCAL_IDX_EXC_HANDLER_UNWIND(emit), REG_ZERO); + ASM_MOV_LOCAL_MP_OBJ_NULL(emit->as, LOCAL_IDX_EXC_HANDLER_UNWIND(emit), REG_ZERO); // clear nlr.ret_val, because it's passed to mp_native_raise regardless // of whether there was an exception or not @@ -1263,8 +1258,7 @@ static void emit_native_global_exc_entry(emit_t *emit) { ASM_JUMP_IF_REG_NONZERO(emit->as, REG_RET, global_except_label, true); // Clear PC of current code block, and jump there to resume execution - ASM_CLR_REG(emit->as, REG_ZERO); - ASM_MOV_LOCAL_REG(emit->as, LOCAL_IDX_EXC_HANDLER_PC(emit), REG_ZERO); + ASM_MOV_LOCAL_MP_OBJ_NULL(emit->as, LOCAL_IDX_EXC_HANDLER_PC(emit), REG_ZERO); ASM_JUMP_REG(emit->as, REG_LOCAL_1); // Global exception handler: check for valid exception handler @@ -1945,7 +1939,7 @@ static void emit_native_delete_attr(emit_t *emit, qstr qst) { vtype_kind_t vtype_base; emit_pre_pop_reg(emit, &vtype_base, REG_ARG_1); // arg1 = base assert(vtype_base == VTYPE_PYOBJ); - ASM_XOR_REG_REG(emit->as, REG_ARG_3, REG_ARG_3); // arg3 = value (null for delete) + ASM_CLR_REG(emit->as, REG_ARG_3); // arg3 = value (null for delete) emit_call_with_qstr_arg(emit, MP_F_STORE_ATTR, qst, REG_ARG_2); // arg2 = attribute name emit_post(emit); } @@ -2091,7 +2085,7 @@ static void emit_native_unwind_jump(emit_t *emit, mp_uint_t label, mp_uint_t exc // No finally, handle the jump ourselves // First, restore the exception handler address for the jump if (e < emit->exc_stack) { - ASM_XOR_REG_REG(emit->as, REG_RET, REG_RET); + ASM_CLR_REG(emit->as, REG_RET); } else { ASM_MOV_REG_PCREL(emit->as, REG_RET, e->label); } diff --git a/py/emitndebug.c b/py/emitndebug.c index e49c5cdbf..2144d14e6 100644 --- a/py/emitndebug.c +++ b/py/emitndebug.c @@ -271,6 +271,9 @@ static void asm_debug_setcc_reg_reg_reg(asm_debug_t *as, int op, int reg1, int r #define ASM_STORE32_REG_REG(as, reg_src, reg_base) \ asm_debug_reg_reg(as, "store32", reg_src, reg_base) +#define ASM_CLR_REG(as, reg_dest) \ + asm_debug_reg(as, "clr", reg_dest) + // Word indices of REG_LOCAL_x in nlr_buf_t #define NLR_BUF_IDX_LOCAL_1 (5) // rbx |
