diff options
| author | Alessandro Gatti <a.gatti@frob.it> | 2024-07-31 16:56:42 +0200 |
|---|---|---|
| committer | Damien George <damien@micropython.org> | 2024-08-19 15:53:50 +1000 |
| commit | 326e1149eccc2521527e62be66f1a0ae9f6600d0 (patch) | |
| tree | 415840e6d918ff47395a6c4d55f495716e008aa5 | |
| parent | 6367099f8365c22b42474818ce5fdb9b35591ade (diff) | |
py/asmrv32: Fix short/long jumps scheduling.
The RV32 emitter always scheduled short jumps even outside the emit
compiler pass. Running the full test suite through the native emitter
instead of just the tests that depend on the emitter at runtime (as in,
`micropython/native_*` and `micropython/viper_* tests`) uncovered more
places where the invalid behaviour was still present.
Signed-off-by: Alessandro Gatti <a.gatti@frob.it>
| -rw-r--r-- | py/asmrv32.c | 69 | ||||
| -rw-r--r-- | py/asmrv32.h | 4 |
2 files changed, 39 insertions, 34 deletions
diff --git a/py/asmrv32.c b/py/asmrv32.c index 7dd58beca..a68401cf3 100644 --- a/py/asmrv32.c +++ b/py/asmrv32.c @@ -132,9 +132,9 @@ static void split_immediate(mp_int_t immediate, mp_uint_t *upper, mp_uint_t *low static void load_upper_immediate(asm_rv32_t *state, mp_uint_t rd, mp_uint_t immediate) { // if immediate fits in 17 bits and is ≠ 0: - // c.lui rd, HI(immediate) + // c.lui rd, HI(immediate) // else: - // lui rd, HI(immediate) + // lui rd, HI(immediate) if (FIT_SIGNED(immediate, 17) && ((immediate >> 12) != 0)) { asm_rv32_opcode_clui(state, rd, immediate); } else { @@ -270,6 +270,14 @@ static void emit_function_epilogue(asm_rv32_t *state, mp_uint_t registers) { state->saved_registers_mask = old_saved_registers_mask; } +static bool calculate_displacement_for_label(asm_rv32_t *state, mp_uint_t label, ptrdiff_t *displacement) { + assert(displacement != NULL && "Displacement pointer is NULL"); + + mp_uint_t label_offset = state->base.label_offsets[label]; + *displacement = (ptrdiff_t)(label_offset - state->base.code_offset); + return (label_offset != (mp_uint_t)-1) && (*displacement < 0); +} + /////////////////////////////////////////////////////////////////////////////// void asm_rv32_entry(asm_rv32_t *state, mp_uint_t locals) { @@ -326,10 +334,10 @@ void asm_rv32_emit_call_ind(asm_rv32_t *state, mp_uint_t index) { } void asm_rv32_emit_jump_if_reg_eq(asm_rv32_t *state, mp_uint_t rs1, mp_uint_t rs2, mp_uint_t label) { - ptrdiff_t displacement = (ptrdiff_t)(state->base.label_offsets[label] - state->base.code_offset); + ptrdiff_t displacement = 0; + bool can_emit_short_jump = calculate_displacement_for_label(state, label, &displacement); - // The least significant bit is ignored anyway. - if (FIT_SIGNED(displacement, 13)) { + if (can_emit_short_jump && FIT_SIGNED(displacement, 13)) { // beq rs1, rs2, displacement asm_rv32_opcode_beq(state, rs1, rs2, displacement); return; @@ -354,31 +362,24 @@ void asm_rv32_emit_jump_if_reg_eq(asm_rv32_t *state, mp_uint_t rs1, mp_uint_t rs } void asm_rv32_emit_jump_if_reg_nonzero(asm_rv32_t *state, mp_uint_t rs, mp_uint_t label) { - ptrdiff_t displacement = (ptrdiff_t)(state->base.label_offsets[label] - state->base.code_offset); + ptrdiff_t displacement = 0; + bool can_emit_short_jump = calculate_displacement_for_label(state, label, &displacement); - if (FIT_SIGNED(displacement, 9) && IS_IN_C_REGISTER_WINDOW(rs)) { + if (can_emit_short_jump && FIT_SIGNED(displacement, 8) && IS_IN_C_REGISTER_WINDOW(rs)) { // c.bnez rs', displacement asm_rv32_opcode_cbnez(state, MAP_IN_C_REGISTER_WINDOW(rs), displacement); return; } - // The least significant bit is ignored anyway. - if (FIT_SIGNED(displacement, 13)) { + if (can_emit_short_jump && FIT_SIGNED(displacement, 13)) { // bne rs, zero, displacement asm_rv32_opcode_bne(state, rs, ASM_RV32_REG_ZERO, displacement); return; } - // Compensate for the initial C.BEQZ/BEQ opcode. - displacement -= IS_IN_C_REGISTER_WINDOW(rs) ? ASM_HALFWORD_SIZE : ASM_WORD_SIZE; - - mp_uint_t upper = 0; - mp_uint_t lower = 0; - split_immediate(displacement, &upper, &lower); - // TODO: Can this clobber REG_TEMP[0:2]? - // if rs1 in C window (the offset always fits): + // if rs1 in C window and displacement is negative: // c.beqz rs', 10 ; PC + 0 // auipc temporary, HI(displacement) ; PC + 2 // jalr zero, temporary, LO(displacement) ; PC + 6 @@ -388,11 +389,20 @@ void asm_rv32_emit_jump_if_reg_nonzero(asm_rv32_t *state, mp_uint_t rs, mp_uint_ // auipc temporary, HI(displacement) ; PC + 4 // jalr zero, temporary, LO(displacement) ; PC + 8 // ... ; PC + 12 - if (IS_IN_C_REGISTER_WINDOW(rs)) { + + if (can_emit_short_jump && IS_IN_C_REGISTER_WINDOW(rs)) { asm_rv32_opcode_cbeqz(state, MAP_IN_C_REGISTER_WINDOW(rs), 10); + // Compensate for the C.BEQZ opcode. + displacement -= ASM_HALFWORD_SIZE; } else { asm_rv32_opcode_beq(state, rs, ASM_RV32_REG_ZERO, 12); + // Compensate for the BEQ opcode. + displacement -= ASM_WORD_SIZE; } + + mp_uint_t upper = 0; + mp_uint_t lower = 0; + split_immediate(displacement, &upper, &lower); asm_rv32_opcode_auipc(state, INTERNAL_TEMPORARY, upper); asm_rv32_opcode_jalr(state, ASM_RV32_REG_ZERO, INTERNAL_TEMPORARY, lower); } @@ -502,10 +512,10 @@ void asm_rv32_emit_load_reg_reg_offset(asm_rv32_t *state, mp_uint_t rd, mp_uint_ } void asm_rv32_emit_jump(asm_rv32_t *state, mp_uint_t label) { - ptrdiff_t displacement = (ptrdiff_t)(state->base.label_offsets[label] - state->base.code_offset); + ptrdiff_t displacement = 0; + bool can_emit_short_jump = calculate_displacement_for_label(state, label, &displacement); - // The least significant bit is ignored anyway. - if (FIT_SIGNED(displacement, 13)) { + if (can_emit_short_jump && FIT_SIGNED(displacement, 12)) { // c.j displacement asm_rv32_opcode_cj(state, displacement); return; @@ -536,12 +546,12 @@ void asm_rv32_emit_store_reg_reg_offset(asm_rv32_t *state, mp_uint_t rd, mp_uint mp_uint_t lower = 0; split_immediate(scaled_offset, &upper, &lower); - // lui rd, HI(offset) ; Or c.lui if possible - // c.add rd, rs - // sw rd, LO(offset)(rd) - load_upper_immediate(state, rd, upper); - asm_rv32_opcode_cadd(state, rd, rs); - asm_rv32_opcode_sw(state, rd, rd, lower); + // lui temporary, HI(offset) ; Or c.lui if possible + // c.add temporary, rs + // sw rd, LO(offset)(temporary) + load_upper_immediate(state, INTERNAL_TEMPORARY, upper); + asm_rv32_opcode_cadd(state, INTERNAL_TEMPORARY, rs); + asm_rv32_opcode_sw(state, rd, INTERNAL_TEMPORARY, lower); } void asm_rv32_emit_mov_reg_pcrel(asm_rv32_t *state, mp_uint_t rd, mp_uint_t label) { @@ -550,11 +560,6 @@ void asm_rv32_emit_mov_reg_pcrel(asm_rv32_t *state, mp_uint_t rd, mp_uint_t labe mp_uint_t lower = 0; split_immediate(displacement, &upper, &lower); - // Compressed instructions are not used even if they may allow for code size - // savings as the code changes size between compute and emit passes - // otherwise. If that happens then the assertion at asmbase.c:93 triggers - // when built in debug mode. - // auipc rd, HI(relative) // addi rd, rd, LO(relative) asm_rv32_opcode_auipc(state, rd, upper); diff --git a/py/asmrv32.h b/py/asmrv32.h index 77a5b0ab6..775cf1ffc 100644 --- a/py/asmrv32.h +++ b/py/asmrv32.h @@ -331,7 +331,7 @@ static inline void asm_rv32_opcode_lw(asm_rv32_t *state, mp_uint_t rd, mp_uint_t } // MUL RD, RS1, RS2 -static inline void asm_rv32m_opcode_mul(asm_rv32_t *state, mp_uint_t rd, mp_uint_t rs1, mp_uint_t rs2) { +static inline void asm_rv32_opcode_mul(asm_rv32_t *state, mp_uint_t rd, mp_uint_t rs1, mp_uint_t rs2) { // R: 0000001 ..... ..... 000 ..... 0110011 asm_rv32_emit_word_opcode(state, RV32_ENCODE_TYPE_R(0x33, 0x00, 0x01, rd, rs1, rs2)); } @@ -479,7 +479,7 @@ void asm_rv32_emit_store_reg_reg_offset(asm_rv32_t *state, mp_uint_t source, mp_ #define ASM_MOV_REG_LOCAL(state, rd, local) asm_rv32_emit_mov_reg_local(state, rd, local) #define ASM_MOV_REG_PCREL(state, rd, label) asm_rv32_emit_mov_reg_pcrel(state, rd, label) #define ASM_MOV_REG_REG(state, rd, rs) asm_rv32_opcode_cmv(state, rd, rs) -#define ASM_MUL_REG_REG(state, rd, rs) asm_rv32m_opcode_mul(state, rd, rd, rs) +#define ASM_MUL_REG_REG(state, rd, rs) asm_rv32_opcode_mul(state, rd, rd, rs) #define ASM_NEG_REG(state, rd) asm_rv32_opcode_sub(state, rd, ASM_RV32_REG_ZERO, rd) #define ASM_NOT_REG(state, rd) asm_rv32_opcode_xori(state, rd, rd, -1) #define ASM_OR_REG_REG(state, rd, rs) asm_rv32_opcode_or(state, rd, rd, rs) |
