diff options
| author | Alessandro Gatti <a.gatti@frob.it> | 2025-09-19 14:50:05 +0200 |
|---|---|---|
| committer | Alessandro Gatti <a.gatti@frob.it> | 2025-09-19 15:51:37 +0200 |
| commit | cb7ca6f1bc9c77969d465025f63afdd96892577a (patch) | |
| tree | 54550c5794f8a3d4ac45fc1123e55d98d1fab62c /py/asmrv32.c | |
| parent | 42ea7bc32af49f305ece4f4e4601accea9df0534 (diff) | |
py/asmrv32: Use RV32 Zba opcodes if possible.
This commit adds optional support for selected Zba opcodes (address
generation) to speed up Viper and native code generation on MCUs where
those opcodes are supported (namely RP2350).
Right now support for these opcodes is opt-in, as extension detection
granularity on the RISC-V platform is still a bit in flux. Relying on
the 'B' bit in the MISA register may yield both false positives and
false negatives depending on the RISC-V implementation the check runs
on.
As a side-effect of Zba support, regular non-byte load/stores have been
made shorter by two bytes. Whilst this makes code using Zba take up the
same space as non-Zba code, the former will still be faster as it will
have to process just one instruction instead of two, without stalling
registers between the shift and the addition needed to compute the final
offset.
Signed-off-by: Alessandro Gatti <a.gatti@frob.it>
Diffstat (limited to 'py/asmrv32.c')
| -rw-r--r-- | py/asmrv32.c | 42 |
1 files changed, 42 insertions, 0 deletions
diff --git a/py/asmrv32.c b/py/asmrv32.c index 723d32cdd..9313e61cc 100644 --- a/py/asmrv32.c +++ b/py/asmrv32.c @@ -36,6 +36,8 @@ #if MICROPY_EMIT_RV32 #include "py/asmrv32.h" +#include "py/mpstate.h" +#include "py/persistentcode.h" #if MICROPY_DEBUG_VERBOSE #define DEBUG_PRINT (1) @@ -555,6 +557,46 @@ void asm_rv32_emit_optimised_xor(asm_rv32_t *state, mp_uint_t rd, mp_uint_t rs) asm_rv32_opcode_xor(state, rd, rd, rs); } +static bool asm_rv32_allow_zba_opcodes(void) { + return asm_rv32_allowed_extensions() & RV32_EXT_ZBA; +} + +static void asm_rv32_fix_up_scaled_reg_reg_reg(asm_rv32_t *state, mp_uint_t rs1, mp_uint_t rs2, mp_uint_t operation_size) { + assert(operation_size <= 2 && "Operation size value out of range."); + + if (operation_size > 0 && asm_rv32_allow_zba_opcodes()) { + // sh{1,2}add rs1, rs2, rs1 + asm_rv32_emit_word_opcode(state, RV32_ENCODE_TYPE_R(0x33, 1 << operation_size, 0x10, rs1, rs2, rs1)); + } else { + if (operation_size > 0) { + asm_rv32_opcode_cslli(state, rs2, operation_size); + } + asm_rv32_opcode_cadd(state, rs1, rs2); + } +} + +void asm_rv32_emit_load_reg_reg_reg(asm_rv32_t *state, mp_uint_t rd, mp_uint_t rs1, mp_uint_t rs2, mp_uint_t operation_size) { + asm_rv32_fix_up_scaled_reg_reg_reg(state, rs1, rs2, operation_size); + if (operation_size == 2 && RV32_IS_IN_C_REGISTER_WINDOW(rd) && RV32_IS_IN_C_REGISTER_WINDOW(rs1)) { + // c.lw rd', offset(rs') + asm_rv32_opcode_clw(state, RV32_MAP_IN_C_REGISTER_WINDOW(rd), RV32_MAP_IN_C_REGISTER_WINDOW(rs1), 0); + } else { + // lbu|lhu|lw rd, offset(rs) + asm_rv32_emit_word_opcode(state, RV32_ENCODE_TYPE_I(0x03, RV32_LOAD_OPCODE_TABLE[operation_size], rd, rs1, 0)); + } +} + +void asm_rv32_emit_store_reg_reg_reg(asm_rv32_t *state, mp_uint_t rd, mp_uint_t rs1, mp_uint_t rs2, mp_uint_t operation_size) { + asm_rv32_fix_up_scaled_reg_reg_reg(state, rs1, rs2, operation_size); + if (operation_size == 2 && RV32_IS_IN_C_REGISTER_WINDOW(rd) && RV32_IS_IN_C_REGISTER_WINDOW(rs1)) { + // c.sw rd', offset(rs') + asm_rv32_opcode_csw(state, RV32_MAP_IN_C_REGISTER_WINDOW(rd), RV32_MAP_IN_C_REGISTER_WINDOW(rs1), 0); + } else { + // sb|sh|sw rd, offset(rs) + asm_rv32_emit_word_opcode(state, RV32_ENCODE_TYPE_S(0x23, operation_size, rs1, rd, 0)); + } +} + void asm_rv32_meta_comparison_eq(asm_rv32_t *state, mp_uint_t rs1, mp_uint_t rs2, mp_uint_t rd) { // c.li rd, 1 ; // beq rs1, rs2, 6 ; PC + 0 |
