diff options
| author | Alessandro Gatti <a.gatti@frob.it> | 2025-09-19 14:50:05 +0200 |
|---|---|---|
| committer | Alessandro Gatti <a.gatti@frob.it> | 2025-09-19 15:51:37 +0200 |
| commit | cb7ca6f1bc9c77969d465025f63afdd96892577a (patch) | |
| tree | 54550c5794f8a3d4ac45fc1123e55d98d1fab62c /py | |
| parent | 42ea7bc32af49f305ece4f4e4601accea9df0534 (diff) | |
py/asmrv32: Use RV32 Zba opcodes if possible.
This commit adds optional support for selected Zba opcodes (address
generation) to speed up Viper and native code generation on MCUs where
those opcodes are supported (namely RP2350).
Right now support for these opcodes is opt-in, as extension detection
granularity on the RISC-V platform is still a bit in flux. Relying on
the 'B' bit in the MISA register may yield both false positives and
false negatives depending on the RISC-V implementation the check runs
on.
As a side-effect of Zba support, regular non-byte load/stores have been
made shorter by two bytes. Whilst this makes code using Zba take up the
same space as non-Zba code, the former will still be faster as it will
have to process just one instruction instead of two, without stalling
registers between the shift and the addition needed to compute the final
offset.
Signed-off-by: Alessandro Gatti <a.gatti@frob.it>
Diffstat (limited to 'py')
| -rw-r--r-- | py/asmrv32.c | 42 | ||||
| -rw-r--r-- | py/asmrv32.h | 65 | ||||
| -rw-r--r-- | py/mpconfig.h | 5 |
3 files changed, 78 insertions, 34 deletions
diff --git a/py/asmrv32.c b/py/asmrv32.c index 723d32cdd..9313e61cc 100644 --- a/py/asmrv32.c +++ b/py/asmrv32.c @@ -36,6 +36,8 @@ #if MICROPY_EMIT_RV32 #include "py/asmrv32.h" +#include "py/mpstate.h" +#include "py/persistentcode.h" #if MICROPY_DEBUG_VERBOSE #define DEBUG_PRINT (1) @@ -555,6 +557,46 @@ void asm_rv32_emit_optimised_xor(asm_rv32_t *state, mp_uint_t rd, mp_uint_t rs) asm_rv32_opcode_xor(state, rd, rd, rs); } +static bool asm_rv32_allow_zba_opcodes(void) { + return asm_rv32_allowed_extensions() & RV32_EXT_ZBA; +} + +static void asm_rv32_fix_up_scaled_reg_reg_reg(asm_rv32_t *state, mp_uint_t rs1, mp_uint_t rs2, mp_uint_t operation_size) { + assert(operation_size <= 2 && "Operation size value out of range."); + + if (operation_size > 0 && asm_rv32_allow_zba_opcodes()) { + // sh{1,2}add rs1, rs2, rs1 + asm_rv32_emit_word_opcode(state, RV32_ENCODE_TYPE_R(0x33, 1 << operation_size, 0x10, rs1, rs2, rs1)); + } else { + if (operation_size > 0) { + asm_rv32_opcode_cslli(state, rs2, operation_size); + } + asm_rv32_opcode_cadd(state, rs1, rs2); + } +} + +void asm_rv32_emit_load_reg_reg_reg(asm_rv32_t *state, mp_uint_t rd, mp_uint_t rs1, mp_uint_t rs2, mp_uint_t operation_size) { + asm_rv32_fix_up_scaled_reg_reg_reg(state, rs1, rs2, operation_size); + if (operation_size == 2 && RV32_IS_IN_C_REGISTER_WINDOW(rd) && RV32_IS_IN_C_REGISTER_WINDOW(rs1)) { + // c.lw rd', offset(rs') + asm_rv32_opcode_clw(state, RV32_MAP_IN_C_REGISTER_WINDOW(rd), RV32_MAP_IN_C_REGISTER_WINDOW(rs1), 0); + } else { + // lbu|lhu|lw rd, offset(rs) + asm_rv32_emit_word_opcode(state, RV32_ENCODE_TYPE_I(0x03, RV32_LOAD_OPCODE_TABLE[operation_size], rd, rs1, 0)); + } +} + +void asm_rv32_emit_store_reg_reg_reg(asm_rv32_t *state, mp_uint_t rd, mp_uint_t rs1, mp_uint_t rs2, mp_uint_t operation_size) { + asm_rv32_fix_up_scaled_reg_reg_reg(state, rs1, rs2, operation_size); + if (operation_size == 2 && RV32_IS_IN_C_REGISTER_WINDOW(rd) && RV32_IS_IN_C_REGISTER_WINDOW(rs1)) { + // c.sw rd', offset(rs') + asm_rv32_opcode_csw(state, RV32_MAP_IN_C_REGISTER_WINDOW(rd), RV32_MAP_IN_C_REGISTER_WINDOW(rs1), 0); + } else { + // sb|sh|sw rd, offset(rs) + asm_rv32_emit_word_opcode(state, RV32_ENCODE_TYPE_S(0x23, operation_size, rs1, rd, 0)); + } +} + void asm_rv32_meta_comparison_eq(asm_rv32_t *state, mp_uint_t rs1, mp_uint_t rs2, mp_uint_t rd) { // c.li rd, 1 ; // beq rs1, rs2, 6 ; PC + 0 diff --git a/py/asmrv32.h b/py/asmrv32.h index dac9c028b..f320d3ee0 100644 --- a/py/asmrv32.h +++ b/py/asmrv32.h @@ -122,6 +122,16 @@ typedef struct _asm_rv32_t { mp_uint_t locals_stack_offset; } asm_rv32_t; +enum { + RV32_EXT_NONE = 0, + RV32_EXT_ZBA = 1 << 0, +}; + +typedef struct _asm_rv32_backend_options_t { + // This is a bitmask holding a combination of RV32_EXT_* entries. + uint8_t allowed_extensions; +} asm_rv32_backend_options_t; + void asm_rv32_entry(asm_rv32_t *state, mp_uint_t locals); void asm_rv32_exit(asm_rv32_t *state); void asm_rv32_end_pass(asm_rv32_t *state); @@ -679,6 +689,19 @@ static inline void asm_rv32_opcode_xori(asm_rv32_t *state, mp_uint_t rd, mp_uint asm_rv32_emit_word_opcode(state, RV32_ENCODE_TYPE_I(0x13, 0x04, rd, rs, immediate)); } +static inline uint8_t asm_rv32_allowed_extensions(void) { + uint8_t extensions = 0; + #if MICROPY_EMIT_RV32_ZBA + extensions |= RV32_EXT_ZBA; + #endif + #if MICROPY_DYNAMIC_COMPILER + if (mp_dynamic_compiler.backend_options != NULL) { + extensions |= ((asm_rv32_backend_options_t *)mp_dynamic_compiler.backend_options)->allowed_extensions; + } + #endif + return extensions; +} + #define ASM_WORD_SIZE (4) #define ASM_HALFWORD_SIZE (2) @@ -702,6 +725,8 @@ void asm_rv32_meta_comparison_lt(asm_rv32_t *state, mp_uint_t rs1, mp_uint_t rs2 void asm_rv32_meta_comparison_le(asm_rv32_t *state, mp_uint_t rs1, mp_uint_t rs2, mp_uint_t rd, bool unsigned_comparison); void asm_rv32_emit_optimised_load_immediate(asm_rv32_t *state, mp_uint_t rd, mp_int_t immediate); +void asm_rv32_emit_load_reg_reg_reg(asm_rv32_t *state, mp_uint_t rd, mp_uint_t rs1, mp_uint_t rs2, mp_uint_t operation_size); +void asm_rv32_emit_store_reg_reg_reg(asm_rv32_t *state, mp_uint_t rd, mp_uint_t rs1, mp_uint_t rs2, mp_uint_t operation_size); #ifdef GENERIC_ASM_API @@ -760,40 +785,12 @@ void asm_rv32_emit_store_reg_reg_offset(asm_rv32_t *state, mp_uint_t source, mp_ #define ASM_SUB_REG_REG(state, rd, rs) asm_rv32_opcode_sub(state, rd, rd, rs) #define ASM_XOR_REG_REG(state, rd, rs) asm_rv32_emit_optimised_xor(state, rd, rs) #define ASM_CLR_REG(state, rd) -#define ASM_LOAD8_REG_REG_REG(state, rd, rs1, rs2) \ - do { \ - asm_rv32_opcode_cadd(state, rs1, rs2); \ - asm_rv32_opcode_lbu(state, rd, rs1, 0); \ - } while (0) -#define ASM_LOAD16_REG_REG_REG(state, rd, rs1, rs2) \ - do { \ - asm_rv32_opcode_slli(state, rs2, rs2, 1); \ - asm_rv32_opcode_cadd(state, rs1, rs2); \ - asm_rv32_opcode_lhu(state, rd, rs1, 0); \ - } while (0) -#define ASM_LOAD32_REG_REG_REG(state, rd, rs1, rs2) \ - do { \ - asm_rv32_opcode_slli(state, rs2, rs2, 2); \ - asm_rv32_opcode_cadd(state, rs1, rs2); \ - asm_rv32_opcode_lw(state, rd, rs1, 0); \ - } while (0) -#define ASM_STORE8_REG_REG_REG(state, rd, rs1, rs2) \ - do { \ - asm_rv32_opcode_cadd(state, rs1, rs2); \ - asm_rv32_opcode_sb(state, rd, rs1, 0); \ - } while (0) -#define ASM_STORE16_REG_REG_REG(state, rd, rs1, rs2) \ - do { \ - asm_rv32_opcode_slli(state, rs2, rs2, 1); \ - asm_rv32_opcode_cadd(state, rs1, rs2); \ - asm_rv32_opcode_sh(state, rd, rs1, 0); \ - } while (0) -#define ASM_STORE32_REG_REG_REG(state, rd, rs1, rs2) \ - do { \ - asm_rv32_opcode_slli(state, rs2, rs2, 2); \ - asm_rv32_opcode_cadd(state, rs1, rs2); \ - asm_rv32_opcode_sw(state, rd, rs1, 0); \ - } while (0) +#define ASM_LOAD8_REG_REG_REG(state, rd, rs1, rs2) asm_rv32_emit_load_reg_reg_reg(state, rd, rs1, rs2, 0) +#define ASM_LOAD16_REG_REG_REG(state, rd, rs1, rs2) asm_rv32_emit_load_reg_reg_reg(state, rd, rs1, rs2, 1) +#define ASM_LOAD32_REG_REG_REG(state, rd, rs1, rs2) asm_rv32_emit_load_reg_reg_reg(state, rd, rs1, rs2, 2) +#define ASM_STORE8_REG_REG_REG(state, rd, rs1, rs2) asm_rv32_emit_store_reg_reg_reg(state, rd, rs1, rs2, 0) +#define ASM_STORE16_REG_REG_REG(state, rd, rs1, rs2) asm_rv32_emit_store_reg_reg_reg(state, rd, rs1, rs2, 1) +#define ASM_STORE32_REG_REG_REG(state, rd, rs1, rs2) asm_rv32_emit_store_reg_reg_reg(state, rd, rs1, rs2, 2) #endif diff --git a/py/mpconfig.h b/py/mpconfig.h index 303eb08f9..ae44b461d 100644 --- a/py/mpconfig.h +++ b/py/mpconfig.h @@ -421,6 +421,11 @@ #define MICROPY_EMIT_RV32 (0) #endif +// Whether to emit RISC-V RV32 Zba opcodes in native code +#ifndef MICROPY_EMIT_RV32_ZBA +#define MICROPY_EMIT_RV32_ZBA (0) +#endif + // Whether to enable the RISC-V RV32 inline assembler #ifndef MICROPY_EMIT_INLINE_RV32 #define MICROPY_EMIT_INLINE_RV32 (0) |
