summaryrefslogtreecommitdiff
path: root/py
diff options
context:
space:
mode:
authorAlessandro Gatti <a.gatti@frob.it>2025-09-19 14:50:05 +0200
committerAlessandro Gatti <a.gatti@frob.it>2025-09-19 15:51:37 +0200
commitcb7ca6f1bc9c77969d465025f63afdd96892577a (patch)
tree54550c5794f8a3d4ac45fc1123e55d98d1fab62c /py
parent42ea7bc32af49f305ece4f4e4601accea9df0534 (diff)
py/asmrv32: Use RV32 Zba opcodes if possible.
This commit adds optional support for selected Zba opcodes (address generation) to speed up Viper and native code generation on MCUs where those opcodes are supported (namely RP2350). Right now support for these opcodes is opt-in, as extension detection granularity on the RISC-V platform is still a bit in flux. Relying on the 'B' bit in the MISA register may yield both false positives and false negatives depending on the RISC-V implementation the check runs on. As a side-effect of Zba support, regular non-byte load/stores have been made shorter by two bytes. Whilst this makes code using Zba take up the same space as non-Zba code, the former will still be faster as it will have to process just one instruction instead of two, without stalling registers between the shift and the addition needed to compute the final offset. Signed-off-by: Alessandro Gatti <a.gatti@frob.it>
Diffstat (limited to 'py')
-rw-r--r--py/asmrv32.c42
-rw-r--r--py/asmrv32.h65
-rw-r--r--py/mpconfig.h5
3 files changed, 78 insertions, 34 deletions
diff --git a/py/asmrv32.c b/py/asmrv32.c
index 723d32cdd..9313e61cc 100644
--- a/py/asmrv32.c
+++ b/py/asmrv32.c
@@ -36,6 +36,8 @@
#if MICROPY_EMIT_RV32
#include "py/asmrv32.h"
+#include "py/mpstate.h"
+#include "py/persistentcode.h"
#if MICROPY_DEBUG_VERBOSE
#define DEBUG_PRINT (1)
@@ -555,6 +557,46 @@ void asm_rv32_emit_optimised_xor(asm_rv32_t *state, mp_uint_t rd, mp_uint_t rs)
asm_rv32_opcode_xor(state, rd, rd, rs);
}
+static bool asm_rv32_allow_zba_opcodes(void) {
+ return asm_rv32_allowed_extensions() & RV32_EXT_ZBA;
+}
+
+static void asm_rv32_fix_up_scaled_reg_reg_reg(asm_rv32_t *state, mp_uint_t rs1, mp_uint_t rs2, mp_uint_t operation_size) {
+ assert(operation_size <= 2 && "Operation size value out of range.");
+
+ if (operation_size > 0 && asm_rv32_allow_zba_opcodes()) {
+ // sh{1,2}add rs1, rs2, rs1
+ asm_rv32_emit_word_opcode(state, RV32_ENCODE_TYPE_R(0x33, 1 << operation_size, 0x10, rs1, rs2, rs1));
+ } else {
+ if (operation_size > 0) {
+ asm_rv32_opcode_cslli(state, rs2, operation_size);
+ }
+ asm_rv32_opcode_cadd(state, rs1, rs2);
+ }
+}
+
+void asm_rv32_emit_load_reg_reg_reg(asm_rv32_t *state, mp_uint_t rd, mp_uint_t rs1, mp_uint_t rs2, mp_uint_t operation_size) {
+ asm_rv32_fix_up_scaled_reg_reg_reg(state, rs1, rs2, operation_size);
+ if (operation_size == 2 && RV32_IS_IN_C_REGISTER_WINDOW(rd) && RV32_IS_IN_C_REGISTER_WINDOW(rs1)) {
+ // c.lw rd', offset(rs')
+ asm_rv32_opcode_clw(state, RV32_MAP_IN_C_REGISTER_WINDOW(rd), RV32_MAP_IN_C_REGISTER_WINDOW(rs1), 0);
+ } else {
+ // lbu|lhu|lw rd, offset(rs)
+ asm_rv32_emit_word_opcode(state, RV32_ENCODE_TYPE_I(0x03, RV32_LOAD_OPCODE_TABLE[operation_size], rd, rs1, 0));
+ }
+}
+
+void asm_rv32_emit_store_reg_reg_reg(asm_rv32_t *state, mp_uint_t rd, mp_uint_t rs1, mp_uint_t rs2, mp_uint_t operation_size) {
+ asm_rv32_fix_up_scaled_reg_reg_reg(state, rs1, rs2, operation_size);
+ if (operation_size == 2 && RV32_IS_IN_C_REGISTER_WINDOW(rd) && RV32_IS_IN_C_REGISTER_WINDOW(rs1)) {
+ // c.sw rd', offset(rs')
+ asm_rv32_opcode_csw(state, RV32_MAP_IN_C_REGISTER_WINDOW(rd), RV32_MAP_IN_C_REGISTER_WINDOW(rs1), 0);
+ } else {
+ // sb|sh|sw rd, offset(rs)
+ asm_rv32_emit_word_opcode(state, RV32_ENCODE_TYPE_S(0x23, operation_size, rs1, rd, 0));
+ }
+}
+
void asm_rv32_meta_comparison_eq(asm_rv32_t *state, mp_uint_t rs1, mp_uint_t rs2, mp_uint_t rd) {
// c.li rd, 1 ;
// beq rs1, rs2, 6 ; PC + 0
diff --git a/py/asmrv32.h b/py/asmrv32.h
index dac9c028b..f320d3ee0 100644
--- a/py/asmrv32.h
+++ b/py/asmrv32.h
@@ -122,6 +122,16 @@ typedef struct _asm_rv32_t {
mp_uint_t locals_stack_offset;
} asm_rv32_t;
+enum {
+ RV32_EXT_NONE = 0,
+ RV32_EXT_ZBA = 1 << 0,
+};
+
+typedef struct _asm_rv32_backend_options_t {
+ // This is a bitmask holding a combination of RV32_EXT_* entries.
+ uint8_t allowed_extensions;
+} asm_rv32_backend_options_t;
+
void asm_rv32_entry(asm_rv32_t *state, mp_uint_t locals);
void asm_rv32_exit(asm_rv32_t *state);
void asm_rv32_end_pass(asm_rv32_t *state);
@@ -679,6 +689,19 @@ static inline void asm_rv32_opcode_xori(asm_rv32_t *state, mp_uint_t rd, mp_uint
asm_rv32_emit_word_opcode(state, RV32_ENCODE_TYPE_I(0x13, 0x04, rd, rs, immediate));
}
+static inline uint8_t asm_rv32_allowed_extensions(void) {
+ uint8_t extensions = 0;
+ #if MICROPY_EMIT_RV32_ZBA
+ extensions |= RV32_EXT_ZBA;
+ #endif
+ #if MICROPY_DYNAMIC_COMPILER
+ if (mp_dynamic_compiler.backend_options != NULL) {
+ extensions |= ((asm_rv32_backend_options_t *)mp_dynamic_compiler.backend_options)->allowed_extensions;
+ }
+ #endif
+ return extensions;
+}
+
#define ASM_WORD_SIZE (4)
#define ASM_HALFWORD_SIZE (2)
@@ -702,6 +725,8 @@ void asm_rv32_meta_comparison_lt(asm_rv32_t *state, mp_uint_t rs1, mp_uint_t rs2
void asm_rv32_meta_comparison_le(asm_rv32_t *state, mp_uint_t rs1, mp_uint_t rs2, mp_uint_t rd, bool unsigned_comparison);
void asm_rv32_emit_optimised_load_immediate(asm_rv32_t *state, mp_uint_t rd, mp_int_t immediate);
+void asm_rv32_emit_load_reg_reg_reg(asm_rv32_t *state, mp_uint_t rd, mp_uint_t rs1, mp_uint_t rs2, mp_uint_t operation_size);
+void asm_rv32_emit_store_reg_reg_reg(asm_rv32_t *state, mp_uint_t rd, mp_uint_t rs1, mp_uint_t rs2, mp_uint_t operation_size);
#ifdef GENERIC_ASM_API
@@ -760,40 +785,12 @@ void asm_rv32_emit_store_reg_reg_offset(asm_rv32_t *state, mp_uint_t source, mp_
#define ASM_SUB_REG_REG(state, rd, rs) asm_rv32_opcode_sub(state, rd, rd, rs)
#define ASM_XOR_REG_REG(state, rd, rs) asm_rv32_emit_optimised_xor(state, rd, rs)
#define ASM_CLR_REG(state, rd)
-#define ASM_LOAD8_REG_REG_REG(state, rd, rs1, rs2) \
- do { \
- asm_rv32_opcode_cadd(state, rs1, rs2); \
- asm_rv32_opcode_lbu(state, rd, rs1, 0); \
- } while (0)
-#define ASM_LOAD16_REG_REG_REG(state, rd, rs1, rs2) \
- do { \
- asm_rv32_opcode_slli(state, rs2, rs2, 1); \
- asm_rv32_opcode_cadd(state, rs1, rs2); \
- asm_rv32_opcode_lhu(state, rd, rs1, 0); \
- } while (0)
-#define ASM_LOAD32_REG_REG_REG(state, rd, rs1, rs2) \
- do { \
- asm_rv32_opcode_slli(state, rs2, rs2, 2); \
- asm_rv32_opcode_cadd(state, rs1, rs2); \
- asm_rv32_opcode_lw(state, rd, rs1, 0); \
- } while (0)
-#define ASM_STORE8_REG_REG_REG(state, rd, rs1, rs2) \
- do { \
- asm_rv32_opcode_cadd(state, rs1, rs2); \
- asm_rv32_opcode_sb(state, rd, rs1, 0); \
- } while (0)
-#define ASM_STORE16_REG_REG_REG(state, rd, rs1, rs2) \
- do { \
- asm_rv32_opcode_slli(state, rs2, rs2, 1); \
- asm_rv32_opcode_cadd(state, rs1, rs2); \
- asm_rv32_opcode_sh(state, rd, rs1, 0); \
- } while (0)
-#define ASM_STORE32_REG_REG_REG(state, rd, rs1, rs2) \
- do { \
- asm_rv32_opcode_slli(state, rs2, rs2, 2); \
- asm_rv32_opcode_cadd(state, rs1, rs2); \
- asm_rv32_opcode_sw(state, rd, rs1, 0); \
- } while (0)
+#define ASM_LOAD8_REG_REG_REG(state, rd, rs1, rs2) asm_rv32_emit_load_reg_reg_reg(state, rd, rs1, rs2, 0)
+#define ASM_LOAD16_REG_REG_REG(state, rd, rs1, rs2) asm_rv32_emit_load_reg_reg_reg(state, rd, rs1, rs2, 1)
+#define ASM_LOAD32_REG_REG_REG(state, rd, rs1, rs2) asm_rv32_emit_load_reg_reg_reg(state, rd, rs1, rs2, 2)
+#define ASM_STORE8_REG_REG_REG(state, rd, rs1, rs2) asm_rv32_emit_store_reg_reg_reg(state, rd, rs1, rs2, 0)
+#define ASM_STORE16_REG_REG_REG(state, rd, rs1, rs2) asm_rv32_emit_store_reg_reg_reg(state, rd, rs1, rs2, 1)
+#define ASM_STORE32_REG_REG_REG(state, rd, rs1, rs2) asm_rv32_emit_store_reg_reg_reg(state, rd, rs1, rs2, 2)
#endif
diff --git a/py/mpconfig.h b/py/mpconfig.h
index 303eb08f9..ae44b461d 100644
--- a/py/mpconfig.h
+++ b/py/mpconfig.h
@@ -421,6 +421,11 @@
#define MICROPY_EMIT_RV32 (0)
#endif
+// Whether to emit RISC-V RV32 Zba opcodes in native code
+#ifndef MICROPY_EMIT_RV32_ZBA
+#define MICROPY_EMIT_RV32_ZBA (0)
+#endif
+
// Whether to enable the RISC-V RV32 inline assembler
#ifndef MICROPY_EMIT_INLINE_RV32
#define MICROPY_EMIT_INLINE_RV32 (0)