summaryrefslogtreecommitdiff
path: root/py/asmrv32.c
diff options
context:
space:
mode:
authorAlessandro Gatti <a.gatti@frob.it>2024-06-25 15:45:13 +0200
committerDamien George <damien@micropython.org>2024-07-01 11:36:03 +1000
commit0600e4f27333092884358c967123dcc040d7a1cc (patch)
tree6f553ad86e19e0282c2322f55deb47a0dfefc9af /py/asmrv32.c
parent0e261443be9a9201fd786b54aa2d0198f21543f7 (diff)
py/asmrv32: Make some code sequences smaller.
This commit changes a few code sequences to use more compressed opcodes where possible. The sequences in question are the ones that show up the most in the test suite and require the least amount of code changes, namely short offset loads from memory to RET/ARG registers, indirect calls through the function table, register-based jumps, locals' offset calculation, reg-is-null jumps, and register comparisons. There are no speed losses or gains from these changes, but there is an average 15-20% generated code size reduction. Signed-off-by: Alessandro Gatti <a.gatti@frob.it>
Diffstat (limited to 'py/asmrv32.c')
-rw-r--r--py/asmrv32.c71
1 files changed, 56 insertions, 15 deletions
diff --git a/py/asmrv32.c b/py/asmrv32.c
index 228edfc22..7dd58beca 100644
--- a/py/asmrv32.c
+++ b/py/asmrv32.c
@@ -64,9 +64,14 @@ static uint32_t fallback_popcount(uint32_t value) {
#endif
#endif
-#define INTERNAL_TEMPORARY ASM_RV32_REG_T4
+#define INTERNAL_TEMPORARY ASM_RV32_REG_S0
#define AVAILABLE_REGISTERS_COUNT 32
+#define IS_IN_C_REGISTER_WINDOW(register_number) \
+ (((register_number) >= ASM_RV32_REG_X8) && ((register_number) <= ASM_RV32_REG_X15))
+#define MAP_IN_C_REGISTER_WINDOW(register_number) \
+ ((register_number) - ASM_RV32_REG_X8)
+
#define FIT_UNSIGNED(value, bits) (((value) & ~((1U << (bits)) - 1)) == 0)
#define FIT_SIGNED(value, bits) \
((((value) & ~((1U << ((bits) - 1)) - 1)) == 0) || \
@@ -269,7 +274,7 @@ static void emit_function_epilogue(asm_rv32_t *state, mp_uint_t registers) {
void asm_rv32_entry(asm_rv32_t *state, mp_uint_t locals) {
state->saved_registers_mask |= (1U << REG_FUN_TABLE) | (1U << REG_LOCAL_1) | \
- (1U << REG_LOCAL_2) | (1U << REG_LOCAL_3);
+ (1U << REG_LOCAL_2) | (1U << REG_LOCAL_3) | (1U << INTERNAL_TEMPORARY);
state->locals_count = locals;
emit_function_prologue(state, state->saved_registers_mask);
}
@@ -288,6 +293,14 @@ void asm_rv32_emit_call_ind(asm_rv32_t *state, mp_uint_t index) {
mp_uint_t offset = index * ASM_WORD_SIZE;
state->saved_registers_mask |= (1U << ASM_RV32_REG_RA);
+ if (IS_IN_C_REGISTER_WINDOW(REG_FUN_TABLE) && IS_IN_C_REGISTER_WINDOW(INTERNAL_TEMPORARY) && FIT_SIGNED(offset, 7)) {
+ // c.lw temporary, offset(fun_table)
+ // c.jalr temporary
+ asm_rv32_opcode_clw(state, MAP_IN_C_REGISTER_WINDOW(INTERNAL_TEMPORARY), MAP_IN_C_REGISTER_WINDOW(REG_FUN_TABLE), offset);
+ asm_rv32_opcode_cjalr(state, INTERNAL_TEMPORARY);
+ return;
+ }
+
if (FIT_UNSIGNED(offset, 11)) {
// lw temporary, offset(fun_table)
// c.jalr temporary
@@ -343,6 +356,12 @@ void asm_rv32_emit_jump_if_reg_eq(asm_rv32_t *state, mp_uint_t rs1, mp_uint_t rs
void asm_rv32_emit_jump_if_reg_nonzero(asm_rv32_t *state, mp_uint_t rs, mp_uint_t label) {
ptrdiff_t displacement = (ptrdiff_t)(state->base.label_offsets[label] - state->base.code_offset);
+ if (FIT_SIGNED(displacement, 9) && IS_IN_C_REGISTER_WINDOW(rs)) {
+ // c.bnez rs', displacement
+ asm_rv32_opcode_cbnez(state, MAP_IN_C_REGISTER_WINDOW(rs), displacement);
+ return;
+ }
+
// The least significant bit is ignored anyway.
if (FIT_SIGNED(displacement, 13)) {
// bne rs, zero, displacement
@@ -350,8 +369,8 @@ void asm_rv32_emit_jump_if_reg_nonzero(asm_rv32_t *state, mp_uint_t rs, mp_uint_
return;
}
- // Compensate for the initial BEQ opcode.
- displacement -= ASM_WORD_SIZE;
+ // Compensate for the initial C.BEQZ/BEQ opcode.
+ displacement -= IS_IN_C_REGISTER_WINDOW(rs) ? ASM_HALFWORD_SIZE : ASM_WORD_SIZE;
mp_uint_t upper = 0;
mp_uint_t lower = 0;
@@ -359,11 +378,21 @@ void asm_rv32_emit_jump_if_reg_nonzero(asm_rv32_t *state, mp_uint_t rs, mp_uint_
// TODO: Can this clobber REG_TEMP[0:2]?
- // beq rs1, zero, 12 ; PC + 0
- // auipc temporary, HI(displacement) ; PC + 4
- // jalr zero, temporary, LO(displacement) ; PC + 8
- // ... ; PC + 12
- asm_rv32_opcode_beq(state, rs, ASM_RV32_REG_ZERO, 12);
+ // if rs1 in C window (the offset always fits):
+ // c.beqz rs', 10 ; PC + 0
+ // auipc temporary, HI(displacement) ; PC + 2
+ // jalr zero, temporary, LO(displacement) ; PC + 6
+ // ... ; PC + 10
+ // else:
+ // beq rs, zero, 12 ; PC + 0
+ // auipc temporary, HI(displacement) ; PC + 4
+ // jalr zero, temporary, LO(displacement) ; PC + 8
+ // ... ; PC + 12
+ if (IS_IN_C_REGISTER_WINDOW(rs)) {
+ asm_rv32_opcode_cbeqz(state, MAP_IN_C_REGISTER_WINDOW(rs), 10);
+ } else {
+ asm_rv32_opcode_beq(state, rs, ASM_RV32_REG_ZERO, 12);
+ }
asm_rv32_opcode_auipc(state, INTERNAL_TEMPORARY, upper);
asm_rv32_opcode_jalr(state, ASM_RV32_REG_ZERO, INTERNAL_TEMPORARY, lower);
}
@@ -427,7 +456,13 @@ void asm_rv32_emit_mov_reg_local(asm_rv32_t *state, mp_uint_t rd, mp_uint_t loca
void asm_rv32_emit_mov_reg_local_addr(asm_rv32_t *state, mp_uint_t rd, mp_uint_t local) {
mp_uint_t offset = state->locals_stack_offset + (local * ASM_WORD_SIZE);
- if (FIT_SIGNED(offset, 11)) {
+ if (FIT_UNSIGNED(offset, 10) && offset != 0 && IS_IN_C_REGISTER_WINDOW(rd)) {
+ // c.addi4spn rd', offset
+ asm_rv32_opcode_caddi4spn(state, MAP_IN_C_REGISTER_WINDOW(rd), offset);
+ return;
+ }
+
+ if (FIT_UNSIGNED(offset, 11)) {
// addi rd, sp, offset
asm_rv32_opcode_addi(state, rd, ASM_RV32_REG_SP, offset);
return;
@@ -442,6 +477,12 @@ void asm_rv32_emit_mov_reg_local_addr(asm_rv32_t *state, mp_uint_t rd, mp_uint_t
void asm_rv32_emit_load_reg_reg_offset(asm_rv32_t *state, mp_uint_t rd, mp_uint_t rs, mp_int_t offset) {
mp_int_t scaled_offset = offset * sizeof(ASM_WORD_SIZE);
+ if (IS_IN_C_REGISTER_WINDOW(rd) && IS_IN_C_REGISTER_WINDOW(rs) && FIT_SIGNED(offset, 7)) {
+ // c.lw rd', offset(rs')
+ asm_rv32_opcode_clw(state, MAP_IN_C_REGISTER_WINDOW(rd), MAP_IN_C_REGISTER_WINDOW(rs), scaled_offset);
+ return;
+ }
+
if (FIT_SIGNED(scaled_offset, 12)) {
// lw rd, offset(rs)
asm_rv32_opcode_lw(state, rd, rs, scaled_offset);
@@ -554,12 +595,12 @@ void asm_rv32_emit_optimised_xor(asm_rv32_t *state, mp_uint_t rd, mp_uint_t rs)
void asm_rv32_meta_comparison_eq(asm_rv32_t *state, mp_uint_t rs1, mp_uint_t rs2, mp_uint_t rd) {
// c.li rd, 1 ;
- // beq rs1, rs2, 8 ; PC + 0
- // addi rd, zero, 0 ; PC + 4
- // ... ; PC + 8
+ // beq rs1, rs2, 6 ; PC + 0
+ // c.li rd, 0 ; PC + 4
+ // ... ; PC + 6
asm_rv32_opcode_cli(state, rd, 1);
- asm_rv32_opcode_beq(state, rs1, rs2, 8);
- asm_rv32_opcode_addi(state, rd, ASM_RV32_REG_ZERO, 0);
+ asm_rv32_opcode_beq(state, rs1, rs2, 6);
+ asm_rv32_opcode_cli(state, rd, 0);
}
void asm_rv32_meta_comparison_ne(asm_rv32_t *state, mp_uint_t rs1, mp_uint_t rs2, mp_uint_t rd) {