diff options
Diffstat (limited to 'py')
| -rw-r--r-- | py/asmrv32.c | 77 | ||||
| -rw-r--r-- | py/asmrv32.h | 20 |
2 files changed, 86 insertions, 11 deletions
diff --git a/py/asmrv32.c b/py/asmrv32.c index 1d0cea6c0..8b643af56 100644 --- a/py/asmrv32.c +++ b/py/asmrv32.c @@ -53,6 +53,14 @@ ((((value) & ~((1U << ((bits) - 1)) - 1)) == 0) || \ (((value) & ~((1U << ((bits) - 1)) - 1)) == ~((1U << ((bits) - 1)) - 1))) +static bool asm_rv32_allow_zba_opcodes(void) { + return asm_rv32_allowed_extensions() & RV32_EXT_ZBA; +} + +static bool asm_rv32_allow_zcmp_opcodes(void) { + return asm_rv32_allowed_extensions() & RV32_EXT_ZCMP; +} + /////////////////////////////////////////////////////////////////////////////// void asm_rv32_emit_word_opcode(asm_rv32_t *state, mp_uint_t word) { @@ -214,6 +222,14 @@ static void adjust_stack(asm_rv32_t *state, mp_int_t stack_size) { return; } + // WARNING: If REG_TEMP0 is not set to a caller-saved register, then this + // bit has to be rewritten to avoid clobbering the temporary + // register when performing the stack adjustment. + + MP_STATIC_ASSERT(((REG_TEMP0 >= ASM_RV32_REG_T0) && (REG_TEMP0 <= ASM_RV32_REG_T2)) || \ + ((REG_TEMP0 >= ASM_RV32_REG_A0) && (REG_TEMP0 <= ASM_RV32_REG_A7)) || \ + ((REG_TEMP0 >= ASM_RV32_REG_T3) && (REG_TEMP0 <= ASM_RV32_REG_T6))); + // li temporary, stack_size // c.add sp, temporary load_full_immediate(state, REG_TEMP0, stack_size); @@ -245,6 +261,45 @@ static void emit_function_epilogue(asm_rv32_t *state, mp_uint_t registers) { state->saved_registers_mask = old_saved_registers_mask; } +static mp_uint_t compute_zcmp_sequence_length(mp_uint_t registers) { + // Can only handle RA and S0..S11 and must have at least one entry. + assert((registers != 0) && (registers & (~0x0FFC0302U)) == 0 && "Invalid Zcmp registers set."); + mp_uint_t length = 32 - mp_clz(((registers & 0x00000002) >> 1) | ((registers & 0x00000300) >> 7) | ((registers & 0x0FFC0000) >> 15)); + return length == 12 ? 13 : length; +} + +#define EMIT_ASSERT(state, condition, message) assert((((state)->base.pass != MP_ASM_PASS_EMIT) ? true : (condition)) && (message)) + +static void emit_compressed_function_prologue(asm_rv32_t *state, mp_uint_t registers_mask) { + mp_uint_t sequence_length = compute_zcmp_sequence_length(registers_mask); + mp_uint_t allocated_stack = (sequence_length + 3) & (mp_uint_t)-4; + EMIT_ASSERT(state, allocated_stack >= sequence_length, "Incorrect allocated stack calculation."); + mp_uint_t tail_slack = allocated_stack - sequence_length; + mp_uint_t locals_left = (state->locals_count < tail_slack) ? 0 : (state->locals_count - tail_slack); + mp_uint_t adjustment_chunks = MIN(3, locals_left / 4); + EMIT_ASSERT(state, (adjustment_chunks * 4) <= locals_left, "Incorrect adjustment chunks rounding."); + locals_left -= adjustment_chunks * 4; + EMIT_ASSERT(state, locals_left <= (MP_INT_MAX / sizeof(uint32_t)), "Too many locals."); + mp_int_t stack_size = (mp_int_t)(locals_left * sizeof(uint32_t)); + asm_rv32_opcode_cmpush(state, MIN(3 + sequence_length, 15), adjustment_chunks); + // CM.PUSH allocates a stack block and then puts the registers *at the end* + // of the block, so for example "CM.PUSH {RA, S0-S11}, -64" will put RA at + // SP + 60, not at SP + 0. + adjust_stack(state, -stack_size); + // The stack size is expressed in bytes and as a multiple of 4, hence the + // bottom two bits are not used. Since there can be up to three adjustment + // chunks, that number can be expressed in two bits, fitting nicely in the + // existing variable. + state->stack_size = ((mp_uint_t)stack_size) | adjustment_chunks; +} + +static void emit_compressed_function_epilogue(asm_rv32_t *state, mp_uint_t registers_mask) { + mp_uint_t sequence_length = compute_zcmp_sequence_length(registers_mask); + mp_uint_t stack_size = state->stack_size & (mp_uint_t)(~0x03U); + adjust_stack(state, stack_size); + asm_rv32_opcode_cmpopret(state, MIN(3 + sequence_length, 15), state->stack_size & 0x03); +} + static bool calculate_displacement_for_label(asm_rv32_t *state, mp_uint_t label, ptrdiff_t *displacement) { assert(displacement != NULL && "Displacement pointer is NULL"); @@ -256,16 +311,24 @@ static bool calculate_displacement_for_label(asm_rv32_t *state, mp_uint_t label, /////////////////////////////////////////////////////////////////////////////// void asm_rv32_entry(asm_rv32_t *state, mp_uint_t locals) { + state->locals_count = locals; state->saved_registers_mask |= (1U << REG_FUN_TABLE) | (1U << REG_LOCAL_1) | \ (1U << REG_LOCAL_2) | (1U << REG_LOCAL_3); - state->locals_count = locals; - emit_function_prologue(state, state->saved_registers_mask); + if (asm_rv32_allow_zcmp_opcodes()) { + emit_compressed_function_prologue(state, state->saved_registers_mask); + } else { + emit_function_prologue(state, state->saved_registers_mask); + } } void asm_rv32_exit(asm_rv32_t *state) { - emit_function_epilogue(state, state->saved_registers_mask); - // c.jr ra - asm_rv32_opcode_cjr(state, ASM_RV32_REG_RA); + if (asm_rv32_allow_zcmp_opcodes()) { + emit_compressed_function_epilogue(state, state->saved_registers_mask); + } else { + emit_function_epilogue(state, state->saved_registers_mask); + // c.jr ra + asm_rv32_opcode_cjr(state, ASM_RV32_REG_RA); + } } void asm_rv32_end_pass(asm_rv32_t *state) { @@ -557,10 +620,6 @@ void asm_rv32_emit_optimised_xor(asm_rv32_t *state, mp_uint_t rd, mp_uint_t rs) asm_rv32_opcode_xor(state, rd, rd, rs); } -static bool asm_rv32_allow_zba_opcodes(void) { - return asm_rv32_allowed_extensions() & RV32_EXT_ZBA; -} - static void asm_rv32_fix_up_scaled_reg_reg_reg(asm_rv32_t *state, mp_uint_t rs1, mp_uint_t rs2, mp_uint_t operation_size) { assert(operation_size <= 2 && "Operation size value out of range."); diff --git a/py/asmrv32.h b/py/asmrv32.h index ed1b5a835..c25b1aa4e 100644 --- a/py/asmrv32.h +++ b/py/asmrv32.h @@ -197,6 +197,10 @@ void asm_rv32_end_pass(asm_rv32_t *state); ((rs & 0x07) << 7) | ((imm & 0x40) >> 1) | ((imm & 0x38) << 7) | \ ((imm & 0x04) << 4)) +#define RV32_ENCODE_TYPE_CMPP(op, ft6, ft2, rlist, imm) \ + ((op & 0x03) | ((ft6 & 0x3F) << 10) | ((ft2 & 0x03) << 8) | \ + ((rlist & 0x0F) << 4) | ((imm & 0x03) << 2)) + #define RV32_ENCODE_TYPE_CR(op, ft4, rs1, rs2) \ ((op & 0x03) | ((rs2 & 0x1F) << 2) | ((rs1 & 0x1F) << 7) | ((ft4 & 0x0F) << 12)) @@ -440,6 +444,18 @@ static inline void asm_rv32_opcode_cxor(asm_rv32_t *state, mp_uint_t rd, mp_uint asm_rv32_emit_halfword_opcode(state, RV32_ENCODE_TYPE_CA(0x01, 0x23, 0x01, rd, rs)); } +// CM.POPRET {REG_LIST}, IMMEDIATE +static inline void asm_rv32_opcode_cmpopret(asm_rv32_t *state, mp_uint_t reg_list, mp_uint_t immediate) { + // CMPP: 10111110 ... .. 10 + asm_rv32_emit_halfword_opcode(state, RV32_ENCODE_TYPE_CMPP(0x02, 0x2F, 0x02, reg_list, immediate)); +} + +// CM.PUSH {REG_LIST}, -IMMEDIATE +static inline void asm_rv32_opcode_cmpush(asm_rv32_t *state, mp_uint_t reg_list, mp_uint_t immediate) { + // CMPP: 10111000 .... .. 10 + asm_rv32_emit_halfword_opcode(state, RV32_ENCODE_TYPE_CMPP(0x02, 0x2E, 0x00, reg_list, immediate)); +} + // CSRRC RD, RS, IMMEDIATE static inline void asm_rv32_opcode_csrrc(asm_rv32_t *state, mp_uint_t rd, mp_uint_t rs, mp_int_t immediate) { // I: ............ ..... 011 ..... 1110011 @@ -737,8 +753,8 @@ static inline uint8_t asm_rv32_allowed_extensions(void) { #define REG_TEMP2 ASM_RV32_REG_T3 #define REG_FUN_TABLE ASM_RV32_REG_S1 #define REG_LOCAL_1 ASM_RV32_REG_S3 -#define REG_LOCAL_2 ASM_RV32_REG_S4 -#define REG_LOCAL_3 ASM_RV32_REG_S5 +#define REG_LOCAL_2 ASM_RV32_REG_S2 +#define REG_LOCAL_3 ASM_RV32_REG_S4 #define REG_ZERO ASM_RV32_REG_ZERO void asm_rv32_meta_comparison_eq(asm_rv32_t *state, mp_uint_t rs1, mp_uint_t rs2, mp_uint_t rd); |
