diff options
| -rw-r--r-- | py/asmrv32.c | 71 | ||||
| -rw-r--r-- | py/asmrv32.h | 57 |
2 files changed, 105 insertions, 23 deletions
diff --git a/py/asmrv32.c b/py/asmrv32.c index 228edfc22..7dd58beca 100644 --- a/py/asmrv32.c +++ b/py/asmrv32.c @@ -64,9 +64,14 @@ static uint32_t fallback_popcount(uint32_t value) { #endif #endif -#define INTERNAL_TEMPORARY ASM_RV32_REG_T4 +#define INTERNAL_TEMPORARY ASM_RV32_REG_S0 #define AVAILABLE_REGISTERS_COUNT 32 +#define IS_IN_C_REGISTER_WINDOW(register_number) \ + (((register_number) >= ASM_RV32_REG_X8) && ((register_number) <= ASM_RV32_REG_X15)) +#define MAP_IN_C_REGISTER_WINDOW(register_number) \ + ((register_number) - ASM_RV32_REG_X8) + #define FIT_UNSIGNED(value, bits) (((value) & ~((1U << (bits)) - 1)) == 0) #define FIT_SIGNED(value, bits) \ ((((value) & ~((1U << ((bits) - 1)) - 1)) == 0) || \ @@ -269,7 +274,7 @@ static void emit_function_epilogue(asm_rv32_t *state, mp_uint_t registers) { void asm_rv32_entry(asm_rv32_t *state, mp_uint_t locals) { state->saved_registers_mask |= (1U << REG_FUN_TABLE) | (1U << REG_LOCAL_1) | \ - (1U << REG_LOCAL_2) | (1U << REG_LOCAL_3); + (1U << REG_LOCAL_2) | (1U << REG_LOCAL_3) | (1U << INTERNAL_TEMPORARY); state->locals_count = locals; emit_function_prologue(state, state->saved_registers_mask); } @@ -288,6 +293,14 @@ void asm_rv32_emit_call_ind(asm_rv32_t *state, mp_uint_t index) { mp_uint_t offset = index * ASM_WORD_SIZE; state->saved_registers_mask |= (1U << ASM_RV32_REG_RA); + if (IS_IN_C_REGISTER_WINDOW(REG_FUN_TABLE) && IS_IN_C_REGISTER_WINDOW(INTERNAL_TEMPORARY) && FIT_SIGNED(offset, 7)) { + // c.lw temporary, offset(fun_table) + // c.jalr temporary + asm_rv32_opcode_clw(state, MAP_IN_C_REGISTER_WINDOW(INTERNAL_TEMPORARY), MAP_IN_C_REGISTER_WINDOW(REG_FUN_TABLE), offset); + asm_rv32_opcode_cjalr(state, INTERNAL_TEMPORARY); + return; + } + if (FIT_UNSIGNED(offset, 11)) { // lw temporary, offset(fun_table) // c.jalr temporary @@ -343,6 +356,12 @@ void asm_rv32_emit_jump_if_reg_eq(asm_rv32_t *state, mp_uint_t rs1, mp_uint_t rs void asm_rv32_emit_jump_if_reg_nonzero(asm_rv32_t *state, mp_uint_t rs, mp_uint_t label) { ptrdiff_t displacement = (ptrdiff_t)(state->base.label_offsets[label] - state->base.code_offset); + if (FIT_SIGNED(displacement, 9) && IS_IN_C_REGISTER_WINDOW(rs)) { + // c.bnez rs', displacement + asm_rv32_opcode_cbnez(state, MAP_IN_C_REGISTER_WINDOW(rs), displacement); + return; + } + // The least significant bit is ignored anyway. if (FIT_SIGNED(displacement, 13)) { // bne rs, zero, displacement @@ -350,8 +369,8 @@ void asm_rv32_emit_jump_if_reg_nonzero(asm_rv32_t *state, mp_uint_t rs, mp_uint_ return; } - // Compensate for the initial BEQ opcode. - displacement -= ASM_WORD_SIZE; + // Compensate for the initial C.BEQZ/BEQ opcode. + displacement -= IS_IN_C_REGISTER_WINDOW(rs) ? ASM_HALFWORD_SIZE : ASM_WORD_SIZE; mp_uint_t upper = 0; mp_uint_t lower = 0; @@ -359,11 +378,21 @@ void asm_rv32_emit_jump_if_reg_nonzero(asm_rv32_t *state, mp_uint_t rs, mp_uint_ // TODO: Can this clobber REG_TEMP[0:2]? - // beq rs1, zero, 12 ; PC + 0 - // auipc temporary, HI(displacement) ; PC + 4 - // jalr zero, temporary, LO(displacement) ; PC + 8 - // ... ; PC + 12 - asm_rv32_opcode_beq(state, rs, ASM_RV32_REG_ZERO, 12); + // if rs1 in C window (the offset always fits): + // c.beqz rs', 10 ; PC + 0 + // auipc temporary, HI(displacement) ; PC + 2 + // jalr zero, temporary, LO(displacement) ; PC + 6 + // ... ; PC + 10 + // else: + // beq rs, zero, 12 ; PC + 0 + // auipc temporary, HI(displacement) ; PC + 4 + // jalr zero, temporary, LO(displacement) ; PC + 8 + // ... ; PC + 12 + if (IS_IN_C_REGISTER_WINDOW(rs)) { + asm_rv32_opcode_cbeqz(state, MAP_IN_C_REGISTER_WINDOW(rs), 10); + } else { + asm_rv32_opcode_beq(state, rs, ASM_RV32_REG_ZERO, 12); + } asm_rv32_opcode_auipc(state, INTERNAL_TEMPORARY, upper); asm_rv32_opcode_jalr(state, ASM_RV32_REG_ZERO, INTERNAL_TEMPORARY, lower); } @@ -427,7 +456,13 @@ void asm_rv32_emit_mov_reg_local(asm_rv32_t *state, mp_uint_t rd, mp_uint_t loca void asm_rv32_emit_mov_reg_local_addr(asm_rv32_t *state, mp_uint_t rd, mp_uint_t local) { mp_uint_t offset = state->locals_stack_offset + (local * ASM_WORD_SIZE); - if (FIT_SIGNED(offset, 11)) { + if (FIT_UNSIGNED(offset, 10) && offset != 0 && IS_IN_C_REGISTER_WINDOW(rd)) { + // c.addi4spn rd', offset + asm_rv32_opcode_caddi4spn(state, MAP_IN_C_REGISTER_WINDOW(rd), offset); + return; + } + + if (FIT_UNSIGNED(offset, 11)) { // addi rd, sp, offset asm_rv32_opcode_addi(state, rd, ASM_RV32_REG_SP, offset); return; @@ -442,6 +477,12 @@ void asm_rv32_emit_mov_reg_local_addr(asm_rv32_t *state, mp_uint_t rd, mp_uint_t void asm_rv32_emit_load_reg_reg_offset(asm_rv32_t *state, mp_uint_t rd, mp_uint_t rs, mp_int_t offset) { mp_int_t scaled_offset = offset * sizeof(ASM_WORD_SIZE); + if (IS_IN_C_REGISTER_WINDOW(rd) && IS_IN_C_REGISTER_WINDOW(rs) && FIT_SIGNED(offset, 7)) { + // c.lw rd', offset(rs') + asm_rv32_opcode_clw(state, MAP_IN_C_REGISTER_WINDOW(rd), MAP_IN_C_REGISTER_WINDOW(rs), scaled_offset); + return; + } + if (FIT_SIGNED(scaled_offset, 12)) { // lw rd, offset(rs) asm_rv32_opcode_lw(state, rd, rs, scaled_offset); @@ -554,12 +595,12 @@ void asm_rv32_emit_optimised_xor(asm_rv32_t *state, mp_uint_t rd, mp_uint_t rs) void asm_rv32_meta_comparison_eq(asm_rv32_t *state, mp_uint_t rs1, mp_uint_t rs2, mp_uint_t rd) { // c.li rd, 1 ; - // beq rs1, rs2, 8 ; PC + 0 - // addi rd, zero, 0 ; PC + 4 - // ... ; PC + 8 + // beq rs1, rs2, 6 ; PC + 0 + // c.li rd, 0 ; PC + 4 + // ... ; PC + 6 asm_rv32_opcode_cli(state, rd, 1); - asm_rv32_opcode_beq(state, rs1, rs2, 8); - asm_rv32_opcode_addi(state, rd, ASM_RV32_REG_ZERO, 0); + asm_rv32_opcode_beq(state, rs1, rs2, 6); + asm_rv32_opcode_cli(state, rd, 0); } void asm_rv32_meta_comparison_ne(asm_rv32_t *state, mp_uint_t rs1, mp_uint_t rs2, mp_uint_t rd) { diff --git a/py/asmrv32.h b/py/asmrv32.h index 4061fd5f8..7f7602471 100644 --- a/py/asmrv32.h +++ b/py/asmrv32.h @@ -151,10 +151,21 @@ void asm_rv32_end_pass(asm_rv32_t *state); ((op & 0b1111111) | ((rd & 0b11111) << 7) | \ (imm & 0b11111111111111111111000000000000)) +#define RV32_ENCODE_TYPE_CB(op, ft3, rs, imm) \ + ((op & 0b11) | ((ft3 & 0b111) << 13) | ((rs & 0b111) << 7) | \ + (((imm) & 0b100000000) << 4) | (((imm) & 0b11000000) >> 1) | \ + (((imm) & 0b100000) >> 3) | (((imm) & 0b11000) << 7) | \ + (((imm) & 0b110) << 2)) + #define RV32_ENCODE_TYPE_CI(op, ft3, rd, imm) \ ((op & 0b11) | ((ft3 & 0b111) << 13) | ((rd & 0b11111) << 7) | \ (((imm) & 0b100000) << 7) | (((imm) & 0b11111) << 2)) +#define RV32_ENCODE_TYPE_CIW(op, ft3, rd, imm) \ + ((op & 0b11) | ((ft3 & 0b111) << 13) | ((rd & 0b111) << 2) | \ + ((imm & 0b1111000000) << 1) | ((imm & 0b110000) << 7) | \ + ((imm & 0b1000) << 2) | ((imm & 0b100) << 4)) + #define RV32_ENCODE_TYPE_CJ(op, ft3, imm) \ ((op & 0b11) | ((ft3 & 0b111) << 13) | \ ((imm & 0b1110) << 2) | ((imm & 0b1100000000) << 1) | \ @@ -162,12 +173,18 @@ void asm_rv32_end_pass(asm_rv32_t *state); ((imm & 0b10000000) >> 1) | ((imm & 0b1000000) << 1) | \ ((imm & 0b100000) >> 3) | ((imm & 0b10000) << 7)) +#define RV32_ENCODE_TYPE_CL(op, ft3, rd, rs, imm) \ + ((op & 0b11) | ((ft3 & 0b111) << 13) | ((rd & 0b111) << 2) | \ + ((rs & 0b111) << 7) | ((imm & 0b1000000) >> 1) | \ + ((imm & 0b111000) << 7) | ((imm & 0b100) << 4)) + #define RV32_ENCODE_TYPE_CR(op, ft4, rs1, rs2) \ ((op & 0b11) | ((rs2 & 0b11111) << 2) | ((rs1 & 0b11111) << 7) | \ ((ft4 & 0b1111) << 12)) #define RV32_ENCODE_TYPE_CSS(op, ft3, rs, imm) \ - ((op & 0b11) | ((ft3 & 0b111) << 13) | ((rs & 0b11111) << 2) | ((imm) & 0b111111) << 7) + ((op & 0b11) | ((ft3 & 0b111) << 13) | ((rs & 0b11111) << 2) | \ + ((imm) & 0b111111) << 7) void asm_rv32_emit_word_opcode(asm_rv32_t *state, mp_uint_t opcode); void asm_rv32_emit_halfword_opcode(asm_rv32_t *state, mp_uint_t opcode); @@ -220,10 +237,28 @@ static inline void asm_rv32_opcode_caddi(asm_rv32_t *state, mp_uint_t rd, mp_int asm_rv32_emit_halfword_opcode(state, RV32_ENCODE_TYPE_CI(0b01, 0b000, rd, immediate)); } +// C.ADDI4SPN RD', IMMEDIATE +static inline void asm_rv32_opcode_caddi4spn(asm_rv32_t *state, mp_uint_t rd, mp_uint_t immediate) { + // CIW: 000 ........ ... 00 + asm_rv32_emit_halfword_opcode(state, RV32_ENCODE_TYPE_CIW(0b00, 0b000, rd, immediate)); +} + +// C.BEQZ RS', IMMEDIATE +static inline void asm_rv32_opcode_cbeqz(asm_rv32_t *state, mp_uint_t rs, mp_int_t offset) { + // CB: 110 ... ... ..... 01 + asm_rv32_emit_halfword_opcode(state, RV32_ENCODE_TYPE_CB(0b01, 0b110, rs, offset)); +} + +// C.BNEZ RS', IMMEDIATE +static inline void asm_rv32_opcode_cbnez(asm_rv32_t *state, mp_uint_t rs, mp_int_t offset) { + // CB: 111 ... ... ..... 01 + asm_rv32_emit_halfword_opcode(state, RV32_ENCODE_TYPE_CB(0b01, 0b111, rs, offset)); +} + // C.J OFFSET -static inline void asm_rv32_opcode_cj(asm_rv32_t *state, mp_uint_t offset) { +static inline void asm_rv32_opcode_cj(asm_rv32_t *state, mp_int_t offset) { // CJ: 101 ........... 01 - asm_rv32_emit_halfword_opcode(state, RV32_ENCODE_TYPE_CJ(0b01, 0b001, offset)); + asm_rv32_emit_halfword_opcode(state, RV32_ENCODE_TYPE_CJ(0b01, 0b101, offset)); } // C.JALR RS @@ -250,6 +285,12 @@ static inline void asm_rv32_opcode_clui(asm_rv32_t *state, mp_uint_t rd, mp_int_ asm_rv32_emit_halfword_opcode(state, RV32_ENCODE_TYPE_CI(0b01, 0b011, rd, immediate >> 12)); } +// C.LW RD', OFFSET(RS') +static inline void asm_rv32_opcode_clw(asm_rv32_t *state, mp_uint_t rd, mp_uint_t rs, mp_int_t offset) { + // CL: 010 ... ... .. ... 00 + asm_rv32_emit_halfword_opcode(state, RV32_ENCODE_TYPE_CL(0b00, 0b010, rd, rs, offset)); +} + // C.LWSP RD, OFFSET static inline void asm_rv32_opcode_clwsp(asm_rv32_t *state, mp_uint_t rd, mp_uint_t offset) { // CI: 010 . ..... ..... 10 @@ -383,6 +424,7 @@ static inline void asm_rv32_opcode_xori(asm_rv32_t *state, mp_uint_t rd, mp_uint } #define ASM_WORD_SIZE (4) +#define ASM_HALFWORD_SIZE (2) #define REG_RET ASM_RV32_REG_A0 #define REG_ARG_1 ASM_RV32_REG_A0 @@ -392,8 +434,7 @@ static inline void asm_rv32_opcode_xori(asm_rv32_t *state, mp_uint_t rd, mp_uint #define REG_TEMP0 ASM_RV32_REG_T1 #define REG_TEMP1 ASM_RV32_REG_T2 #define REG_TEMP2 ASM_RV32_REG_T3 -// S0 may be used as the frame pointer by the compiler. -#define REG_FUN_TABLE ASM_RV32_REG_S2 +#define REG_FUN_TABLE ASM_RV32_REG_S1 #define REG_LOCAL_1 ASM_RV32_REG_S3 #define REG_LOCAL_2 ASM_RV32_REG_S4 #define REG_LOCAL_3 ASM_RV32_REG_S5 @@ -432,10 +473,10 @@ void asm_rv32_emit_store_reg_reg_offset(asm_rv32_t *state, mp_uint_t source, mp_ #define ASM_JUMP_IF_REG_EQ(state, rs1, rs2, label) asm_rv32_emit_jump_if_reg_eq(state, rs1, rs2, label) #define ASM_JUMP_IF_REG_NONZERO(state, rs, label, bool_test) asm_rv32_emit_jump_if_reg_nonzero(state, rs, label) #define ASM_JUMP_IF_REG_ZERO(state, rs, label, bool_test) asm_rv32_emit_jump_if_reg_eq(state, rs, ASM_RV32_REG_ZERO, label) -#define ASM_JUMP_REG(state, rs) asm_rv32_opcode_jalr(state, ASM_RV32_REG_ZERO, rs, 0) +#define ASM_JUMP_REG(state, rs) asm_rv32_opcode_cjr(state, rs) #define ASM_LOAD16_REG_REG_OFFSET(state, rd, rs, offset) asm_rv32_emit_load16_reg_reg_offset(state, rd, rs, offset) #define ASM_LOAD16_REG_REG(state, rd, rs) asm_rv32_opcode_lhu(state, rd, rs, 0) -#define ASM_LOAD32_REG_REG(state, rd, rs) asm_rv32_opcode_lw(state, rd, rs, 0) +#define ASM_LOAD32_REG_REG(state, rd, rs) ASM_LOAD_REG_REG_OFFSET(state, rd, rs, 0) #define ASM_LOAD8_REG_REG(state, rd, rs) asm_rv32_opcode_lbu(state, rd, rs, 0) #define ASM_LOAD_REG_REG_OFFSET(state, rd, rs, offset) asm_rv32_emit_load_reg_reg_offset(state, rd, rs, offset) #define ASM_LOAD_REG_REG(state, rd, rs) ASM_LOAD32_REG_REG(state, rd, rs) @@ -452,7 +493,7 @@ void asm_rv32_emit_store_reg_reg_offset(asm_rv32_t *state, mp_uint_t source, mp_ #define ASM_NOT_REG(state, rd) asm_rv32_opcode_xori(state, rd, rd, -1) #define ASM_OR_REG_REG(state, rd, rs) asm_rv32_opcode_or(state, rd, rd, rs) #define ASM_STORE16_REG_REG(state, rs1, rs2) asm_rv32_opcode_sh(state, rs1, rs2, 0) -#define ASM_STORE32_REG_REG(state, rs1, rs2) asm_rv32_opcode_sw(state, rs1, rs2, 0) +#define ASM_STORE32_REG_REG(state, rs1, rs2) ASM_STORE_REG_REG_OFFSET(state, rs1, rs2, 0) #define ASM_STORE8_REG_REG(state, rs1, rs2) asm_rv32_opcode_sb(state, rs1, rs2, 0) #define ASM_STORE_REG_REG_OFFSET(state, rd, rs, offset) asm_rv32_emit_store_reg_reg_offset(state, rd, rs, offset) #define ASM_STORE_REG_REG(state, rs1, rs2) ASM_STORE32_REG_REG(state, rs1, rs2) |
