diff options
author | Alessandro Gatti <a.gatti@frob.it> | 2025-04-17 00:27:28 +0200 |
---|---|---|
committer | Alessandro Gatti <a.gatti@frob.it> | 2025-05-21 01:50:11 +0200 |
commit | 1d37caa3679c86ef4fe43d2f9e6ac13074498433 (patch) | |
tree | 8830966badd8c401730eccf6a894ceb71f321e0d | |
parent | 186caf9f0326c9d61494a7d5c6d0408c0fef8485 (diff) |
py/emitnative: Improve Viper register-indexed code for Arm.
This commit lets the Viper code generator use optimised code sequences
for register-indexed load and store operations when generating Arm code.
The existing code defaulted to generic multi-operations code sequences
for Arm code on most cases. Now optimised implementations are provided
for register-indexed loads and stores of all data sizes, taking at most
two machine opcodes for each operation.
Signed-off-by: Alessandro Gatti <a.gatti@frob.it>
-rw-r--r-- | py/asmarm.c | 16 | ||||
-rw-r--r-- | py/asmarm.h | 6 | ||||
-rw-r--r-- | py/emitnative.c | 14 |
3 files changed, 34 insertions, 2 deletions
diff --git a/py/asmarm.c b/py/asmarm.c index 6fa751b32..d30456788 100644 --- a/py/asmarm.c +++ b/py/asmarm.c @@ -343,6 +343,12 @@ void asm_arm_ldrh_reg_reg(asm_arm_t *as, uint rd, uint rn) { emit_al(as, 0x1d000b0 | (rn << 16) | (rd << 12)); } +void asm_arm_ldrh_reg_reg_reg(asm_arm_t *as, uint rd, uint rm, uint rn) { + // ldrh doesn't support scaled register index + emit_al(as, 0x1a00080 | (ASM_ARM_REG_R8 << 12) | rn); // mov r8, rn, lsl #1 + emit_al(as, 0x19000b0 | (rm << 16) | (rd << 12) | ASM_ARM_REG_R8); // ldrh rd, [rm, r8]; +} + void asm_arm_ldrh_reg_reg_offset(asm_arm_t *as, uint rd, uint rn, uint byte_offset) { if (byte_offset < 0x100) { // ldrh rd, [rn, #off] @@ -360,6 +366,16 @@ void asm_arm_ldrb_reg_reg(asm_arm_t *as, uint rd, uint rn) { emit_al(as, 0x5d00000 | (rn << 16) | (rd << 12)); } +void asm_arm_ldrb_reg_reg_reg(asm_arm_t *as, uint rd, uint rm, uint rn) { + // ldrb rd, [rm, rn] + emit_al(as, 0x7d00000 | (rm << 16) | (rd << 12) | rn); +} + +void asm_arm_ldr_reg_reg_reg(asm_arm_t *as, uint rd, uint rm, uint rn) { + // ldr rd, [rm, rn, lsl #2] + emit_al(as, 0x7900100 | (rm << 16) | (rd << 12) | rn); +} + void asm_arm_str_reg_reg(asm_arm_t *as, uint rd, uint rm, uint byte_offset) { // str rd, [rm, #off] emit_al(as, 0x5800000 | (rm << 16) | (rd << 12) | byte_offset); diff --git a/py/asmarm.h b/py/asmarm.h index 4a4253aef..20b4757d2 100644 --- a/py/asmarm.h +++ b/py/asmarm.h @@ -116,6 +116,12 @@ void asm_arm_ldrb_reg_reg(asm_arm_t *as, uint rd, uint rn); void asm_arm_str_reg_reg(asm_arm_t *as, uint rd, uint rm, uint byte_offset); void asm_arm_strh_reg_reg(asm_arm_t *as, uint rd, uint rm); void asm_arm_strb_reg_reg(asm_arm_t *as, uint rd, uint rm); + +// load from array +void asm_arm_ldr_reg_reg_reg(asm_arm_t *as, uint rd, uint rm, uint rn); +void asm_arm_ldrh_reg_reg_reg(asm_arm_t *as, uint rd, uint rm, uint rn); +void asm_arm_ldrb_reg_reg_reg(asm_arm_t *as, uint rd, uint rm, uint rn); + // store to array void asm_arm_str_reg_reg_reg(asm_arm_t *as, uint rd, uint rm, uint rn); void asm_arm_strh_reg_reg_reg(asm_arm_t *as, uint rd, uint rm, uint rn); diff --git a/py/emitnative.c b/py/emitnative.c index 1aab0a9eb..2fb4bdb42 100644 --- a/py/emitnative.c +++ b/py/emitnative.c @@ -1638,6 +1638,10 @@ static void emit_native_load_subscr(emit_t *emit) { switch (vtype_base) { case VTYPE_PTR8: { // pointer to 8-bit memory + #if N_ARM + asm_arm_ldrb_reg_reg_reg(emit->as, REG_RET, REG_ARG_1, reg_index); + break; + #endif // TODO optimise to use thumb ldrb r1, [r2, r3] ASM_ADD_REG_REG(emit->as, REG_ARG_1, reg_index); // add index to base ASM_LOAD8_REG_REG(emit->as, REG_RET, REG_ARG_1); // store value to (base+index) @@ -1645,7 +1649,10 @@ static void emit_native_load_subscr(emit_t *emit) { } case VTYPE_PTR16: { // pointer to 16-bit memory - #if N_XTENSA || N_XTENSAWIN + #if N_ARM + asm_arm_ldrh_reg_reg_reg(emit->as, REG_RET, REG_ARG_1, reg_index); + break; + #elif N_XTENSA || N_XTENSAWIN asm_xtensa_op_addx2(emit->as, REG_ARG_1, reg_index, REG_ARG_1); asm_xtensa_op_l16ui(emit->as, REG_RET, REG_ARG_1, 0); break; @@ -1657,7 +1664,10 @@ static void emit_native_load_subscr(emit_t *emit) { } case VTYPE_PTR32: { // pointer to word-size memory - #if N_RV32 + #if N_ARM + asm_arm_ldr_reg_reg_reg(emit->as, REG_RET, REG_ARG_1, reg_index); + break; + #elif N_RV32 asm_rv32_opcode_slli(emit->as, REG_TEMP2, reg_index, 2); asm_rv32_opcode_cadd(emit->as, REG_ARG_1, REG_TEMP2); asm_rv32_opcode_lw(emit->as, REG_RET, REG_ARG_1, 0); |