diff options
| author | Alessandro Gatti <a.gatti@frob.it> | 2025-05-22 12:34:12 +0200 |
|---|---|---|
| committer | Damien George <damien@micropython.org> | 2025-06-10 11:29:02 +1000 |
| commit | 1f5ba6998bb1895354f5afcd7bb52d83a02733be (patch) | |
| tree | 71345700615e2d3397e77dc79415c343bdec73fd | |
| parent | 78ee1bac60a89f13d62395c1f7b6b122e26af98a (diff) | |
py/asmthumb: Extend load/store generators with ARMv7-M opcodes.
This commit lets the Thumb native code generator backend emit ARMv7-M
specific opcodes for indexed load/store operations if possible.
Now T3 opcode encodings are used if the generator backend is configured
to allow emitting ARMv7-M opcodes and if the (unsigned) scaled index
fits in 12 bits. Or, in other words, LDR{B,H}.W and STR{B,H}.W opcodes
are now emitted if possible.
Signed-off-by: Alessandro Gatti <a.gatti@frob.it>
| -rw-r--r-- | py/asmthumb.c | 66 | ||||
| -rw-r--r-- | py/asmthumb.h | 27 | ||||
| -rw-r--r-- | py/emitnative.c | 43 |
3 files changed, 57 insertions, 79 deletions
diff --git a/py/asmthumb.c b/py/asmthumb.c index 06021f2bc..c0ba6222c 100644 --- a/py/asmthumb.c +++ b/py/asmthumb.c @@ -40,6 +40,7 @@ #define UNSIGNED_FIT5(x) ((uint32_t)(x) < 32) #define UNSIGNED_FIT7(x) ((uint32_t)(x) < 128) #define UNSIGNED_FIT8(x) (((x) & 0xffffff00) == 0) +#define UNSIGNED_FIT12(x) (((x) & 0xfffff000) == 0) #define UNSIGNED_FIT16(x) (((x) & 0xffff0000) == 0) #define SIGNED_FIT8(x) (((x) & 0xffffff80) == 0) || (((x) & 0xffffff80) == 0xffffff80) #define SIGNED_FIT9(x) (((x) & 0xffffff00) == 0) || (((x) & 0xffffff00) == 0xffffff00) @@ -52,12 +53,6 @@ #define OP_SUB_W_RRI_HI(reg_src) (0xf2a0 | (reg_src)) #define OP_SUB_W_RRI_LO(reg_dest, imm11) ((imm11 << 4 & 0x7000) | reg_dest << 8 | (imm11 & 0xff)) -#define OP_LDR_W_HI(reg_base) (0xf8d0 | (reg_base)) -#define OP_LDR_W_LO(reg_dest, imm12) ((reg_dest) << 12 | (imm12)) - -#define OP_LDRH_W_HI(reg_base) (0xf8b0 | (reg_base)) -#define OP_LDRH_W_LO(reg_dest, imm12) ((reg_dest) << 12 | (imm12)) - static inline byte *asm_thumb_get_cur_to_write_bytes(asm_thumb_t *as, int n) { return mp_asm_base_get_cur_to_write_bytes(&as->base, n); } @@ -432,11 +427,6 @@ void asm_thumb_mov_reg_pcrel(asm_thumb_t *as, uint rlo_dest, uint label) { asm_thumb_add_reg_reg(as, rlo_dest, ASM_THUMB_REG_R15); // 2 bytes } -// ARMv7-M only -static inline void asm_thumb_ldr_reg_reg_i12(asm_thumb_t *as, uint reg_dest, uint reg_base, uint word_offset) { - asm_thumb_op32(as, OP_LDR_W_HI(reg_base), OP_LDR_W_LO(reg_dest, word_offset * 4)); -} - // emits code for: reg_dest = reg_base + offset << offset_shift static void asm_thumb_add_reg_reg_offset(asm_thumb_t *as, uint reg_dest, uint reg_base, uint offset, uint offset_shift) { if (reg_dest < ASM_THUMB_REG_R8 && reg_base < ASM_THUMB_REG_R8) { @@ -464,30 +454,44 @@ static void asm_thumb_add_reg_reg_offset(asm_thumb_t *as, uint reg_dest, uint re } } -void asm_thumb_ldr_reg_reg_i12_optimised(asm_thumb_t *as, uint reg_dest, uint reg_base, uint word_offset) { - if (reg_dest < ASM_THUMB_REG_R8 && reg_base < ASM_THUMB_REG_R8 && UNSIGNED_FIT5(word_offset)) { - asm_thumb_ldr_rlo_rlo_i5(as, reg_dest, reg_base, word_offset); - } else if (asm_thumb_allow_armv7m(as)) { - asm_thumb_ldr_reg_reg_i12(as, reg_dest, reg_base, word_offset); +#define OP_LDR_STR_W_HI(shift, reg) ((0xf880 | (shift) << 5) | (reg)) +#define OP_LDR_STR_W_LO(reg, imm12) (((reg) << 12) | (imm12)) + +#define OP_LDR 0x01 +#define OP_STR 0x00 + +#define OP_LDR_W 0x10 +#define OP_STR_W 0x00 + +static const uint8_t OP_LDR_STR_TABLE[3] = { + 0x0E, 0x10, 0x0C +}; + +void asm_thumb_load_reg_reg_offset(asm_thumb_t *as, uint reg_dest, uint reg_base, uint offset, uint shift) { + if (UNSIGNED_FIT5(offset) && (reg_dest < ASM_THUMB_REG_R8) && (reg_base < ASM_THUMB_REG_R8)) { + // Can use T1 encoding + asm_thumb_op16(as, ((OP_LDR_STR_TABLE[shift] | OP_LDR) << 11) | (offset << 6) | (reg_base << 3) | reg_dest); + } else if (asm_thumb_allow_armv7m(as) && UNSIGNED_FIT12(offset << shift)) { + // Can use T3 encoding + asm_thumb_op32(as, (OP_LDR_STR_W_HI(shift, reg_base) | OP_LDR_W), OP_LDR_STR_W_LO(reg_dest, (offset << shift))); } else { - asm_thumb_add_reg_reg_offset(as, reg_dest, reg_base, word_offset - 31, 2); - asm_thumb_ldr_rlo_rlo_i5(as, reg_dest, reg_dest, 31); + // Must use the generic sequence + asm_thumb_add_reg_reg_offset(as, reg_dest, reg_base, offset - 31, shift); + asm_thumb_op16(as, ((OP_LDR_STR_TABLE[shift] | OP_LDR) << 11) | (31 << 6) | (reg_dest << 3) | (reg_dest)); } } -// ARMv7-M only -static inline void asm_thumb_ldrh_reg_reg_i12(asm_thumb_t *as, uint reg_dest, uint reg_base, uint uint16_offset) { - asm_thumb_op32(as, OP_LDRH_W_HI(reg_base), OP_LDRH_W_LO(reg_dest, uint16_offset * 2)); -} - -void asm_thumb_ldrh_reg_reg_i12_optimised(asm_thumb_t *as, uint reg_dest, uint reg_base, uint uint16_offset) { - if (reg_dest < ASM_THUMB_REG_R8 && reg_base < ASM_THUMB_REG_R8 && UNSIGNED_FIT5(uint16_offset)) { - asm_thumb_ldrh_rlo_rlo_i5(as, reg_dest, reg_base, uint16_offset); - } else if (asm_thumb_allow_armv7m(as)) { - asm_thumb_ldrh_reg_reg_i12(as, reg_dest, reg_base, uint16_offset); +void asm_thumb_store_reg_reg_offset(asm_thumb_t *as, uint reg_src, uint reg_base, uint offset, uint shift) { + if (UNSIGNED_FIT5(offset) && (reg_src < ASM_THUMB_REG_R8) && (reg_base < ASM_THUMB_REG_R8)) { + // Can use T1 encoding + asm_thumb_op16(as, ((OP_LDR_STR_TABLE[shift] | OP_STR) << 11) | (offset << 6) | (reg_base << 3) | reg_src); + } else if (asm_thumb_allow_armv7m(as) && UNSIGNED_FIT12(offset << shift)) { + // Can use T3 encoding + asm_thumb_op32(as, (OP_LDR_STR_W_HI(shift, reg_base) | OP_STR_W), OP_LDR_STR_W_LO(reg_src, (offset << shift))); } else { - asm_thumb_add_reg_reg_offset(as, reg_dest, reg_base, uint16_offset - 31, 1); - asm_thumb_ldrh_rlo_rlo_i5(as, reg_dest, reg_dest, 31); + // Must use the generic sequence + asm_thumb_add_reg_reg_offset(as, reg_base, reg_base, offset - 31, shift); + asm_thumb_op16(as, ((OP_LDR_STR_TABLE[shift] | OP_STR) << 11) | (31 << 6) | (reg_base << 3) | reg_src); } } @@ -569,7 +573,7 @@ void asm_thumb_b_rel12(asm_thumb_t *as, int rel) { void asm_thumb_bl_ind(asm_thumb_t *as, uint fun_id, uint reg_temp) { // Load ptr to function from table, indexed by fun_id, then call it - asm_thumb_ldr_reg_reg_i12_optimised(as, reg_temp, ASM_THUMB_REG_FUN_TABLE, fun_id); + asm_thumb_load_reg_reg_offset(as, reg_temp, ASM_THUMB_REG_FUN_TABLE, fun_id, 2); asm_thumb_op16(as, OP_BLX(reg_temp)); } diff --git a/py/asmthumb.h b/py/asmthumb.h index 570b44b96..6c83b583e 100644 --- a/py/asmthumb.h +++ b/py/asmthumb.h @@ -382,8 +382,10 @@ void asm_thumb_mov_reg_local(asm_thumb_t *as, uint rlo_dest, int local_num); // void asm_thumb_mov_reg_local_addr(asm_thumb_t *as, uint rlo_dest, int local_num); // convenience void asm_thumb_mov_reg_pcrel(asm_thumb_t *as, uint rlo_dest, uint label); -void asm_thumb_ldr_reg_reg_i12_optimised(asm_thumb_t *as, uint reg_dest, uint reg_base, uint word_offset); // convenience -void asm_thumb_ldrh_reg_reg_i12_optimised(asm_thumb_t *as, uint reg_dest, uint reg_base, uint uint16_offset); // convenience +// Generate optimised load dest, [src, #offset] sequence +void asm_thumb_load_reg_reg_offset(asm_thumb_t *as, uint reg_dest, uint reg_base, uint offset, uint shift); +// Generate optimised store src, [dest, #offset] sequence +void asm_thumb_store_reg_reg_offset(asm_thumb_t *as, uint reg_src, uint reg_base, uint offset, uint shift); void asm_thumb_b_label(asm_thumb_t *as, uint label); // convenience: picks narrow or wide branch void asm_thumb_bcc_label(asm_thumb_t *as, int cc, uint label); // convenience: picks narrow or wide branch @@ -463,17 +465,20 @@ void asm_thumb_b_rel12(asm_thumb_t *as, int rel); #define ASM_MUL_REG_REG(as, reg_dest, reg_src) asm_thumb_format_4((as), ASM_THUMB_FORMAT_4_MUL, (reg_dest), (reg_src)) #define ASM_LOAD_REG_REG_OFFSET(as, reg_dest, reg_base, word_offset) ASM_LOAD32_REG_REG_OFFSET((as), (reg_dest), (reg_base), (word_offset)) -#define ASM_LOAD8_REG_REG(as, reg_dest, reg_base) asm_thumb_ldrb_rlo_rlo_i5((as), (reg_dest), (reg_base), 0) -#define ASM_LOAD16_REG_REG(as, reg_dest, reg_base) asm_thumb_ldrh_rlo_rlo_i5((as), (reg_dest), (reg_base), 0) -#define ASM_LOAD16_REG_REG_OFFSET(as, reg_dest, reg_base, uint16_offset) asm_thumb_ldrh_reg_reg_i12_optimised((as), (reg_dest), (reg_base), (uint16_offset)) -#define ASM_LOAD32_REG_REG(as, reg_dest, reg_base) asm_thumb_ldr_rlo_rlo_i5((as), (reg_dest), (reg_base), 0) -#define ASM_LOAD32_REG_REG_OFFSET(as, reg_dest, reg_base, word_offset) asm_thumb_ldr_reg_reg_i12_optimised((as), (reg_dest), (reg_base), (word_offset)) +#define ASM_LOAD8_REG_REG(as, reg_dest, reg_base) ASM_LOAD8_REG_REG_OFFSET((as), (reg_dest), (reg_base), 0) +#define ASM_LOAD8_REG_REG_OFFSET(as, reg_dest, reg_base, byte_offset) asm_thumb_load_reg_reg_offset((as), (reg_dest), (reg_base), (byte_offset), 0) +#define ASM_LOAD16_REG_REG(as, reg_dest, reg_base) ASM_LOAD16_REG_REG_OFFSET((as), (reg_dest), (reg_base), 0) +#define ASM_LOAD16_REG_REG_OFFSET(as, reg_dest, reg_base, halfword_offset) asm_thumb_load_reg_reg_offset((as), (reg_dest), (reg_base), (halfword_offset), 1) +#define ASM_LOAD32_REG_REG(as, reg_dest, reg_base) ASM_LOAD32_REG_REG_OFFSET((as), (reg_dest), (reg_base), 0) +#define ASM_LOAD32_REG_REG_OFFSET(as, reg_dest, reg_base, word_offset) asm_thumb_load_reg_reg_offset((as), (reg_dest), (reg_base), (word_offset), 2) #define ASM_STORE_REG_REG_OFFSET(as, reg_src, reg_base, word_offset) ASM_STORE32_REG_REG_OFFSET((as), (reg_src), (reg_base), (word_offset)) -#define ASM_STORE8_REG_REG(as, reg_src, reg_base) asm_thumb_strb_rlo_rlo_i5((as), (reg_src), (reg_base), 0) -#define ASM_STORE16_REG_REG(as, reg_src, reg_base) asm_thumb_strh_rlo_rlo_i5((as), (reg_src), (reg_base), 0) -#define ASM_STORE32_REG_REG(as, reg_src, reg_base) asm_thumb_str_rlo_rlo_i5((as), (reg_src), (reg_base), 0) -#define ASM_STORE32_REG_REG_OFFSET(as, reg_src, reg_base, word_offset) asm_thumb_str_rlo_rlo_i5((as), (reg_src), (reg_base), (word_offset)) +#define ASM_STORE8_REG_REG(as, reg_src, reg_base) ASM_STORE8_REG_REG_OFFSET((as), (reg_src), (reg_base), 0) +#define ASM_STORE8_REG_REG_OFFSET(as, reg_src, reg_base, byte_offset) asm_thumb_store_reg_reg_offset((as), (reg_src), (reg_base), (byte_offset), 0) +#define ASM_STORE16_REG_REG(as, reg_src, reg_base) ASM_STORE16_REG_REG_OFFSET((as), (reg_src), (reg_base), 0) +#define ASM_STORE16_REG_REG_OFFSET(as, reg_src, reg_base, halfword_offset) asm_thumb_store_reg_reg_offset((as), (reg_src), (reg_base), (halfword_offset), 1) +#define ASM_STORE32_REG_REG(as, reg_src, reg_base) ASM_STORE32_REG_REG_OFFSET((as), (reg_src), (reg_base), 0) +#define ASM_STORE32_REG_REG_OFFSET(as, reg_src, reg_base, word_offset) asm_thumb_store_reg_reg_offset((as), (reg_src), (reg_base), (word_offset), 2) #define ASM_LOAD8_REG_REG_REG(as, reg_dest, reg_base, reg_index) asm_thumb_ldrb_rlo_rlo_rlo((as), (reg_dest), (reg_base), (reg_index)) #define ASM_LOAD16_REG_REG_REG(as, reg_dest, reg_base, reg_index) \ diff --git a/py/emitnative.c b/py/emitnative.c index 577bcddfb..c1965fc91 100644 --- a/py/emitnative.c +++ b/py/emitnative.c @@ -1540,12 +1540,7 @@ static void emit_native_load_subscr(emit_t *emit) { #ifdef ASM_LOAD8_REG_REG_OFFSET ASM_LOAD8_REG_REG_OFFSET(emit->as, REG_RET, reg_base, index_value); #else - #if N_THUMB - if (index_value >= 0 && index_value < 32) { - asm_thumb_ldrb_rlo_rlo_i5(emit->as, REG_RET, reg_base, index_value); - break; - } - #elif N_RV32 + #if N_RV32 if (FIT_SIGNED(index_value, 12)) { asm_rv32_opcode_lbu(emit->as, REG_RET, reg_base, index_value); break; @@ -1572,12 +1567,7 @@ static void emit_native_load_subscr(emit_t *emit) { #ifdef ASM_LOAD16_REG_REG_OFFSET ASM_LOAD16_REG_REG_OFFSET(emit->as, REG_RET, reg_base, index_value); #else - #if N_THUMB - if (index_value >= 0 && index_value < 32) { - asm_thumb_ldrh_rlo_rlo_i5(emit->as, REG_RET, reg_base, index_value); - break; - } - #elif N_RV32 + #if N_RV32 if (FIT_SIGNED(index_value, 11)) { asm_rv32_opcode_lhu(emit->as, REG_RET, reg_base, index_value << 1); break; @@ -1604,12 +1594,7 @@ static void emit_native_load_subscr(emit_t *emit) { #ifdef ASM_LOAD32_REG_REG_OFFSET ASM_LOAD32_REG_REG_OFFSET(emit->as, REG_RET, reg_base, index_value); #else - #if N_THUMB - if (index_value >= 0 && index_value < 32) { - asm_thumb_ldr_rlo_rlo_i5(emit->as, REG_RET, reg_base, index_value); - break; - } - #elif N_RV32 + #if N_RV32 if (FIT_SIGNED(index_value, 10)) { asm_rv32_opcode_lw(emit->as, REG_RET, reg_base, index_value << 2); break; @@ -1824,13 +1809,7 @@ static void emit_native_store_subscr(emit_t *emit) { #ifdef ASM_STORE8_REG_REG_OFFSET ASM_STORE8_REG_REG_OFFSET(emit->as, reg_value, reg_base, index_value); #else - // TODO optimise to use thumb strb r1, [r2, r3] - #if N_THUMB - if (index_value >= 0 && index_value < 32) { - asm_thumb_strb_rlo_rlo_i5(emit->as, reg_value, reg_base, index_value); - break; - } - #elif N_RV32 + #if N_RV32 if (FIT_SIGNED(index_value, 12)) { asm_rv32_opcode_sb(emit->as, reg_value, reg_base, index_value); break; @@ -1860,12 +1839,7 @@ static void emit_native_store_subscr(emit_t *emit) { #ifdef ASM_STORE16_REG_REG_OFFSET ASM_STORE16_REG_REG_OFFSET(emit->as, reg_value, reg_base, index_value); #else - #if N_THUMB - if (index_value >= 0 && index_value < 32) { - asm_thumb_strh_rlo_rlo_i5(emit->as, reg_value, reg_base, index_value); - break; - } - #elif N_RV32 + #if N_RV32 if (FIT_SIGNED(index_value, 11)) { asm_rv32_opcode_sh(emit->as, reg_value, reg_base, index_value << 1); break; @@ -1891,12 +1865,7 @@ static void emit_native_store_subscr(emit_t *emit) { #ifdef ASM_STORE32_REG_REG_OFFSET ASM_STORE32_REG_REG_OFFSET(emit->as, reg_value, reg_base, index_value); #else - #if N_THUMB - if (index_value >= 0 && index_value < 32) { - asm_thumb_str_rlo_rlo_i5(emit->as, reg_value, reg_base, index_value); - break; - } - #elif N_RV32 + #if N_RV32 if (FIT_SIGNED(index_value, 10)) { asm_rv32_opcode_sw(emit->as, reg_value, reg_base, index_value << 2); break; |
