summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAlessandro Gatti <a.gatti@frob.it>2025-06-10 19:24:10 +0200
committerDamien George <damien@micropython.org>2025-07-01 15:34:29 +1000
commitcd1b921bf296da72cee4f6135ad8bd74e6217d2f (patch)
tree2a17ff35d4b34be3755cffb1a7c6fc607f9101e7
parenta8e036982677077e27ab3bdfb0792a8de14f8b77 (diff)
py/asmarm: Implement the full set of Viper load/store operations.
This commit expands the implementation of Viper load/store operations that are optimised for the Arm platform. Now both load and store emitters should generate the shortest possible sequence in all cases. Redundant specialised operation emitters have been folded into the general case implementation - this was the case of integer-indexed load/store operations with a fixed offset of zero. Signed-off-by: Alessandro Gatti <a.gatti@frob.it>
-rw-r--r--py/asmarm.c52
-rw-r--r--py/asmarm.h24
-rw-r--r--py/emitnative.c4
3 files changed, 46 insertions, 34 deletions
diff --git a/py/asmarm.c b/py/asmarm.c
index be50a991b..15bc73b61 100644
--- a/py/asmarm.c
+++ b/py/asmarm.c
@@ -38,8 +38,6 @@
#define REG_TEMP ASM_ARM_REG_R8
-#define SIGNED_FIT24(x) (((x) & 0xff800000) == 0) || (((x) & 0xff000000) == 0xff000000)
-
// Insert word into instruction flow
static void emit(asm_arm_t *as, uint op) {
uint8_t *c = mp_asm_base_get_cur_to_write_bytes(&as->base, 4);
@@ -347,11 +345,6 @@ void asm_arm_ldr_reg_reg_offset(asm_arm_t *as, uint rd, uint rn, uint byte_offse
}
}
-void asm_arm_ldrh_reg_reg(asm_arm_t *as, uint rd, uint rn) {
- // ldrh rd, [rn]
- emit_al(as, 0x1d000b0 | (rn << 16) | (rd << 12));
-}
-
void asm_arm_ldrh_reg_reg_reg(asm_arm_t *as, uint rd, uint rm, uint rn) {
// ldrh doesn't support scaled register index
emit_al(as, 0x1a00080 | (REG_TEMP << 12) | rn); // mov temp, rn, lsl #1
@@ -370,16 +363,23 @@ void asm_arm_ldrh_reg_reg_offset(asm_arm_t *as, uint rd, uint rn, uint byte_offs
}
}
-void asm_arm_ldrb_reg_reg(asm_arm_t *as, uint rd, uint rn) {
- // ldrb rd, [rn]
- emit_al(as, 0x5d00000 | (rn << 16) | (rd << 12));
-}
-
void asm_arm_ldrb_reg_reg_reg(asm_arm_t *as, uint rd, uint rm, uint rn) {
// ldrb rd, [rm, rn]
emit_al(as, 0x7d00000 | (rm << 16) | (rd << 12) | rn);
}
+void asm_arm_ldrb_reg_reg_offset(asm_arm_t *as, uint rd, uint rn, uint byte_offset) {
+ if (byte_offset < 0x1000) {
+ // ldrb rd, [rn, #off]
+ emit_al(as, 0x5d00000 | (rn << 16) | (rd << 12) | byte_offset);
+ } else {
+ // mov temp, #off
+ // ldrb rd, [rn, temp]
+ asm_arm_mov_reg_i32_optimised(as, REG_TEMP, byte_offset);
+ emit_al(as, 0x7d00000 | (rn << 16) | (rd << 12) | REG_TEMP);
+ }
+}
+
void asm_arm_ldr_reg_reg_reg(asm_arm_t *as, uint rd, uint rm, uint rn) {
// ldr rd, [rm, rn, lsl #2]
emit_al(as, 0x7900100 | (rm << 16) | (rd << 12) | rn);
@@ -397,14 +397,28 @@ void asm_arm_str_reg_reg_offset(asm_arm_t *as, uint rd, uint rm, uint byte_offse
}
}
-void asm_arm_strh_reg_reg(asm_arm_t *as, uint rd, uint rm) {
- // strh rd, [rm]
- emit_al(as, 0x1c000b0 | (rm << 16) | (rd << 12));
+void asm_arm_strh_reg_reg_offset(asm_arm_t *as, uint rd, uint rn, uint byte_offset) {
+ if (byte_offset < 0x100) {
+ // strh rd, [rn, #off]
+ emit_al(as, 0x1c000b0 | (rn << 16) | (rd << 12) | ((byte_offset & 0xf0) << 4) | (byte_offset & 0xf));
+ } else {
+ // mov temp, #off
+ // strh rd, [rn, temp]
+ asm_arm_mov_reg_i32_optimised(as, REG_TEMP, byte_offset);
+ emit_al(as, 0x18000b0 | (rn << 16) | (rd << 12) | REG_TEMP);
+ }
}
-void asm_arm_strb_reg_reg(asm_arm_t *as, uint rd, uint rm) {
- // strb rd, [rm]
- emit_al(as, 0x5c00000 | (rm << 16) | (rd << 12));
+void asm_arm_strb_reg_reg_offset(asm_arm_t *as, uint rd, uint rm, uint byte_offset) {
+ if (byte_offset < 0x1000) {
+ // strb rd, [rm, #off]
+ emit_al(as, 0x5c00000 | (rm << 16) | (rd << 12) | byte_offset);
+ } else {
+ // mov temp, #off
+ // strb rd, [rm, temp]
+ asm_arm_mov_reg_i32_optimised(as, REG_TEMP, byte_offset);
+ emit_al(as, 0x7c00000 | (rm << 16) | (rd << 12) | REG_TEMP);
+ }
}
void asm_arm_str_reg_reg_reg(asm_arm_t *as, uint rd, uint rm, uint rn) {
@@ -430,7 +444,7 @@ void asm_arm_bcc_label(asm_arm_t *as, int cond, uint label) {
rel -= 8; // account for instruction prefetch, PC is 8 bytes ahead of this instruction
rel >>= 2; // in ARM mode the branch target is 32-bit aligned, so the 2 LSB are omitted
- if (SIGNED_FIT24(rel)) {
+ if (MP_FIT_SIGNED(24, rel)) {
emit(as, cond | 0xa000000 | (rel & 0xffffff));
} else {
printf("asm_arm_bcc: branch does not fit in 24 bits\n");
diff --git a/py/asmarm.h b/py/asmarm.h
index 07ed425c9..0d6881214 100644
--- a/py/asmarm.h
+++ b/py/asmarm.h
@@ -110,12 +110,11 @@ void asm_arm_asr_reg_reg(asm_arm_t *as, uint rd, uint rs);
// memory
void asm_arm_ldr_reg_reg_offset(asm_arm_t *as, uint rd, uint rn, uint byte_offset);
-void asm_arm_ldrh_reg_reg(asm_arm_t *as, uint rd, uint rn);
void asm_arm_ldrh_reg_reg_offset(asm_arm_t *as, uint rd, uint rn, uint byte_offset);
-void asm_arm_ldrb_reg_reg(asm_arm_t *as, uint rd, uint rn);
+void asm_arm_ldrb_reg_reg_offset(asm_arm_t *as, uint rd, uint rn, uint byte_offset);
void asm_arm_str_reg_reg_offset(asm_arm_t *as, uint rd, uint rm, uint byte_offset);
-void asm_arm_strh_reg_reg(asm_arm_t *as, uint rd, uint rm);
-void asm_arm_strb_reg_reg(asm_arm_t *as, uint rd, uint rm);
+void asm_arm_strh_reg_reg_offset(asm_arm_t *as, uint rd, uint rm, uint byte_offset);
+void asm_arm_strb_reg_reg_offset(asm_arm_t *as, uint rd, uint rm, uint byte_offset);
// load from array
void asm_arm_ldr_reg_reg_reg(asm_arm_t *as, uint rd, uint rm, uint rn);
@@ -209,16 +208,19 @@ void asm_arm_bx_reg(asm_arm_t *as, uint reg_src);
#define ASM_MUL_REG_REG(as, reg_dest, reg_src) asm_arm_mul_reg_reg_reg((as), (reg_dest), (reg_dest), (reg_src))
#define ASM_LOAD_REG_REG_OFFSET(as, reg_dest, reg_base, word_offset) ASM_LOAD32_REG_REG_OFFSET((as), (reg_dest), (reg_base), (word_offset))
-#define ASM_LOAD8_REG_REG(as, reg_dest, reg_base) asm_arm_ldrb_reg_reg((as), (reg_dest), (reg_base))
-#define ASM_LOAD16_REG_REG(as, reg_dest, reg_base) asm_arm_ldrh_reg_reg((as), (reg_dest), (reg_base))
-#define ASM_LOAD16_REG_REG_OFFSET(as, reg_dest, reg_base, uint16_offset) asm_arm_ldrh_reg_reg_offset((as), (reg_dest), (reg_base), 2 * (uint16_offset))
-#define ASM_LOAD32_REG_REG(as, reg_dest, reg_base) asm_arm_ldr_reg_reg_offset((as), (reg_dest), (reg_base), 0)
+#define ASM_LOAD8_REG_REG(as, reg_dest, reg_base) ASM_LOAD8_REG_REG_OFFSET((as), (reg_dest), (reg_base), 0)
+#define ASM_LOAD8_REG_REG_OFFSET(as, reg_dest, reg_base, byte_offset) asm_arm_ldrb_reg_reg_offset((as), (reg_dest), (reg_base), (byte_offset))
+#define ASM_LOAD16_REG_REG(as, reg_dest, reg_base) ASM_LOAD16_REG_REG_OFFSET((as), (reg_dest), (reg_base), 0)
+#define ASM_LOAD16_REG_REG_OFFSET(as, reg_dest, reg_base, halfword_offset) asm_arm_ldrh_reg_reg_offset((as), (reg_dest), (reg_base), 2 * (halfword_offset))
+#define ASM_LOAD32_REG_REG(as, reg_dest, reg_base) ASM_LOAD32_REG_REG_OFFSET((as), (reg_dest), (reg_base), 0)
#define ASM_LOAD32_REG_REG_OFFSET(as, reg_dest, reg_base, word_offset) asm_arm_ldr_reg_reg_offset((as), (reg_dest), (reg_base), 4 * (word_offset))
#define ASM_STORE_REG_REG_OFFSET(as, reg_value, reg_base, word_offset) ASM_STORE32_REG_REG_OFFSET((as), (reg_value), (reg_base), (word_offset))
-#define ASM_STORE8_REG_REG(as, reg_value, reg_base) asm_arm_strb_reg_reg((as), (reg_value), (reg_base))
-#define ASM_STORE16_REG_REG(as, reg_value, reg_base) asm_arm_strh_reg_reg((as), (reg_value), (reg_base))
-#define ASM_STORE32_REG_REG(as, reg_value, reg_base) asm_arm_str_reg_reg_offset((as), (reg_value), (reg_base), 0)
+#define ASM_STORE8_REG_REG(as, reg_value, reg_base) ASM_STORE8_REG_REG_OFFSET((as), (reg_value), (reg_base), 0)
+#define ASM_STORE8_REG_REG_OFFSET(as, reg_value, reg_base, byte_offset) asm_arm_strb_reg_reg_offset((as), (reg_value), (reg_base), (byte_offset))
+#define ASM_STORE16_REG_REG(as, reg_value, reg_base) ASM_STORE16_REG_REG_OFFSET((as), (reg_value), (reg_base), 0)
+#define ASM_STORE16_REG_REG_OFFSET(as, reg_value, reg_base, halfword_offset) asm_arm_strh_reg_reg_offset((as), (reg_value), (reg_base), 2 * (halfword_offset))
+#define ASM_STORE32_REG_REG(as, reg_value, reg_base) ASM_STORE32_REG_REG_OFFSET((as), (reg_value), (reg_base), 0)
#define ASM_STORE32_REG_REG_OFFSET(as, reg_value, reg_base, word_offset) asm_arm_str_reg_reg_offset((as), (reg_value), (reg_base), 4 * (word_offset))
#define ASM_LOAD8_REG_REG_REG(as, reg_dest, reg_base, reg_index) asm_arm_ldrb_reg_reg_reg((as), (reg_dest), (reg_base), (reg_index))
diff --git a/py/emitnative.c b/py/emitnative.c
index 2cce31dae..36e9719db 100644
--- a/py/emitnative.c
+++ b/py/emitnative.c
@@ -1785,10 +1785,6 @@ static void emit_native_store_subscr(emit_t *emit) {
if (index_value != 0) {
// index is non-zero
ASM_MOV_REG_IMM(emit->as, reg_index, index_value);
- #if N_ARM
- asm_arm_strb_reg_reg_reg(emit->as, reg_value, reg_base, reg_index);
- break;
- #endif
ASM_ADD_REG_REG(emit->as, reg_index, reg_base); // add index to base
reg_base = reg_index;
}