diff options
| author | Alessandro Gatti <a.gatti@frob.it> | 2024-06-08 11:00:08 +0200 |
|---|---|---|
| committer | Damien George <damien@micropython.org> | 2024-06-21 15:06:07 +1000 |
| commit | 8338f663523d675847b8c0b9b92977b76995de8f (patch) | |
| tree | 7058f782f01fd1a211a18c73456565f4f26f77aa /py | |
| parent | 5a778ebc378d7a1bc9716177950c9e8ac000bb56 (diff) | |
py/asmrv32: Add RISC-V RV32IMC native code emitter.
This adds a native code generation backend for RISC-V RV32I CPUs, currently
limited to the I, M, and C instruction sets.
Signed-off-by: Alessandro Gatti <a.gatti@frob.it>
Diffstat (limited to 'py')
| -rw-r--r-- | py/asmbase.h | 1 | ||||
| -rw-r--r-- | py/asmrv32.c | 595 | ||||
| -rw-r--r-- | py/asmrv32.h | 464 | ||||
| -rw-r--r-- | py/compile.c | 3 | ||||
| -rw-r--r-- | py/emit.h | 3 | ||||
| -rw-r--r-- | py/emitglue.c | 2 | ||||
| -rw-r--r-- | py/emitnative.c | 32 | ||||
| -rw-r--r-- | py/emitnrv32.c | 44 | ||||
| -rw-r--r-- | py/mpconfig.h | 7 | ||||
| -rw-r--r-- | py/py.cmake | 2 | ||||
| -rw-r--r-- | py/py.mk | 2 |
11 files changed, 1152 insertions, 3 deletions
diff --git a/py/asmbase.h b/py/asmbase.h index 352d2f54c..461393fe7 100644 --- a/py/asmbase.h +++ b/py/asmbase.h @@ -27,6 +27,7 @@ #define MICROPY_INCLUDED_PY_ASMBASE_H #include <stdint.h> +#include <stddef.h> #include <stdbool.h> #define MP_ASM_PASS_COMPUTE (1) diff --git a/py/asmrv32.c b/py/asmrv32.c new file mode 100644 index 000000000..228edfc22 --- /dev/null +++ b/py/asmrv32.c @@ -0,0 +1,595 @@ +/* + * This file is part of the MicroPython project, https://micropython.org/ + * + * The MIT License (MIT) + * + * Copyright (c) 2024 Alessandro Gatti + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +#include "py/emit.h" +#include "py/mpconfig.h" + +// wrapper around everything in this file +#if MICROPY_EMIT_RV32 + +#include "py/asmrv32.h" + +#if MICROPY_DEBUG_VERBOSE +#define DEBUG_PRINT (1) +#define DEBUG_printf DEBUG_printf +#else +#define DEBUG_printf(...) (void)0 +#endif + +#ifndef MP_POPCOUNT +#ifdef _MSC_VER +#include <intrin.h> +#define MP_POPCOUNT __popcnt +#else +#if defined __has_builtin +#if __has_builtin(__builtin_popcount) +#define MP_POPCOUNT __builtin_popcount +#endif +#else +static uint32_t fallback_popcount(uint32_t value) { + value = value - ((value >> 1) & 0x55555555); + value = (value & 0x33333333) + ((value >> 2) & 0x33333333); + value = (value + (value >> 4)) & 0x0F0F0F0F; + return value * 0x01010101; +} +#define MP_POPCOUNT fallback_popcount +#endif +#endif +#endif + +#define INTERNAL_TEMPORARY ASM_RV32_REG_T4 +#define AVAILABLE_REGISTERS_COUNT 32 + +#define FIT_UNSIGNED(value, bits) (((value) & ~((1U << (bits)) - 1)) == 0) +#define FIT_SIGNED(value, bits) \ + ((((value) & ~((1U << ((bits) - 1)) - 1)) == 0) || \ + (((value) & ~((1U << ((bits) - 1)) - 1)) == ~((1U << ((bits) - 1)) - 1))) + +/////////////////////////////////////////////////////////////////////////////// + +void asm_rv32_emit_word_opcode(asm_rv32_t *state, mp_uint_t word) { + uint8_t *cursor = mp_asm_base_get_cur_to_write_bytes(&state->base, sizeof(uint32_t)); + if (cursor == NULL) { + return; + } + + #if MP_ENDIANNESS_LITTLE + cursor[0] = word & 0xFF; + cursor[1] = (word >> 8) & 0xFF; + cursor[2] = (word >> 16) & 0xFF; + cursor[3] = (word >> 24) & 0xFF; + #else + cursor[0] = (word >> 24) & 0xFF; + cursor[1] = (word >> 16) & 0xFF; + cursor[2] = (word >> 8) & 0xFF; + cursor[3] = word & 0xFF; + #endif +} + +void asm_rv32_emit_halfword_opcode(asm_rv32_t *state, mp_uint_t word) { + uint8_t *cursor = mp_asm_base_get_cur_to_write_bytes(&state->base, sizeof(uint16_t)); + if (cursor == NULL) { + return; + } + + #if MP_ENDIANNESS_LITTLE + cursor[0] = word & 0xFF; + cursor[1] = (word >> 8) & 0xFF; + #else + cursor[0] = (word >> 8) & 0xFF; + cursor[1] = word & 0xFF; + #endif +} + +/////////////////////////////////////////////////////////////////////////////// + +static void split_immediate(mp_int_t immediate, mp_uint_t *upper, mp_uint_t *lower) { + assert(upper != NULL && "Upper pointer is NULL."); + assert(lower != NULL && "Lower pointer is NULL."); + + mp_uint_t unsigned_immediate = *((mp_uint_t *)&immediate); + *upper = unsigned_immediate & 0xFFFFF000; + *lower = unsigned_immediate & 0x00000FFF; + + // Turn the lower half from unsigned to signed. + if ((*lower & 0x800) != 0) { + *upper += 0x1000; + *lower -= 0x1000; + } +} + +static void load_upper_immediate(asm_rv32_t *state, mp_uint_t rd, mp_uint_t immediate) { + // if immediate fits in 17 bits and is ≠ 0: + // c.lui rd, HI(immediate) + // else: + // lui rd, HI(immediate) + if (FIT_SIGNED(immediate, 17) && ((immediate >> 12) != 0)) { + asm_rv32_opcode_clui(state, rd, immediate); + } else { + asm_rv32_opcode_lui(state, rd, immediate); + } +} + +static void load_lower_immediate(asm_rv32_t *state, mp_uint_t rd, mp_uint_t immediate) { + // WARNING: This must be executed on a register that has either been + // previously cleared or was the target of a LUI/C.LUI or + // AUIPC opcode. + + if (immediate == 0) { + return; + } + + // if LO(immediate) fits in 6 bits: + // c.addi rd, LO(immediate) + // else: + // addi rd, rd, LO(immediate) + if (FIT_SIGNED(immediate, 6)) { + asm_rv32_opcode_caddi(state, rd, immediate); + } else { + asm_rv32_opcode_addi(state, rd, rd, immediate); + } +} + +static void load_full_immediate(asm_rv32_t *state, mp_uint_t rd, mp_int_t immediate) { + mp_uint_t upper = 0; + mp_uint_t lower = 0; + split_immediate(immediate, &upper, &lower); + + // if immediate fits in 17 bits: + // c.lui rd, HI(immediate) + // else: + // lui rd, HI(immediate) + // if LO(immediate) fits in 6 bits && LO(immediate) != 0: + // c.addi rd, LO(immediate) + // else: + // addi rd, rd, LO(immediate) + load_upper_immediate(state, rd, upper); + load_lower_immediate(state, rd, lower); +} + +void asm_rv32_emit_optimised_load_immediate(asm_rv32_t *state, mp_uint_t rd, mp_int_t immediate) { + if (FIT_SIGNED(immediate, 6)) { + // c.li rd, immediate + asm_rv32_opcode_cli(state, rd, immediate); + return; + } + + if (FIT_SIGNED(immediate, 12)) { + // addi rd, zero, immediate + asm_rv32_opcode_addi(state, rd, ASM_RV32_REG_ZERO, immediate); + return; + } + + load_full_immediate(state, rd, immediate); +} + +// RV32 does not have dedicated push/pop opcodes, so series of loads and +// stores are generated in their place. + +static void emit_registers_store(asm_rv32_t *state, mp_uint_t registers_mask) { + mp_uint_t offset = 0; + for (mp_uint_t register_index = 0; register_index < AVAILABLE_REGISTERS_COUNT; register_index++) { + if (registers_mask & (1U << register_index)) { + assert(FIT_UNSIGNED(offset >> 2, 6) && "Registers save stack offset out of range."); + // c.swsp register, offset + asm_rv32_opcode_cswsp(state, register_index, offset); + offset += sizeof(uint32_t); + } + } +} + +static void emit_registers_load(asm_rv32_t *state, mp_uint_t registers_mask) { + mp_uint_t offset = 0; + for (mp_uint_t register_index = 0; register_index < AVAILABLE_REGISTERS_COUNT; register_index++) { + if (registers_mask & (1U << register_index)) { + assert(FIT_UNSIGNED(offset >> 2, 6) && "Registers load stack offset out of range."); + // c.lwsp register, offset + asm_rv32_opcode_clwsp(state, register_index, offset); + offset += sizeof(uint32_t); + } + } +} + +static void adjust_stack(asm_rv32_t *state, mp_int_t stack_size) { + if (stack_size == 0) { + return; + } + + if (FIT_SIGNED(stack_size, 6)) { + // c.addi sp, stack_size + asm_rv32_opcode_caddi(state, ASM_RV32_REG_SP, stack_size); + return; + } + + if (FIT_SIGNED(stack_size, 12)) { + // addi sp, sp, stack_size + asm_rv32_opcode_addi(state, ASM_RV32_REG_SP, ASM_RV32_REG_SP, stack_size); + return; + } + + // li temporary, stack_size + // c.add sp, temporary + load_full_immediate(state, REG_TEMP0, stack_size); + asm_rv32_opcode_cadd(state, ASM_RV32_REG_SP, REG_TEMP0); +} + +// Generate a generic function entry prologue code sequence, setting up the +// stack to hold all the tainted registers and an arbitrary amount of space +// for locals. +static void emit_function_prologue(asm_rv32_t *state, mp_uint_t registers) { + mp_uint_t registers_count = MP_POPCOUNT(registers); + state->stack_size = (registers_count + state->locals_count) * sizeof(uint32_t); + mp_uint_t old_saved_registers_mask = state->saved_registers_mask; + // Move stack pointer up. + adjust_stack(state, -state->stack_size); + // Store registers at the top of the saved stack area. + emit_registers_store(state, registers); + state->locals_stack_offset = registers_count * sizeof(uint32_t); + state->saved_registers_mask = old_saved_registers_mask; +} + +// Restore registers and reset the stack pointer to its initial value. +static void emit_function_epilogue(asm_rv32_t *state, mp_uint_t registers) { + mp_uint_t old_saved_registers_mask = state->saved_registers_mask; + // Restore registers from the top of the stack area. + emit_registers_load(state, registers); + // Move stack pointer down. + adjust_stack(state, state->stack_size); + state->saved_registers_mask = old_saved_registers_mask; +} + +/////////////////////////////////////////////////////////////////////////////// + +void asm_rv32_entry(asm_rv32_t *state, mp_uint_t locals) { + state->saved_registers_mask |= (1U << REG_FUN_TABLE) | (1U << REG_LOCAL_1) | \ + (1U << REG_LOCAL_2) | (1U << REG_LOCAL_3); + state->locals_count = locals; + emit_function_prologue(state, state->saved_registers_mask); +} + +void asm_rv32_exit(asm_rv32_t *state) { + emit_function_epilogue(state, state->saved_registers_mask); + // c.jr ra + asm_rv32_opcode_cjr(state, ASM_RV32_REG_RA); +} + +void asm_rv32_end_pass(asm_rv32_t *state) { + (void)state; +} + +void asm_rv32_emit_call_ind(asm_rv32_t *state, mp_uint_t index) { + mp_uint_t offset = index * ASM_WORD_SIZE; + state->saved_registers_mask |= (1U << ASM_RV32_REG_RA); + + if (FIT_UNSIGNED(offset, 11)) { + // lw temporary, offset(fun_table) + // c.jalr temporary + asm_rv32_opcode_lw(state, INTERNAL_TEMPORARY, REG_FUN_TABLE, offset); + asm_rv32_opcode_cjalr(state, INTERNAL_TEMPORARY); + return; + } + + mp_uint_t upper = 0; + mp_uint_t lower = 0; + split_immediate(offset, &upper, &lower); + + // TODO: Can this clobber REG_TEMP[0:2]? + + // lui temporary, HI(index) ; Or c.lui if possible + // c.add temporary, fun_table + // lw temporary, LO(index)(temporary) + // c.jalr temporary + load_upper_immediate(state, INTERNAL_TEMPORARY, upper); + asm_rv32_opcode_cadd(state, INTERNAL_TEMPORARY, REG_FUN_TABLE); + asm_rv32_opcode_lw(state, INTERNAL_TEMPORARY, INTERNAL_TEMPORARY, lower); + asm_rv32_opcode_cjalr(state, INTERNAL_TEMPORARY); +} + +void asm_rv32_emit_jump_if_reg_eq(asm_rv32_t *state, mp_uint_t rs1, mp_uint_t rs2, mp_uint_t label) { + ptrdiff_t displacement = (ptrdiff_t)(state->base.label_offsets[label] - state->base.code_offset); + + // The least significant bit is ignored anyway. + if (FIT_SIGNED(displacement, 13)) { + // beq rs1, rs2, displacement + asm_rv32_opcode_beq(state, rs1, rs2, displacement); + return; + } + + // Compensate for the initial BNE opcode. + displacement -= ASM_WORD_SIZE; + + mp_uint_t upper = 0; + mp_uint_t lower = 0; + split_immediate(displacement, &upper, &lower); + + // TODO: Can this clobber REG_TEMP[0:2]? + + // bne rs1, rs2, 12 ; PC + 0 + // auipc temporary, HI(displacement) ; PC + 4 + // jalr zero, temporary, LO(displacement) ; PC + 8 + // ... ; PC + 12 + asm_rv32_opcode_bne(state, rs1, rs2, 12); + asm_rv32_opcode_auipc(state, INTERNAL_TEMPORARY, upper); + asm_rv32_opcode_jalr(state, ASM_RV32_REG_ZERO, INTERNAL_TEMPORARY, lower); +} + +void asm_rv32_emit_jump_if_reg_nonzero(asm_rv32_t *state, mp_uint_t rs, mp_uint_t label) { + ptrdiff_t displacement = (ptrdiff_t)(state->base.label_offsets[label] - state->base.code_offset); + + // The least significant bit is ignored anyway. + if (FIT_SIGNED(displacement, 13)) { + // bne rs, zero, displacement + asm_rv32_opcode_bne(state, rs, ASM_RV32_REG_ZERO, displacement); + return; + } + + // Compensate for the initial BEQ opcode. + displacement -= ASM_WORD_SIZE; + + mp_uint_t upper = 0; + mp_uint_t lower = 0; + split_immediate(displacement, &upper, &lower); + + // TODO: Can this clobber REG_TEMP[0:2]? + + // beq rs1, zero, 12 ; PC + 0 + // auipc temporary, HI(displacement) ; PC + 4 + // jalr zero, temporary, LO(displacement) ; PC + 8 + // ... ; PC + 12 + asm_rv32_opcode_beq(state, rs, ASM_RV32_REG_ZERO, 12); + asm_rv32_opcode_auipc(state, INTERNAL_TEMPORARY, upper); + asm_rv32_opcode_jalr(state, ASM_RV32_REG_ZERO, INTERNAL_TEMPORARY, lower); +} + +void asm_rv32_emit_mov_local_reg(asm_rv32_t *state, mp_uint_t local, mp_uint_t rs) { + mp_uint_t offset = state->locals_stack_offset + (local * ASM_WORD_SIZE); + + if (FIT_UNSIGNED(offset >> 2, 6)) { + // c.swsp rs, offset + asm_rv32_opcode_cswsp(state, rs, offset); + return; + } + + if (FIT_UNSIGNED(offset, 11)) { + // sw rs, offset(sp) + asm_rv32_opcode_sw(state, rs, ASM_RV32_REG_SP, offset); + return; + } + + mp_uint_t upper = 0; + mp_uint_t lower = 0; + split_immediate(offset, &upper, &lower); + + // TODO: Can this clobber REG_TEMP[0:2]? + + // lui temporary, HI(offset) ; Or c.lui if possible + // c.add temporary, sp + // sw rs, LO(offset)(temporary) + load_upper_immediate(state, INTERNAL_TEMPORARY, upper); + asm_rv32_opcode_cadd(state, INTERNAL_TEMPORARY, ASM_RV32_REG_SP); + asm_rv32_opcode_sw(state, rs, INTERNAL_TEMPORARY, lower); +} + +void asm_rv32_emit_mov_reg_local(asm_rv32_t *state, mp_uint_t rd, mp_uint_t local) { + mp_uint_t offset = state->locals_stack_offset + (local * ASM_WORD_SIZE); + + if (FIT_UNSIGNED(offset >> 2, 6)) { + // c.lwsp rd, offset + asm_rv32_opcode_clwsp(state, rd, offset); + return; + } + + if (FIT_UNSIGNED(offset, 11)) { + // lw rd, offset(sp) + asm_rv32_opcode_lw(state, rd, ASM_RV32_REG_SP, offset); + return; + } + + mp_uint_t upper = 0; + mp_uint_t lower = 0; + split_immediate(offset, &upper, &lower); + + // lui rd, HI(offset) ; Or c.lui if possible + // c.add rd, sp + // lw rd, LO(offset)(rd) + load_upper_immediate(state, rd, upper); + asm_rv32_opcode_cadd(state, rd, ASM_RV32_REG_SP); + asm_rv32_opcode_lw(state, rd, rd, lower); +} + +void asm_rv32_emit_mov_reg_local_addr(asm_rv32_t *state, mp_uint_t rd, mp_uint_t local) { + mp_uint_t offset = state->locals_stack_offset + (local * ASM_WORD_SIZE); + + if (FIT_SIGNED(offset, 11)) { + // addi rd, sp, offset + asm_rv32_opcode_addi(state, rd, ASM_RV32_REG_SP, offset); + return; + } + + // li rd, offset + // c.add rd, sp + load_full_immediate(state, rd, offset); + asm_rv32_opcode_cadd(state, rd, ASM_RV32_REG_SP); +} + +void asm_rv32_emit_load_reg_reg_offset(asm_rv32_t *state, mp_uint_t rd, mp_uint_t rs, mp_int_t offset) { + mp_int_t scaled_offset = offset * sizeof(ASM_WORD_SIZE); + + if (FIT_SIGNED(scaled_offset, 12)) { + // lw rd, offset(rs) + asm_rv32_opcode_lw(state, rd, rs, scaled_offset); + return; + } + + mp_uint_t upper = 0; + mp_uint_t lower = 0; + split_immediate(scaled_offset, &upper, &lower); + + // lui rd, HI(offset) ; Or c.lui if possible + // c.add rd, rs + // lw rd, LO(offset)(rd) + load_upper_immediate(state, rd, upper); + asm_rv32_opcode_cadd(state, rd, rs); + asm_rv32_opcode_lw(state, rd, rd, lower); +} + +void asm_rv32_emit_jump(asm_rv32_t *state, mp_uint_t label) { + ptrdiff_t displacement = (ptrdiff_t)(state->base.label_offsets[label] - state->base.code_offset); + + // The least significant bit is ignored anyway. + if (FIT_SIGNED(displacement, 13)) { + // c.j displacement + asm_rv32_opcode_cj(state, displacement); + return; + } + + mp_uint_t upper = 0; + mp_uint_t lower = 0; + split_immediate(displacement, &upper, &lower); + + // TODO: Can this clobber REG_TEMP[0:2]? + + // auipc temporary, HI(displacement) + // jalr zero, temporary, LO(displacement) + asm_rv32_opcode_auipc(state, INTERNAL_TEMPORARY, upper); + asm_rv32_opcode_jalr(state, ASM_RV32_REG_ZERO, INTERNAL_TEMPORARY, lower); +} + +void asm_rv32_emit_store_reg_reg_offset(asm_rv32_t *state, mp_uint_t rd, mp_uint_t rs, mp_int_t offset) { + mp_int_t scaled_offset = offset * ASM_WORD_SIZE; + + if (FIT_SIGNED(scaled_offset, 12)) { + // sw rd, offset(rs) + asm_rv32_opcode_sw(state, rd, rs, scaled_offset); + return; + } + + mp_uint_t upper = 0; + mp_uint_t lower = 0; + split_immediate(scaled_offset, &upper, &lower); + + // lui rd, HI(offset) ; Or c.lui if possible + // c.add rd, rs + // sw rd, LO(offset)(rd) + load_upper_immediate(state, rd, upper); + asm_rv32_opcode_cadd(state, rd, rs); + asm_rv32_opcode_sw(state, rd, rd, lower); +} + +void asm_rv32_emit_mov_reg_pcrel(asm_rv32_t *state, mp_uint_t rd, mp_uint_t label) { + ptrdiff_t displacement = (ptrdiff_t)(state->base.label_offsets[label] - state->base.code_offset); + mp_uint_t upper = 0; + mp_uint_t lower = 0; + split_immediate(displacement, &upper, &lower); + + // Compressed instructions are not used even if they may allow for code size + // savings as the code changes size between compute and emit passes + // otherwise. If that happens then the assertion at asmbase.c:93 triggers + // when built in debug mode. + + // auipc rd, HI(relative) + // addi rd, rd, LO(relative) + asm_rv32_opcode_auipc(state, rd, upper); + asm_rv32_opcode_addi(state, rd, rd, lower); +} + +void asm_rv32_emit_load16_reg_reg_offset(asm_rv32_t *state, mp_uint_t rd, mp_uint_t rs, mp_int_t offset) { + mp_int_t scaled_offset = offset * sizeof(uint16_t); + + if (FIT_SIGNED(scaled_offset, 12)) { + // lhu rd, offset(rs) + asm_rv32_opcode_lhu(state, rd, rs, scaled_offset); + return; + } + + mp_uint_t upper = 0; + mp_uint_t lower = 0; + split_immediate(scaled_offset, &upper, &lower); + + // lui rd, HI(offset) ; Or c.lui if possible + // c.add rd, rs + // lhu rd, LO(offset)(rd) + load_upper_immediate(state, rd, upper); + asm_rv32_opcode_cadd(state, rd, rs); + asm_rv32_opcode_lhu(state, rd, rd, lower); +} + +void asm_rv32_emit_optimised_xor(asm_rv32_t *state, mp_uint_t rd, mp_uint_t rs) { + if (rs == rd) { + // c.li rd, 0 + asm_rv32_opcode_cli(state, rd, 0); + return; + } + + // xor rd, rd, rs + asm_rv32_opcode_xor(state, rd, rd, rs); +} + +void asm_rv32_meta_comparison_eq(asm_rv32_t *state, mp_uint_t rs1, mp_uint_t rs2, mp_uint_t rd) { + // c.li rd, 1 ; + // beq rs1, rs2, 8 ; PC + 0 + // addi rd, zero, 0 ; PC + 4 + // ... ; PC + 8 + asm_rv32_opcode_cli(state, rd, 1); + asm_rv32_opcode_beq(state, rs1, rs2, 8); + asm_rv32_opcode_addi(state, rd, ASM_RV32_REG_ZERO, 0); +} + +void asm_rv32_meta_comparison_ne(asm_rv32_t *state, mp_uint_t rs1, mp_uint_t rs2, mp_uint_t rd) { + // sub rd, rs1, rs2 + // sltu rd, zero, rd + asm_rv32_opcode_sub(state, rd, rs1, rs2); + asm_rv32_opcode_sltu(state, rd, ASM_RV32_REG_ZERO, rd); +} + +void asm_rv32_meta_comparison_lt(asm_rv32_t *state, mp_uint_t rs1, mp_uint_t rs2, mp_uint_t rd, bool unsigned_comparison) { + // slt(u) rd, rs1, rs2 + if (unsigned_comparison) { + asm_rv32_opcode_sltu(state, rd, rs1, rs2); + } else { + asm_rv32_opcode_slt(state, rd, rs1, rs2); + } +} + +void asm_rv32_meta_comparison_le(asm_rv32_t *state, mp_uint_t rs1, mp_uint_t rs2, mp_uint_t rd, bool unsigned_comparison) { + // c.li rd, 1 ; + // beq rs1, rs2, 8 ; PC + 0 + // slt(u) rd, rs1, rs2 ; PC + 4 + // ... ; PC + 8 + asm_rv32_opcode_cli(state, rd, 1); + asm_rv32_opcode_beq(state, rs1, rs2, 8); + if (unsigned_comparison) { + asm_rv32_opcode_sltu(state, rd, rs1, rs2); + } else { + asm_rv32_opcode_slt(state, rd, rs1, rs2); + } +} + +#endif // MICROPY_EMIT_RV32 diff --git a/py/asmrv32.h b/py/asmrv32.h new file mode 100644 index 000000000..4061fd5f8 --- /dev/null +++ b/py/asmrv32.h @@ -0,0 +1,464 @@ +/* + * This file is part of the MicroPython project, https://micropython.org/ + * + * The MIT License (MIT) + * + * Copyright (c) 2024 Alessandro Gatti + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#ifndef MICROPY_INCLUDED_PY_ASMRV32_H +#define MICROPY_INCLUDED_PY_ASMRV32_H + +#include <assert.h> + +#include "py/asmbase.h" +#include "py/emit.h" +#include "py/misc.h" +#include "py/persistentcode.h" + +#define ASM_RV32_REG_X0 (0) // Zero +#define ASM_RV32_REG_X1 (1) // RA +#define ASM_RV32_REG_X2 (2) // SP +#define ASM_RV32_REG_X3 (3) // GP +#define ASM_RV32_REG_X4 (4) // TP +#define ASM_RV32_REG_X5 (5) // T0 +#define ASM_RV32_REG_X6 (6) // T1 +#define ASM_RV32_REG_X7 (7) // T2 +#define ASM_RV32_REG_X8 (8) // S0 +#define ASM_RV32_REG_X9 (9) // S1 +#define ASM_RV32_REG_X10 (10) // A0 +#define ASM_RV32_REG_X11 (11) // A1 +#define ASM_RV32_REG_X12 (12) // A2 +#define ASM_RV32_REG_X13 (13) // A3 +#define ASM_RV32_REG_X14 (14) // A4 +#define ASM_RV32_REG_X15 (15) // A5 +#define ASM_RV32_REG_X16 (16) // A6 +#define ASM_RV32_REG_X17 (17) // A7 +#define ASM_RV32_REG_X18 (18) // S2 +#define ASM_RV32_REG_X19 (19) // S3 +#define ASM_RV32_REG_X20 (20) // S4 +#define ASM_RV32_REG_X21 (21) // S5 +#define ASM_RV32_REG_X22 (22) // S6 +#define ASM_RV32_REG_X23 (23) // S7 +#define ASM_RV32_REG_X24 (24) // S8 +#define ASM_RV32_REG_X25 (25) // S9 +#define ASM_RV32_REG_X26 (26) // S10 +#define ASM_RV32_REG_X27 (27) // S11 +#define ASM_RV32_REG_X28 (28) // T3 +#define ASM_RV32_REG_X29 (29) // T4 +#define ASM_RV32_REG_X30 (30) // T5 +#define ASM_RV32_REG_X31 (31) // T6 + +// Alternate register names. + +#define ASM_RV32_REG_ZERO (ASM_RV32_REG_X0) +#define ASM_RV32_REG_RA (ASM_RV32_REG_X1) +#define ASM_RV32_REG_SP (ASM_RV32_REG_X2) +#define ASM_RV32_REG_GP (ASM_RV32_REG_X3) +#define ASM_RV32_REG_TP (ASM_RV32_REG_X4) +#define ASM_RV32_REG_T0 (ASM_RV32_REG_X5) +#define ASM_RV32_REG_T1 (ASM_RV32_REG_X6) +#define ASM_RV32_REG_T2 (ASM_RV32_REG_X7) +#define ASM_RV32_REG_A0 (ASM_RV32_REG_X10) +#define ASM_RV32_REG_A1 (ASM_RV32_REG_X11) +#define ASM_RV32_REG_A2 (ASM_RV32_REG_X12) +#define ASM_RV32_REG_A3 (ASM_RV32_REG_X13) +#define ASM_RV32_REG_A4 (ASM_RV32_REG_X14) +#define ASM_RV32_REG_A5 (ASM_RV32_REG_X15) +#define ASM_RV32_REG_A6 (ASM_RV32_REG_X16) +#define ASM_RV32_REG_A7 (ASM_RV32_REG_X17) +#define ASM_RV32_REG_T3 (ASM_RV32_REG_X28) +#define ASM_RV32_REG_T4 (ASM_RV32_REG_X29) +#define ASM_RV32_REG_T5 (ASM_RV32_REG_X30) +#define ASM_RV32_REG_T6 (ASM_RV32_REG_X31) +#define ASM_RV32_REG_FP (ASM_RV32_REG_X8) +#define ASM_RV32_REG_S0 (ASM_RV32_REG_X8) +#define ASM_RV32_REG_S1 (ASM_RV32_REG_X9) +#define ASM_RV32_REG_S2 (ASM_RV32_REG_X18) +#define ASM_RV32_REG_S3 (ASM_RV32_REG_X19) +#define ASM_RV32_REG_S4 (ASM_RV32_REG_X20) +#define ASM_RV32_REG_S5 (ASM_RV32_REG_X21) +#define ASM_RV32_REG_S6 (ASM_RV32_REG_X22) +#define ASM_RV32_REG_S7 (ASM_RV32_REG_X23) +#define ASM_RV32_REG_S8 (ASM_RV32_REG_X24) +#define ASM_RV32_REG_S9 (ASM_RV32_REG_X25) +#define ASM_RV32_REG_S10 (ASM_RV32_REG_X26) +#define ASM_RV32_REG_S11 (ASM_RV32_REG_X27) + +typedef struct _asm_rv32_t { + // Opaque emitter state. + mp_asm_base_t base; + // Which registers are tainted and need saving/restoring. + mp_uint_t saved_registers_mask; + // How many locals must be stored on the stack. + mp_uint_t locals_count; + // The computed function stack size. + mp_uint_t stack_size; + // The stack offset where stack-based locals start to be stored. + mp_uint_t locals_stack_offset; +} asm_rv32_t; + +void asm_rv32_entry(asm_rv32_t *state, mp_uint_t locals); +void asm_rv32_exit(asm_rv32_t *state); +void asm_rv32_end_pass(asm_rv32_t *state); + +//////////////////////////////////////////////////////////////////////////////// + +#define RV32_ENCODE_TYPE_B(op, ft3, rs1, rs2, imm) \ + ((op & 0b1111111) | ((ft3 & 0b111) << 12) | ((imm & 0b100000000000) >> 4) | \ + ((imm & 0b11110) << 7) | ((rs1 & 0b11111) << 15) | \ + ((rs2 & 0b11111) << 20) | ((imm & 0b11111100000) << 20) | \ + ((imm & 0b1000000000000) << 19)) + +#define RV32_ENCODE_TYPE_I(op, ft3, rd, rs, imm) \ + ((op & 0b1111111) | ((rd & 0b11111) << 7) | ((ft3 & 0b111) << 12) | \ + ((rs & 0b11111) << 15) | ((imm & 0b111111111111) << 20)) + +#define RV32_ENCODE_TYPE_J(op, rd, imm) \ + ((op & 0b1111111) | ((rd & 0b11111) << 7) | (imm & 0b11111111000000000000) | \ + ((imm & 0b100000000000) << 9) | ((imm & 0b11111111110) << 20) | \ + ((imm & 0b100000000000000000000) << 11)) + +#define RV32_ENCODE_TYPE_R(op, ft3, ft7, rd, rs1, rs2) \ + ((op & 0b1111111) | ((rd & 0b11111) << 7) | ((ft3 & 0b111) << 12) | \ + ((rs1 & 0b11111) << 15) | ((rs2 & 0b11111) << 20) | \ + ((ft7 & 0b1111111) << 25)) + +#define RV32_ENCODE_TYPE_S(op, ft3, rs1, rs2, imm) \ + ((op & 0b1111111) | ((imm & 0b11111) << 7) | ((ft3 & 0b111) << 12) | \ + ((rs1 & 0b11111) << 15) | ((rs2 & 0b11111) << 20) | \ + ((imm & 0b111111100000) << 20)) + +#define RV32_ENCODE_TYPE_U(op, rd, imm) \ + ((op & 0b1111111) | ((rd & 0b11111) << 7) | \ + (imm & 0b11111111111111111111000000000000)) + +#define RV32_ENCODE_TYPE_CI(op, ft3, rd, imm) \ + ((op & 0b11) | ((ft3 & 0b111) << 13) | ((rd & 0b11111) << 7) | \ + (((imm) & 0b100000) << 7) | (((imm) & 0b11111) << 2)) + +#define RV32_ENCODE_TYPE_CJ(op, ft3, imm) \ + ((op & 0b11) | ((ft3 & 0b111) << 13) | \ + ((imm & 0b1110) << 2) | ((imm & 0b1100000000) << 1) | \ + ((imm & 0b100000000000) << 1) | ((imm & 0b10000000000) >> 2) | \ + ((imm & 0b10000000) >> 1) | ((imm & 0b1000000) << 1) | \ + ((imm & 0b100000) >> 3) | ((imm & 0b10000) << 7)) + +#define RV32_ENCODE_TYPE_CR(op, ft4, rs1, rs2) \ + ((op & 0b11) | ((rs2 & 0b11111) << 2) | ((rs1 & 0b11111) << 7) | \ + ((ft4 & 0b1111) << 12)) + +#define RV32_ENCODE_TYPE_CSS(op, ft3, rs, imm) \ + ((op & 0b11) | ((ft3 & 0b111) << 13) | ((rs & 0b11111) << 2) | ((imm) & 0b111111) << 7) + +void asm_rv32_emit_word_opcode(asm_rv32_t *state, mp_uint_t opcode); +void asm_rv32_emit_halfword_opcode(asm_rv32_t *state, mp_uint_t opcode); + +// ADD RD, RS1, RS2 +static inline void asm_rv32_opcode_add(asm_rv32_t *state, mp_uint_t rd, mp_uint_t rs1, mp_uint_t rs2) { + // R: 0000000 ..... ..... 000 ..... 0110011 + asm_rv32_emit_word_opcode(state, RV32_ENCODE_TYPE_R(0b0110011, 0b000, 0b0000000, rd, rs1, rs2)); +} + +// ADDI RD, RS, IMMEDIATE +static inline void asm_rv32_opcode_addi(asm_rv32_t *state, mp_uint_t rd, mp_uint_t rs, mp_int_t immediate) { + // I: ............ ..... 000 ..... 0010011 + asm_rv32_emit_word_opcode(state, RV32_ENCODE_TYPE_I(0b0010011, 0b000, rd, rs, immediate)); +} + +// AND RD, RS1, RS2 +static inline void asm_rv32_opcode_and(asm_rv32_t *state, mp_uint_t rd, mp_uint_t rs1, mp_uint_t rs2) { + // R: 0000000 ..... ..... 111 ..... 0110011 + asm_rv32_emit_word_opcode(state, RV32_ENCODE_TYPE_R(0b0110011, 0b111, 0b0000000, rd, rs1, rs2)); +} + +// AUIPC RD, offset +static inline void asm_rv32_opcode_auipc(asm_rv32_t *state, mp_uint_t rd, mp_int_t offset) { + // U: .................... ..... 0010111 + asm_rv32_emit_word_opcode(state, RV32_ENCODE_TYPE_U(0b0010111, rd, offset)); +} + +// BEQ RS1, RS2, OFFSET +static inline void asm_rv32_opcode_beq(asm_rv32_t *state, mp_uint_t rs1, mp_uint_t rs2, mp_int_t offset) { + // B: . ...... ..... ..... 000 .... . 1100011 + asm_rv32_emit_word_opcode(state, RV32_ENCODE_TYPE_B(0b1100011, 0b000, rs1, rs2, offset)); +} + +// BNE RS1, RS2, OFFSET +static inline void asm_rv32_opcode_bne(asm_rv32_t *state, mp_uint_t rs1, mp_uint_t rs2, mp_int_t offset) { + // B: . ...... ..... ..... 001 .... . 1100011 + asm_rv32_emit_word_opcode(state, RV32_ENCODE_TYPE_B(0b1100011, 0b001, rs1, rs2, offset)); +} + +// C.ADD RD, RS +static inline void asm_rv32_opcode_cadd(asm_rv32_t *state, mp_uint_t rd, mp_uint_t rs) { + // CR: 1001 ..... ..... 10 + asm_rv32_emit_halfword_opcode(state, RV32_ENCODE_TYPE_CR(0b10, 0b1001, rd, rs)); +} + +// C.ADDI RD, IMMEDIATE +static inline void asm_rv32_opcode_caddi(asm_rv32_t *state, mp_uint_t rd, mp_int_t immediate) { + // CI: 000 . ..... ..... 01 + asm_rv32_emit_halfword_opcode(state, RV32_ENCODE_TYPE_CI(0b01, 0b000, rd, immediate)); +} + +// C.J OFFSET +static inline void asm_rv32_opcode_cj(asm_rv32_t *state, mp_uint_t offset) { + // CJ: 101 ........... 01 + asm_rv32_emit_halfword_opcode(state, RV32_ENCODE_TYPE_CJ(0b01, 0b001, offset)); +} + +// C.JALR RS +static inline void asm_rv32_opcode_cjalr(asm_rv32_t *state, mp_uint_t rs) { + // CR: 1001 ..... 00000 10 + asm_rv32_emit_halfword_opcode(state, RV32_ENCODE_TYPE_CR(0b10, 0b1001, rs, 0)); +} + +// C.JR RS +static inline void asm_rv32_opcode_cjr(asm_rv32_t *state, mp_uint_t rs) { + // CR: 1000 ..... 00000 10 + asm_rv32_emit_halfword_opcode(state, RV32_ENCODE_TYPE_CR(0b10, 0b1000, rs, 0)); +} + +// C.LI RD, IMMEDIATE +static inline void asm_rv32_opcode_cli(asm_rv32_t *state, mp_uint_t rd, mp_int_t immediate) { + // CI: 010 . ..... ..... 01 + asm_rv32_emit_halfword_opcode(state, RV32_ENCODE_TYPE_CI(0b01, 0b010, rd, immediate)); +} + +// C.LUI RD, IMMEDIATE +static inline void asm_rv32_opcode_clui(asm_rv32_t *state, mp_uint_t rd, mp_int_t immediate) { + // CI: 011 . ..... ..... 01 + asm_rv32_emit_halfword_opcode(state, RV32_ENCODE_TYPE_CI(0b01, 0b011, rd, immediate >> 12)); +} + +// C.LWSP RD, OFFSET +static inline void asm_rv32_opcode_clwsp(asm_rv32_t *state, mp_uint_t rd, mp_uint_t offset) { + // CI: 010 . ..... ..... 10 + asm_rv32_emit_halfword_opcode(state, RV32_ENCODE_TYPE_CI(0b10, 0b010, rd, ((offset & 0b11000000) >> 6) | (offset & 0b111100))); +} + +// C.MV RD, RS +static inline void asm_rv32_opcode_cmv(asm_rv32_t *state, mp_uint_t rd, mp_uint_t rs) { + // CR: 1000 ..... ..... 10 + asm_rv32_emit_halfword_opcode(state, RV32_ENCODE_TYPE_CR(0b10, 0b1000, rd, rs)); +} + +// C.SWSP RS, OFFSET +static inline void asm_rv32_opcode_cswsp(asm_rv32_t *state, mp_uint_t rs, mp_uint_t offset) { + // CSS: 010 ...... ..... 10 + asm_rv32_emit_halfword_opcode(state, RV32_ENCODE_TYPE_CSS(0b10, 0b110, rs, ((offset & 0b11000000) >> 6) | (offset & 0b111100))); +} + +// JALR RD, RS, offset +static inline void asm_rv32_opcode_jalr(asm_rv32_t *state, mp_uint_t rd, mp_uint_t rs, mp_int_t offset) { + // I: ............ ..... 000 ..... 1100111 + asm_rv32_emit_word_opcode(state, RV32_ENCODE_TYPE_I(0b1100111, 0b000, rd, rs, offset)); +} + +// LBU RD, OFFSET(RS) +static inline void asm_rv32_opcode_lbu(asm_rv32_t *state, mp_uint_t rd, mp_uint_t rs, mp_int_t offset) { + // I: ............ ..... 100 ..... 0000011 + asm_rv32_emit_word_opcode(state, RV32_ENCODE_TYPE_I(0b0000011, 0b100, rd, rs, offset)); +} + +// LHU RD, OFFSET(RS) +static inline void asm_rv32_opcode_lhu(asm_rv32_t *state, mp_uint_t rd, mp_uint_t rs, mp_int_t offset) { + // I: ............ ..... 101 ..... 0000011 + asm_rv32_emit_word_opcode(state, RV32_ENCODE_TYPE_I(0b0000011, 0b101, rd, rs, offset)); +} + +// LUI RD, immediate +static inline void asm_rv32_opcode_lui(asm_rv32_t *state, mp_uint_t rd, mp_int_t immediate) { + // U: .................... ..... 0110111 + asm_rv32_emit_word_opcode(state, RV32_ENCODE_TYPE_U(0b0110111, rd, immediate)); +} + +// LW RD, OFFSET(RS) +static inline void asm_rv32_opcode_lw(asm_rv32_t *state, mp_uint_t rd, mp_uint_t rs, mp_int_t offset) { + // I: ............ ..... 010 ..... 0000011 + asm_rv32_emit_word_opcode(state, RV32_ENCODE_TYPE_I(0b0000011, 0b010, rd, rs, offset)); +} + +// MUL RD, RS1, RS2 +static inline void asm_rv32m_opcode_mul(asm_rv32_t *state, mp_uint_t rd, mp_uint_t rs1, mp_uint_t rs2) { + // R: 0000001 ..... ..... 000 ..... 0110011 + asm_rv32_emit_word_opcode(state, RV32_ENCODE_TYPE_R(0b0110011, 0b000, 0b0000001, rd, rs1, rs2)); +} + +// OR RD, RS1, RS2 +static inline void asm_rv32_opcode_or(asm_rv32_t *state, mp_uint_t rd, mp_uint_t rs1, mp_uint_t rs2) { + // R: 0000000 ..... ..... 110 ..... 0110011 + asm_rv32_emit_word_opcode(state, RV32_ENCODE_TYPE_R(0b0110011, 0b110, 0b0000000, rd, rs1, rs2)); +} + +// SLL RD, RS1, RS2 +static inline void asm_rv32_opcode_sll(asm_rv32_t *state, mp_uint_t rd, mp_uint_t rs1, mp_uint_t rs2) { + // R: 0000000 ..... ..... 001 ..... 0110011 + asm_rv32_emit_word_opcode(state, RV32_ENCODE_TYPE_R(0b0110011, 0b001, 0b0000000, rd, rs1, rs2)); +} + +// SLLI RD, RS, IMMEDIATE +static inline void asm_rv32_opcode_slli(asm_rv32_t *state, mp_uint_t rd, mp_uint_t rs, mp_uint_t immediate) { + // I: 0000000..... ..... 001 ..... 0010011 + asm_rv32_emit_word_opcode(state, RV32_ENCODE_TYPE_I(0b0010011, 0b001, rd, rs, immediate & 0x1F)); +} + +// SRL RD, RS1, RS2 +static inline void asm_rv32_opcode_srl(asm_rv32_t *state, mp_uint_t rd, mp_uint_t rs1, mp_uint_t rs2) { + // R: 0000000 ..... ..... 101 ..... 0110011 + asm_rv32_emit_word_opcode(state, RV32_ENCODE_TYPE_R(0b0110011, 0b101, 0b0000000, rd, rs1, rs2)); +} + +// SLT RD, RS1, RS2 +static inline void asm_rv32_opcode_slt(asm_rv32_t *state, mp_uint_t rd, mp_uint_t rs1, mp_uint_t rs2) { + // R: 0000000 ..... ..... 010 ..... 0110011 + asm_rv32_emit_word_opcode(state, RV32_ENCODE_TYPE_R(0b0110011, 0b010, 0b0000000, rd, rs1, rs2)); +} + +// SLTU RD, RS1, RS2 +static inline void asm_rv32_opcode_sltu(asm_rv32_t *state, mp_uint_t rd, mp_uint_t rs1, mp_uint_t rs2) { + // R: 0000000 ..... ..... 011 ..... 0110011 + asm_rv32_emit_word_opcode(state, RV32_ENCODE_TYPE_R(0b0110011, 0b011, 0b0000000, rd, rs1, rs2)); +} + +// SRA RD, RS1, RS2 +static inline void asm_rv32_opcode_sra(asm_rv32_t *state, mp_uint_t rd, mp_uint_t rs1, mp_uint_t rs2) { + // R: 0100000 ..... ..... 101 ..... 0110011 + asm_rv32_emit_word_opcode(state, RV32_ENCODE_TYPE_R(0b0110011, 0b101, 0b0100000, rd, rs1, rs2)); +} + +// SUB RD, RS1, RS2 +static inline void asm_rv32_opcode_sub(asm_rv32_t *state, mp_uint_t rd, mp_uint_t rs1, mp_uint_t rs2) { + // R: 0100000 ..... ..... 000 ..... 0110011 + asm_rv32_emit_word_opcode(state, RV32_ENCODE_TYPE_R(0b0110011, 0b000, 0b0100000, rd, rs1, rs2)); +} + +// SB RS2, OFFSET(RS1) +static inline void asm_rv32_opcode_sb(asm_rv32_t *state, mp_uint_t rs2, mp_uint_t rs1, mp_int_t offset) { + // S: ....... ..... ..... 000 ..... 0100011 + asm_rv32_emit_word_opcode(state, RV32_ENCODE_TYPE_S(0b0100011, 0b000, rs1, rs2, offset)); +} + +// SH RS2, OFFSET(RS1) +static inline void asm_rv32_opcode_sh(asm_rv32_t *state, mp_uint_t rs2, mp_uint_t rs1, mp_int_t offset) { + // S: ....... ..... ..... 001 ..... 0100011 + asm_rv32_emit_word_opcode(state, RV32_ENCODE_TYPE_S(0b0100011, 0b001, rs1, rs2, offset)); +} + +// SW RS2, OFFSET(RS1) +static inline void asm_rv32_opcode_sw(asm_rv32_t *state, mp_uint_t rs2, mp_uint_t rs1, mp_int_t offset) { + // S: ....... ..... ..... 010 ..... 0100011 + asm_rv32_emit_word_opcode(state, RV32_ENCODE_TYPE_S(0b0100011, 0b010, rs1, rs2, offset)); +} + +// XOR RD, RS1, RS2 +static inline void asm_rv32_opcode_xor(asm_rv32_t *state, mp_uint_t rd, mp_uint_t rs1, mp_uint_t rs2) { + // R: 0000000 ..... ..... 100 ..... 0110011 + asm_rv32_emit_word_opcode(state, RV32_ENCODE_TYPE_R(0b0110011, 0b100, 0b0000000, rd, rs1, rs2)); +} + +// XORI RD, RS, IMMEDIATE +static inline void asm_rv32_opcode_xori(asm_rv32_t *state, mp_uint_t rd, mp_uint_t rs, mp_int_t immediate) { + // I: ............ ..... 100 ..... 0010011 + asm_rv32_emit_word_opcode(state, RV32_ENCODE_TYPE_I(0b0010011, 0b100, rd, rs, immediate)); +} + +#define ASM_WORD_SIZE (4) + +#define REG_RET ASM_RV32_REG_A0 +#define REG_ARG_1 ASM_RV32_REG_A0 +#define REG_ARG_2 ASM_RV32_REG_A1 +#define REG_ARG_3 ASM_RV32_REG_A2 +#define REG_ARG_4 ASM_RV32_REG_A3 +#define REG_TEMP0 ASM_RV32_REG_T1 +#define REG_TEMP1 ASM_RV32_REG_T2 +#define REG_TEMP2 ASM_RV32_REG_T3 +// S0 may be used as the frame pointer by the compiler. +#define REG_FUN_TABLE ASM_RV32_REG_S2 +#define REG_LOCAL_1 ASM_RV32_REG_S3 +#define REG_LOCAL_2 ASM_RV32_REG_S4 +#define REG_LOCAL_3 ASM_RV32_REG_S5 + +void asm_rv32_meta_comparison_eq(asm_rv32_t *state, mp_uint_t rs1, mp_uint_t rs2, mp_uint_t rd); +void asm_rv32_meta_comparison_ne(asm_rv32_t *state, mp_uint_t rs1, mp_uint_t rs2, mp_uint_t rd); +void asm_rv32_meta_comparison_lt(asm_rv32_t *state, mp_uint_t rs1, mp_uint_t rs2, mp_uint_t rd, bool unsigned_comparison); +void asm_rv32_meta_comparison_le(asm_rv32_t *state, mp_uint_t rs1, mp_uint_t rs2, mp_uint_t rd, bool unsigned_comparison); + +#ifdef GENERIC_ASM_API + +void asm_rv32_emit_call_ind(asm_rv32_t *state, mp_uint_t index); +void asm_rv32_emit_jump(asm_rv32_t *state, mp_uint_t label); +void asm_rv32_emit_jump_if_reg_eq(asm_rv32_t *state, mp_uint_t rs1, mp_uint_t rs2, mp_uint_t label); +void asm_rv32_emit_jump_if_reg_nonzero(asm_rv32_t *state, mp_uint_t rs, mp_uint_t label); +void asm_rv32_emit_load16_reg_reg_offset(asm_rv32_t *state, mp_uint_t rd, mp_uint_t rs, mp_int_t offset); +void asm_rv32_emit_load_reg_reg_offset(asm_rv32_t *state, mp_uint_t rd, mp_uint_t rs, mp_int_t offset); +void asm_rv32_emit_mov_local_reg(asm_rv32_t *state, mp_uint_t local, mp_uint_t rs); +void asm_rv32_emit_mov_reg_local(asm_rv32_t *state, mp_uint_t rd, mp_uint_t local); +void asm_rv32_emit_mov_reg_local_addr(asm_rv32_t *state, mp_uint_t rd, mp_uint_t local); +void asm_rv32_emit_mov_reg_pcrel(asm_rv32_t *state, mp_uint_t rd, mp_uint_t label); +void asm_rv32_emit_optimised_load_immediate(asm_rv32_t *state, mp_uint_t rd, mp_int_t immediate); +void asm_rv32_emit_optimised_xor(asm_rv32_t *state, mp_uint_t rd, mp_uint_t rs); +void asm_rv32_emit_store_reg_reg_offset(asm_rv32_t *state, mp_uint_t source, mp_uint_t base, mp_int_t offset); + +#define ASM_T asm_rv32_t +#define ASM_ENTRY(state, labels) asm_rv32_entry(state, labels) +#define ASM_EXIT(state) asm_rv32_exit(state) +#define ASM_END_PASS(state) asm_rv32_end_pass(state) + +#define ASM_ADD_REG_REG(state, rd, rs) asm_rv32_opcode_cadd(state, rd, rs) +#define ASM_AND_REG_REG(state, rd, rs) asm_rv32_opcode_and(state, rd, rs, rd) +#define ASM_ASR_REG_REG(state, rd, rs) asm_rv32_opcode_sra(state, rd, rd, rs) +#define ASM_CALL_IND(state, index) asm_rv32_emit_call_ind(state, index) +#define ASM_JUMP(state, label) asm_rv32_emit_jump(state, label) +#define ASM_JUMP_IF_REG_EQ(state, rs1, rs2, label) asm_rv32_emit_jump_if_reg_eq(state, rs1, rs2, label) +#define ASM_JUMP_IF_REG_NONZERO(state, rs, label, bool_test) asm_rv32_emit_jump_if_reg_nonzero(state, rs, label) +#define ASM_JUMP_IF_REG_ZERO(state, rs, label, bool_test) asm_rv32_emit_jump_if_reg_eq(state, rs, ASM_RV32_REG_ZERO, label) +#define ASM_JUMP_REG(state, rs) asm_rv32_opcode_jalr(state, ASM_RV32_REG_ZERO, rs, 0) +#define ASM_LOAD16_REG_REG_OFFSET(state, rd, rs, offset) asm_rv32_emit_load16_reg_reg_offset(state, rd, rs, offset) +#define ASM_LOAD16_REG_REG(state, rd, rs) asm_rv32_opcode_lhu(state, rd, rs, 0) +#define ASM_LOAD32_REG_REG(state, rd, rs) asm_rv32_opcode_lw(state, rd, rs, 0) +#define ASM_LOAD8_REG_REG(state, rd, rs) asm_rv32_opcode_lbu(state, rd, rs, 0) +#define ASM_LOAD_REG_REG_OFFSET(state, rd, rs, offset) asm_rv32_emit_load_reg_reg_offset(state, rd, rs, offset) +#define ASM_LOAD_REG_REG(state, rd, rs) ASM_LOAD32_REG_REG(state, rd, rs) +#define ASM_LSL_REG_REG(state, rd, rs) asm_rv32_opcode_sll(state, rd, rd, rs) +#define ASM_LSR_REG_REG(state, rd, rs) asm_rv32_opcode_srl(state, rd, rd, rs) +#define ASM_MOV_LOCAL_REG(state, local, rs) asm_rv32_emit_mov_local_reg(state, local, rs) +#define ASM_MOV_REG_IMM(state, rd, imm) asm_rv32_emit_optimised_load_immediate(state, rd, imm) +#define ASM_MOV_REG_LOCAL_ADDR(state, rd, local) asm_rv32_emit_mov_reg_local_addr(state, rd, local) +#define ASM_MOV_REG_LOCAL(state, rd, local) asm_rv32_emit_mov_reg_local(state, rd, local) +#define ASM_MOV_REG_PCREL(state, rd, label) asm_rv32_emit_mov_reg_pcrel(state, rd, label) +#define ASM_MOV_REG_REG(state, rd, rs) asm_rv32_opcode_cmv(state, rd, rs) +#define ASM_MUL_REG_REG(state, rd, rs) asm_rv32m_opcode_mul(state, rd, rd, rs) +#define ASM_NEG_REG(state, rd) asm_rv32_opcode_sub(state, rd, ASM_RV32_REG_ZERO, rd) +#define ASM_NOT_REG(state, rd) asm_rv32_opcode_xori(state, rd, rd, -1) +#define ASM_OR_REG_REG(state, rd, rs) asm_rv32_opcode_or(state, rd, rd, rs) +#define ASM_STORE16_REG_REG(state, rs1, rs2) asm_rv32_opcode_sh(state, rs1, rs2, 0) +#define ASM_STORE32_REG_REG(state, rs1, rs2) asm_rv32_opcode_sw(state, rs1, rs2, 0) +#define ASM_STORE8_REG_REG(state, rs1, rs2) asm_rv32_opcode_sb(state, rs1, rs2, 0) +#define ASM_STORE_REG_REG_OFFSET(state, rd, rs, offset) asm_rv32_emit_store_reg_reg_offset(state, rd, rs, offset) +#define ASM_STORE_REG_REG(state, rs1, rs2) ASM_STORE32_REG_REG(state, rs1, rs2) +#define ASM_SUB_REG_REG(state, rd, rs) asm_rv32_opcode_sub(state, rd, rd, rs) +#define ASM_XOR_REG_REG(state, rd, rs) asm_rv32_emit_optimised_xor(state, rd, rs) + +#endif + +#endif // MICROPY_INCLUDED_PY_ASMRV32_H diff --git a/py/compile.c b/py/compile.c index 62757de3c..9b012f878 100644 --- a/py/compile.c +++ b/py/compile.c @@ -102,6 +102,7 @@ static const emit_method_table_t *emit_native_table[] = { &emit_native_thumb_method_table, &emit_native_xtensa_method_table, &emit_native_xtensawin_method_table, + &emit_native_rv32_method_table, }; #elif MICROPY_EMIT_NATIVE @@ -118,6 +119,8 @@ static const emit_method_table_t *emit_native_table[] = { #define NATIVE_EMITTER(f) emit_native_xtensa_##f #elif MICROPY_EMIT_XTENSAWIN #define NATIVE_EMITTER(f) emit_native_xtensawin_##f +#elif MICROPY_EMIT_RV32 +#define NATIVE_EMITTER(f) emit_native_rv32_##f #else #error "unknown native emitter" #endif @@ -201,6 +201,7 @@ extern const emit_method_table_t emit_native_thumb_method_table; extern const emit_method_table_t emit_native_arm_method_table; extern const emit_method_table_t emit_native_xtensa_method_table; extern const emit_method_table_t emit_native_xtensawin_method_table; +extern const emit_method_table_t emit_native_rv32_method_table; extern const mp_emit_method_table_id_ops_t mp_emit_bc_method_table_load_id_ops; extern const mp_emit_method_table_id_ops_t mp_emit_bc_method_table_store_id_ops; @@ -213,6 +214,7 @@ emit_t *emit_native_thumb_new(mp_emit_common_t *emit_common, mp_obj_t *error_slo emit_t *emit_native_arm_new(mp_emit_common_t *emit_common, mp_obj_t *error_slot, uint *label_slot, mp_uint_t max_num_labels); emit_t *emit_native_xtensa_new(mp_emit_common_t *emit_common, mp_obj_t *error_slot, uint *label_slot, mp_uint_t max_num_labels); emit_t *emit_native_xtensawin_new(mp_emit_common_t *emit_common, mp_obj_t *error_slot, uint *label_slot, mp_uint_t max_num_labels); +emit_t *emit_native_rv32_new(mp_emit_common_t *emit_common, mp_obj_t *error_slot, uint *label_slot, mp_uint_t max_num_labels); void emit_bc_set_max_num_labels(emit_t *emit, mp_uint_t max_num_labels); @@ -223,6 +225,7 @@ void emit_native_thumb_free(emit_t *emit); void emit_native_arm_free(emit_t *emit); void emit_native_xtensa_free(emit_t *emit); void emit_native_xtensawin_free(emit_t *emit); +void emit_native_rv32_free(emit_t *emit); void mp_emit_bc_start_pass(emit_t *emit, pass_kind_t pass, scope_t *scope); bool mp_emit_bc_end_pass(emit_t *emit); diff --git a/py/emitglue.c b/py/emitglue.c index 6b6d5ccba..444e48047 100644 --- a/py/emitglue.c +++ b/py/emitglue.c @@ -158,7 +158,7 @@ void mp_emit_glue_assign_native(mp_raw_code_t *rc, mp_raw_code_kind_t kind, cons } DEBUG_printf("\n"); - #ifdef WRITE_CODE + #if WRITE_CODE FILE *fp_write_code = fopen("out-code", "wb"); fwrite(fun_data, fun_len, 1, fp_write_code); fclose(fp_write_code); diff --git a/py/emitnative.c b/py/emitnative.c index 0b84a2ec8..7557e4ba4 100644 --- a/py/emitnative.c +++ b/py/emitnative.c @@ -59,7 +59,7 @@ #endif // wrapper around everything in this file -#if N_X64 || N_X86 || N_THUMB || N_ARM || N_XTENSA || N_XTENSAWIN +#if N_X64 || N_X86 || N_THUMB || N_ARM || N_XTENSA || N_XTENSAWIN || N_RV32 // C stack layout for native functions: // 0: nlr_buf_t [optional] @@ -2522,6 +2522,36 @@ static void emit_native_binary_op(emit_t *emit, mp_binary_op_t op) { } else { asm_xtensa_setcc_reg_reg_reg(emit->as, cc & ~0x80, REG_RET, reg_rhs, REG_ARG_2); } + #elif N_RV32 + (void)op_idx; + switch (op) { + case MP_BINARY_OP_LESS: + asm_rv32_meta_comparison_lt(emit->as, REG_ARG_2, reg_rhs, REG_RET, vtype_lhs == VTYPE_UINT); + break; + + case MP_BINARY_OP_MORE: + asm_rv32_meta_comparison_lt(emit->as, reg_rhs, REG_ARG_2, REG_RET, vtype_lhs == VTYPE_UINT); + break; + + case MP_BINARY_OP_EQUAL: + asm_rv32_meta_comparison_eq(emit->as, REG_ARG_2, reg_rhs, REG_RET); + break; + + case MP_BINARY_OP_LESS_EQUAL: + asm_rv32_meta_comparison_le(emit->as, REG_ARG_2, reg_rhs, REG_RET, vtype_lhs == VTYPE_UINT); + break; + + case MP_BINARY_OP_MORE_EQUAL: + asm_rv32_meta_comparison_le(emit->as, reg_rhs, REG_ARG_2, REG_RET, vtype_lhs == VTYPE_UINT); + break; + + case MP_BINARY_OP_NOT_EQUAL: + asm_rv32_meta_comparison_ne(emit->as, reg_rhs, REG_ARG_2, REG_RET); + break; + + default: + break; + } #else #error not implemented #endif diff --git a/py/emitnrv32.c b/py/emitnrv32.c new file mode 100644 index 000000000..4a4410009 --- /dev/null +++ b/py/emitnrv32.c @@ -0,0 +1,44 @@ +/* + * This file is part of the MicroPython project, https://micropython.org/ + * + * The MIT License (MIT) + * + * Copyright (c) 2024 Alessandro Gatti + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +// RISC-V RV32 specific stuff + +#include "py/mpconfig.h" + +#if MICROPY_EMIT_RV32 + +// this is defined so that the assembler exports generic assembler API macros +#define GENERIC_ASM_API (1) +#include "py/asmrv32.h" + +// Word indices of REG_LOCAL_x in nlr_buf_t +#define NLR_BUF_IDX_LOCAL_1 (6) // S3 + +#define N_RV32 (1) +#define EXPORT_FUN(name) emit_native_rv32_##name +#include "py/emitnative.c" + +#endif diff --git a/py/mpconfig.h b/py/mpconfig.h index 2cbf5b88e..efb30ac04 100644 --- a/py/mpconfig.h +++ b/py/mpconfig.h @@ -406,8 +406,13 @@ #define MICROPY_EMIT_XTENSAWIN (0) #endif +// Whether to emit RISC-V RV32 native code +#ifndef MICROPY_EMIT_RV32 +#define MICROPY_EMIT_RV32 (0) +#endif + // Convenience definition for whether any native emitter is enabled -#define MICROPY_EMIT_NATIVE (MICROPY_EMIT_X64 || MICROPY_EMIT_X86 || MICROPY_EMIT_THUMB || MICROPY_EMIT_ARM || MICROPY_EMIT_XTENSA || MICROPY_EMIT_XTENSAWIN) +#define MICROPY_EMIT_NATIVE (MICROPY_EMIT_X64 || MICROPY_EMIT_X86 || MICROPY_EMIT_THUMB || MICROPY_EMIT_ARM || MICROPY_EMIT_XTENSA || MICROPY_EMIT_XTENSAWIN || MICROPY_EMIT_RV32) // Some architectures cannot read byte-wise from executable memory. In this case // the prelude for a native function (which usually sits after the machine code) diff --git a/py/py.cmake b/py/py.cmake index 74a433c97..f47661c40 100644 --- a/py/py.cmake +++ b/py/py.cmake @@ -9,6 +9,7 @@ set(MICROPY_SOURCE_PY ${MICROPY_PY_DIR}/argcheck.c ${MICROPY_PY_DIR}/asmarm.c ${MICROPY_PY_DIR}/asmbase.c + ${MICROPY_PY_DIR}/asmrv32.c ${MICROPY_PY_DIR}/asmthumb.c ${MICROPY_PY_DIR}/asmx64.c ${MICROPY_PY_DIR}/asmx86.c @@ -25,6 +26,7 @@ set(MICROPY_SOURCE_PY ${MICROPY_PY_DIR}/emitinlinethumb.c ${MICROPY_PY_DIR}/emitinlinextensa.c ${MICROPY_PY_DIR}/emitnarm.c + ${MICROPY_PY_DIR}/emitnrv32.c ${MICROPY_PY_DIR}/emitnthumb.c ${MICROPY_PY_DIR}/emitnx64.c ${MICROPY_PY_DIR}/emitnx86.c @@ -114,6 +114,8 @@ PY_CORE_O_BASENAME = $(addprefix py/,\ emitnxtensa.o \ emitinlinextensa.o \ emitnxtensawin.o \ + asmrv32.o \ + emitnrv32.o \ formatfloat.o \ parsenumbase.o \ parsenum.o \ |
