diff options
| -rw-r--r-- | py/.gitignore | 1 | ||||
| -rw-r--r-- | py/Makefile | 50 | ||||
| -rw-r--r-- | py/asmthumb.c | 421 | ||||
| -rw-r--r-- | py/asmthumb.h | 60 | ||||
| -rw-r--r-- | py/asmx64.c | 621 | ||||
| -rw-r--r-- | py/asmx64.h | 76 | ||||
| -rw-r--r-- | py/bc.c | 272 | ||||
| -rw-r--r-- | py/bc.h | 97 | ||||
| -rw-r--r-- | py/compile.c | 2510 | ||||
| -rw-r--r-- | py/compile.h | 1 | ||||
| -rw-r--r-- | py/emit.h | 120 | ||||
| -rw-r--r-- | py/emitbc.c | 692 | ||||
| -rw-r--r-- | py/emitcommon.c | 171 | ||||
| -rw-r--r-- | py/emitcpy.c | 834 | ||||
| -rw-r--r-- | py/emitthumb.c | 673 | ||||
| -rw-r--r-- | py/emitx64.c | 680 | ||||
| -rw-r--r-- | py/grammar.h | 300 | ||||
| -rw-r--r-- | py/lexer.c | 677 | ||||
| -rw-r--r-- | py/lexer.h | 141 | ||||
| -rw-r--r-- | py/lexerfile.c | 23 | ||||
| -rw-r--r-- | py/machine.h | 4 | ||||
| -rw-r--r-- | py/main.c | 58 | ||||
| -rw-r--r-- | py/malloc.c | 56 | ||||
| -rw-r--r-- | py/misc.c | 84 | ||||
| -rw-r--r-- | py/misc.h | 91 | ||||
| -rw-r--r-- | py/parse.c | 565 | ||||
| -rw-r--r-- | py/parse.h | 54 | ||||
| -rw-r--r-- | py/qstr.c | 56 | ||||
| -rw-r--r-- | py/runtime.c | 944 | ||||
| -rw-r--r-- | py/runtime.h | 121 | ||||
| -rw-r--r-- | py/scope.c | 218 | ||||
| -rw-r--r-- | py/scope.h | 58 | 
32 files changed, 10729 insertions, 0 deletions
| diff --git a/py/.gitignore b/py/.gitignore new file mode 100644 index 000000000..5761abcfd --- /dev/null +++ b/py/.gitignore @@ -0,0 +1 @@ +*.o diff --git a/py/Makefile b/py/Makefile new file mode 100644 index 000000000..669453dba --- /dev/null +++ b/py/Makefile @@ -0,0 +1,50 @@ +CC = gcc +CFLAGS = -Wall -ansi -std=gnu99 -Os #-DNDEBUG +LDFLAGS = + +SRC = \ +	malloc.c \ +	misc.c \ +	qstr.c \ +	lexer.c \ +	lexerfile.c \ +	parse.c \ +	scope.c \ +	compile.c \ +	emitcommon.c \ +	emitcpy.c \ +	emitbc.c \ +	asmx64.c \ +	emitx64v2.c \ +	emitthumb.c \ +	asmthumb.c \ +	runtime.c \ +	bc.c \ +	main.c \ + +SRC_ASM = \ +	runtime1.s \ + +OBJ = $(SRC:.c=.o) $(SRC_ASM:.s=.o) +LIB = +PROG = py + +$(PROG): $(OBJ) +	$(CC) -o $@ $(OBJ) $(LIB) $(LDFLAGS) + +runtime.o: runtime.c +	$(CC) $(CFLAGS) -O3 -c -o $@ $< + +bc.o: bc.c +	$(CC) $(CFLAGS) -O3 -c -o $@ $< + +parse.o: grammar.h +compile.o: grammar.h +emitcpy.o: emit.h +emitbc.o: emit.h +emitx64.o: emit.h +emitx64v2.o: emit.h +emitthumb.o: emit.h + +clean: +	/bin/rm $(OBJ) diff --git a/py/asmthumb.c b/py/asmthumb.c new file mode 100644 index 000000000..ea7547d4b --- /dev/null +++ b/py/asmthumb.c @@ -0,0 +1,421 @@ +#include <stdint.h> +#include <stdio.h> +#include <assert.h> +#include <string.h> + +#include "misc.h" +#include "machine.h" +#include "asmthumb.h" + +#define UNSIGNED_FIT8(x) (((x) & 0xffffff00) == 0) +#define UNSIGNED_FIT16(x) (((x) & 0xffff0000) == 0) +#define SIGNED_FIT8(x) (((x) & 0xffffff80) == 0) || (((x) & 0xffffff80) == 0xffffff80) +#define SIGNED_FIT9(x) (((x) & 0xffffff00) == 0) || (((x) & 0xffffff00) == 0xffffff00) +#define SIGNED_FIT12(x) (((x) & 0xfffff800) == 0) || (((x) & 0xfffff800) == 0xfffff800) + +struct _asm_thumb_t { +    int pass; +    uint code_offset; +    uint code_size; +    byte *code_base; +    byte dummy_data[8]; + +    int next_label; +    int max_num_labels; +    int *label_offsets; +    int num_locals; +    uint push_reglist; +    uint stack_adjust; +}; + +asm_thumb_t *asm_thumb_new() { +    asm_thumb_t *as; + +    as = m_new(asm_thumb_t, 1); +    as->pass = 0; +    as->code_offset = 0; +    as->code_size = 0; +    as->code_base = NULL; +    as->label_offsets = NULL; +    as->num_locals = 0; + +    return as; +} + +void asm_thumb_free(asm_thumb_t *as, bool free_code) { +    if (free_code) { +        m_free(as->code_base); +    } +    /* +    if (as->label != NULL) { +        int i; +        for (i = 0; i < as->label->len; ++i) +        { +            Label *lab = &g_array_index(as->label, Label, i); +            if (lab->unresolved != NULL) +                g_array_free(lab->unresolved, true); +        } +        g_array_free(as->label, true); +    } +    */ +    m_free(as); +} + +void asm_thumb_start_pass(asm_thumb_t *as, int pass) { +    as->pass = pass; +    as->code_offset = 0; +    as->next_label = 1; +    if (pass == ASM_THUMB_PASS_1) { +        as->max_num_labels = 0; +    } else { +        if (pass == ASM_THUMB_PASS_2) { +            memset(as->label_offsets, -1, as->max_num_labels * sizeof(int)); +        } +    } +} + +void asm_thumb_end_pass(asm_thumb_t *as) { +    if (as->pass == ASM_THUMB_PASS_1) { +        // calculate number of labels need +        if (as->next_label > as->max_num_labels) { +            as->max_num_labels = as->next_label; +        } +        as->label_offsets = m_new(int, as->max_num_labels); +    } else if (as->pass == ASM_THUMB_PASS_2) { +        // calculate size of code in bytes +        as->code_size = as->code_offset; +        as->code_base = m_new(byte, as->code_size); +        printf("code_size: %u\n", as->code_size); +    } + +    /* +    // check labels are resolved +    if (as->label != NULL) +    { +        int i; +        for (i = 0; i < as->label->len; ++i) +            if (g_array_index(as->label, Label, i).unresolved != NULL) +                return false; +    } +    */ +} + +// all functions must go through this one to emit bytes +static byte *asm_thumb_get_cur_to_write_bytes(asm_thumb_t *as, int num_bytes_to_write) { +    //printf("emit %d\n", num_bytes_to_write); +    if (as->pass < ASM_THUMB_PASS_3) { +        as->code_offset += num_bytes_to_write; +        return as->dummy_data; +    } else { +        assert(as->code_offset + num_bytes_to_write <= as->code_size); +        byte *c = as->code_base + as->code_offset; +        as->code_offset += num_bytes_to_write; +        return c; +    } +} + +uint asm_thumb_get_code_size(asm_thumb_t *as) { +    return as->code_size; +} + +void *asm_thumb_get_code(asm_thumb_t *as) { +    // need to set low bit to indicate that it's thumb code +    return (void *)(((machine_uint_t)as->code_base) | 1); +} + +/* +static void asm_thumb_write_byte_1(asm_thumb_t *as, byte b1) { +    byte *c = asm_thumb_get_cur_to_write_bytes(as, 1); +    c[0] = b1; +} +*/ + +static void asm_thumb_write_op16(asm_thumb_t *as, uint op) { +    byte *c = asm_thumb_get_cur_to_write_bytes(as, 2); +    // little endian +    c[0] = op; +    c[1] = op >> 8; +} + +static void asm_thumb_write_op32(asm_thumb_t *as, uint op1, uint op2) { +    byte *c = asm_thumb_get_cur_to_write_bytes(as, 4); +    // little endian, op1 then op2 +    c[0] = op1; +    c[1] = op1 >> 8; +    c[2] = op2; +    c[3] = op2 >> 8; +} + +/* +#define IMM32_L0(x) ((x) & 0xff) +#define IMM32_L1(x) (((x) >> 8) & 0xff) +#define IMM32_L2(x) (((x) >> 16) & 0xff) +#define IMM32_L3(x) (((x) >> 24) & 0xff) + +static void asm_thumb_write_word32(asm_thumb_t *as, int w32) { +    byte *c = asm_thumb_get_cur_to_write_bytes(as, 4); +    c[0] = IMM32_L0(w32); +    c[1] = IMM32_L1(w32); +    c[2] = IMM32_L2(w32); +    c[3] = IMM32_L3(w32); +} +*/ + +// rlolist is a bit map indicating desired lo-registers +#define OP_PUSH_RLIST(rlolist)      (0xb400 | (rlolist)) +#define OP_PUSH_RLIST_LR(rlolist)   (0xb400 | 0x0100 | (rlolist)) +#define OP_POP_RLIST(rlolist)       (0xbc00 | (rlolist)) +#define OP_POP_RLIST_PC(rlolist)    (0xbc00 | 0x0100 | (rlolist)) + +#define OP_ADD_SP(num_words) (0xb000 | (num_words)) +#define OP_SUB_SP(num_words) (0xb080 | (num_words)) + +void asm_thumb_entry(asm_thumb_t *as, int num_locals) { +    // work out what to push and how many extra space to reserve on stack +    // so that we have enough for all locals and it's aligned an 8-byte boundary +    uint reglist; +    uint stack_adjust; +    if (num_locals < 0) { +        num_locals = 0; +    } +    // don't ppop r0 because it's used for return value +    switch (num_locals) { +        case 0: +            reglist = 0xf2; +            stack_adjust = 0; +            break; + +        case 1: +            reglist = 0xf2; +            stack_adjust = 0; +            break; + +        case 2: +            reglist = 0xfe; +            stack_adjust = 0; +            break; + +        case 3: +            reglist = 0xfe; +            stack_adjust = 0; +            break; + +        default: +            reglist = 0xfe; +            stack_adjust = ((num_locals - 3) + 1) & (~1); +            break; +    } +    asm_thumb_write_op16(as, OP_PUSH_RLIST_LR(reglist)); +    if (stack_adjust > 0) { +        asm_thumb_write_op16(as, OP_SUB_SP(stack_adjust)); +    } +    as->push_reglist = reglist; +    as->stack_adjust = stack_adjust; +    as->num_locals = num_locals; +} + +void asm_thumb_exit(asm_thumb_t *as) { +    if (as->stack_adjust > 0) { +        asm_thumb_write_op16(as, OP_ADD_SP(as->stack_adjust)); +    } +    asm_thumb_write_op16(as, OP_POP_RLIST_PC(as->push_reglist)); +} + +int asm_thumb_label_new(asm_thumb_t *as) { +    return as->next_label++; +} + +void asm_thumb_label_assign(asm_thumb_t *as, int label) { +    if (as->pass > ASM_THUMB_PASS_1) { +        assert(label < as->max_num_labels); +        if (as->pass == ASM_THUMB_PASS_2) { +            // assign label offset +            assert(as->label_offsets[label] == -1); +            as->label_offsets[label] = as->code_offset; +        } else if (as->pass == ASM_THUMB_PASS_3) { +            // ensure label offset has not changed from PASS_2 to PASS_3 +            //printf("l%d: (at %d=%ld)\n", label, as->label_offsets[label], as->code_offset); +            assert(as->label_offsets[label] == as->code_offset); +        } +    } +} + +// the i8 value will be zero extended into the r32 register! +void asm_thumb_mov_reg_i8(asm_thumb_t *as, uint rlo_dest, int i8) { +    assert(rlo_dest < REG_R8); +    // movs rlo_dest, #i8 +    asm_thumb_write_op16(as, 0x2000 | (rlo_dest << 8) | i8); +} + +// if loading lo half, the i16 value will be zero extended into the r32 register! +void asm_thumb_mov_i16_to_reg(asm_thumb_t *as, int i16, uint reg_dest, bool load_hi_half) { +    assert(reg_dest < REG_R15); +    uint op; +    if (load_hi_half) { +        // movt reg_dest, #i16 +        op = 0xf2c0; +    } else { +        // movw reg_dest, #i16 +        op = 0xf240; +    } +    asm_thumb_write_op32(as, op | ((i16 >> 1) & 0x0400) | ((i16 >> 12) & 0xf), ((i16 << 4) & 0x7000) | (reg_dest << 8) | (i16 & 0xff)); +} + +void asm_thumb_mov_reg_i32(asm_thumb_t *as, uint reg_dest, machine_uint_t i32) { +    // movw, movt does it in 8 bytes +    // ldr [pc, #], dw does it in 6 bytes, but we might not reach to end of code for dw + +    asm_thumb_mov_i16_to_reg(as, i32, reg_dest, false); +    asm_thumb_mov_i16_to_reg(as, i32 >> 16, reg_dest, true); +} + +void asm_thumb_mov_reg_i32_optimised(asm_thumb_t *as, uint reg_dest, int i32) { +    if (reg_dest < 8 && UNSIGNED_FIT8(i32)) { +        asm_thumb_mov_reg_i8(as, reg_dest, i32); +    } else if (UNSIGNED_FIT16(i32)) { +        asm_thumb_mov_i16_to_reg(as, i32, reg_dest, false); +    } else { +        asm_thumb_mov_reg_i32(as, reg_dest, i32); +    } +} + +void asm_thumb_mov_reg_reg(asm_thumb_t *as, uint reg_dest, uint reg_src) { +    uint op_lo; +    if (reg_src < 8) { +        op_lo = reg_src << 3; +    } else { +        op_lo = 0x40 | ((reg_src - 8) << 3); +    } +    if (reg_dest < 8) { +        op_lo |= reg_dest; +    } else { +        op_lo |= 0x80 | (reg_dest - 8); +    } +    asm_thumb_write_op16(as, 0x4600 | op_lo); +} + +#define OP_STR_TO_SP_OFFSET(rlo_dest, word_offset) (0x9000 | ((rlo_dest) << 8) | ((word_offset) & 0x00ff)) +#define OP_LDR_FROM_SP_OFFSET(rlo_dest, word_offset) (0x9800 | ((rlo_dest) << 8) | ((word_offset) & 0x00ff)) + +void asm_thumb_mov_local_reg(asm_thumb_t *as, int local_num, uint rlo_src) { +    assert(rlo_src < REG_R8); +    int word_offset = as->num_locals - local_num - 1; +    assert(as->pass < ASM_THUMB_PASS_3 || word_offset >= 0); +    asm_thumb_write_op16(as, OP_STR_TO_SP_OFFSET(rlo_src, word_offset)); +} + +void asm_thumb_mov_reg_local(asm_thumb_t *as, uint rlo_dest, int local_num) { +    assert(rlo_dest < REG_R8); +    int word_offset = as->num_locals - local_num - 1; +    assert(as->pass < ASM_THUMB_PASS_3 || word_offset >= 0); +    asm_thumb_write_op16(as, OP_LDR_FROM_SP_OFFSET(rlo_dest, word_offset)); +} + +void asm_thumb_mov_reg_local_addr(asm_thumb_t *as, uint reg_dest, int local_num) { +    assert(0); +    // see format 12, load address +    asm_thumb_write_op16(as, 0x0000); +} + +#define OP_ADD_REG_REG_REG(rlo_dest, rlo_src_a, rlo_src_b) (0x1800 | ((rlo_src_b) << 6) | ((rlo_src_a) << 3) | (rlo_dest)) + +void asm_thumb_add_reg_reg_reg(asm_thumb_t *as, uint rlo_dest, uint rlo_src_a, uint rlo_src_b) { +    asm_thumb_write_op16(as, OP_ADD_REG_REG_REG(rlo_dest, rlo_src_a, rlo_src_b)); +} + +#define OP_CMP_REG_REG(rlo_a, rlo_b) (0x4280 | ((rlo_b) << 3) | (rlo_a)) + +void asm_thumb_cmp_reg_reg(asm_thumb_t *as, uint rlo_a, uint rlo_b) { +    asm_thumb_write_op16(as, OP_CMP_REG_REG(rlo_a, rlo_b)); +} + +void asm_thumb_ite_ge(asm_thumb_t *as) { +    asm_thumb_write_op16(as, 0xbfac); +} + +#define OP_B(byte_offset) (0xe000 | (((byte_offset) >> 1) & 0x07ff)) +// this could be wrong, because it should have a range of +/- 16MiB... +#define OP_BW_HI(byte_offset) (0xf000 | (((byte_offset) >> 12) & 0x07ff)) +#define OP_BW_LO(byte_offset) (0xb800 | (((byte_offset) >> 1) & 0x07ff)) + +void asm_thumb_b_label(asm_thumb_t *as, int label) { +    if (as->pass > ASM_THUMB_PASS_1) { +        int dest = as->label_offsets[label]; +        int rel = dest - as->code_offset; +        rel -= 4; // account for instruction prefetch, PC is 4 bytes ahead of this instruction +        if (dest >= 0 && rel <= -4) { +            // is a backwards jump, so we know the size of the jump on the first pass +            // calculate rel assuming 12 bit relative jump +            if (SIGNED_FIT12(rel)) { +                asm_thumb_write_op16(as, OP_B(rel)); +            } else { +                goto large_jump; +            } +        } else { +            // is a forwards jump, so need to assume it's large +            large_jump: +            asm_thumb_write_op32(as, OP_BW_HI(rel), OP_BW_LO(rel)); +        } +    } +} + +#define OP_CMP_REG_IMM(rlo, i8) (0x2800 | ((rlo) << 8) | (i8)) +// all these bit arithmetics need coverage testing! +#define OP_BEQ(byte_offset) (0xd000 | (((byte_offset) >> 1) & 0x00ff)) +#define OP_BEQW_HI(byte_offset) (0xf000 | (((byte_offset) >> 10) & 0x0400) | (((byte_offset) >> 14) & 0x003f)) +#define OP_BEQW_LO(byte_offset) (0x8000 | ((byte_offset) & 0x2000) | (((byte_offset) >> 1) & 0x0fff)) + +void asm_thumb_cmp_reg_bz_label(asm_thumb_t *as, uint rlo, int label) { +    assert(rlo < REG_R8); + +    // compare reg with 0 +    asm_thumb_write_op16(as, OP_CMP_REG_IMM(rlo, 0)); + +    // branch if equal +    if (as->pass > ASM_THUMB_PASS_1) { +        int dest = as->label_offsets[label]; +        int rel = dest - as->code_offset; +        rel -= 4; // account for instruction prefetch, PC is 4 bytes ahead of this instruction +        if (dest >= 0 && rel <= -4) { +            // is a backwards jump, so we know the size of the jump on the first pass +            // calculate rel assuming 12 bit relative jump +            if (SIGNED_FIT9(rel)) { +                asm_thumb_write_op16(as, OP_BEQ(rel)); +            } else { +                goto large_jump; +            } +        } else { +            // is a forwards jump, so need to assume it's large +            large_jump: +            asm_thumb_write_op32(as, OP_BEQW_HI(rel), OP_BEQW_LO(rel)); +        } +    } +} + +#define OP_BLX(reg) (0x4780 | ((reg) << 3)) +#define OP_SVC(arg) (0xdf00 | (arg)) +#define OP_LDR_FROM_BASE_OFFSET(rlo_dest, rlo_base, word_offset) (0x6800 | (((word_offset) << 6) & 0x07c0) | ((rlo_base) << 3) | (rlo_dest)) + +void asm_thumb_bl_ind(asm_thumb_t *as, void *fun_ptr, uint fun_id, uint reg_temp) { +    /* TODO make this use less bytes +    uint rlo_base = REG_R3; +    uint rlo_dest = REG_R7; +    uint word_offset = 4; +    asm_thumb_write_op16(as, 0x0000); +    asm_thumb_write_op16(as, 0x6800 | (word_offset << 6) | (rlo_base << 3) | rlo_dest); // ldr rlo_dest, [rlo_base, #offset] +    asm_thumb_write_op16(as, 0x4780 | (REG_R9 << 3)); // blx reg +    */ + +    if (0) { +        // load ptr to function into register using immediate, then branch +        // not relocatable +        asm_thumb_mov_reg_i32(as, reg_temp, (machine_uint_t)fun_ptr); +        asm_thumb_write_op16(as, OP_BLX(reg_temp)); +    } else if (1) { +        asm_thumb_write_op16(as, OP_LDR_FROM_BASE_OFFSET(reg_temp, REG_R7, fun_id)); +        asm_thumb_write_op16(as, OP_BLX(reg_temp)); +    } else { +        // use SVC +        asm_thumb_write_op16(as, OP_SVC(fun_id)); +    } +} diff --git a/py/asmthumb.h b/py/asmthumb.h new file mode 100644 index 000000000..d3ffb9a00 --- /dev/null +++ b/py/asmthumb.h @@ -0,0 +1,60 @@ +#define ASM_THUMB_PASS_1 (1) +#define ASM_THUMB_PASS_2 (2) +#define ASM_THUMB_PASS_3 (3) + +#define REG_R0  (0) +#define REG_R1  (1) +#define REG_R2  (2) +#define REG_R3  (3) +#define REG_R4  (4) +#define REG_R5  (5) +#define REG_R6  (6) +#define REG_R7  (7) +#define REG_R8  (8) +#define REG_R9  (9) +#define REG_R10 (10) +#define REG_R11 (11) +#define REG_R12 (12) +#define REG_R13 (13) +#define REG_R14 (14) +#define REG_R15 (15) +#define REG_LR  (REG_R14) + +#define REG_RET REG_R0 +#define REG_ARG_1 REG_R0 +#define REG_ARG_2 REG_R1 +#define REG_ARG_3 REG_R2 +#define REG_ARG_4 REG_R3 + +typedef struct _asm_thumb_t asm_thumb_t; + +asm_thumb_t *asm_thumb_new(); +void asm_thumb_free(asm_thumb_t *as, bool free_code); +void asm_thumb_start_pass(asm_thumb_t *as, int pass); +void asm_thumb_end_pass(asm_thumb_t *as); +uint asm_thumb_get_code_size(asm_thumb_t *as); +void *asm_thumb_get_code(asm_thumb_t *as); + +void asm_thumb_entry(asm_thumb_t *as, int num_locals); +void asm_thumb_exit(asm_thumb_t *as); + +int asm_thumb_label_new(asm_thumb_t *as); +void asm_thumb_label_assign(asm_thumb_t *as, int label); + +// argument order follows ARM, in general dest is first + +void asm_thumb_mov_reg_i8(asm_thumb_t *as, uint rlo_dest, int i8_src); +void asm_thumb_mov_reg_i32(asm_thumb_t *as, uint reg_dest, machine_uint_t i32_src); +void asm_thumb_mov_reg_i32_optimised(asm_thumb_t *as, uint reg_dest, int i32_src); +void asm_thumb_mov_reg_reg(asm_thumb_t *as, uint reg_dest, uint reg_src); +void asm_thumb_mov_local_reg(asm_thumb_t *as, int local_num_dest, uint rlo_src); +void asm_thumb_mov_reg_local(asm_thumb_t *as, uint rlo_dest, int local_num); +void asm_thumb_mov_reg_local_addr(asm_thumb_t *as, uint reg_dest, int local_num); + +void asm_thumb_add_reg_reg_reg(asm_thumb_t *as, uint rlo_dest, uint rlo_src_a, uint rlo_src_b); +void asm_thumb_cmp_reg_reg(asm_thumb_t *as, uint rlo_a, uint rlo_b); +void asm_thumb_ite_ge(asm_thumb_t *as); + +void asm_thumb_b_label(asm_thumb_t *as, int label); +void asm_thumb_cmp_reg_bz_label(asm_thumb_t *as, uint rlo, int label); +void asm_thumb_bl_ind(asm_thumb_t *as, void *fun_ptr, uint fun_id, uint reg_temp); diff --git a/py/asmx64.c b/py/asmx64.c new file mode 100644 index 000000000..59c8113bc --- /dev/null +++ b/py/asmx64.c @@ -0,0 +1,621 @@ +#include <stdio.h> +#include <assert.h> +#include <sys/types.h> +#include <sys/mman.h> +#include <string.h> + +#include "misc.h" +#include "asmx64.h" + +/* all offsets are measured in multiples of 8 bytes */ +#define WORD_SIZE                (8) + +#define OPCODE_NOP               (0x90) +#define OPCODE_PUSH_R64          (0x50) +#define OPCODE_PUSH_I64          (0x68) +#define OPCODE_PUSH_M64          (0xff) /* /6 */ +#define OPCODE_POP_R64           (0x58) +#define OPCODE_RET               (0xc3) +#define OPCODE_MOV_I8_TO_R8      (0xb0) /* +rb */ +#define OPCODE_MOV_I64_TO_R64    (0xb8) +#define OPCODE_MOV_I32_TO_RM32   (0xc7) +#define OPCODE_MOV_R64_TO_RM64   (0x89) +#define OPCODE_MOV_RM64_TO_R64   (0x8b) +#define OPCODE_LEA_MEM_TO_R64    (0x8d) /* /r */ +#define OPCODE_XOR_R64_TO_RM64   (0x31) /* /r */ +#define OPCODE_ADD_R64_TO_RM64   (0x01) +#define OPCODE_ADD_I32_TO_RM32   (0x81) /* /0 */ +#define OPCODE_ADD_I8_TO_RM32    (0x83) /* /0 */ +#define OPCODE_SUB_R64_FROM_RM64 (0x29) +#define OPCODE_SUB_I32_FROM_RM64 (0x81) /* /5 */ +#define OPCODE_SUB_I8_FROM_RM64  (0x83) /* /5 */ +#define OPCODE_SHL_RM32_BY_I8    (0xc1) /* /4 */ +#define OPCODE_SHR_RM32_BY_I8    (0xc1) /* /5 */ +#define OPCODE_SAR_RM32_BY_I8    (0xc1) /* /7 */ +#define OPCODE_CMP_I32_WITH_RM32 (0x81) /* /7 */ +#define OPCODE_CMP_I8_WITH_RM32  (0x83) /* /7 */ +#define OPCODE_CMP_R64_WITH_RM64 (0x39) +#define OPCODE_CMP_RM32_WITH_R32 (0x3b) +#define OPCODE_TEST_R8_WITH_RM8  (0x84) /* /r */ +#define OPCODE_JMP_REL8          (0xeb) +#define OPCODE_JMP_REL32         (0xe9) +#define OPCODE_JCC_REL8          (0x70) /* | jcc type */ +#define OPCODE_JCC_REL32_A       (0x0f) +#define OPCODE_JCC_REL32_B       (0x80) /* | jcc type */ +#define OPCODE_SETCC_RM8_A       (0x0f) +#define OPCODE_SETCC_RM8_B       (0x90) /* | jcc type, /0 */ +#define OPCODE_CALL_REL32        (0xe8) +#define OPCODE_CALL_RM32         (0xff) /* /2 */ +#define OPCODE_LEAVE             (0xc9) + +#define MODRM_R64(x)    ((x) << 3) +#define MODRM_RM_DISP0  (0x00) +#define MODRM_RM_DISP8  (0x40) +#define MODRM_RM_DISP32 (0x80) +#define MODRM_RM_REG    (0xc0) +#define MODRM_RM_R64(x) (x) + +#define REX_PREFIX  (0x40) +#define REX_W       (0x08)  // width +#define REX_R       (0x04)  // register +#define REX_X       (0x02)  // index +#define REX_B       (0x01)  // base + +#define IMM32_L0(x) ((x) & 0xff) +#define IMM32_L1(x) (((x) >> 8) & 0xff) +#define IMM32_L2(x) (((x) >> 16) & 0xff) +#define IMM32_L3(x) (((x) >> 24) & 0xff) +#define IMM64_L4(x) (((x) >> 32) & 0xff) +#define IMM64_L5(x) (((x) >> 40) & 0xff) +#define IMM64_L6(x) (((x) >> 48) & 0xff) +#define IMM64_L7(x) (((x) >> 56) & 0xff) + +#define UNSIGNED_FIT8(x) (((x) & 0xffffffffffffff00) == 0) +#define UNSIGNED_FIT32(x) (((x) & 0xffffffff00000000) == 0) +#define SIGNED_FIT8(x) (((x) & 0xffffff80) == 0) || (((x) & 0xffffff80) == 0xffffff80) + +struct _asm_x64_t { +    int pass; +    uint code_offset; +    uint code_size; +    byte *code_base; +    byte dummy_data[8]; + +    int next_label; +    int max_num_labels; +    int *label_offsets; +}; + +// for allocating memory, see src/v8/src/platform-linux.cc +void *alloc_mem(uint req_size, uint *alloc_size, bool is_exec) { +    req_size = (req_size + 0xfff) & (~0xfff); +    int prot = PROT_READ | PROT_WRITE | (is_exec ? PROT_EXEC : 0); +    void *ptr = mmap(NULL, req_size, prot, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); +    if (ptr == MAP_FAILED) { +        assert(0); +    } +    *alloc_size = req_size; +    return ptr; +} + +asm_x64_t* asm_x64_new() { +    asm_x64_t* as; + +    as = m_new(asm_x64_t, 1); +    as->pass = 0; +    as->code_offset = 0; +    as->code_size = 0; +    as->code_base = NULL; +    as->label_offsets = NULL; + +    return as; +} + +void asm_x64_free(asm_x64_t* as, bool free_code) { +    if (free_code) { +        m_free(as->code_base); +    } +    /* +    if (as->label != NULL) { +        int i; +        for (i = 0; i < as->label->len; ++i) +        { +            Label* lab = &g_array_index(as->label, Label, i); +            if (lab->unresolved != NULL) +                g_array_free(lab->unresolved, true); +        } +        g_array_free(as->label, true); +    } +    */ +    m_free(as); +} + +void asm_x64_start_pass(asm_x64_t *as, int pass) { +    as->pass = pass; +    as->code_offset = 0; +    as->next_label = 1; +    if (pass == ASM_X64_PASS_1) { +        as->max_num_labels = 0; +    } else { +        if (pass == ASM_X64_PASS_2) { +            memset(as->label_offsets, -1, as->max_num_labels * sizeof(int)); +        } +    } +} + +void asm_x64_end_pass(asm_x64_t *as) { +    if (as->pass == ASM_X64_PASS_1) { +        // calculate number of labels need +        if (as->next_label > as->max_num_labels) { +            as->max_num_labels = as->next_label; +        } +        as->label_offsets = m_new(int, as->max_num_labels); +    } else if (as->pass == ASM_X64_PASS_2) { +        // calculate size of code in bytes +        as->code_size = as->code_offset; +        as->code_base = m_new(byte, as->code_size); +        printf("code_size: %u\n", as->code_size); +    } + +    /* +    // check labels are resolved +    if (as->label != NULL) +    { +        int i; +        for (i = 0; i < as->label->len; ++i) +            if (g_array_index(as->label, Label, i).unresolved != NULL) +                return false; +    } +    */ +} + +// all functions must go through this one to emit bytes +static byte* asm_x64_get_cur_to_write_bytes(asm_x64_t* as, int num_bytes_to_write) { +    //printf("emit %d\n", num_bytes_to_write); +    if (as->pass < ASM_X64_PASS_3) { +        as->code_offset += num_bytes_to_write; +        return as->dummy_data; +    } else { +        assert(as->code_offset + num_bytes_to_write <= as->code_size); +        byte *c = as->code_base + as->code_offset; +        as->code_offset += num_bytes_to_write; +        return c; +    } +} + +uint asm_x64_get_code_size(asm_x64_t* as) { +    return as->code_size; +} + +void* asm_x64_get_code(asm_x64_t* as) { +    return as->code_base; +} + +static void asm_x64_write_byte_1(asm_x64_t* as, byte b1) { +    byte* c = asm_x64_get_cur_to_write_bytes(as, 1); +    c[0] = b1; +} + +static void asm_x64_write_byte_2(asm_x64_t* as, byte b1, byte b2) { +    byte* c = asm_x64_get_cur_to_write_bytes(as, 2); +    c[0] = b1; +    c[1] = b2; +} + +static void asm_x64_write_byte_3(asm_x64_t* as, byte b1, byte b2, byte b3) { +    byte* c = asm_x64_get_cur_to_write_bytes(as, 3); +    c[0] = b1; +    c[1] = b2; +    c[2] = b3; +} + +static void asm_x64_write_word32(asm_x64_t* as, int w32) { +    byte* c = asm_x64_get_cur_to_write_bytes(as, 4); +    c[0] = IMM32_L0(w32); +    c[1] = IMM32_L1(w32); +    c[2] = IMM32_L2(w32); +    c[3] = IMM32_L3(w32); +} + +static void asm_x64_write_word64(asm_x64_t* as, int64_t w64) { +    byte* c = asm_x64_get_cur_to_write_bytes(as, 8); +    c[0] = IMM32_L0(w64); +    c[1] = IMM32_L1(w64); +    c[2] = IMM32_L2(w64); +    c[3] = IMM32_L3(w64); +    c[4] = IMM64_L4(w64); +    c[5] = IMM64_L5(w64); +    c[6] = IMM64_L6(w64); +    c[7] = IMM64_L7(w64); +} + +/* unused +static void asm_x64_write_word32_to(asm_x64_t* as, int offset, int w32) { +    byte* c; +    assert(offset + 4 <= as->code_size); +    c = as->code_base + offset; +    c[0] = IMM32_L0(w32); +    c[1] = IMM32_L1(w32); +    c[2] = IMM32_L2(w32); +    c[3] = IMM32_L3(w32); +} +*/ + +static void asm_x64_write_r64_disp(asm_x64_t* as, int r64, int disp_r64, int disp_offset) { +    assert(disp_r64 != REG_RSP); + +    if (disp_offset == 0 && disp_r64 != REG_RBP) { +        asm_x64_write_byte_1(as, MODRM_R64(r64) | MODRM_RM_DISP0 | MODRM_RM_R64(disp_r64)); +    } else if (SIGNED_FIT8(disp_offset)) { +        asm_x64_write_byte_2(as, MODRM_R64(r64) | MODRM_RM_DISP8 | MODRM_RM_R64(disp_r64), IMM32_L0(disp_offset)); +    } else { +        asm_x64_write_byte_1(as, MODRM_R64(r64) | MODRM_RM_DISP32 | MODRM_RM_R64(disp_r64)); +        asm_x64_write_word32(as, disp_offset); +    } +} + +void asm_x64_nop(asm_x64_t* as) +{ +    asm_x64_write_byte_1(as, OPCODE_NOP); +} + +void asm_x64_push_r64(asm_x64_t* as, int src_r64) +{ +    asm_x64_write_byte_1(as, OPCODE_PUSH_R64 | src_r64); +} + +void asm_x64_push_i32(asm_x64_t* as, int src_i32) +{ +    asm_x64_write_byte_1(as, OPCODE_PUSH_I64); +    asm_x64_write_word32(as, src_i32); // will be sign extended to 64 bits +} + +void asm_x64_push_disp(asm_x64_t* as, int src_r64, int src_offset) { +    asm_x64_write_byte_1(as, OPCODE_PUSH_M64); +    asm_x64_write_r64_disp(as, 6, src_r64, src_offset); +} + +void asm_x64_pop_r64(asm_x64_t* as, int dest_r64) +{ +    asm_x64_write_byte_1(as, OPCODE_POP_R64 | dest_r64); +} + +static void asm_x64_ret(asm_x64_t* as) +{ +    asm_x64_write_byte_1(as, OPCODE_RET); +} + +void asm_x64_mov_r32_to_r32(asm_x64_t* as, int src_r32, int dest_r32) { +    // defaults to 32 bit operation +    asm_x64_write_byte_2(as, OPCODE_MOV_R64_TO_RM64, MODRM_R64(src_r32) | MODRM_RM_REG | MODRM_RM_R64(dest_r32)); +} + +void asm_x64_mov_r64_to_r64(asm_x64_t* as, int src_r64, int dest_r64) { +    // use REX prefix for 64 bit operation +    asm_x64_write_byte_3(as, REX_PREFIX | REX_W, OPCODE_MOV_R64_TO_RM64, MODRM_R64(src_r64) | MODRM_RM_REG | MODRM_RM_R64(dest_r64)); +} + +void asm_x64_mov_r64_to_disp(asm_x64_t* as, int src_r64, int dest_r64, int dest_disp) { +    // use REX prefix for 64 bit operation +    asm_x64_write_byte_2(as, REX_PREFIX | REX_W, OPCODE_MOV_R64_TO_RM64); +    asm_x64_write_r64_disp(as, src_r64, dest_r64, dest_disp); +} + +void asm_x64_mov_disp_to_r64(asm_x64_t* as, int src_r64, int src_disp, int dest_r64) { +    // use REX prefix for 64 bit operation +    asm_x64_write_byte_2(as, REX_PREFIX | REX_W, OPCODE_MOV_RM64_TO_R64); +    asm_x64_write_r64_disp(as, dest_r64, src_r64, src_disp); +} + +void asm_x64_lea_disp_to_r64(asm_x64_t* as, int src_r64, int src_disp, int dest_r64) { +    // use REX prefix for 64 bit operation +    asm_x64_write_byte_2(as, REX_PREFIX | REX_W, OPCODE_LEA_MEM_TO_R64); +    asm_x64_write_r64_disp(as, dest_r64, src_r64, src_disp); +} + +void asm_x64_mov_i8_to_r8(asm_x64_t *as, int src_i8, int dest_r64) { +    asm_x64_write_byte_2(as, OPCODE_MOV_I8_TO_R8 | dest_r64, src_i8); +} + +void asm_x64_mov_i32_to_r64(asm_x64_t* as, int src_i32, int dest_r64) { +    // cpu defaults to i32 to r64, with zero extension +    asm_x64_write_byte_1(as, OPCODE_MOV_I64_TO_R64 | dest_r64); +    asm_x64_write_word32(as, src_i32); +} + +void asm_x64_mov_i64_to_r64(asm_x64_t* as, int64_t src_i64, int dest_r64) { +    // cpu defaults to i32 to r64 +    // to mov i64 to r64 need to use REX prefix +    asm_x64_write_byte_2(as, REX_PREFIX | REX_W, OPCODE_MOV_I64_TO_R64 | dest_r64); +    asm_x64_write_word64(as, src_i64); +} + +void asm_x64_mov_i64_to_r64_optimised(asm_x64_t *as, int64_t src_i64, int dest_r64) { +    if (UNSIGNED_FIT32(src_i64)) { +        // 5 bytes +        asm_x64_mov_i32_to_r64(as, src_i64 & 0xffffffff, dest_r64); +    } else { +        // 10 bytes +        asm_x64_mov_i64_to_r64(as, src_i64, dest_r64); +    } +} + +void asm_x64_mov_i32_to_disp(asm_x64_t* as, int src_i32, int dest_r32, int dest_disp) +{ +    assert(0); +    asm_x64_write_byte_1(as, OPCODE_MOV_I32_TO_RM32); +    //asm_x64_write_r32_disp(as, 0, dest_r32, dest_disp); +    asm_x64_write_word32(as, src_i32); +} + +void asm_x64_xor_r64_to_r64(asm_x64_t *as, int src_r64, int dest_r64) { +    asm_x64_write_byte_3(as, REX_PREFIX | REX_W, OPCODE_XOR_R64_TO_RM64, MODRM_R64(src_r64) | MODRM_RM_REG | MODRM_RM_R64(dest_r64)); +} + +void asm_x64_add_r64_to_r64(asm_x64_t* as, int src_r64, int dest_r64) { +    asm_x64_write_byte_3(as, REX_PREFIX | REX_W, OPCODE_ADD_R64_TO_RM64, MODRM_R64(src_r64) | MODRM_RM_REG | MODRM_RM_R64(dest_r64)); +} + +void asm_x64_add_i32_to_r32(asm_x64_t* as, int src_i32, int dest_r32) +{ +    assert(dest_r32 != REG_RSP); // in this case i think src_i32 must be 64 bits +    if (SIGNED_FIT8(src_i32)) +    { +        asm_x64_write_byte_2(as, OPCODE_ADD_I8_TO_RM32, MODRM_R64(0) | MODRM_RM_REG | MODRM_RM_R64(dest_r32)); +        asm_x64_write_byte_1(as, src_i32 & 0xff); +    } +    else +    { +        asm_x64_write_byte_2(as, OPCODE_ADD_I32_TO_RM32, MODRM_R64(0) | MODRM_RM_REG | MODRM_RM_R64(dest_r32)); +        asm_x64_write_word32(as, src_i32); +    } +} + +void asm_x64_sub_r32_from_r32(asm_x64_t* as, int src_r32, int dest_r32) { +    // defaults to 32 bit operation +    asm_x64_write_byte_2(as, OPCODE_SUB_R64_FROM_RM64, MODRM_R64(src_r32) | MODRM_RM_REG | MODRM_RM_R64(dest_r32)); +} + +void asm_x64_sub_r64_from_r64(asm_x64_t* as, int src_r64, int dest_r64) { +    // use REX prefix for 64 bit operation +    asm_x64_write_byte_3(as, REX_PREFIX | REX_W, OPCODE_SUB_R64_FROM_RM64, MODRM_R64(src_r64) | MODRM_RM_REG | MODRM_RM_R64(dest_r64)); +} + +void asm_x64_sub_i32_from_r32(asm_x64_t* as, int src_i32, int dest_r32) { +    if (SIGNED_FIT8(src_i32)) { +        // defaults to 32 bit operation +        asm_x64_write_byte_2(as, OPCODE_SUB_I8_FROM_RM64, MODRM_R64(5) | MODRM_RM_REG | MODRM_RM_R64(dest_r32)); +        asm_x64_write_byte_1(as, src_i32 & 0xff); +    } else { +        // defaults to 32 bit operation +        asm_x64_write_byte_2(as, OPCODE_SUB_I32_FROM_RM64, MODRM_R64(5) | MODRM_RM_REG | MODRM_RM_R64(dest_r32)); +        asm_x64_write_word32(as, src_i32); +    } +} + +void asm_x64_sub_i32_from_r64(asm_x64_t* as, int src_i32, int dest_r64) { +    if (SIGNED_FIT8(src_i32)) { +        // use REX prefix for 64 bit operation +        asm_x64_write_byte_3(as, REX_PREFIX | REX_W, OPCODE_SUB_I8_FROM_RM64, MODRM_R64(5) | MODRM_RM_REG | MODRM_RM_R64(dest_r64)); +        asm_x64_write_byte_1(as, src_i32 & 0xff); +    } else { +        // use REX prefix for 64 bit operation +        asm_x64_write_byte_3(as, REX_PREFIX | REX_W, OPCODE_SUB_I32_FROM_RM64, MODRM_R64(5) | MODRM_RM_REG | MODRM_RM_R64(dest_r64)); +        asm_x64_write_word32(as, src_i32); +    } +} + +/* shifts not tested */ +void asm_x64_shl_r32_by_imm(asm_x64_t* as, int r32, int imm) { +    asm_x64_write_byte_2(as, OPCODE_SHL_RM32_BY_I8, MODRM_R64(4) | MODRM_RM_REG | MODRM_RM_R64(r32)); +    asm_x64_write_byte_1(as, imm); +} + +void asm_x64_shr_r32_by_imm(asm_x64_t* as, int r32, int imm) { +    asm_x64_write_byte_2(as, OPCODE_SHR_RM32_BY_I8, MODRM_R64(5) | MODRM_RM_REG | MODRM_RM_R64(r32)); +    asm_x64_write_byte_1(as, imm); +} + +void asm_x64_sar_r32_by_imm(asm_x64_t* as, int r32, int imm) { +    asm_x64_write_byte_2(as, OPCODE_SAR_RM32_BY_I8, MODRM_R64(7) | MODRM_RM_REG | MODRM_RM_R64(r32)); +    asm_x64_write_byte_1(as, imm); +} + +void asm_x64_cmp_r64_with_r64(asm_x64_t* as, int src_r64_a, int src_r64_b) { +    asm_x64_write_byte_3(as, REX_PREFIX | REX_W, OPCODE_CMP_R64_WITH_RM64, MODRM_R64(src_r64_a) | MODRM_RM_REG | MODRM_RM_R64(src_r64_b)); +} + +void asm_x64_cmp_r32_with_disp(asm_x64_t* as, int src_r32_a, int src_r32_b, int src_disp_b) { +    assert(0); +    asm_x64_write_byte_1(as, OPCODE_CMP_R64_WITH_RM64); +    //asm_x64_write_r32_disp(as, src_r32_a, src_r32_b, src_disp_b); +} + +void asm_x64_cmp_disp_with_r32(asm_x64_t* as, int src_r32_a, int src_disp_a, int src_r32_b) { +    assert(0); +    asm_x64_write_byte_1(as, OPCODE_CMP_RM32_WITH_R32); +    //asm_x64_write_r32_disp(as, src_r32_b, src_r32_a, src_disp_a); +} + +void asm_x64_cmp_i32_with_r32(asm_x64_t* as, int src_i32, int src_r32) { +    if (SIGNED_FIT8(src_i32)) { +        asm_x64_write_byte_2(as, OPCODE_CMP_I8_WITH_RM32, MODRM_R64(7) | MODRM_RM_REG | MODRM_RM_R64(src_r32)); +        asm_x64_write_byte_1(as, src_i32 & 0xff); +    } else { +        asm_x64_write_byte_2(as, OPCODE_CMP_I32_WITH_RM32, MODRM_R64(7) | MODRM_RM_REG | MODRM_RM_R64(src_r32)); +        asm_x64_write_word32(as, src_i32); +    } +} + +void asm_x64_test_r8_with_r8(asm_x64_t* as, int src_r64_a, int src_r64_b) { +    asm_x64_write_byte_2(as, OPCODE_TEST_R8_WITH_RM8, MODRM_R64(src_r64_a) | MODRM_RM_REG | MODRM_RM_R64(src_r64_b)); +} + +void asm_x64_setcc_r8(asm_x64_t* as, int jcc_type, int dest_r8) { +    asm_x64_write_byte_3(as, OPCODE_SETCC_RM8_A, OPCODE_SETCC_RM8_B | jcc_type, MODRM_R64(0) | MODRM_RM_REG | MODRM_RM_R64(dest_r8)); +} + +int asm_x64_label_new(asm_x64_t* as) { +    return as->next_label++; +} + +void asm_x64_label_assign(asm_x64_t* as, int label) { +    if (as->pass > ASM_X64_PASS_1) { +        assert(label < as->max_num_labels); +        if (as->pass == ASM_X64_PASS_2) { +            // assign label offset +            assert(as->label_offsets[label] == -1); +            as->label_offsets[label] = as->code_offset; +        } else if (as->pass == ASM_X64_PASS_3) { +            // ensure label offset has not changed from PASS_2 to PASS_3 +            //printf("l%d: (at %d=%ld)\n", label, as->label_offsets[label], as->code_offset); +            assert(as->label_offsets[label] == as->code_offset); +        } +    } +} + +void asm_x64_jmp_label(asm_x64_t* as, int label) { +    if (as->pass > ASM_X64_PASS_1) { +        int dest = as->label_offsets[label]; +        int rel = dest - as->code_offset; +        if (dest >= 0 && rel < 0) { +            // is a backwards jump, so we know the size of the jump on the first pass +            // calculate rel assuming 8 bit relative jump +            rel -= 2; +            if (SIGNED_FIT8(rel)) { +                asm_x64_write_byte_2(as, OPCODE_JMP_REL8, rel & 0xff); +            } else { +                rel += 2; +                goto large_jump; +            } +        } else { +            // is a forwards jump, so need to assume it's large +            large_jump: +            rel -= 5; +            asm_x64_write_byte_1(as, OPCODE_JMP_REL32); +            asm_x64_write_word32(as, rel); +        } +    } +} + +void asm_x64_jcc_label(asm_x64_t* as, int jcc_type, int label) { +    if (as->pass > ASM_X64_PASS_1) { +        int dest = as->label_offsets[label]; +        int rel = dest - as->code_offset; +        if (dest >= 0 && rel < 0) { +            // is a backwards jump, so we know the size of the jump on the first pass +            // calculate rel assuming 8 bit relative jump +            rel -= 2; +            if (SIGNED_FIT8(rel)) { +                asm_x64_write_byte_2(as, OPCODE_JCC_REL8 | jcc_type, rel & 0xff); +            } else { +                rel += 2; +                goto large_jump; +            } +        } else { +            // is a forwards jump, so need to assume it's large +            large_jump: +            rel -= 6; +            asm_x64_write_byte_2(as, OPCODE_JCC_REL32_A, OPCODE_JCC_REL32_B | jcc_type); +            asm_x64_write_word32(as, rel); +        } +    } +} + +void asm_x64_entry(asm_x64_t* as, int num_locals) { +    asm_x64_push_r64(as, REG_RBP); +    asm_x64_mov_r64_to_r64(as, REG_RSP, REG_RBP); +    if (num_locals < 0) { +        num_locals = 0; +    } +    num_locals |= 1; // make it odd so stack is aligned on 16 byte boundary +    asm_x64_sub_i32_from_r64(as, num_locals * WORD_SIZE, REG_RSP); +    asm_x64_push_r64(as, REG_RBX); +} + +void asm_x64_exit(asm_x64_t* as) { +    asm_x64_pop_r64(as, REG_RBX); +    asm_x64_write_byte_1(as, OPCODE_LEAVE); +    asm_x64_ret(as); +} + +void asm_x64_push_arg(asm_x64_t* as, int src_arg_num) { +    assert(0); +    asm_x64_push_disp(as, REG_RBP, 8 + src_arg_num * WORD_SIZE); +} + +void asm_x64_mov_arg_to_r32(asm_x64_t* as, int src_arg_num, int dest_r32) { +    assert(0); +    //asm_x64_mov_disp_to_r32(as, REG_RBP, 8 + src_arg_num * WORD_SIZE, dest_r32); +} + +void asm_x64_mov_r32_to_arg(asm_x64_t* as, int src_r32, int dest_arg_num) { +    assert(0); +    //asm_x64_mov_r32_to_disp(as, src_r32, REG_RBP, 8 + dest_arg_num * WORD_SIZE); +} + +static int asm_x64_local_offset_from_ebp(int local_num) +{ +    return -(local_num + 1) * WORD_SIZE; +} + +void asm_x64_mov_local_to_r64(asm_x64_t* as, int src_local_num, int dest_r64) { +    asm_x64_mov_disp_to_r64(as, REG_RBP, asm_x64_local_offset_from_ebp(src_local_num), dest_r64); +} + +void asm_x64_mov_r64_to_local(asm_x64_t* as, int src_r64, int dest_local_num) { +    asm_x64_mov_r64_to_disp(as, src_r64, REG_RBP, asm_x64_local_offset_from_ebp(dest_local_num)); +} + +void asm_x64_mov_local_addr_to_r64(asm_x64_t* as, int local_num, int dest_r64) { +    int offset = asm_x64_local_offset_from_ebp(local_num); +    if (offset == 0) { +        asm_x64_mov_r64_to_r64(as, REG_RBP, dest_r64); +    } else { +        asm_x64_lea_disp_to_r64(as, REG_RBP, offset, dest_r64); +    } +} + +void asm_x64_push_local(asm_x64_t* as, int local_num) { +    asm_x64_push_disp(as, REG_RBP, asm_x64_local_offset_from_ebp(local_num)); +} + +void asm_x64_push_local_addr(asm_x64_t* as, int local_num, int temp_r64) +{ +    asm_x64_mov_r64_to_r64(as, REG_RBP, temp_r64); +    asm_x64_add_i32_to_r32(as, asm_x64_local_offset_from_ebp(local_num), temp_r64); +    asm_x64_push_r64(as, temp_r64); +} + +/* +   can't use these because code might be relocated when resized + +void asm_x64_call(asm_x64_t* as, void* func) +{ +    asm_x64_sub_i32_from_r32(as, 8, REG_RSP); +    asm_x64_write_byte_1(as, OPCODE_CALL_REL32); +    asm_x64_write_word32(as, func - (void*)(as->code_cur + 4)); +    asm_x64_mov_r64_to_r64(as, REG_RBP, REG_RSP); +} + +void asm_x64_call_i1(asm_x64_t* as, void* func, int i1) +{ +    asm_x64_sub_i32_from_r32(as, 8, REG_RSP); +    asm_x64_sub_i32_from_r32(as, 12, REG_RSP); +    asm_x64_push_i32(as, i1); +    asm_x64_write_byte_1(as, OPCODE_CALL_REL32); +    asm_x64_write_word32(as, func - (void*)(as->code_cur + 4)); +    asm_x64_add_i32_to_r32(as, 16, REG_RSP); +    asm_x64_mov_r64_to_r64(as, REG_RBP, REG_RSP); +} +*/ + +void asm_x64_call_ind(asm_x64_t* as, void *ptr, int temp_r64) { +    /* +    asm_x64_mov_i64_to_r64_optimised(as, (int64_t)ptr, temp_r64); +    asm_x64_write_byte_2(as, OPCODE_CALL_RM32, MODRM_R64(2) | MODRM_RM_REG | MODRM_RM_R64(temp_r64)); +    */ +    // this reduces code size by 2 bytes per call, but doesn't seem to speed it up at all +    asm_x64_write_byte_1(as, OPCODE_CALL_REL32); +    asm_x64_write_word32(as, ptr - (void*)(as->code_base + as->code_offset + 4)); +} diff --git a/py/asmx64.h b/py/asmx64.h new file mode 100644 index 000000000..4871dbff8 --- /dev/null +++ b/py/asmx64.h @@ -0,0 +1,76 @@ +#define ASM_X64_PASS_1 (1) +#define ASM_X64_PASS_2 (2) +#define ASM_X64_PASS_3 (3) + +#define REG_RAX (0) +#define REG_RCX (1) +#define REG_RDX (2) +#define REG_RBX (3) +#define REG_RSP (4) +#define REG_RBP (5) +#define REG_RSI (6) +#define REG_RDI (7) + +// condition codes, used for jcc and setcc (desipite their j-name!) +#define JCC_JB  (0x2) // below, unsigned +#define JCC_JZ  (0x4) +#define JCC_JE  (0x4) +#define JCC_JNZ (0x5) +#define JCC_JNE (0x5) +#define JCC_JL  (0xc) // less, signed + +#define REG_RET REG_RAX +#define REG_ARG_1 REG_RDI +#define REG_ARG_2 REG_RSI +#define REG_ARG_3 REG_RDX + +typedef struct _asm_x64_t asm_x64_t; + +asm_x64_t* asm_x64_new(); +void asm_x64_free(asm_x64_t* as, bool free_code); +void asm_x64_start_pass(asm_x64_t *as, int pass); +void asm_x64_end_pass(asm_x64_t *as); +uint asm_x64_get_code_size(asm_x64_t* as); +void* asm_x64_get_code(asm_x64_t* as); + +void asm_x64_nop(asm_x64_t* as); +void asm_x64_push_r64(asm_x64_t* as, int src_r64); +void asm_x64_push_i32(asm_x64_t* as, int src_i32); // will be sign extended to 64 bits +void asm_x64_push_disp(asm_x64_t* as, int src_r32, int src_offset); +void asm_x64_pop_r64(asm_x64_t* as, int dest_r64); +void asm_x64_mov_r64_to_r64(asm_x64_t* as, int src_r64, int dest_r64); +void asm_x64_mov_r32_to_disp(asm_x64_t* as, int src_r32, int dest_r32, int dest_disp); +void asm_x64_mov_disp_to_r32(asm_x64_t* as, int src_r32, int src_disp, int dest_r32); +void asm_x64_mov_i32_to_r64(asm_x64_t* as, int src_i32, int dest_r64); +void asm_x64_mov_i64_to_r64(asm_x64_t* as, int64_t src_i64, int dest_r64); +void asm_x64_mov_i32_to_disp(asm_x64_t* as, int src_i32, int dest_r32, int dest_disp); +void asm_x64_mov_i64_to_r64_optimised(asm_x64_t *as, int64_t src_i64, int dest_r64); +void asm_x64_xor_r64_to_r64(asm_x64_t *as, int src_r64, int dest_r64); +void asm_x64_add_r64_to_r64(asm_x64_t* as, int src_r64, int dest_r64); +void asm_x64_add_i32_to_r32(asm_x64_t* as, int src_i32, int dest_r32); +void asm_x64_sub_r32_from_r32(asm_x64_t* as, int src_r32, int dest_r32); +void asm_x64_sub_i32_from_r32(asm_x64_t* as, int src_i32, int dest_r32); +void asm_x64_shl_r32_by_imm(asm_x64_t* as, int r32, int imm); +void asm_x64_shr_r32_by_imm(asm_x64_t* as, int r32, int imm); +void asm_x64_sar_r32_by_imm(asm_x64_t* as, int r32, int imm); +void asm_x64_cmp_r64_with_r64(asm_x64_t* as, int src_r64_a, int src_r64_b); +void asm_x64_cmp_r32_with_disp(asm_x64_t* as, int src_r32_a, int src_r32_b, int src_disp_b); +void asm_x64_cmp_disp_with_r32(asm_x64_t* as, int src_r32_a, int src_disp_a, int src_r32_b); +void asm_x64_cmp_i32_with_r32(asm_x64_t* as, int src_i32, int src_r32); +void asm_x64_test_r8_with_r8(asm_x64_t* as, int src_r64_a, int src_r64_b); +void asm_x64_setcc_r8(asm_x64_t* as, int jcc_type, int dest_r8); +int asm_x64_label_new(asm_x64_t* as); +void asm_x64_label_assign(asm_x64_t* as, int label); +void asm_x64_jmp_label(asm_x64_t* as, int label); +void asm_x64_jcc_label(asm_x64_t* as, int jcc_type, int label); +void asm_x64_entry(asm_x64_t* as, int num_locals); +void asm_x64_exit(asm_x64_t* as); +void asm_x64_push_arg(asm_x64_t* as, int src_arg_num); +void asm_x64_mov_arg_to_r32(asm_x64_t* as, int src_arg_num, int dest_r32); +void asm_x64_mov_r32_to_arg(asm_x64_t* as, int src_r32, int dest_arg_num); +void asm_x64_mov_local_to_r64(asm_x64_t* as, int src_local_num, int dest_r64); +void asm_x64_mov_r64_to_local(asm_x64_t* as, int src_r64, int dest_local_num); +void asm_x64_mov_local_addr_to_r64(asm_x64_t* as, int local_num, int dest_r64); +void asm_x64_push_local(asm_x64_t* as, int local_num); +void asm_x64_push_local_addr(asm_x64_t* as, int local_num, int temp_r32); +void asm_x64_call_ind(asm_x64_t* as, void* ptr, int temp_r32); diff --git a/py/bc.c b/py/bc.c new file mode 100644 index 000000000..1edd911ab --- /dev/null +++ b/py/bc.c @@ -0,0 +1,272 @@ +#include <stdint.h> +#include <stdlib.h> +#include <stdio.h> +#include <string.h> +#include <assert.h> + +#include "misc.h" +#include "machine.h" +#include "runtime.h" +#include "bc.h" + +#define DECODE_UINT do { unum = *ip++; if (unum > 127) { unum = ((unum & 0x3f) << 8) | (*ip++); } } while (0) +#define DECODE_QSTR do { qstr = *ip++; if (qstr > 127) { qstr = ((qstr & 0x3f) << 8) | (*ip++); } } while (0) +#define PUSH(val) *--sp = (val) +#define POP() (*sp++) + +py_obj_t py_execute_byte_code(byte *code, uint len, py_obj_t *args, uint n_args) { +    byte *ip = code; +    py_obj_t stack[10]; +    py_obj_t *sp = &stack[10]; // stack grows down, sp points to top of stack +    machine_uint_t unum; +    machine_int_t snum; +    qstr qstr; +    py_obj_t obj1, obj2; +    py_obj_t fast0 = NULL, fast1 = NULL, fast2 = NULL, fastn[4] = {NULL, NULL, NULL, NULL}; + +    // init args +    for (int i = 0; i < n_args; i++) { +        if (i == 0) { +            fast0 = args[0]; +        } else if (i == 1) { +            fast1 = args[1]; +        } else if (i == 2) { +            fast2 = args[2]; +        } else { +            assert(i - 3 < 4); +            fastn[i - 3] = args[i]; +        } +    } + +    // execute byte code +    for (;;) { +        int op = *ip++; +        switch (op) { +            case PYBC_LOAD_CONST_FALSE: +                PUSH(py_const_false); +                break; + +            case PYBC_LOAD_CONST_NONE: +                PUSH(py_const_none); +                break; + +            case PYBC_LOAD_CONST_TRUE: +                PUSH(py_const_true); +                break; + +            case PYBC_LOAD_CONST_SMALL_INT: +                snum = ip[0] | (ip[1] << 8); +                if (snum & 0x8000) { +                    snum |= ~0xffff; +                } +                ip += 2; +                PUSH((py_obj_t)(snum << 1 | 1)); +                break; + +            case PYBC_LOAD_CONST_ID: +                DECODE_QSTR; +                PUSH(rt_load_const_str(qstr)); // TODO +                break; + +            case PYBC_LOAD_CONST_STRING: +                DECODE_QSTR; +                PUSH(rt_load_const_str(qstr)); +                break; + +            case PYBC_LOAD_FAST_0: +                PUSH(fast0); +                break; + +            case PYBC_LOAD_FAST_1: +                PUSH(fast1); +                break; + +            case PYBC_LOAD_FAST_2: +                PUSH(fast2); +                break; + +            case PYBC_LOAD_FAST_N: +                DECODE_UINT; +                PUSH(fastn[unum - 3]); +                break; + +            case PYBC_LOAD_NAME: +                DECODE_QSTR; +                PUSH(rt_load_name(qstr)); +                break; + +            case PYBC_LOAD_GLOBAL: +                DECODE_QSTR; +                PUSH(rt_load_global(qstr)); +                break; + +            case PYBC_LOAD_ATTR: +                DECODE_QSTR; +                *sp = rt_load_attr(*sp, qstr); +                break; + +            case PYBC_LOAD_METHOD: +                DECODE_QSTR; +                sp -= 1; +                rt_load_method(sp[1], qstr, sp); +                break; + +            case PYBC_LOAD_BUILD_CLASS: +                PUSH(rt_load_build_class()); +                break; + +            case PYBC_STORE_FAST_0: +                fast0 = POP(); +                break; + +            case PYBC_STORE_FAST_1: +                fast1 = POP(); +                break; + +            case PYBC_STORE_FAST_2: +                fast2 = POP(); +                break; + +            case PYBC_STORE_FAST_N: +                DECODE_UINT; +                fastn[unum - 3] = POP(); +                break; + +            case PYBC_STORE_NAME: +                DECODE_QSTR; +                rt_store_name(qstr, POP()); +                break; + +            case PYBC_STORE_SUBSCR: +                rt_store_subscr(sp[1], sp[0], sp[2]); +                sp += 3; +                break; + +            case PYBC_DUP_TOP: +                obj1 = *sp; +                PUSH(obj1); +                break; + +            case PYBC_DUP_TOP_TWO: +                sp -= 2; +                sp[0] = sp[2]; +                sp[1] = sp[3]; +                break; + +            case PYBC_POP_TOP: +                ++sp; +                break; + +            case PYBC_ROT_THREE: +                obj1 = sp[0]; +                sp[0] = sp[1]; +                sp[1] = sp[2]; +                sp[2] = obj1; +                break; + +            case PYBC_JUMP: +                DECODE_UINT; +                ip = code + unum; +                break; + +            case PYBC_POP_JUMP_IF_FALSE: +                DECODE_UINT; +                if (!rt_is_true(POP())) { +                    ip = code + unum; +                } +                break; + +            case PYBC_SETUP_LOOP: +                DECODE_UINT; +                break; + +            case PYBC_POP_BLOCK: +                break; + +            case PYBC_BINARY_OP: +                unum = *ip++; +                obj2 = POP(); +                obj1 = *sp; +                *sp = rt_binary_op(unum, obj1, obj2); +                break; + +            case PYBC_COMPARE_OP: +                unum = *ip++; +                obj2 = POP(); +                obj1 = *sp; +                *sp = rt_compare_op(unum, obj1, obj2); +                break; + +            case PYBC_BUILD_LIST: +                DECODE_UINT; +                obj1 = rt_build_list(unum, sp); +                sp += unum - 1; +                *sp = obj1; +                break; + +            case PYBC_BUILD_MAP: +                DECODE_UINT; +                PUSH(rt_build_map(unum)); +                break; + +            case PYBC_STORE_MAP: +                sp += 2; +                rt_store_map(sp[0], sp[-2], sp[-1]); +                break; + +            case PYBC_BUILD_SET: +                DECODE_UINT; +                obj1 = rt_build_set(unum, sp); +                sp += unum - 1; +                *sp = obj1; +                break; + +            case PYBC_MAKE_FUNCTION: +                DECODE_UINT; +                PUSH(rt_make_function_from_id(unum)); +                break; + +            case PYBC_CALL_FUNCTION: +                DECODE_UINT; +                assert((unum & 0xff00) == 0); // n_keyword +                // switch on n_positional +                if ((unum & 0xff) == 0) { +                    *sp = rt_call_function_0(*sp); +                } else if ((unum & 0xff) == 1) { +                    obj1 = *sp++; // the single argument +                    *sp = rt_call_function_1(*sp, obj1); +                } else if ((unum & 0xff) == 2) { +                    obj2 = *sp++; // the second argument +                    obj1 = *sp++; // the first argument +                    *sp = rt_call_function_2(*sp, obj1, obj2); +                } else { +                    assert(0); +                } +                break; + +            case PYBC_CALL_METHOD: +                DECODE_UINT; +                assert((unum & 0xff00) == 0); // n_keyword +                // switch on n_positional +                if ((unum & 0xff) == 0) { +                    obj1 = *sp++; // the self object (or NULL) +                    *sp = rt_call_method_1(*sp, obj1); +                } else if ((unum & 0xff) == 1) { +                    obj2 = *sp++; // the first argument +                    obj1 = *sp++; // the self object (or NULL) +                    *sp = rt_call_function_2(*sp, obj1, obj2); +                } else { +                    assert(0); +                } +                break; + +            case PYBC_RETURN_VALUE: +                return *sp; + +            default: +                printf("code %p, offset %u, byte code 0x%02x not implemented\n", code, (uint)(ip - code), op); +                assert(0); +                return py_const_none; +        } +    } +} diff --git a/py/bc.h b/py/bc.h new file mode 100644 index 000000000..f09843a96 --- /dev/null +++ b/py/bc.h @@ -0,0 +1,97 @@ +#define PYBC_LOAD_CONST_FALSE   (0x10) +#define PYBC_LOAD_CONST_NONE    (0x11) +#define PYBC_LOAD_CONST_TRUE    (0x12) +#define PYBC_LOAD_CONST_SMALL_INT   (0x13) // int +#define PYBC_LOAD_CONST_INT     (0x14) // qstr +#define PYBC_LOAD_CONST_DEC     (0x15) // qstr +#define PYBC_LOAD_CONST_ID      (0x16) // qstr +#define PYBC_LOAD_CONST_BYTES   (0x17) // qstr +#define PYBC_LOAD_CONST_STRING  (0x18) // qstr + +#define PYBC_LOAD_FAST_0        (0x20) +#define PYBC_LOAD_FAST_1        (0x21) +#define PYBC_LOAD_FAST_2        (0x22) +#define PYBC_LOAD_FAST_N        (0x23) // uint +#define PYBC_LOAD_NAME          (0x24) // qstr +#define PYBC_LOAD_GLOBAL        (0x25) // qstr +#define PYBC_LOAD_ATTR          (0x26) // qstr +#define PYBC_LOAD_METHOD        (0x27) // qstr +#define PYBC_LOAD_BUILD_CLASS   (0x28) + +#define PYBC_STORE_FAST_0       (0x30) +#define PYBC_STORE_FAST_1       (0x31) +#define PYBC_STORE_FAST_2       (0x32) +#define PYBC_STORE_FAST_N       (0x33) // uint +#define PYBC_STORE_NAME         (0x34) // qstr +#define PYBC_STORE_GLOBAL       (0x35) // qstr +#define PYBC_STORE_ATTR         (0x36) // qstr +#define PYBC_STORE_LOCALS       (0x37) +#define PYBC_STORE_SUBSCR       (0x38) + +#define PYBC_DELETE_FAST_N      (0x39) // uint +#define PYBC_DELETE_NAME        (0x3a) // qstr +#define PYBC_DELETE_GLOBAL      (0x3b) // qstr +#define PYBC_DELETE_DEREF       (0x3c) // qstr +#define PYBC_DELETE_ATTR        (0x3d) // qstr +#define PYBC_DELETE_SUBSCR      (0x3e) + +#define PYBC_DUP_TOP            (0x40) +#define PYBC_DUP_TOP_TWO        (0x41) +#define PYBC_POP_TOP            (0x42) +#define PYBC_ROT_TWO            (0x43) +#define PYBC_ROT_THREE          (0x44) +#define PYBC_JUMP               (0x45) // pos +#define PYBC_POP_JUMP_IF_TRUE   (0x46) // pos +#define PYBC_POP_JUMP_IF_FALSE  (0x47) // pos +#define PYBC_JUMP_IF_TRUE_OR_POP    (0x48) // pos +#define PYBC_JUMP_IF_FALSE_OR_POP   (0x49) // pos +#define PYBC_SETUP_LOOP         (0x4a) // pos +#define PYBC_BREAK_LOOP         (0x4b) // pos +#define PYBC_CONTINUE_LOOP      (0x4c) // pos +#define PYBC_SETUP_WITH         (0x4d) // pos +#define PYBC_WITH_CLEANUP       (0x4e) +#define PYBC_SETUP_EXCEPT       (0x4f) // pos +#define PYBC_SETUP_FINALLY      (0x50) // pos +#define PYBC_END_FINALLY        (0x51) +#define PYBC_GET_ITER           (0x52) +#define PYBC_FOR_ITER           (0x53) // pos +#define PYBC_POP_BLOCK          (0x54) +#define PYBC_POP_EXCEPT         (0x55) + +#define PYBC_UNARY_OP           (0x60) // byte +#define PYBC_BINARY_OP          (0x61) // byte +#define PYBC_COMPARE_OP         (0x62) // byte + +#define PYBC_BUILD_TUPLE        (0x70) // uint +#define PYBC_BUILD_LIST         (0x71) // uint +#define PYBC_LIST_APPEND        (0x72) // uint +#define PYBC_BUILD_MAP          (0x73) // uint +#define PYBC_STORE_MAP          (0x74) +#define PYBC_MAP_ADD            (0x75) // uint +#define PYBC_BUILD_SET          (0x76) // uint +#define PYBC_SET_ADD            (0x77) // uint +#define PYBC_BUILD_SLICE        (0x78) // uint +#define PYBC_UNPACK_SEQUENCE    (0x79) // uint +#define PYBC_UNPACK_EX          (0x7a) // uint + +#define PYBC_RETURN_VALUE       (0x80) +#define PYBC_RAISE_VARARGS      (0x81) // uint +#define PYBC_YIELD_VALUE        (0x82) +#define PYBC_YIELD_FROM         (0x83) + +#define PYBC_MAKE_FUNCTION      (0x90) // uint +#define PYBC_MAKE_CLOSURE       (0x91) // uint? +#define PYBC_CALL_FUNCTION      (0x92) // uint +#define PYBC_CALL_FUNCTION_VAR  (0x93) // uint +#define PYBC_CALL_FUNCTION_KW   (0x94) // uint +#define PYBC_CALL_FUNCTION_VAR_KW   (0x95) // uint +#define PYBC_CALL_METHOD        (0x96) // uint +#define PYBC_CALL_METHOD_VAR    (0x97) // uint +#define PYBC_CALL_METHOD_KW     (0x98) // uint +#define PYBC_CALL_METHOD_VAR_KW (0x99) // uint + +#define PYBC_IMPORT_NAME (0xe0) +#define PYBC_IMPORT_FROM (0xe1) +#define PYBC_IMPORT_STAR (0xe2) + +py_obj_t py_execute_byte_code(byte *code, uint len, py_obj_t *args, uint n_args); diff --git a/py/compile.c b/py/compile.c new file mode 100644 index 000000000..0e6ce4443 --- /dev/null +++ b/py/compile.c @@ -0,0 +1,2510 @@ +#include <unistd.h> +#include <stdlib.h> +#include <stdint.h> +#include <stdio.h> +#include <string.h> +#include <assert.h> + +#include "misc.h" +#include "lexer.h" +#include "machine.h" +#include "parse.h" +#include "scope.h" +#include "compile.h" +#include "runtime.h" +#include "emit.h" + +// TODO need to mangle __attr names + +typedef enum { +    PN_none = 0, +#define DEF_RULE(rule, comp, kind, arg...) PN_##rule, +#include "grammar.h" +#undef DEF_RULE +    PN_maximum_number_of, +} pn_kind_t; + +#define EMIT(fun, arg...) (emit_##fun(comp->emit, ##arg)) + +typedef struct _compiler_t { +    qstr qstr___class__; +    qstr qstr___locals__; +    qstr qstr___name__; +    qstr qstr___module__; +    qstr qstr___qualname__; +    qstr qstr___doc__; +    qstr qstr_assertion_error; + +    pass_kind_t pass; + +    int break_label; +    int continue_label; +    int except_nest_level; + +    int n_arg_keyword; +    bool have_star_arg; +    bool have_dbl_star_arg; +    bool have_bare_star; +    int param_pass; +    int param_pass_num_dict_params; +    int param_pass_num_default_params; + +    scope_t *scope_head; +    scope_t *scope_cur; + +    emitter_t *emit; +} compiler_t; + +py_parse_node_t fold_constants(py_parse_node_t pn) { +    if (PY_PARSE_NODE_IS_STRUCT(pn)) { +        py_parse_node_struct_t *pns = (py_parse_node_struct_t*)pn; +        int n = PY_PARSE_NODE_STRUCT_NUM_NODES(pns); + +        // fold arguments first +        for (int i = 0; i < n; i++) { +            pns->nodes[i] = fold_constants(pns->nodes[i]); +        } + +        switch (PY_PARSE_NODE_STRUCT_KIND(pns)) { +            case PN_shift_expr: +                if (n == 3 && PY_PARSE_NODE_IS_SMALL_INT(pns->nodes[0]) && PY_PARSE_NODE_IS_SMALL_INT(pns->nodes[2])) { +                    int arg0 = PY_PARSE_NODE_LEAF_ARG(pns->nodes[0]); +                    int arg1 = PY_PARSE_NODE_LEAF_ARG(pns->nodes[2]); +                    if (PY_PARSE_NODE_IS_TOKEN_KIND(pns->nodes[1], PY_TOKEN_OP_DBL_LESS)) { +                        pn = py_parse_node_new_leaf(PY_PARSE_NODE_SMALL_INT, arg0 << arg1); // XXX can overflow; enabled only to compare with CPython +                    } else if (PY_PARSE_NODE_IS_TOKEN_KIND(pns->nodes[1], PY_TOKEN_OP_DBL_MORE)) { +                        pn = py_parse_node_new_leaf(PY_PARSE_NODE_SMALL_INT, arg0 >> arg1); +                    } else { +                        // shouldn't happen +                        assert(0); +                    } +                } +                break; + +            case PN_arith_expr: +                // XXX can overflow; enabled only to compare with CPython +                if (n == 3 && PY_PARSE_NODE_IS_SMALL_INT(pns->nodes[0]) && PY_PARSE_NODE_IS_SMALL_INT(pns->nodes[2])) { +                    int arg0 = PY_PARSE_NODE_LEAF_ARG(pns->nodes[0]); +                    int arg1 = PY_PARSE_NODE_LEAF_ARG(pns->nodes[2]); +                    if (PY_PARSE_NODE_IS_TOKEN_KIND(pns->nodes[1], PY_TOKEN_OP_PLUS)) { +                        pn = py_parse_node_new_leaf(PY_PARSE_NODE_SMALL_INT, arg0 + arg1); +                    } else if (PY_PARSE_NODE_IS_TOKEN_KIND(pns->nodes[1], PY_TOKEN_OP_MINUS)) { +                        pn = py_parse_node_new_leaf(PY_PARSE_NODE_SMALL_INT, arg0 - arg1); +                    } else { +                        // shouldn't happen +                        assert(0); +                    } +                } +                break; + +            case PN_term: +                // XXX can overflow; enabled only to compare with CPython +                if (n == 3 && PY_PARSE_NODE_IS_SMALL_INT(pns->nodes[0]) && PY_PARSE_NODE_IS_SMALL_INT(pns->nodes[2])) { +                    int arg0 = PY_PARSE_NODE_LEAF_ARG(pns->nodes[0]); +                    int arg1 = PY_PARSE_NODE_LEAF_ARG(pns->nodes[2]); +                    if (PY_PARSE_NODE_IS_TOKEN_KIND(pns->nodes[1], PY_TOKEN_OP_STAR)) { +                        pn = py_parse_node_new_leaf(PY_PARSE_NODE_SMALL_INT, arg0 * arg1); +                    } else if (PY_PARSE_NODE_IS_TOKEN_KIND(pns->nodes[1], PY_TOKEN_OP_SLASH)) { +                        ; // pass +                    //} else if (PY_PARSE_NODE_IS_TOKEN_KIND(pns->nodes[1], PY_TOKEN_OP_)) { +                        //pn = py_parse_node_new_leaf(PY_PARSE_NODE_SMALL_INT, arg0 - arg1); +                    } else { +                        // shouldn't happen +                        assert(0); +                    } +                } +                break; + +            case PN_factor_2: +                if (PY_PARSE_NODE_IS_SMALL_INT(pns->nodes[1])) { +                    machine_int_t arg = PY_PARSE_NODE_LEAF_ARG(pns->nodes[1]); +                    if (PY_PARSE_NODE_IS_TOKEN_KIND(pns->nodes[0], PY_TOKEN_OP_PLUS)) { +                        pn = py_parse_node_new_leaf(PY_PARSE_NODE_SMALL_INT, arg); +                    } else if (PY_PARSE_NODE_IS_TOKEN_KIND(pns->nodes[0], PY_TOKEN_OP_MINUS)) { +                        pn = py_parse_node_new_leaf(PY_PARSE_NODE_SMALL_INT, -arg); +                    } else if (PY_PARSE_NODE_IS_TOKEN_KIND(pns->nodes[0], PY_TOKEN_OP_TILDE)) { +                        pn = py_parse_node_new_leaf(PY_PARSE_NODE_SMALL_INT, ~arg); +                    } else { +                        // shouldn't happen +                        assert(0); +                    } +                } +                break; + +            case PN_power: +                // XXX can overflow; enabled only to compare with CPython +                if (PY_PARSE_NODE_IS_SMALL_INT(pns->nodes[0]) && PY_PARSE_NODE_IS_NULL(pns->nodes[1]) && !PY_PARSE_NODE_IS_NULL(pns->nodes[2])) { +                    py_parse_node_struct_t* pns2 = (py_parse_node_struct_t*)pns->nodes[2]; +                    if (PY_PARSE_NODE_IS_SMALL_INT(pns2->nodes[0])) { +                        int power = PY_PARSE_NODE_LEAF_ARG(pns2->nodes[0]); +                        if (power >= 0) { +                            int ans = 1; +                            int base = PY_PARSE_NODE_LEAF_ARG(pns->nodes[0]); +                            for (; power > 0; power--) { +                                ans *= base; +                            } +                            pn = py_parse_node_new_leaf(PY_PARSE_NODE_SMALL_INT, ans); +                        } +                    } +                } +                break; +        } +    } + +    return pn; +} + +void compile_node(compiler_t *comp, py_parse_node_t pn); + +scope_t *scope_new_and_link(compiler_t *comp, scope_kind_t kind, py_parse_node_t pn) { +    scope_t *scope = scope_new(kind, pn); +    scope->parent = comp->scope_cur; +    scope->next = NULL; +    if (comp->scope_head == NULL) { +        comp->scope_head = scope; +    } else { +        scope_t *s = comp->scope_head; +        while (s->next != NULL) { +            s = s->next; +        } +        s->next = scope; +    } +    return scope; +} + +int list_len(py_parse_node_t pn, int pn_kind) { +    if (PY_PARSE_NODE_IS_NULL(pn)) { +        return 0; +    } else if (PY_PARSE_NODE_IS_LEAF(pn)) { +        return 1; +    } else { +        py_parse_node_struct_t *pns = (py_parse_node_struct_t*)pn; +        if (PY_PARSE_NODE_STRUCT_KIND(pns) != pn_kind) { +            return 1; +        } else { +            return PY_PARSE_NODE_STRUCT_NUM_NODES(pns); +        } +    } +} + +void apply_to_single_or_list(compiler_t *comp, py_parse_node_t pn, int pn_list_kind, void (*f)(compiler_t*, py_parse_node_t)) { +    if (PY_PARSE_NODE_IS_STRUCT(pn) && PY_PARSE_NODE_STRUCT_KIND((py_parse_node_struct_t*)pn) == pn_list_kind) { +        py_parse_node_struct_t *pns = (py_parse_node_struct_t*)pn; +        int num_nodes = PY_PARSE_NODE_STRUCT_NUM_NODES(pns); +        for (int i = 0; i < num_nodes; i++) { +            f(comp, pns->nodes[i]); +        } +    } else if (!PY_PARSE_NODE_IS_NULL(pn)) { +        f(comp, pn); +    } +} + +int list_get(py_parse_node_t *pn, int pn_kind, py_parse_node_t **nodes) { +    if (PY_PARSE_NODE_IS_NULL(*pn)) { +        *nodes = NULL; +        return 0; +    } else if (PY_PARSE_NODE_IS_LEAF(*pn)) { +        *nodes = pn; +        return 1; +    } else { +        py_parse_node_struct_t *pns = (py_parse_node_struct_t*)(*pn); +        if (PY_PARSE_NODE_STRUCT_KIND(pns) != pn_kind) { +            *nodes = pn; +            return 1; +        } else { +            *nodes = pns->nodes; +            return PY_PARSE_NODE_STRUCT_NUM_NODES(pns); +        } +    } +} + +void compile_do_nothing(compiler_t *comp, py_parse_node_struct_t *pns) { +} + +void compile_generic_all_nodes(compiler_t *comp, py_parse_node_struct_t *pns) { +    int num_nodes = PY_PARSE_NODE_STRUCT_NUM_NODES(pns); +    for (int i = 0; i < num_nodes; i++) { +        compile_node(comp, pns->nodes[i]); +    } +} + +bool c_tuple_is_const(py_parse_node_t pn) { +    if (!PY_PARSE_NODE_IS_LEAF(pn)) { +        return false; +    } +    if (PY_PARSE_NODE_IS_ID(pn)) { +        return false; +    } +    return true; +} + +void c_tuple_emit_const(compiler_t *comp, py_parse_node_t pn) { +    assert(PY_PARSE_NODE_IS_LEAF(pn)); +    int arg = PY_PARSE_NODE_LEAF_ARG(pn); +    switch (PY_PARSE_NODE_LEAF_KIND(pn)) { +        case PY_PARSE_NODE_ID: assert(0); +        case PY_PARSE_NODE_SMALL_INT: EMIT(load_const_verbatim_int, arg); break; +        case PY_PARSE_NODE_INTEGER: EMIT(load_const_verbatim_str, qstr_str(arg)); break; +        case PY_PARSE_NODE_DECIMAL: EMIT(load_const_verbatim_str, qstr_str(arg)); break; +        case PY_PARSE_NODE_STRING: EMIT(load_const_verbatim_quoted_str, arg, false); break; +        case PY_PARSE_NODE_BYTES: EMIT(load_const_verbatim_quoted_str, arg, true); break; +        case PY_PARSE_NODE_TOKEN: +            switch (arg) { +                case PY_TOKEN_KW_FALSE: EMIT(load_const_verbatim_str, "False"); break; +                case PY_TOKEN_KW_NONE: EMIT(load_const_verbatim_str, "None"); break; +                case PY_TOKEN_KW_TRUE: EMIT(load_const_verbatim_str, "True"); break; +                default: assert(0); +            } +            break; +        default: assert(0); +    } +} + +// funnelling all tuple creations through this function and all this constant stuff is purely to agree with CPython +void c_tuple(compiler_t *comp, py_parse_node_t pn, py_parse_node_struct_t *pns_list) { +    int n = 0; +    if (pns_list != NULL) { +        n = PY_PARSE_NODE_STRUCT_NUM_NODES(pns_list); +    } +    int total = n; +    bool is_const = true; +    if (!PY_PARSE_NODE_IS_NULL(pn)) { +        total += 1; +        if (!c_tuple_is_const(pn)) { +            is_const = false; +        } +    } +    for (int i = 0; i < n; i++) { +        if (!c_tuple_is_const(pns_list->nodes[i])) { +            is_const = false; +            break; +        } +    } +    if (total > 0 && is_const) { +        bool need_comma = false; +        EMIT(load_const_verbatim_start); +        EMIT(load_const_verbatim_str, "("); +        if (!PY_PARSE_NODE_IS_NULL(pn)) { +            c_tuple_emit_const(comp, pn); +            need_comma = true; +        } +        for (int i = 0; i < n; i++) { +            if (need_comma) { +                EMIT(load_const_verbatim_str, ", "); +            } +            c_tuple_emit_const(comp, pns_list->nodes[i]); +            need_comma = true; +        } +        if (total == 1) { +            EMIT(load_const_verbatim_str, ",)"); +        } else { +            EMIT(load_const_verbatim_str, ")"); +        } +        EMIT(load_const_verbatim_end); +    } else { +        if (!PY_PARSE_NODE_IS_NULL(pn)) { +            compile_node(comp, pn); +        } +        for (int i = 0; i < n; i++) { +            compile_node(comp, pns_list->nodes[i]); +        } +        EMIT(build_tuple, total); +    } +} + +void compile_generic_tuple(compiler_t *comp, py_parse_node_struct_t *pns) { +    // a simple tuple expression +    /* +    int n = PY_PARSE_NODE_STRUCT_NUM_NODES(pns); +    for (int i = 0; i < n; i++) { +        compile_node(comp, pns->nodes[i]); +    } +    EMIT(build_tuple, n); +    */ +    c_tuple(comp, PY_PARSE_NODE_NULL, pns); +} + +bool node_is_const_false(py_parse_node_t pn) { +    return PY_PARSE_NODE_IS_TOKEN_KIND(pn, PY_TOKEN_KW_FALSE); +    // untested: || (PY_PARSE_NODE_IS_SMALL_INT(pn) && PY_PARSE_NODE_LEAF_ARG(pn) == 1); +} + +bool node_is_const_true(py_parse_node_t pn) { +    return PY_PARSE_NODE_IS_TOKEN_KIND(pn, PY_TOKEN_KW_TRUE) || (PY_PARSE_NODE_IS_SMALL_INT(pn) && PY_PARSE_NODE_LEAF_ARG(pn) == 1); +} + +// having c_if_cond_2 and the is_nested variable is purely to match with CPython, which doesn't fully optimise not's +void c_if_cond_2(compiler_t *comp, py_parse_node_t pn, bool jump_if, int label, bool is_nested) { +    if (node_is_const_false(pn)) { +        if (jump_if == false) { +            EMIT(jump, label); +        } +        return; +    } else if (node_is_const_true(pn)) { +        if (jump_if == true) { +            EMIT(jump, label); +        } +        return; +    } else if (PY_PARSE_NODE_IS_STRUCT(pn)) { +        py_parse_node_struct_t *pns = (py_parse_node_struct_t*)pn; +        int n = PY_PARSE_NODE_STRUCT_NUM_NODES(pns); +        if (PY_PARSE_NODE_STRUCT_KIND(pns) == PN_or_test) { +            if (jump_if == false) { +                int label2 = EMIT(label_new); +                for (int i = 0; i < n - 1; i++) { +                    c_if_cond_2(comp, pns->nodes[i], true, label2, true); +                } +                c_if_cond_2(comp, pns->nodes[n - 1], false, label, true); +                EMIT(label_assign, label2); +            } else { +                for (int i = 0; i < n; i++) { +                    c_if_cond_2(comp, pns->nodes[i], true, label, true); +                } +            } +            return; +        } else if (PY_PARSE_NODE_STRUCT_KIND(pns) == PN_and_test) { +            if (jump_if == false) { +                for (int i = 0; i < n; i++) { +                    c_if_cond_2(comp, pns->nodes[i], false, label, true); +                } +            } else { +                int label2 = EMIT(label_new); +                for (int i = 0; i < n - 1; i++) { +                    c_if_cond_2(comp, pns->nodes[i], false, label2, true); +                } +                c_if_cond_2(comp, pns->nodes[n - 1], true, label, true); +                EMIT(label_assign, label2); +            } +            return; +        } else if (!is_nested && PY_PARSE_NODE_STRUCT_KIND(pns) == PN_not_test_2) { +            c_if_cond_2(comp, pns->nodes[0], !jump_if, label, true); +            return; +        } +    } + +    // nothing special, fall back to default compiling for node and jump +    compile_node(comp, pn); +    if (jump_if == false) { +        EMIT(pop_jump_if_false, label); +    } else { +        EMIT(pop_jump_if_true, label); +    } +} + +void c_if_cond(compiler_t *comp, py_parse_node_t pn, bool jump_if, int label) { +    c_if_cond_2(comp, pn, jump_if, label, false); +} + +typedef enum { ASSIGN_STORE, ASSIGN_AUG_LOAD, ASSIGN_AUG_STORE } assign_kind_t; +void c_assign(compiler_t *comp, py_parse_node_t pn, assign_kind_t kind); + +void c_assign_power(compiler_t *comp, py_parse_node_struct_t *pns, assign_kind_t assign_kind) { +    if (assign_kind != ASSIGN_AUG_STORE) { +        compile_node(comp, pns->nodes[0]); +    } + +    if (PY_PARSE_NODE_IS_STRUCT(pns->nodes[1])) { +        py_parse_node_struct_t *pns1 = (py_parse_node_struct_t*)pns->nodes[1]; +        if (PY_PARSE_NODE_STRUCT_KIND(pns1) == PN_power_trailers) { +            int n = PY_PARSE_NODE_STRUCT_NUM_NODES(pns1); +            if (assign_kind != ASSIGN_AUG_STORE) { +                for (int i = 0; i < n - 1; i++) { +                    compile_node(comp, pns1->nodes[i]); +                } +            } +            assert(PY_PARSE_NODE_IS_STRUCT(pns1->nodes[n - 1])); +            pns1 = (py_parse_node_struct_t*)pns1->nodes[n - 1]; +        } +        if (PY_PARSE_NODE_STRUCT_KIND(pns1) == PN_trailer_paren) { +            printf("SyntaxError: can't assign to function call\n"); +            return; +        } else if (PY_PARSE_NODE_STRUCT_KIND(pns1) == PN_trailer_bracket) { +            if (assign_kind == ASSIGN_AUG_STORE) { +                EMIT(rot_three); +                EMIT(store_subscr); +            } else { +                compile_node(comp, pns1->nodes[0]); +                if (assign_kind == ASSIGN_AUG_LOAD) { +                    EMIT(dup_top_two); +                    EMIT(binary_op, RT_BINARY_OP_SUBSCR); +                } else { +                    EMIT(store_subscr); +                } +            } +        } else if (PY_PARSE_NODE_STRUCT_KIND(pns1) == PN_trailer_period) { +            assert(PY_PARSE_NODE_IS_ID(pns1->nodes[0])); +            if (assign_kind == ASSIGN_AUG_LOAD) { +                EMIT(dup_top); +                EMIT(load_attr, PY_PARSE_NODE_LEAF_ARG(pns1->nodes[0])); +            } else { +                if (assign_kind == ASSIGN_AUG_STORE) { +                    EMIT(rot_two); +                } +                EMIT(store_attr, PY_PARSE_NODE_LEAF_ARG(pns1->nodes[0])); +            } +        } else { +            // shouldn't happen +            assert(0); +        } +    } else { +        // shouldn't happen +        assert(0); +    } + +    if (!PY_PARSE_NODE_IS_NULL(pns->nodes[2])) { +        // SyntaxError, cannot assign +        assert(0); +    } +} + +void c_assign_tuple(compiler_t *comp, int n, py_parse_node_t *nodes) { +    assert(n >= 0); +    int have_star_index = -1; +    for (int i = 0; i < n; i++) { +        if (PY_PARSE_NODE_IS_STRUCT_KIND(nodes[i], PN_star_expr)) { +            if (have_star_index < 0) { +                EMIT(unpack_ex, i, n - i - 1); +                have_star_index = i; +            } else { +                printf("SyntaxError: two starred expressions in assignment\n"); +                return; +            } +        } +    } +    if (have_star_index < 0) { +        EMIT(unpack_sequence, n); +    } +    for (int i = 0; i < n; i++) { +        if (i == have_star_index) { +            c_assign(comp, ((py_parse_node_struct_t*)nodes[i])->nodes[0], ASSIGN_STORE); +        } else { +            c_assign(comp, nodes[i], ASSIGN_STORE); +        } +    } +} + +// assigns top of stack to pn +void c_assign(compiler_t *comp, py_parse_node_t pn, assign_kind_t assign_kind) { +    tail_recursion: +    if (PY_PARSE_NODE_IS_NULL(pn)) { +        assert(0); +    } else if (PY_PARSE_NODE_IS_LEAF(pn)) { +        if (PY_PARSE_NODE_IS_ID(pn)) { +            int arg = PY_PARSE_NODE_LEAF_ARG(pn); +            switch (assign_kind) { +                case ASSIGN_STORE: +                case ASSIGN_AUG_STORE: +                    emit_common_store_id(comp->pass, comp->scope_cur, comp->emit, arg); +                    break; +                case ASSIGN_AUG_LOAD: +                    emit_common_load_id(comp->pass, comp->scope_cur, comp->qstr___class__, comp->emit, arg); +                    break; +            } +        } else { +            printf("SyntaxError: can't assign to literal\n"); +            return; +        } +    } else { +        py_parse_node_struct_t *pns = (py_parse_node_struct_t*)pn; +        switch (PY_PARSE_NODE_STRUCT_KIND(pns)) { +            case PN_power: +                // lhs is an index or attribute +                c_assign_power(comp, pns, assign_kind); +                break; + +            case PN_testlist_star_expr: +            case PN_exprlist: +                // lhs is a tuple +                if (assign_kind != ASSIGN_STORE) { +                    goto bad_aug; +                } +                c_assign_tuple(comp, PY_PARSE_NODE_STRUCT_NUM_NODES(pns), pns->nodes); +                break; + +            case PN_atom_paren: +                // lhs is something in parenthesis +                if (PY_PARSE_NODE_IS_NULL(pns->nodes[0])) { +                    // empty tuple +                    printf("SyntaxError: can't assign to ()\n"); +                    return; +                } else if (PY_PARSE_NODE_IS_STRUCT_KIND(pns->nodes[0], PN_testlist_comp)) { +                    pns = (py_parse_node_struct_t*)pns->nodes[0]; +                    goto testlist_comp; +                } else { +                    // parenthesis around 1 item, is just that item +                    pn = pns->nodes[0]; +                    goto tail_recursion; +                } +                break; + +            case PN_atom_bracket: +                // lhs is something in brackets +                if (assign_kind != ASSIGN_STORE) { +                    goto bad_aug; +                } +                if (PY_PARSE_NODE_IS_NULL(pns->nodes[0])) { +                    // empty list, assignment allowed +                    c_assign_tuple(comp, 0, NULL); +                } else if (PY_PARSE_NODE_IS_STRUCT_KIND(pns->nodes[0], PN_testlist_comp)) { +                    pns = (py_parse_node_struct_t*)pns->nodes[0]; +                    goto testlist_comp; +                } else { +                    // brackets around 1 item +                    c_assign_tuple(comp, 1, &pns->nodes[0]); +                } +                break; + +            default: +                printf("unknown assign, %u\n", (uint)PY_PARSE_NODE_STRUCT_KIND(pns)); +                assert(0); +        } +        return; + +        testlist_comp: +        // lhs is a sequence +        if (PY_PARSE_NODE_IS_STRUCT(pns->nodes[1])) { +            py_parse_node_struct_t *pns2 = (py_parse_node_struct_t*)pns->nodes[1]; +            if (PY_PARSE_NODE_STRUCT_KIND(pns2) == PN_testlist_comp_3b) { +                // sequence of one item, with trailing comma +                assert(PY_PARSE_NODE_IS_NULL(pns2->nodes[0])); +                c_assign_tuple(comp, 1, &pns->nodes[0]); +            } else if (PY_PARSE_NODE_STRUCT_KIND(pns2) == PN_testlist_comp_3c) { +                // sequence of many items +                // TODO call c_assign_tuple instead +                int n = PY_PARSE_NODE_STRUCT_NUM_NODES(pns2); +                EMIT(unpack_sequence, 1 + n); +                c_assign(comp, pns->nodes[0], ASSIGN_STORE); +                for (int i = 0; i < n; i++) { +                    c_assign(comp, pns2->nodes[i], ASSIGN_STORE); +                } +            } else if (PY_PARSE_NODE_STRUCT_KIND(pns) == PN_comp_for) { +                // TODO not implemented +                assert(0); +            } else { +                // sequence with 2 items +                goto sequence_with_2_items; +            } +        } else { +            // sequence with 2 items +            sequence_with_2_items: +            c_assign_tuple(comp, 2, pns->nodes); +        } +        return; +    } +    return; + +    bad_aug: +    printf("SyntaxError: illegal expression for augmented assignment\n"); +} + +// stuff for lambda and comprehensions and generators +void close_over_variables_etc(compiler_t *comp, scope_t *this_scope, int n_dict_params, int n_default_params) { +    // make closed over variables, if any +    int nfree = 0; +    if (comp->scope_cur->kind != SCOPE_MODULE) { +        for (int i = 0; i < this_scope->id_info_len; i++) { +            id_info_t *id_info = &this_scope->id_info[i]; +            if (id_info->kind == ID_INFO_KIND_FREE) { +                EMIT(load_closure, id_info->qstr); +                nfree += 1; +            } +        } +    } +    if (nfree > 0) { +        EMIT(build_tuple, nfree); +    } + +    // make the function/closure +    if (nfree == 0) { +        EMIT(make_function, this_scope, n_dict_params, n_default_params); +    } else { +        EMIT(make_closure, this_scope, n_dict_params, n_default_params); +    } +} + +void compile_funcdef_param(compiler_t *comp, py_parse_node_t pn) { +    assert(PY_PARSE_NODE_IS_STRUCT(pn)); +    py_parse_node_struct_t *pns = (py_parse_node_struct_t*)pn; +    if (PY_PARSE_NODE_STRUCT_KIND(pns) == PN_typedargslist_name) { +        if (!PY_PARSE_NODE_IS_NULL(pns->nodes[2])) { +            // this parameter has a default value +            // in CPython, None (and True, False?) as default parameters are loaded with LOAD_NAME; don't understandy why +            if (comp->have_bare_star) { +                comp->param_pass_num_dict_params += 1; +                if (comp->param_pass == 1) { +                    EMIT(load_const_id, PY_PARSE_NODE_LEAF_ARG(pns->nodes[0])); +                    compile_node(comp, pns->nodes[2]); +                } +            } else { +                comp->param_pass_num_default_params += 1; +                if (comp->param_pass == 2) { +                    compile_node(comp, pns->nodes[2]); +                } +            } +        } +    } else if (PY_PARSE_NODE_STRUCT_KIND(pns) == PN_typedargslist_star) { +        if (PY_PARSE_NODE_IS_NULL(pns->nodes[0])) { +            // bare star +            comp->have_bare_star = true; +        } +    } +} + +// leaves function object on stack +// returns function name +qstr compile_funcdef_helper(compiler_t *comp, py_parse_node_struct_t *pns) { +    if (comp->pass == PASS_1) { +        // create a new scope for this function +        scope_t *s = scope_new_and_link(comp, SCOPE_FUNCTION, (py_parse_node_t)pns); +        // store the function scope so the compiling function can use it at each pass +        pns->nodes[4] = (py_parse_node_t)s; +    } + +    // save variables (probably don't need to do this, since we can't have nested definitions..?) +    bool old_have_bare_star = comp->have_bare_star; +    int old_param_pass = comp->param_pass; +    int old_param_pass_num_dict_params = comp->param_pass_num_dict_params; +    int old_param_pass_num_default_params = comp->param_pass_num_default_params; + +    // compile default parameters +    comp->have_bare_star = false; +    comp->param_pass = 1; // pass 1 does any default parameters after bare star +    comp->param_pass_num_dict_params = 0; +    comp->param_pass_num_default_params = 0; +    apply_to_single_or_list(comp, pns->nodes[1], PN_typedargslist, compile_funcdef_param); +    comp->have_bare_star = false; +    comp->param_pass = 2; // pass 2 does any default parameters before bare star +    comp->param_pass_num_dict_params = 0; +    comp->param_pass_num_default_params = 0; +    apply_to_single_or_list(comp, pns->nodes[1], PN_typedargslist, compile_funcdef_param); + +    // get the scope for this function +    scope_t *fscope = (scope_t*)pns->nodes[4]; + +    // make the function +    close_over_variables_etc(comp, fscope, comp->param_pass_num_dict_params, comp->param_pass_num_default_params); + +    // restore variables +    comp->have_bare_star = old_have_bare_star; +    comp->param_pass = old_param_pass; +    comp->param_pass_num_dict_params = old_param_pass_num_dict_params; +    comp->param_pass_num_default_params = old_param_pass_num_default_params; + +    // return its name (the 'f' in "def f(...):") +    return fscope->simple_name; +} + +// leaves class object on stack +// returns class name +qstr compile_classdef_helper(compiler_t *comp, py_parse_node_struct_t *pns) { +    if (comp->pass == PASS_1) { +        // create a new scope for this class +        scope_t *s = scope_new_and_link(comp, SCOPE_CLASS, (py_parse_node_t)pns); +        // store the class scope so the compiling function can use it at each pass +        pns->nodes[3] = (py_parse_node_t)s; +    } + +    EMIT(load_build_class); + +    // scope for this class +    scope_t *cscope = (scope_t*)pns->nodes[3]; + +    // compile the class +    close_over_variables_etc(comp, cscope, 0, 0); + +    // get its name +    EMIT(load_const_id, cscope->simple_name); + +    // nodes[1] has parent classes, if any +    if (PY_PARSE_NODE_IS_NULL(pns->nodes[1])) { +        // no parent classes +        EMIT(call_function, 2, 0, false, false); +    } else { +        // have a parent class or classes +        // TODO what if we have, eg, *a or **a in the parent list? +        compile_node(comp, pns->nodes[1]); +        EMIT(call_function, 2 + list_len(pns->nodes[1], PN_arglist), 0, false, false); +    } + +    // return its name (the 'C' in class C(...):") +    return cscope->simple_name; +} + +void compile_decorated(compiler_t *comp, py_parse_node_struct_t *pns) { +    // get the list of decorators +    py_parse_node_t *nodes; +    int n = list_get(&pns->nodes[0], PN_decorators, &nodes); + +    // load each decorator +    for (int i = 0; i < n; i++) { +        assert(PY_PARSE_NODE_IS_STRUCT_KIND(nodes[i], PN_decorator)); // should be +        py_parse_node_struct_t *pns_decorator = (py_parse_node_struct_t*)nodes[i]; +        py_parse_node_t *nodes2; +        int n2 = list_get(&pns_decorator->nodes[0], PN_dotted_name, &nodes2); +        compile_node(comp, nodes2[0]); +        for (int i = 1; i < n2; i++) { +            EMIT(load_attr, PY_PARSE_NODE_LEAF_ARG(nodes2[i])); +        } +        if (!PY_PARSE_NODE_IS_NULL(pns_decorator->nodes[1])) { +            // first call the function with these arguments +            compile_node(comp, pns_decorator->nodes[1]); +        } +    } + +    // compile the body (funcdef or classdef) and get its name +    py_parse_node_struct_t *pns_body = (py_parse_node_struct_t*)pns->nodes[1]; +    qstr body_name = 0; +    if (PY_PARSE_NODE_STRUCT_KIND(pns_body) == PN_funcdef) { +        body_name = compile_funcdef_helper(comp, pns_body); +    } else if (PY_PARSE_NODE_STRUCT_KIND(pns_body) == PN_classdef) { +        body_name = compile_classdef_helper(comp, pns_body); +    } else { +        // shouldn't happen +        assert(0); +    } + +    // call each decorator +    for (int i = 0; i < n; i++) { +        EMIT(call_function, 1, 0, false, false); +    } + +    // store func/class object into name +    emit_common_store_id(comp->pass, comp->scope_cur, comp->emit, body_name); +} + +void compile_funcdef(compiler_t *comp, py_parse_node_struct_t *pns) { +    qstr fname = compile_funcdef_helper(comp, pns); +    // store function object into function name +    emit_common_store_id(comp->pass, comp->scope_cur, comp->emit, fname); +} + +void c_del_stmt(compiler_t *comp, py_parse_node_t pn) { +    if (PY_PARSE_NODE_IS_ID(pn)) { +        emit_common_delete_id(comp->pass, comp->scope_cur, comp->emit, PY_PARSE_NODE_LEAF_ARG(pn)); +    } else if (PY_PARSE_NODE_IS_STRUCT_KIND(pn, PN_power)) { +        py_parse_node_struct_t *pns = (py_parse_node_struct_t*)pn; + +        compile_node(comp, pns->nodes[0]); // base of the power node + +        if (PY_PARSE_NODE_IS_STRUCT(pns->nodes[1])) { +            py_parse_node_struct_t *pns1 = (py_parse_node_struct_t*)pns->nodes[1]; +            if (PY_PARSE_NODE_STRUCT_KIND(pns1) == PN_power_trailers) { +                int n = PY_PARSE_NODE_STRUCT_NUM_NODES(pns1); +                for (int i = 0; i < n - 1; i++) { +                    compile_node(comp, pns1->nodes[i]); +                } +                assert(PY_PARSE_NODE_IS_STRUCT(pns1->nodes[n - 1])); +                pns1 = (py_parse_node_struct_t*)pns1->nodes[n - 1]; +            } +            if (PY_PARSE_NODE_STRUCT_KIND(pns1) == PN_trailer_paren) { +                // SyntaxError: can't delete a function call +                assert(0); +            } else if (PY_PARSE_NODE_STRUCT_KIND(pns1) == PN_trailer_bracket) { +                compile_node(comp, pns1->nodes[0]); +                EMIT(delete_subscr); +            } else if (PY_PARSE_NODE_STRUCT_KIND(pns1) == PN_trailer_period) { +                assert(PY_PARSE_NODE_IS_ID(pns1->nodes[0])); +                EMIT(delete_attr, PY_PARSE_NODE_LEAF_ARG(pns1->nodes[0])); +            } else { +                // shouldn't happen +                assert(0); +            } +        } else { +            // shouldn't happen +            assert(0); +        } + +        if (!PY_PARSE_NODE_IS_NULL(pns->nodes[2])) { +            // SyntaxError, cannot delete +            assert(0); +        } +    } else if (PY_PARSE_NODE_IS_STRUCT_KIND(pn, PN_atom_paren)) { +        pn = ((py_parse_node_struct_t*)pn)->nodes[0]; +        if (PY_PARSE_NODE_IS_STRUCT_KIND(pn, PN_testlist_comp)) { +            py_parse_node_struct_t *pns = (py_parse_node_struct_t*)pn; +            // TODO perhaps factorise testlist_comp code with other uses of PN_testlist_comp + +            if (PY_PARSE_NODE_IS_STRUCT(pns->nodes[1])) { +                py_parse_node_struct_t *pns1 = (py_parse_node_struct_t*)pns->nodes[1]; +                if (PY_PARSE_NODE_STRUCT_KIND(pns1) == PN_testlist_comp_3b) { +                    // sequence of one item, with trailing comma +                    assert(PY_PARSE_NODE_IS_NULL(pns1->nodes[0])); +                    c_del_stmt(comp, pns->nodes[0]); +                } else if (PY_PARSE_NODE_STRUCT_KIND(pns1) == PN_testlist_comp_3c) { +                    // sequence of many items +                    int n = PY_PARSE_NODE_STRUCT_NUM_NODES(pns1); +                    c_del_stmt(comp, pns->nodes[0]); +                    for (int i = 0; i < n; i++) { +                        c_del_stmt(comp, pns1->nodes[i]); +                    } +                } else if (PY_PARSE_NODE_STRUCT_KIND(pns) == PN_comp_for) { +                    // TODO not implemented; can't del comprehension? +                    assert(0); +                } else { +                    // sequence with 2 items +                    goto sequence_with_2_items; +                } +            } else { +                // sequence with 2 items +                sequence_with_2_items: +                c_del_stmt(comp, pns->nodes[0]); +                c_del_stmt(comp, pns->nodes[1]); +            } +        } else { +            // tuple with 1 element +            c_del_stmt(comp, pn); +        } +    } else { +        // not implemented +        assert(0); +    } +} + +void compile_del_stmt(compiler_t *comp, py_parse_node_struct_t *pns) { +    apply_to_single_or_list(comp, pns->nodes[0], PN_exprlist, c_del_stmt); +} + +void compile_break_stmt(compiler_t *comp, py_parse_node_struct_t *pns) { +    if (comp->break_label == 0) { +        printf("ERROR: cannot break from here\n"); +    } +    EMIT(break_loop, comp->break_label); +} + +void compile_continue_stmt(compiler_t *comp, py_parse_node_struct_t *pns) { +    if (comp->continue_label == 0) { +        printf("ERROR: cannot continue from here\n"); +    } +    if (comp->except_nest_level > 0) { +        EMIT(continue_loop, comp->continue_label); +    } else { +        EMIT(jump, comp->continue_label); +    } +} + +void compile_return_stmt(compiler_t *comp, py_parse_node_struct_t *pns) { +    if (PY_PARSE_NODE_IS_NULL(pns->nodes[0])) { +        EMIT(load_const_tok, PY_TOKEN_KW_NONE); +    } else if (PY_PARSE_NODE_IS_STRUCT_KIND(pns->nodes[0], PN_test_if_expr)) { +        // special case when returning an if-expression; to match CPython optimisation +        py_parse_node_struct_t *pns_test_if_expr = (py_parse_node_struct_t*)pns->nodes[0]; +        py_parse_node_struct_t *pns_test_if_else = (py_parse_node_struct_t*)pns_test_if_expr->nodes[1]; + +        int l_fail = EMIT(label_new); +        c_if_cond(comp, pns_test_if_else->nodes[0], false, l_fail); // condition +        compile_node(comp, pns_test_if_expr->nodes[0]); // success value +        EMIT(return_value); +        EMIT(label_assign, l_fail); +        compile_node(comp, pns_test_if_else->nodes[1]); // failure value +    } else { +        compile_node(comp, pns->nodes[0]); +    } +    EMIT(return_value); +} + +void compile_yield_stmt(compiler_t *comp, py_parse_node_struct_t *pns) { +    compile_node(comp, pns->nodes[0]); +    EMIT(pop_top); +} + +void compile_raise_stmt(compiler_t *comp, py_parse_node_struct_t *pns) { +    if (PY_PARSE_NODE_IS_NULL(pns->nodes[0])) { +        // raise +        EMIT(raise_varargs, 0); +    } else if (PY_PARSE_NODE_IS_STRUCT_KIND(pns->nodes[0], PN_raise_stmt_arg)) { +        // raise x from y +        pns = (py_parse_node_struct_t*)pns->nodes[0]; +        compile_node(comp, pns->nodes[0]); +        compile_node(comp, pns->nodes[1]); +        EMIT(raise_varargs, 2); +    } else { +        // raise x +        compile_node(comp, pns->nodes[0]); +        EMIT(raise_varargs, 1); +    } +} + +// q1 holds the base, q2 the full name +// eg   a -> q1=q2=a +//      a.b.c -> q1=a, q2=a.b.c +void do_import_name(compiler_t *comp, py_parse_node_t pn, qstr *q1, qstr *q2) { +    bool is_as = false; +    if (PY_PARSE_NODE_IS_STRUCT_KIND(pn, PN_dotted_as_name)) { +        py_parse_node_struct_t *pns = (py_parse_node_struct_t*)pn; +        // a name of the form x as y; unwrap it +        *q1 = PY_PARSE_NODE_LEAF_ARG(pns->nodes[1]); +        pn = pns->nodes[0]; +        is_as = true; +    } +    if (PY_PARSE_NODE_IS_ID(pn)) { +        // just a simple name +        *q2 = PY_PARSE_NODE_LEAF_ARG(pn); +        if (!is_as) { +            *q1 = *q2; +        } +        EMIT(import_name, *q2); +    } else if (PY_PARSE_NODE_IS_STRUCT(pn)) { +        py_parse_node_struct_t *pns = (py_parse_node_struct_t*)pn; +        if (PY_PARSE_NODE_STRUCT_KIND(pns) == PN_dotted_name) { +            // a name of the form a.b.c +            if (!is_as) { +                *q1 = PY_PARSE_NODE_LEAF_ARG(pns->nodes[0]); +            } +            int n = PY_PARSE_NODE_STRUCT_NUM_NODES(pns); +            int len = n - 1; +            for (int i = 0; i < n; i++) { +                len += strlen(qstr_str(PY_PARSE_NODE_LEAF_ARG(pns->nodes[i]))); +            } +            char *str = m_new(char, len + 1); +            str[0] = 0; +            for (int i = 0; i < n; i++) { +                if (i > 0) { +                    strcat(str, "."); +                } +                strcat(str, qstr_str(PY_PARSE_NODE_LEAF_ARG(pns->nodes[i]))); +            } +            *q2 = qstr_from_str_take(str); +            EMIT(import_name, *q2); +            if (is_as) { +                for (int i = 1; i < n; i++) { +                    EMIT(load_attr, PY_PARSE_NODE_LEAF_ARG(pns->nodes[i])); +                } +            } +        } else { +            // TODO not implemented +            assert(0); +        } +    } else { +        // TODO not implemented +        assert(0); +    } +} + +void compile_dotted_as_name(compiler_t *comp, py_parse_node_t pn) { +    EMIT(load_const_small_int, 0); // ?? +    EMIT(load_const_tok, PY_TOKEN_KW_NONE); +    qstr q1, q2; +    do_import_name(comp, pn, &q1, &q2); +    emit_common_store_id(comp->pass, comp->scope_cur, comp->emit, q1); +} + +void compile_import_name(compiler_t *comp, py_parse_node_struct_t *pns) { +    apply_to_single_or_list(comp, pns->nodes[0], PN_dotted_as_names, compile_dotted_as_name); +} + +void compile_import_from(compiler_t *comp, py_parse_node_struct_t *pns) { +    if (PY_PARSE_NODE_IS_TOKEN_KIND(pns->nodes[1], PY_TOKEN_OP_STAR)) { +        EMIT(load_const_small_int, 0); // what's this for?? +        EMIT(load_const_verbatim_start); +        EMIT(load_const_verbatim_str, "('*',)"); +        EMIT(load_const_verbatim_end); +        qstr dummy_q, id1; +        do_import_name(comp, pns->nodes[0], &dummy_q, &id1); +        EMIT(import_star); +    } else { +        py_parse_node_t *pn_nodes; +        int n = list_get(&pns->nodes[1], PN_import_as_names, &pn_nodes); + +        EMIT(load_const_small_int, 0); // what's this for?? +        EMIT(load_const_verbatim_start); +        EMIT(load_const_verbatim_str, "("); +        for (int i = 0; i < n; i++) { +            assert(PY_PARSE_NODE_IS_STRUCT_KIND(pn_nodes[i], PN_import_as_name)); +            py_parse_node_struct_t *pns3 = (py_parse_node_struct_t*)pn_nodes[i]; +            qstr id2 = PY_PARSE_NODE_LEAF_ARG(pns3->nodes[0]); // should be id +            if (i > 0) { +                EMIT(load_const_verbatim_str, ", "); +            } +            EMIT(load_const_verbatim_str, "'"); +            EMIT(load_const_verbatim_str, qstr_str(id2)); +            EMIT(load_const_verbatim_str, "'"); +        } +        if (n == 1) { +            EMIT(load_const_verbatim_str, ","); +        } +        EMIT(load_const_verbatim_str, ")"); +        EMIT(load_const_verbatim_end); +        qstr dummy_q, id1; +        do_import_name(comp, pns->nodes[0], &dummy_q, &id1); +        for (int i = 0; i < n; i++) { +            assert(PY_PARSE_NODE_IS_STRUCT_KIND(pn_nodes[i], PN_import_as_name)); +            py_parse_node_struct_t *pns3 = (py_parse_node_struct_t*)pn_nodes[i]; +            qstr id2 = PY_PARSE_NODE_LEAF_ARG(pns3->nodes[0]); // should be id +            EMIT(import_from, id2); +            if (PY_PARSE_NODE_IS_NULL(pns3->nodes[1])) { +                emit_common_store_id(comp->pass, comp->scope_cur, comp->emit, id2); +            } else { +                emit_common_store_id(comp->pass, comp->scope_cur, comp->emit, PY_PARSE_NODE_LEAF_ARG(pns3->nodes[1])); +            } +        } +        EMIT(pop_top); +    } +} + +void compile_global_stmt(compiler_t *comp, py_parse_node_struct_t *pns) { +    if (PY_PARSE_NODE_IS_LEAF(pns->nodes[0])) { +        emit_common_declare_global(comp->pass, comp->scope_cur, PY_PARSE_NODE_LEAF_ARG(pns->nodes[0])); +    } else { +        pns = (py_parse_node_struct_t*)pns->nodes[0]; +        int num_nodes = PY_PARSE_NODE_STRUCT_NUM_NODES(pns); +        for (int i = 0; i < num_nodes; i++) { +            emit_common_declare_global(comp->pass, comp->scope_cur, PY_PARSE_NODE_LEAF_ARG(pns->nodes[i])); +        } +    } +} + +void compile_nonlocal_stmt(compiler_t *comp, py_parse_node_struct_t *pns) { +    if (PY_PARSE_NODE_IS_LEAF(pns->nodes[0])) { +        emit_common_declare_nonlocal(comp->pass, comp->scope_cur, PY_PARSE_NODE_LEAF_ARG(pns->nodes[0])); +    } else { +        pns = (py_parse_node_struct_t*)pns->nodes[0]; +        int num_nodes = PY_PARSE_NODE_STRUCT_NUM_NODES(pns); +        for (int i = 0; i < num_nodes; i++) { +            emit_common_declare_nonlocal(comp->pass, comp->scope_cur, PY_PARSE_NODE_LEAF_ARG(pns->nodes[i])); +        } +    } +} + +void compile_assert_stmt(compiler_t *comp, py_parse_node_struct_t *pns) { +    int l_end = EMIT(label_new); +    c_if_cond(comp, pns->nodes[0], true, l_end); +    emit_common_load_id(comp->pass, comp->scope_cur, comp->qstr___class__, comp->emit, comp->qstr_assertion_error); +    if (!PY_PARSE_NODE_IS_NULL(pns->nodes[1])) { +        // assertion message +        compile_node(comp, pns->nodes[1]); +        EMIT(call_function, 1, 0, false, false); +    } +    EMIT(raise_varargs, 1); +    EMIT(label_assign, l_end); +} + +void compile_if_stmt(compiler_t *comp, py_parse_node_struct_t *pns) { +    // TODO proper and/or short circuiting + +    int l_end = EMIT(label_new); + +    int l_fail = EMIT(label_new); +    c_if_cond(comp, pns->nodes[0], false, l_fail); // if condition + +    compile_node(comp, pns->nodes[1]); // if block +    //if (!(PY_PARSE_NODE_IS_NULL(pns->nodes[2]) && PY_PARSE_NODE_IS_NULL(pns->nodes[3]))) { // optimisation; doesn't align with CPython +        // jump over elif/else blocks if they exist +        if (!emit_last_emit_was_return_value(comp->emit)) { // simple optimisation to align with CPython +            EMIT(jump, l_end); +        } +    //} +    EMIT(label_assign, l_fail); + +    if (!PY_PARSE_NODE_IS_NULL(pns->nodes[2])) { +        // compile elif blocks + +        py_parse_node_struct_t *pns_elif = (py_parse_node_struct_t*)pns->nodes[2]; + +        if (PY_PARSE_NODE_STRUCT_KIND(pns_elif) == PN_if_stmt_elif_list) { +            // multiple elif blocks + +            int n = PY_PARSE_NODE_STRUCT_NUM_NODES(pns_elif); +            for (int i = 0; i < n; i++) { +                py_parse_node_struct_t *pns_elif2 = (py_parse_node_struct_t*)pns_elif->nodes[i]; +                l_fail = EMIT(label_new); +                c_if_cond(comp, pns_elif2->nodes[0], false, l_fail); // elif condition + +                compile_node(comp, pns_elif2->nodes[1]); // elif block +                if (!emit_last_emit_was_return_value(comp->emit)) { // simple optimisation to align with CPython +                    EMIT(jump, l_end); +                } +                EMIT(label_assign, l_fail); +            } + +        } else { +            // a single elif block + +            l_fail = EMIT(label_new); +            c_if_cond(comp, pns_elif->nodes[0], false, l_fail); // elif condition + +            compile_node(comp, pns_elif->nodes[1]); // elif block +            if (!emit_last_emit_was_return_value(comp->emit)) { // simple optimisation to align with CPython +                EMIT(jump, l_end); +            } +            EMIT(label_assign, l_fail); +        } +    } + +    // compile else block +    compile_node(comp, pns->nodes[3]); // can be null + +    EMIT(label_assign, l_end); +} + +void compile_while_stmt(compiler_t *comp, py_parse_node_struct_t *pns) { +    int old_break_label = comp->break_label; +    int old_continue_label = comp->continue_label; + +    int done_label = EMIT(label_new); +    int end_label = EMIT(label_new); +    int break_label = EMIT(label_new); +    int continue_label = EMIT(label_new); + +    comp->break_label = break_label; +    comp->continue_label = continue_label; + +    EMIT(setup_loop, end_label); +    EMIT(label_assign, continue_label); +    c_if_cond(comp, pns->nodes[0], false, done_label); // condition +    compile_node(comp, pns->nodes[1]); // body +    if (!emit_last_emit_was_return_value(comp->emit)) { +        EMIT(jump, continue_label); +    } +    EMIT(label_assign, done_label); + +    // break/continue apply to outer loop (if any) in the else block +    comp->break_label = old_break_label; +    comp->continue_label = old_continue_label; + +    // CPython does not emit POP_BLOCK if the condition was a constant; don't undertand why +    // this is a small hack to agree with CPython +    if (!node_is_const_true(pns->nodes[0])) { +        EMIT(pop_block); +    } + +    compile_node(comp, pns->nodes[2]); // else + +    EMIT(label_assign, break_label); +    EMIT(label_assign, end_label); +} + +void compile_for_stmt(compiler_t *comp, py_parse_node_struct_t *pns) { +    int old_break_label = comp->break_label; +    int old_continue_label = comp->continue_label; + +    int for_label = EMIT(label_new); +    int pop_label = EMIT(label_new); +    int end_label = EMIT(label_new); + +    int break_label = EMIT(label_new); + +    comp->continue_label = for_label; +    comp->break_label = break_label; + +    EMIT(setup_loop, end_label); +    compile_node(comp, pns->nodes[1]); // iterator +    EMIT(get_iter); +    EMIT(label_assign, for_label); +    EMIT(for_iter, pop_label); +    c_assign(comp, pns->nodes[0], ASSIGN_STORE); // variable +    compile_node(comp, pns->nodes[2]); // body +    if (!emit_last_emit_was_return_value(comp->emit)) { +        EMIT(jump, for_label); +    } +    EMIT(label_assign, pop_label); +    EMIT(for_iter_end); + +    // break/continue apply to outer loop (if any) in the else block +    comp->break_label = old_break_label; +    comp->continue_label = old_continue_label; + +    EMIT(pop_block); + +    compile_node(comp, pns->nodes[3]); // else (not tested) + +    EMIT(label_assign, break_label); +    EMIT(label_assign, end_label); +} + +void compile_try_except(compiler_t *comp, py_parse_node_t pn_body, int n_except, py_parse_node_t *pn_excepts, py_parse_node_t pn_else) { +    // this function is a bit of a hack at the moment +    // don't understand how the stack works with exceptions, so we force it to return to the correct value + +    // setup code +    int stack_size = EMIT(get_stack_size); +    int l1 = EMIT(label_new); +    int success_label = EMIT(label_new); +    comp->except_nest_level += 1; // for correct handling of continue +    EMIT(setup_except, l1); +    compile_node(comp, pn_body); // body +    EMIT(pop_block); +    EMIT(jump, success_label); +    EMIT(label_assign, l1); +    int l2 = EMIT(label_new); + +    for (int i = 0; i < n_except; i++) { +        assert(PY_PARSE_NODE_IS_STRUCT_KIND(pn_excepts[i], PN_try_stmt_except)); // should be +        py_parse_node_struct_t *pns_except = (py_parse_node_struct_t*)pn_excepts[i]; + +        qstr qstr_exception_local = 0; +        int end_finally_label = EMIT(label_new); + +        if (PY_PARSE_NODE_IS_NULL(pns_except->nodes[0])) { +            // this is a catch all exception handler +            if (i + 1 != n_except) { +                printf("SyntaxError: default 'except:' must be last\n"); +                return; +            } +        } else { +            // this exception handler requires a match to a certain type of exception +            py_parse_node_t pns_exception_expr = pns_except->nodes[0]; +            if (PY_PARSE_NODE_IS_STRUCT(pns_exception_expr)) { +                py_parse_node_struct_t *pns3 = (py_parse_node_struct_t*)pns_exception_expr; +                if (PY_PARSE_NODE_STRUCT_KIND(pns3) == PN_try_stmt_as_name) { +                    // handler binds the exception to a local +                    pns_exception_expr = pns3->nodes[0]; +                    qstr_exception_local = PY_PARSE_NODE_LEAF_ARG(pns3->nodes[1]); +                } +            } +            EMIT(dup_top); +            compile_node(comp, pns_exception_expr); +            EMIT(compare_op, RT_COMPARE_OP_EXCEPTION_MATCH); +            EMIT(pop_jump_if_false, end_finally_label); +        } + +        EMIT(pop_top); + +        if (qstr_exception_local == 0) { +            EMIT(pop_top); +        } else { +            emit_common_store_id(comp->pass, comp->scope_cur, comp->emit, qstr_exception_local); +        } + +        EMIT(pop_top); + +        int l3; +        if (qstr_exception_local != 0) { +            l3 = EMIT(label_new); +            EMIT(setup_finally, l3); +        } +        compile_node(comp, pns_except->nodes[1]); +        if (qstr_exception_local != 0) { +            EMIT(pop_block); +        } +        EMIT(pop_except); +        if (qstr_exception_local != 0) { +            EMIT(load_const_tok, PY_TOKEN_KW_NONE); +            EMIT(label_assign, l3); +            EMIT(load_const_tok, PY_TOKEN_KW_NONE); +            emit_common_store_id(comp->pass, comp->scope_cur, comp->emit, qstr_exception_local); +            emit_common_delete_id(comp->pass, comp->scope_cur, comp->emit, qstr_exception_local); +            EMIT(end_finally); +        } +        EMIT(jump, l2); +        EMIT(label_assign, end_finally_label); +    } + +    EMIT(end_finally); +    EMIT(label_assign, success_label); +    comp->except_nest_level -= 1; +    compile_node(comp, pn_else); // else block, can be null +    EMIT(label_assign, l2); +    EMIT(set_stack_size, stack_size); +} + +void compile_try_finally(compiler_t *comp, py_parse_node_t pn_body, int n_except, py_parse_node_t *pn_except, py_parse_node_t pn_else, py_parse_node_t pn_finally) { +    // don't understand how the stack works with exceptions, so we force it to return to the correct value +    int stack_size = EMIT(get_stack_size); +    int l_finally_block = EMIT(label_new); +    EMIT(setup_finally, l_finally_block); +    if (n_except == 0) { +        assert(PY_PARSE_NODE_IS_NULL(pn_else)); +        compile_node(comp, pn_body); +    } else { +        compile_try_except(comp, pn_body, n_except, pn_except, pn_else); +    } +    EMIT(pop_block); +    EMIT(load_const_tok, PY_TOKEN_KW_NONE); +    EMIT(label_assign, l_finally_block); +    compile_node(comp, pn_finally); +    EMIT(end_finally); +    EMIT(set_stack_size, stack_size); +} + +void compile_try_stmt(compiler_t *comp, py_parse_node_struct_t *pns) { +    if (PY_PARSE_NODE_IS_STRUCT(pns->nodes[1])) { +        py_parse_node_struct_t *pns2 = (py_parse_node_struct_t*)pns->nodes[1]; +        if (PY_PARSE_NODE_STRUCT_KIND(pns2) == PN_try_stmt_finally) { +            // just try-finally +            compile_try_finally(comp, pns->nodes[0], 0, NULL, PY_PARSE_NODE_NULL, pns2->nodes[0]); +        } else if (PY_PARSE_NODE_STRUCT_KIND(pns2) == PN_try_stmt_except_and_more) { +            // try-except and possibly else and/or finally +            py_parse_node_t *pn_excepts; +            int n_except = list_get(&pns2->nodes[0], PN_try_stmt_except_list, &pn_excepts); +            if (PY_PARSE_NODE_IS_NULL(pns2->nodes[2])) { +                // no finally +                compile_try_except(comp, pns->nodes[0], n_except, pn_excepts, pns2->nodes[1]); +            } else { +                // have finally +                compile_try_finally(comp, pns->nodes[0], n_except, pn_excepts, pns2->nodes[1], ((py_parse_node_struct_t*)pns2->nodes[2])->nodes[0]); +            } +        } else { +            // just try-except +            py_parse_node_t *pn_excepts; +            int n_except = list_get(&pns->nodes[1], PN_try_stmt_except_list, &pn_excepts); +            compile_try_except(comp, pns->nodes[0], n_except, pn_excepts, PY_PARSE_NODE_NULL); +        } +    } else { +        // shouldn't happen +        assert(0); +    } +} + +void compile_with_stmt_helper(compiler_t *comp, int n, py_parse_node_t *nodes, py_parse_node_t body) { +    if (n == 0) { +        // no more pre-bits, compile the body of the with +        compile_node(comp, body); +    } else { +        int l_end = EMIT(label_new); +        if (PY_PARSE_NODE_IS_STRUCT_KIND(nodes[0], PN_with_item)) { +            // this pre-bit is of the form "a as b" +            py_parse_node_struct_t *pns = (py_parse_node_struct_t*)nodes[0]; +            compile_node(comp, pns->nodes[0]); +            EMIT(setup_with, l_end); +            c_assign(comp, pns->nodes[1], ASSIGN_STORE); +        } else { +            // this pre-bit is just an expression +            compile_node(comp, nodes[0]); +            EMIT(setup_with, l_end); +            EMIT(pop_top); +        } +        // compile additional pre-bits and the body +        compile_with_stmt_helper(comp, n - 1, nodes + 1, body); +        // finish this with block +        EMIT(pop_block); +        EMIT(load_const_tok, PY_TOKEN_KW_NONE); +        EMIT(label_assign, l_end); +        EMIT(with_cleanup); +        EMIT(end_finally); +    } +} + +void compile_with_stmt(compiler_t *comp, py_parse_node_struct_t *pns) { +    // get the nodes for the pre-bit of the with (the a as b, c as d, ... bit) +    py_parse_node_t *nodes; +    int n = list_get(&pns->nodes[0], PN_with_stmt_list, &nodes); +    assert(n > 0); + +    // compile in a nested fashion +    compile_with_stmt_helper(comp, n, nodes, pns->nodes[1]); +} + +void compile_expr_stmt(compiler_t *comp, py_parse_node_struct_t *pns) { +    if (PY_PARSE_NODE_IS_NULL(pns->nodes[1])) { +        if (PY_PARSE_NODE_IS_LEAF(pns->nodes[0]) && !PY_PARSE_NODE_IS_ID(pns->nodes[0])) { +            // do nothing with a lonely constant +        } else { +            compile_node(comp, pns->nodes[0]); // just an expression +            EMIT(pop_top); // discard last result since this is a statement and leaves nothing on the stack +        } +    } else { +        py_parse_node_struct_t *pns1 = (py_parse_node_struct_t*)pns->nodes[1]; +        int kind = PY_PARSE_NODE_STRUCT_KIND(pns1); +        if (kind == PN_expr_stmt_augassign) { +            c_assign(comp, pns->nodes[0], ASSIGN_AUG_LOAD); // lhs load for aug assign +            compile_node(comp, pns1->nodes[1]); // rhs +            assert(PY_PARSE_NODE_IS_TOKEN(pns1->nodes[0])); +            // note that we don't really need to implement separate inplace ops, just normal binary ops will suffice +            switch (PY_PARSE_NODE_LEAF_ARG(pns1->nodes[0])) { +                case PY_TOKEN_DEL_PIPE_EQUAL: EMIT(binary_op, RT_BINARY_OP_INPLACE_OR); break; +                case PY_TOKEN_DEL_CARET_EQUAL: EMIT(binary_op, RT_BINARY_OP_INPLACE_XOR); break; +                case PY_TOKEN_DEL_AMPERSAND_EQUAL: EMIT(binary_op, RT_BINARY_OP_INPLACE_AND); break; +                case PY_TOKEN_DEL_DBL_LESS_EQUAL: EMIT(binary_op, RT_BINARY_OP_INPLACE_LSHIFT); break; +                case PY_TOKEN_DEL_DBL_MORE_EQUAL: EMIT(binary_op, RT_BINARY_OP_INPLACE_RSHIFT); break; +                case PY_TOKEN_DEL_PLUS_EQUAL: EMIT(binary_op, RT_BINARY_OP_INPLACE_ADD); break; +                case PY_TOKEN_DEL_MINUS_EQUAL: EMIT(binary_op, RT_BINARY_OP_INPLACE_SUBTRACT); break; +                case PY_TOKEN_DEL_STAR_EQUAL: EMIT(binary_op, RT_BINARY_OP_INPLACE_MULTIPLY); break; +                case PY_TOKEN_DEL_DBL_SLASH_EQUAL: EMIT(binary_op, RT_BINARY_OP_INPLACE_FLOOR_DIVIDE); break; +                case PY_TOKEN_DEL_SLASH_EQUAL: EMIT(binary_op, RT_BINARY_OP_INPLACE_TRUE_DIVIDE); break; +                case PY_TOKEN_DEL_PERCENT_EQUAL: EMIT(binary_op, RT_BINARY_OP_INPLACE_MODULO); break; +                case PY_TOKEN_DEL_DBL_STAR_EQUAL: EMIT(binary_op, RT_BINARY_OP_INPLACE_POWER); break; +                default: assert(0); // shouldn't happen +            } +            c_assign(comp, pns->nodes[0], ASSIGN_AUG_STORE); // lhs store for aug assign +        } else if (kind == PN_expr_stmt_assign_list) { +            int rhs = PY_PARSE_NODE_STRUCT_NUM_NODES(pns1) - 1; +            compile_node(comp, ((py_parse_node_struct_t*)pns1->nodes[rhs])->nodes[0]); // rhs +            // following CPython, we store left-most first +            if (rhs > 0) { +                EMIT(dup_top); +            } +            c_assign(comp, pns->nodes[0], ASSIGN_STORE); // lhs store +            for (int i = 0; i < rhs; i++) { +                if (i + 1 < rhs) { +                    EMIT(dup_top); +                } +                c_assign(comp, ((py_parse_node_struct_t*)pns1->nodes[i])->nodes[0], ASSIGN_STORE); // middle store +            } +        } else if (kind == PN_expr_stmt_assign) { +            if (PY_PARSE_NODE_IS_STRUCT_KIND(pns1->nodes[0], PN_testlist_star_expr) +                && PY_PARSE_NODE_IS_STRUCT_KIND(pns->nodes[0], PN_testlist_star_expr) +                && PY_PARSE_NODE_STRUCT_NUM_NODES((py_parse_node_struct_t*)pns1->nodes[0]) == 2 +                && PY_PARSE_NODE_STRUCT_NUM_NODES((py_parse_node_struct_t*)pns->nodes[0]) == 2) { +                // optimisation for a, b = c, d; to match CPython's optimisation +                py_parse_node_struct_t* pns10 = (py_parse_node_struct_t*)pns1->nodes[0]; +                py_parse_node_struct_t* pns0 = (py_parse_node_struct_t*)pns->nodes[0]; +                compile_node(comp, pns10->nodes[0]); // rhs +                compile_node(comp, pns10->nodes[1]); // rhs +                EMIT(rot_two); +                c_assign(comp, pns0->nodes[0], ASSIGN_STORE); // lhs store +                c_assign(comp, pns0->nodes[1], ASSIGN_STORE); // lhs store +            } else if (PY_PARSE_NODE_IS_STRUCT_KIND(pns1->nodes[0], PN_testlist_star_expr) +                && PY_PARSE_NODE_IS_STRUCT_KIND(pns->nodes[0], PN_testlist_star_expr) +                && PY_PARSE_NODE_STRUCT_NUM_NODES((py_parse_node_struct_t*)pns1->nodes[0]) == 3 +                && PY_PARSE_NODE_STRUCT_NUM_NODES((py_parse_node_struct_t*)pns->nodes[0]) == 3) { +                // optimisation for a, b, c = d, e, f; to match CPython's optimisation +                py_parse_node_struct_t* pns10 = (py_parse_node_struct_t*)pns1->nodes[0]; +                py_parse_node_struct_t* pns0 = (py_parse_node_struct_t*)pns->nodes[0]; +                compile_node(comp, pns10->nodes[0]); // rhs +                compile_node(comp, pns10->nodes[1]); // rhs +                compile_node(comp, pns10->nodes[2]); // rhs +                EMIT(rot_three); +                EMIT(rot_two); +                c_assign(comp, pns0->nodes[0], ASSIGN_STORE); // lhs store +                c_assign(comp, pns0->nodes[1], ASSIGN_STORE); // lhs store +                c_assign(comp, pns0->nodes[2], ASSIGN_STORE); // lhs store +            } else { +                compile_node(comp, pns1->nodes[0]); // rhs +                c_assign(comp, pns->nodes[0], ASSIGN_STORE); // lhs store +            } +        } else { +            // shouldn't happen +            assert(0); +        } +    } +} + +void c_binary_op(compiler_t *comp, py_parse_node_struct_t *pns, rt_binary_op_t binary_op) { +    int num_nodes = PY_PARSE_NODE_STRUCT_NUM_NODES(pns); +    compile_node(comp, pns->nodes[0]); +    for (int i = 1; i < num_nodes; i += 1) { +        compile_node(comp, pns->nodes[i]); +        EMIT(binary_op, binary_op); +    } +} + +void compile_test_if_expr(compiler_t *comp, py_parse_node_struct_t *pns) { +    assert(PY_PARSE_NODE_IS_STRUCT_KIND(pns->nodes[1], PN_test_if_else)); +    py_parse_node_struct_t *pns_test_if_else = (py_parse_node_struct_t*)pns->nodes[1]; + +    int stack_size = EMIT(get_stack_size); +    int l_fail = EMIT(label_new); +    int l_end = EMIT(label_new); +    c_if_cond(comp, pns_test_if_else->nodes[0], false, l_fail); // condition +    compile_node(comp, pns->nodes[0]); // success value +    EMIT(jump, l_end); +    EMIT(label_assign, l_fail); +    EMIT(set_stack_size, stack_size); // force stack size reset +    compile_node(comp, pns_test_if_else->nodes[1]); // failure value +    EMIT(label_assign, l_end); +} + +void compile_lambdef(compiler_t *comp, py_parse_node_struct_t *pns) { +    // TODO default params etc for lambda; possibly just use funcdef code +    //py_parse_node_t pn_params = pns->nodes[0]; +    //py_parse_node_t pn_body = pns->nodes[1]; + +    if (comp->pass == PASS_1) { +        // create a new scope for this lambda +        scope_t *s = scope_new_and_link(comp, SCOPE_LAMBDA, (py_parse_node_t)pns); +        // store the lambda scope so the compiling function (this one) can use it at each pass +        pns->nodes[2] = (py_parse_node_t)s; +    } + +    // get the scope for this lambda +    scope_t *this_scope = (scope_t*)pns->nodes[2]; + +    // make the lambda +    close_over_variables_etc(comp, this_scope, 0, 0); +} + +void compile_or_test(compiler_t *comp, py_parse_node_struct_t *pns) { +    int l_end = EMIT(label_new); +    int n = PY_PARSE_NODE_STRUCT_NUM_NODES(pns); +    for (int i = 0; i < n; i += 1) { +        compile_node(comp, pns->nodes[i]); +        if (i + 1 < n) { +            EMIT(jump_if_true_or_pop, l_end); +        } +    } +    EMIT(label_assign, l_end); +} + +void compile_and_test(compiler_t *comp, py_parse_node_struct_t *pns) { +    int l_end = EMIT(label_new); +    int n = PY_PARSE_NODE_STRUCT_NUM_NODES(pns); +    for (int i = 0; i < n; i += 1) { +        compile_node(comp, pns->nodes[i]); +        if (i + 1 < n) { +            EMIT(jump_if_false_or_pop, l_end); +        } +    } +    EMIT(label_assign, l_end); +} + +void compile_not_test_2(compiler_t *comp, py_parse_node_struct_t *pns) { +    compile_node(comp, pns->nodes[0]); +    EMIT(unary_op, RT_UNARY_OP_NOT); +} + +void compile_comparison(compiler_t *comp, py_parse_node_struct_t *pns) { +    int stack_size = EMIT(get_stack_size); +    int num_nodes = PY_PARSE_NODE_STRUCT_NUM_NODES(pns); +    compile_node(comp, pns->nodes[0]); +    bool multi = (num_nodes > 3); +    int l_fail = 0; +    if (multi) { +        l_fail = EMIT(label_new); +    } +    for (int i = 1; i + 1 < num_nodes; i += 2) { +        compile_node(comp, pns->nodes[i + 1]); +        if (i + 2 < num_nodes) { +            EMIT(dup_top); +            EMIT(rot_three); +        } +        if (PY_PARSE_NODE_IS_TOKEN_KIND(pns->nodes[i], PY_TOKEN_OP_LESS)) { +            EMIT(compare_op, RT_COMPARE_OP_LESS); +        } else if (PY_PARSE_NODE_IS_TOKEN_KIND(pns->nodes[i], PY_TOKEN_OP_MORE)) { +            EMIT(compare_op, RT_COMPARE_OP_MORE); +        } else if (PY_PARSE_NODE_IS_TOKEN_KIND(pns->nodes[i], PY_TOKEN_OP_DBL_EQUAL)) { +            EMIT(compare_op, RT_COMPARE_OP_EQUAL); +        } else if (PY_PARSE_NODE_IS_TOKEN_KIND(pns->nodes[i], PY_TOKEN_OP_LESS_EQUAL)) { +            EMIT(compare_op, RT_COMPARE_OP_LESS_EQUAL); +        } else if (PY_PARSE_NODE_IS_TOKEN_KIND(pns->nodes[i], PY_TOKEN_OP_MORE_EQUAL)) { +            EMIT(compare_op, RT_COMPARE_OP_MORE_EQUAL); +        } else if (PY_PARSE_NODE_IS_TOKEN_KIND(pns->nodes[i], PY_TOKEN_OP_NOT_EQUAL)) { +            EMIT(compare_op, RT_COMPARE_OP_NOT_EQUAL); +        } else if (PY_PARSE_NODE_IS_TOKEN_KIND(pns->nodes[i], PY_TOKEN_KW_IN)) { +            EMIT(compare_op, RT_COMPARE_OP_IN); +        } else if (PY_PARSE_NODE_IS_STRUCT(pns->nodes[i])) { +            py_parse_node_struct_t *pns2 = (py_parse_node_struct_t*)pns->nodes[i]; +            int kind = PY_PARSE_NODE_STRUCT_KIND(pns2); +            if (kind == PN_comp_op_not_in) { +                EMIT(compare_op, RT_COMPARE_OP_NOT_IN); +            } else if (kind == PN_comp_op_is) { +                if (PY_PARSE_NODE_IS_NULL(pns2->nodes[0])) { +                    EMIT(compare_op, RT_COMPARE_OP_IS); +                } else { +                    EMIT(compare_op, RT_COMPARE_OP_IS_NOT); +                } +            } else { +                // shouldn't happen +                assert(0); +            } +        } else { +            // shouldn't happen +            assert(0); +        } +        if (i + 2 < num_nodes) { +            EMIT(jump_if_false_or_pop, l_fail); +        } +    } +    if (multi) { +        int l_end = EMIT(label_new); +        EMIT(jump, l_end); +        EMIT(label_assign, l_fail); +        EMIT(rot_two); +        EMIT(pop_top); +        EMIT(label_assign, l_end); +        EMIT(set_stack_size, stack_size + 1); // force stack size +    } +} + +void compile_star_expr(compiler_t *comp, py_parse_node_struct_t *pns) { +    // TODO +    assert(0); +    compile_node(comp, pns->nodes[0]); +    //EMIT(unary_op, "UNARY_STAR"); +} + +void compile_expr(compiler_t *comp, py_parse_node_struct_t *pns) { +    c_binary_op(comp, pns, RT_BINARY_OP_OR); +} + +void compile_xor_expr(compiler_t *comp, py_parse_node_struct_t *pns) { +    c_binary_op(comp, pns, RT_BINARY_OP_XOR); +} + +void compile_and_expr(compiler_t *comp, py_parse_node_struct_t *pns) { +    c_binary_op(comp, pns, RT_BINARY_OP_AND); +} + +void compile_shift_expr(compiler_t *comp, py_parse_node_struct_t *pns) { +    int num_nodes = PY_PARSE_NODE_STRUCT_NUM_NODES(pns); +    compile_node(comp, pns->nodes[0]); +    for (int i = 1; i + 1 < num_nodes; i += 2) { +        compile_node(comp, pns->nodes[i + 1]); +        if (PY_PARSE_NODE_IS_TOKEN_KIND(pns->nodes[i], PY_TOKEN_OP_DBL_LESS)) { +            EMIT(binary_op, RT_BINARY_OP_LSHIFT); +        } else if (PY_PARSE_NODE_IS_TOKEN_KIND(pns->nodes[i], PY_TOKEN_OP_DBL_MORE)) { +            EMIT(binary_op, RT_BINARY_OP_RSHIFT); +        } else { +            // shouldn't happen +            assert(0); +        } +    } +} + +void compile_arith_expr(compiler_t *comp, py_parse_node_struct_t *pns) { +    int num_nodes = PY_PARSE_NODE_STRUCT_NUM_NODES(pns); +    compile_node(comp, pns->nodes[0]); +    for (int i = 1; i + 1 < num_nodes; i += 2) { +        compile_node(comp, pns->nodes[i + 1]); +        if (PY_PARSE_NODE_IS_TOKEN_KIND(pns->nodes[i], PY_TOKEN_OP_PLUS)) { +            EMIT(binary_op, RT_BINARY_OP_ADD); +        } else if (PY_PARSE_NODE_IS_TOKEN_KIND(pns->nodes[i], PY_TOKEN_OP_MINUS)) { +            EMIT(binary_op, RT_BINARY_OP_SUBTRACT); +        } else { +            // shouldn't happen +            assert(0); +        } +    } +} + +void compile_term(compiler_t *comp, py_parse_node_struct_t *pns) { +    int num_nodes = PY_PARSE_NODE_STRUCT_NUM_NODES(pns); +    compile_node(comp, pns->nodes[0]); +    for (int i = 1; i + 1 < num_nodes; i += 2) { +        compile_node(comp, pns->nodes[i + 1]); +        if (PY_PARSE_NODE_IS_TOKEN_KIND(pns->nodes[i], PY_TOKEN_OP_STAR)) { +            EMIT(binary_op, RT_BINARY_OP_MULTIPLY); +        } else if (PY_PARSE_NODE_IS_TOKEN_KIND(pns->nodes[i], PY_TOKEN_OP_DBL_SLASH)) { +            EMIT(binary_op, RT_BINARY_OP_FLOOR_DIVIDE); +        } else if (PY_PARSE_NODE_IS_TOKEN_KIND(pns->nodes[i], PY_TOKEN_OP_SLASH)) { +            EMIT(binary_op, RT_BINARY_OP_TRUE_DIVIDE); +        } else if (PY_PARSE_NODE_IS_TOKEN_KIND(pns->nodes[i], PY_TOKEN_OP_PERCENT)) { +            EMIT(binary_op, RT_BINARY_OP_MODULO); +        } else { +            // shouldn't happen +            assert(0); +        } +    } +} + +void compile_factor_2(compiler_t *comp, py_parse_node_struct_t *pns) { +    compile_node(comp, pns->nodes[1]); +    if (PY_PARSE_NODE_IS_TOKEN_KIND(pns->nodes[0], PY_TOKEN_OP_PLUS)) { +        EMIT(unary_op, RT_UNARY_OP_POSITIVE); +    } else if (PY_PARSE_NODE_IS_TOKEN_KIND(pns->nodes[0], PY_TOKEN_OP_MINUS)) { +        EMIT(unary_op, RT_UNARY_OP_NEGATIVE); +    } else if (PY_PARSE_NODE_IS_TOKEN_KIND(pns->nodes[0], PY_TOKEN_OP_TILDE)) { +        EMIT(unary_op, RT_UNARY_OP_INVERT); +    } else { +        // shouldn't happen +        assert(0); +    } +} + +void compile_trailer_paren_helper(compiler_t *comp, py_parse_node_struct_t *pns, bool is_method_call) { +    // function to call is on top of stack + +    int old_n_arg_keyword = comp->n_arg_keyword; +    bool old_have_star_arg = comp->have_star_arg; +    bool old_have_dbl_star_arg = comp->have_dbl_star_arg; +    comp->n_arg_keyword = 0; +    comp->have_star_arg = false; +    comp->have_dbl_star_arg = false; + +    compile_node(comp, pns->nodes[0]); // arguments to function call; can be null + +    // compute number of positional arguments +    int n_positional = list_len(pns->nodes[0], PN_arglist) - comp->n_arg_keyword; +    if (comp->have_star_arg) { +        n_positional -= 1; +    } +    if (comp->have_dbl_star_arg) { +        n_positional -= 1; +    } + +    if (is_method_call) { +        EMIT(call_method, n_positional, comp->n_arg_keyword, comp->have_star_arg, comp->have_dbl_star_arg); +    } else { +        EMIT(call_function, n_positional, comp->n_arg_keyword, comp->have_star_arg, comp->have_dbl_star_arg); +    } + +    comp->n_arg_keyword = old_n_arg_keyword; +    comp->have_star_arg = old_have_star_arg; +    comp->have_dbl_star_arg = old_have_dbl_star_arg; +} + +void compile_power_trailers(compiler_t *comp, py_parse_node_struct_t *pns) { +    int num_nodes = PY_PARSE_NODE_STRUCT_NUM_NODES(pns); +    for (int i = 0; i < num_nodes; i++) { +        if (i + 1 < num_nodes && PY_PARSE_NODE_IS_STRUCT_KIND(pns->nodes[i], PN_trailer_period) && PY_PARSE_NODE_IS_STRUCT_KIND(pns->nodes[i + 1], PN_trailer_paren)) { +            // optimisation for method calls a.f(...), following PyPy +            py_parse_node_struct_t *pns_period = (py_parse_node_struct_t*)pns->nodes[i]; +            py_parse_node_struct_t *pns_paren = (py_parse_node_struct_t*)pns->nodes[i + 1]; +            EMIT(load_method, PY_PARSE_NODE_LEAF_ARG(pns_period->nodes[0])); // get the method +            compile_trailer_paren_helper(comp, pns_paren, true); +            i += 1; +        } else { +            compile_node(comp, pns->nodes[i]); +        } +    } +} + +void compile_power_dbl_star(compiler_t *comp, py_parse_node_struct_t *pns) { +    compile_node(comp, pns->nodes[0]); +    EMIT(binary_op, RT_BINARY_OP_POWER); +} + +void compile_atom_string(compiler_t *comp, py_parse_node_struct_t *pns) { +    // a list of strings +    EMIT(load_const_verbatim_start); +    EMIT(load_const_verbatim_str, "'"); +    int n = PY_PARSE_NODE_STRUCT_NUM_NODES(pns); +    for (int i = 0; i < n; i++) { +        // TODO allow concatenation of either strings or bytes, but not mixed +        assert(PY_PARSE_NODE_IS_LEAF(pns->nodes[i])); +        assert(PY_PARSE_NODE_LEAF_KIND(pns->nodes[i]) == PY_PARSE_NODE_STRING); +        const char *str = qstr_str(PY_PARSE_NODE_LEAF_ARG(pns->nodes[i])); +        EMIT(load_const_verbatim_strn, str, strlen(str)); +    } +    EMIT(load_const_verbatim_str, "'"); +    EMIT(load_const_verbatim_end); +} + +// pns needs to have 2 nodes, first is lhs of comprehension, second is PN_comp_for node +void compile_comprehension(compiler_t *comp, py_parse_node_struct_t *pns, scope_kind_t kind) { +    assert(PY_PARSE_NODE_STRUCT_NUM_NODES(pns) == 2); +    assert(PY_PARSE_NODE_IS_STRUCT_KIND(pns->nodes[1], PN_comp_for)); +    py_parse_node_struct_t *pns_comp_for = (py_parse_node_struct_t*)pns->nodes[1]; + +    if (comp->pass == PASS_1) { +        // create a new scope for this comprehension +        scope_t *s = scope_new_and_link(comp, kind, (py_parse_node_t)pns); +        // store the comprehension scope so the compiling function (this one) can use it at each pass +        pns_comp_for->nodes[3] = (py_parse_node_t)s; +    } + +    // get the scope for this comprehension +    scope_t *this_scope = (scope_t*)pns_comp_for->nodes[3]; + +    // compile the comprehension +    close_over_variables_etc(comp, this_scope, 0, 0); + +    compile_node(comp, pns_comp_for->nodes[1]); // source of the iterator +    EMIT(get_iter); +    EMIT(call_function, 1, 0, false, false); +} + +void compile_atom_paren(compiler_t *comp, py_parse_node_struct_t *pns) { +    if (PY_PARSE_NODE_IS_NULL(pns->nodes[0])) { +        // an empty tuple +        /* +        EMIT(build_tuple, 0); +        */ +        c_tuple(comp, PY_PARSE_NODE_NULL, NULL); +    } else if (PY_PARSE_NODE_IS_STRUCT_KIND(pns->nodes[0], PN_testlist_comp)) { +        pns = (py_parse_node_struct_t*)pns->nodes[0]; +        assert(!PY_PARSE_NODE_IS_NULL(pns->nodes[1])); +        if (PY_PARSE_NODE_IS_STRUCT(pns->nodes[1])) { +            py_parse_node_struct_t *pns2 = (py_parse_node_struct_t*)pns->nodes[1]; +            if (PY_PARSE_NODE_STRUCT_KIND(pns2) == PN_testlist_comp_3b) { +                // tuple of one item, with trailing comma +                assert(PY_PARSE_NODE_IS_NULL(pns2->nodes[0])); +                /* +                compile_node(comp, pns->nodes[0]); +                EMIT(build_tuple, 1); +                */ +                c_tuple(comp, pns->nodes[0], NULL); +            } else if (PY_PARSE_NODE_STRUCT_KIND(pns2) == PN_testlist_comp_3c) { +                // tuple of many items +                /* +                compile_node(comp, pns->nodes[0]); +                compile_generic_all_nodes(comp, pns2); +                EMIT(build_tuple, 1 + PY_PARSE_NODE_STRUCT_NUM_NODES(pns2)); +                */ +                c_tuple(comp, pns->nodes[0], pns2); +            } else if (PY_PARSE_NODE_STRUCT_KIND(pns2) == PN_comp_for) { +                // generator expression +                compile_comprehension(comp, pns, SCOPE_GEN_EXPR); +            } else { +                // tuple with 2 items +                goto tuple_with_2_items; +            } +        } else { +            // tuple with 2 items +            tuple_with_2_items: +            /* +            compile_node(comp, pns->nodes[0]); +            compile_node(comp, pns->nodes[1]); +            EMIT(build_tuple, 2); +            */ +            c_tuple(comp, PY_PARSE_NODE_NULL, pns); +        } +    } else { +        // parenthesis around a single item, is just that item +        compile_node(comp, pns->nodes[0]); +    } +} + +void compile_atom_bracket(compiler_t *comp, py_parse_node_struct_t *pns) { +    if (PY_PARSE_NODE_IS_NULL(pns->nodes[0])) { +        // empty list +        EMIT(build_list, 0); +    } else if (PY_PARSE_NODE_IS_STRUCT_KIND(pns->nodes[0], PN_testlist_comp)) { +        py_parse_node_struct_t *pns2 = (py_parse_node_struct_t*)pns->nodes[0]; +        if (PY_PARSE_NODE_IS_STRUCT(pns2->nodes[1])) { +            py_parse_node_struct_t *pns3 = (py_parse_node_struct_t*)pns2->nodes[1]; +            if (PY_PARSE_NODE_STRUCT_KIND(pns3) == PN_testlist_comp_3b) { +                // list of one item, with trailing comma +                assert(PY_PARSE_NODE_IS_NULL(pns3->nodes[0])); +                compile_node(comp, pns2->nodes[0]); +                EMIT(build_list, 1); +            } else if (PY_PARSE_NODE_STRUCT_KIND(pns3) == PN_testlist_comp_3c) { +                // list of many items +                compile_node(comp, pns2->nodes[0]); +                compile_generic_all_nodes(comp, pns3); +                EMIT(build_list, 1 + PY_PARSE_NODE_STRUCT_NUM_NODES(pns3)); +            } else if (PY_PARSE_NODE_STRUCT_KIND(pns3) == PN_comp_for) { +                // list comprehension +                compile_comprehension(comp, pns2, SCOPE_LIST_COMP); +            } else { +                // list with 2 items +                goto list_with_2_items; +            } +        } else { +            // list with 2 items +            list_with_2_items: +            compile_node(comp, pns2->nodes[0]); +            compile_node(comp, pns2->nodes[1]); +            EMIT(build_list, 2); +        } +    } else { +        // list with 1 item +        compile_node(comp, pns->nodes[0]); +        EMIT(build_list, 1); +    } +} + +void compile_atom_brace(compiler_t *comp, py_parse_node_struct_t *pns) { +    py_parse_node_t pn = pns->nodes[0]; +    if (PY_PARSE_NODE_IS_NULL(pn)) { +        // empty dict +        EMIT(build_map, 0); +    } else if (PY_PARSE_NODE_IS_STRUCT(pn)) { +        pns = (py_parse_node_struct_t*)pn; +        if (PY_PARSE_NODE_STRUCT_KIND(pns) == PN_dictorsetmaker_item) { +            // dict with one element +            EMIT(build_map, 1); +            compile_node(comp, pn); +            EMIT(store_map); +        } else if (PY_PARSE_NODE_STRUCT_KIND(pns) == PN_dictorsetmaker) { +            assert(PY_PARSE_NODE_IS_STRUCT(pns->nodes[1])); // should succeed +            py_parse_node_struct_t *pns1 = (py_parse_node_struct_t*)pns->nodes[1]; +            if (PY_PARSE_NODE_STRUCT_KIND(pns1) == PN_dictorsetmaker_list) { +                // dict/set with multiple elements + +                // get tail elements (2nd, 3rd, ...) +                py_parse_node_t *nodes; +                int n = list_get(&pns1->nodes[0], PN_dictorsetmaker_list2, &nodes); + +                // first element sets whether it's a dict or set +                bool is_dict; +                if (PY_PARSE_NODE_IS_STRUCT_KIND(pns->nodes[0], PN_dictorsetmaker_item)) { +                    // a dictionary +                    EMIT(build_map, 1 + n); +                    compile_node(comp, pns->nodes[0]); +                    EMIT(store_map); +                    is_dict = true; +                } else { +                    // a set +                    compile_node(comp, pns->nodes[0]); // 1st value of set +                    is_dict = false; +                } + +                // process rest of elements +                for (int i = 0; i < n; i++) { +                    py_parse_node_t pn = nodes[i]; +                    bool is_key_value = PY_PARSE_NODE_IS_STRUCT_KIND(pn, PN_dictorsetmaker_item); +                    compile_node(comp, pn); +                    if (is_dict) { +                        if (!is_key_value) { +                            printf("SyntaxError?: expecting key:value for dictionary"); +                            return; +                        } +                        EMIT(store_map); +                    } else { +                        if (is_key_value) { +                            printf("SyntaxError?: expecting just a value for set"); +                            return; +                        } +                    } +                } + +                // if it's a set, build it +                if (!is_dict) { +                    EMIT(build_set, 1 + n); +                } +            } else if (PY_PARSE_NODE_STRUCT_KIND(pns1) == PN_comp_for) { +                // dict/set comprehension +                if (PY_PARSE_NODE_IS_STRUCT_KIND(pns->nodes[0], PN_dictorsetmaker_item)) { +                    // a dictionary comprehension +                    compile_comprehension(comp, pns, SCOPE_DICT_COMP); +                } else { +                    // a set comprehension +                    compile_comprehension(comp, pns, SCOPE_SET_COMP); +                } +            } else { +                // shouldn't happen +                assert(0); +            } +        } else { +            // set with one element +            goto set_with_one_element; +        } +    } else { +        // set with one element +        set_with_one_element: +        compile_node(comp, pn); +        EMIT(build_set, 1); +    } +} + +void compile_trailer_paren(compiler_t *comp, py_parse_node_struct_t *pns) { +    compile_trailer_paren_helper(comp, pns, false); +} + +void compile_trailer_bracket(compiler_t *comp, py_parse_node_struct_t *pns) { +    // object who's index we want is on top of stack +    compile_node(comp, pns->nodes[0]); // the index +    EMIT(binary_op, RT_BINARY_OP_SUBSCR); +} + +void compile_trailer_period(compiler_t *comp, py_parse_node_struct_t *pns) { +    // object who's attribute we want is on top of stack +    EMIT(load_attr, PY_PARSE_NODE_LEAF_ARG(pns->nodes[0])); // attribute to get +} + +void compile_subscript_3_helper(compiler_t *comp, py_parse_node_struct_t *pns) { +    assert(PY_PARSE_NODE_STRUCT_KIND(pns) == PN_subscript_3); // should always be +    py_parse_node_t pn = pns->nodes[0]; +    if (PY_PARSE_NODE_IS_NULL(pn)) { +        // [?:] +        EMIT(load_const_tok, PY_TOKEN_KW_NONE); +        EMIT(build_slice, 2); +    } else if (PY_PARSE_NODE_IS_STRUCT(pn)) { +        pns = (py_parse_node_struct_t*)pn; +        if (PY_PARSE_NODE_STRUCT_KIND(pns) == PN_subscript_3c) { +            EMIT(load_const_tok, PY_TOKEN_KW_NONE); +            pn = pns->nodes[0]; +            if (PY_PARSE_NODE_IS_NULL(pn)) { +                // [?::] +                EMIT(build_slice, 2); +            } else { +                // [?::x] +                compile_node(comp, pn); +                EMIT(build_slice, 3); +            } +        } else if (PY_PARSE_NODE_STRUCT_KIND(pns) == PN_subscript_3d) { +            compile_node(comp, pns->nodes[0]); +            assert(PY_PARSE_NODE_IS_STRUCT(pns->nodes[1])); // should always be +            pns = (py_parse_node_struct_t*)pns->nodes[1]; +            assert(PY_PARSE_NODE_STRUCT_KIND(pns) == PN_sliceop); // should always be +            if (PY_PARSE_NODE_IS_NULL(pns->nodes[0])) { +                // [?:x:] +                EMIT(build_slice, 2); +            } else { +                // [?:x:x] +                compile_node(comp, pns->nodes[0]); +                EMIT(build_slice, 3); +            } +        } else { +            // [?:x] +            compile_node(comp, pn); +            EMIT(build_slice, 2); +        } +    } else { +        // [?:x] +        compile_node(comp, pn); +        EMIT(build_slice, 2); +    } +} + +void compile_subscript_2(compiler_t *comp, py_parse_node_struct_t *pns) { +    compile_node(comp, pns->nodes[0]); // start of slice +    assert(PY_PARSE_NODE_IS_STRUCT(pns->nodes[1])); // should always be +    compile_subscript_3_helper(comp, (py_parse_node_struct_t*)pns->nodes[1]); +} + +void compile_subscript_3(compiler_t *comp, py_parse_node_struct_t *pns) { +    EMIT(load_const_tok, PY_TOKEN_KW_NONE); +    compile_subscript_3_helper(comp, pns); +} + +void compile_dictorsetmaker_item(compiler_t *comp, py_parse_node_struct_t *pns) { +    // if this is called then we are compiling a dict key:value pair +    compile_node(comp, pns->nodes[1]); // value +    compile_node(comp, pns->nodes[0]); // key +} + +void compile_classdef(compiler_t *comp, py_parse_node_struct_t *pns) { +    qstr cname = compile_classdef_helper(comp, pns); +    // store class object into class name +    emit_common_store_id(comp->pass, comp->scope_cur, comp->emit, cname); +} + +void compile_arglist_star(compiler_t *comp, py_parse_node_struct_t *pns) { +    if (comp->have_star_arg) { +        printf("SyntaxError?: can't have multiple *x\n"); +        return; +    } +    comp->have_star_arg = true; +    compile_node(comp, pns->nodes[0]); +} + +void compile_arglist_dbl_star(compiler_t *comp, py_parse_node_struct_t *pns) { +    if (comp->have_dbl_star_arg) { +        printf("SyntaxError?: can't have multiple **x\n"); +        return; +    } +    comp->have_dbl_star_arg = true; +    compile_node(comp, pns->nodes[0]); +} + +void compile_argument(compiler_t *comp, py_parse_node_struct_t *pns) { +    assert(PY_PARSE_NODE_IS_STRUCT(pns->nodes[1])); // should always be +    py_parse_node_struct_t *pns2 = (py_parse_node_struct_t*)pns->nodes[1]; +    if (PY_PARSE_NODE_STRUCT_KIND(pns2) == PN_argument_3) { +        if (!PY_PARSE_NODE_IS_ID(pns->nodes[0])) { +            printf("SyntaxError?: lhs of keyword argument must be an id\n"); +            return; +        } +        EMIT(load_const_id, PY_PARSE_NODE_LEAF_ARG(pns->nodes[0])); +        compile_node(comp, pns2->nodes[0]); +        comp->n_arg_keyword += 1; +    } else if (PY_PARSE_NODE_STRUCT_KIND(pns2) == PN_comp_for) { +        compile_comprehension(comp, pns, SCOPE_GEN_EXPR); +    } else { +        // shouldn't happen +        assert(0); +    } +} + +void compile_yield_expr(compiler_t *comp, py_parse_node_struct_t *pns) { +    if (comp->scope_cur->kind != SCOPE_FUNCTION) { +        printf("SyntaxError: 'yield' outside function\n"); +        return; +    } +    if (PY_PARSE_NODE_IS_NULL(pns->nodes[0])) { +        EMIT(load_const_tok, PY_TOKEN_KW_NONE); +        EMIT(yield_value); +    } else if (PY_PARSE_NODE_IS_STRUCT_KIND(pns->nodes[0], PN_yield_arg_from)) { +        pns = (py_parse_node_struct_t*)pns->nodes[0]; +        compile_node(comp, pns->nodes[0]); +        EMIT(get_iter); +        EMIT(load_const_tok, PY_TOKEN_KW_NONE); +        EMIT(yield_from); +    } else { +        compile_node(comp, pns->nodes[0]); +        EMIT(yield_value); +    } +} + +typedef void (*compile_function_t)(compiler_t*, py_parse_node_struct_t*); +static compile_function_t compile_function[] = { +    NULL, +#define nc NULL +#define c(f) compile_##f +#define DEF_RULE(rule, comp, kind, arg...) comp, +#include "grammar.h" +#undef nc +#undef c +#undef DEF_RULE +}; + +void compile_node(compiler_t *comp, py_parse_node_t pn) { +    if (PY_PARSE_NODE_IS_NULL(pn)) { +        // pass +    } else if (PY_PARSE_NODE_IS_LEAF(pn)) { +        int arg = PY_PARSE_NODE_LEAF_ARG(pn); +        switch (PY_PARSE_NODE_LEAF_KIND(pn)) { +            case PY_PARSE_NODE_ID: emit_common_load_id(comp->pass, comp->scope_cur, comp->qstr___class__, comp->emit, arg); break; +            case PY_PARSE_NODE_SMALL_INT: EMIT(load_const_small_int, arg); break; +            case PY_PARSE_NODE_INTEGER: EMIT(load_const_int, arg); break; +            case PY_PARSE_NODE_DECIMAL: EMIT(load_const_dec, arg); break; +            case PY_PARSE_NODE_STRING: EMIT(load_const_str, arg, false); break; +            case PY_PARSE_NODE_BYTES: EMIT(load_const_str, arg, true); break; +            case PY_PARSE_NODE_TOKEN: EMIT(load_const_tok, arg); break; +            default: assert(0); +        } +    } else { +        py_parse_node_struct_t *pns = (py_parse_node_struct_t*)pn; +        compile_function_t f = compile_function[PY_PARSE_NODE_STRUCT_KIND(pns)]; +        if (f == NULL) { +            printf("node %u cannot be compiled\n", (uint)PY_PARSE_NODE_STRUCT_KIND(pns)); +            parse_node_show(pn, 0); +            assert(0); +        } else { +            f(comp, pns); +        } +    } +} + +void compile_scope_func_lambda_param(compiler_t *comp, py_parse_node_t pn, pn_kind_t pn_name, pn_kind_t pn_star, pn_kind_t pn_dbl_star, bool allow_annotations) { +    // TODO verify that *k and **k are last etc +    assert(PY_PARSE_NODE_IS_STRUCT(pn)); +    py_parse_node_struct_t *pns = (py_parse_node_struct_t*)pn; +    qstr param_name = 0; +    py_parse_node_t pn_annotation = PY_PARSE_NODE_NULL; +    if (PY_PARSE_NODE_STRUCT_KIND(pns) == pn_name) { +        param_name = PY_PARSE_NODE_LEAF_ARG(pns->nodes[0]); +        //int node_index = 1; unused +        if (allow_annotations) { +            if (!PY_PARSE_NODE_IS_NULL(pns->nodes[1])) { +                // this parameter has an annotation +                pn_annotation = pns->nodes[1]; +            } +            //node_index = 2; unused +        } +        /* this is obsolete now that num dict/default params are calculated in compile_funcdef_param +        if (!PY_PARSE_NODE_IS_NULL(pns->nodes[node_index])) { +            // this parameter has a default value +            if (comp->have_bare_star) { +                comp->scope_cur->num_dict_params += 1; +            } else { +                comp->scope_cur->num_default_params += 1; +            } +        } +        */ +        if (comp->have_bare_star) { +            // comes after a bare star, so doesn't count as a parameter +        } else { +            comp->scope_cur->num_params += 1; +        } +    } else if (PY_PARSE_NODE_STRUCT_KIND(pns) == pn_star) { +        if (PY_PARSE_NODE_IS_NULL(pns->nodes[0])) { +            // bare star +            // TODO see http://www.python.org/dev/peps/pep-3102/ +            comp->have_bare_star = true; +            //assert(comp->scope_cur->num_dict_params == 0); +        } else if (PY_PARSE_NODE_IS_ID(pns->nodes[0])) { +            // named star +            comp->scope_cur->flags |= SCOPE_FLAG_VARARGS; +            param_name = PY_PARSE_NODE_LEAF_ARG(pns->nodes[0]); +        } else if (allow_annotations && PY_PARSE_NODE_IS_STRUCT_KIND(pns->nodes[0], PN_tfpdef)) { +            // named star with annotation +            comp->scope_cur->flags |= SCOPE_FLAG_VARARGS; +            pns = (py_parse_node_struct_t*)pns->nodes[0]; +            param_name = PY_PARSE_NODE_LEAF_ARG(pns->nodes[0]); +            pn_annotation = pns->nodes[1]; +        } else { +            // shouldn't happen +            assert(0); +        } +    } else if (PY_PARSE_NODE_STRUCT_KIND(pns) == pn_dbl_star) { +        param_name = PY_PARSE_NODE_LEAF_ARG(pns->nodes[0]); +        if (allow_annotations && !PY_PARSE_NODE_IS_NULL(pns->nodes[1])) { +            // this parameter has an annotation +            pn_annotation = pns->nodes[1]; +        } +        comp->scope_cur->flags |= SCOPE_FLAG_VARKEYWORDS; +    } else { +        // TODO anything to implement? +        assert(0); +    } + +    if (param_name != 0) { +        if (!PY_PARSE_NODE_IS_NULL(pn_annotation)) { +            // TODO this parameter has an annotation +        } +        bool added; +        id_info_t *id_info = scope_find_or_add_id(comp->scope_cur, param_name, &added); +        if (!added) { +            printf("SyntaxError?: same name used for parameter; %s\n", qstr_str(param_name)); +            return; +        } +        id_info->param = true; +        id_info->kind = ID_INFO_KIND_LOCAL; +    } +} + +void compile_scope_func_param(compiler_t *comp, py_parse_node_t pn) { +    compile_scope_func_lambda_param(comp, pn, PN_typedargslist_name, PN_typedargslist_star, PN_typedargslist_dbl_star, true); +} + +void compile_scope_lambda_param(compiler_t *comp, py_parse_node_t pn) { +    compile_scope_func_lambda_param(comp, pn, PN_varargslist_name, PN_varargslist_star, PN_varargslist_dbl_star, false); +} + +void compile_scope_comp_iter(compiler_t *comp, py_parse_node_t pn_iter, py_parse_node_t pn_inner_expr, int l_top, int for_depth) { +    tail_recursion: +    if (PY_PARSE_NODE_IS_NULL(pn_iter)) { +        // no more nested if/for; compile inner expression +        compile_node(comp, pn_inner_expr); +        if (comp->scope_cur->kind == SCOPE_LIST_COMP) { +            EMIT(list_append, for_depth + 2); +        } else if (comp->scope_cur->kind == SCOPE_DICT_COMP) { +            EMIT(map_add, for_depth + 2); +        } else if (comp->scope_cur->kind == SCOPE_SET_COMP) { +            EMIT(set_add, for_depth + 2); +        } else { +            EMIT(yield_value); +            EMIT(pop_top); +        } +    } else if (PY_PARSE_NODE_IS_STRUCT_KIND(pn_iter, PN_comp_if)) { +        // if condition +        py_parse_node_struct_t *pns_comp_if = (py_parse_node_struct_t*)pn_iter; +        c_if_cond(comp, pns_comp_if->nodes[0], false, l_top); +        pn_iter = pns_comp_if->nodes[1]; +        goto tail_recursion; +    } else if (PY_PARSE_NODE_IS_STRUCT_KIND(pn_iter, PN_comp_for)) { +        // for loop +        py_parse_node_struct_t *pns_comp_for2 = (py_parse_node_struct_t*)pn_iter; +        compile_node(comp, pns_comp_for2->nodes[1]); +        int l_end2 = EMIT(label_new); +        int l_top2 = EMIT(label_new); +        EMIT(get_iter); +        EMIT(label_assign, l_top2); +        EMIT(for_iter, l_end2); +        c_assign(comp, pns_comp_for2->nodes[0], ASSIGN_STORE); +        compile_scope_comp_iter(comp, pns_comp_for2->nodes[2], pn_inner_expr, l_top2, for_depth + 1); +        EMIT(jump, l_top2); +        EMIT(label_assign, l_end2); +        EMIT(for_iter_end); +    } else { +        // shouldn't happen +        assert(0); +    } +} + +void check_for_doc_string(compiler_t *comp, py_parse_node_t pn) { +    // see http://www.python.org/dev/peps/pep-0257/ + +    // look for the first statement +    if (PY_PARSE_NODE_IS_STRUCT_KIND(pn, PN_expr_stmt)) { +        // fall through +    } else if (PY_PARSE_NODE_IS_STRUCT_KIND(pn, PN_file_input_2)) { +        pn = ((py_parse_node_struct_t*)pn)->nodes[0]; +    } else if (PY_PARSE_NODE_IS_STRUCT_KIND(pn, PN_suite_block_stmts)) { +        pn = ((py_parse_node_struct_t*)pn)->nodes[0]; +    } else { +        return; +    } + +    // check the first statement for a doc string +    if (PY_PARSE_NODE_IS_STRUCT_KIND(pn, PN_expr_stmt)) { +        py_parse_node_struct_t* pns = (py_parse_node_struct_t*)pn; +        if (PY_PARSE_NODE_IS_LEAF(pns->nodes[0])) { +            int kind = PY_PARSE_NODE_LEAF_KIND(pns->nodes[0]); +            if (kind == PY_PARSE_NODE_STRING) { +                compile_node(comp, pns->nodes[0]); // a doc string +                // store doc string +                emit_common_store_id(comp->pass, comp->scope_cur, comp->emit, comp->qstr___doc__); +            } +        } +    } +} + +void compile_scope(compiler_t *comp, scope_t *scope, pass_kind_t pass) { +    comp->pass = pass; +    comp->scope_cur = scope; +    emit_start_pass(comp->emit, pass, scope); + +    if (comp->pass == PASS_1) { +        scope->stack_size = 0; +    } + +    if (comp->pass == PASS_3) { +        //printf("----\n"); +        scope_print_info(scope); +    } + +    // compile +    if (scope->kind == SCOPE_MODULE) { +        check_for_doc_string(comp, scope->pn); +        compile_node(comp, scope->pn); +        EMIT(load_const_tok, PY_TOKEN_KW_NONE); +        EMIT(return_value); +    } else if (scope->kind == SCOPE_FUNCTION) { +        assert(PY_PARSE_NODE_IS_STRUCT(scope->pn)); +        py_parse_node_struct_t *pns = (py_parse_node_struct_t*)scope->pn; +        assert(PY_PARSE_NODE_STRUCT_KIND(pns) == PN_funcdef); + +        // work out number of parameters, keywords and default parameters, and add them to the id_info array +        if (comp->pass == PASS_1) { +            comp->have_bare_star = false; +            apply_to_single_or_list(comp, pns->nodes[1], PN_typedargslist, compile_scope_func_param); +        } + +        assert(pns->nodes[2] == 0); // 2 is something... + +        compile_node(comp, pns->nodes[3]); // 3 is function body +        // emit return if it wasn't the last opcode +        if (!emit_last_emit_was_return_value(comp->emit)) { +            EMIT(load_const_tok, PY_TOKEN_KW_NONE); +            EMIT(return_value); +        } +    } else if (scope->kind == SCOPE_LAMBDA) { +        assert(PY_PARSE_NODE_IS_STRUCT(scope->pn)); +        py_parse_node_struct_t *pns = (py_parse_node_struct_t*)scope->pn; +        assert(PY_PARSE_NODE_STRUCT_NUM_NODES(pns) == 3); + +        // work out number of parameters, keywords and default parameters, and add them to the id_info array +        if (comp->pass == PASS_1) { +            comp->have_bare_star = false; +            apply_to_single_or_list(comp, pns->nodes[0], PN_varargslist, compile_scope_lambda_param); +        } + +        compile_node(comp, pns->nodes[1]); // 1 is lambda body +        EMIT(return_value); +    } else if (scope->kind == SCOPE_LIST_COMP || scope->kind == SCOPE_DICT_COMP || scope->kind == SCOPE_SET_COMP || scope->kind == SCOPE_GEN_EXPR) { +        // a bit of a hack at the moment + +        assert(PY_PARSE_NODE_IS_STRUCT(scope->pn)); +        py_parse_node_struct_t *pns = (py_parse_node_struct_t*)scope->pn; +        assert(PY_PARSE_NODE_STRUCT_NUM_NODES(pns) == 2); +        assert(PY_PARSE_NODE_IS_STRUCT_KIND(pns->nodes[1], PN_comp_for)); +        py_parse_node_struct_t *pns_comp_for = (py_parse_node_struct_t*)pns->nodes[1]; + +        qstr qstr_arg = qstr_from_strn_copy(".0", 2); +        if (comp->pass == PASS_1) { +            bool added; +            id_info_t *id_info = scope_find_or_add_id(comp->scope_cur, qstr_arg, &added); +            assert(added); +            id_info->kind = ID_INFO_KIND_LOCAL; +            scope->num_params = 1; +        } + +        if (scope->kind == SCOPE_LIST_COMP) { +            EMIT(build_list, 0); +        } else if (scope->kind == SCOPE_DICT_COMP) { +            EMIT(build_map, 0); +        } else if (scope->kind == SCOPE_SET_COMP) { +            EMIT(build_set, 0); +        } + +        int l_end = EMIT(label_new); +        int l_top = EMIT(label_new); +        emit_common_load_id(comp->pass, comp->scope_cur, comp->qstr___class__, comp->emit, qstr_arg); +        EMIT(label_assign, l_top); +        EMIT(for_iter, l_end); +        c_assign(comp, pns_comp_for->nodes[0], ASSIGN_STORE); +        compile_scope_comp_iter(comp, pns_comp_for->nodes[2], pns->nodes[0], l_top, 0); +        EMIT(jump, l_top); +        EMIT(label_assign, l_end); +        EMIT(for_iter_end); + +        if (scope->kind == SCOPE_GEN_EXPR) { +            EMIT(load_const_tok, PY_TOKEN_KW_NONE); +        } +        EMIT(return_value); +    } else { +        assert(scope->kind == SCOPE_CLASS); +        assert(PY_PARSE_NODE_IS_STRUCT(scope->pn)); +        py_parse_node_struct_t *pns = (py_parse_node_struct_t*)scope->pn; +        assert(PY_PARSE_NODE_STRUCT_KIND(pns) == PN_classdef); + +        if (comp->pass == PASS_1) { +            bool added; +            id_info_t *id_info = scope_find_or_add_id(scope, comp->qstr___class__, &added); +            assert(added); +            id_info->kind = ID_INFO_KIND_LOCAL; +            id_info = scope_find_or_add_id(scope, comp->qstr___locals__, &added); +            assert(added); +            id_info->kind = ID_INFO_KIND_LOCAL; +            id_info->param = true; +            scope->num_params = 1; // __locals__ is the parameter +        } + +        emit_common_load_id(comp->pass, comp->scope_cur, comp->qstr___class__, comp->emit, comp->qstr___locals__); +        EMIT(store_locals); +        emit_common_load_id(comp->pass, comp->scope_cur, comp->qstr___class__, comp->emit, comp->qstr___name__); +        emit_common_store_id(comp->pass, comp->scope_cur, comp->emit, comp->qstr___module__); +        EMIT(load_const_id, PY_PARSE_NODE_LEAF_ARG(pns->nodes[0])); // 0 is class name +        emit_common_store_id(comp->pass, comp->scope_cur, comp->emit, comp->qstr___qualname__); + +        check_for_doc_string(comp, pns->nodes[2]); +        compile_node(comp, pns->nodes[2]); // 2 is class body + +        id_info_t *id = scope_find(scope, comp->qstr___class__); +        assert(id != NULL); +        if (id->kind == ID_INFO_KIND_LOCAL) { +            EMIT(load_const_tok, PY_TOKEN_KW_NONE); +        } else { +            EMIT(load_closure, comp->qstr___class__); +        } +        EMIT(return_value); +    } + +    emit_end_pass(comp->emit); +} + +void compile_scope_compute_things(compiler_t *comp, scope_t *scope) { +    // in functions, turn implicit globals into explicit globals +    // compute num_locals, and the index of each local +    scope->num_locals = 0; +    for (int i = 0; i < scope->id_info_len; i++) { +        id_info_t *id = &scope->id_info[i]; +        if (scope->kind == SCOPE_CLASS && id->qstr == comp->qstr___class__) { +            // __class__ is not counted as a local; if it's used then it becomes a ID_INFO_KIND_CELL +            continue; +        } +        if (scope->kind >= SCOPE_FUNCTION && scope->kind <= SCOPE_GEN_EXPR && id->kind == ID_INFO_KIND_GLOBAL_IMPLICIT) { +            id->kind = ID_INFO_KIND_GLOBAL_EXPLICIT; +        } +        if (id->param || id->kind == ID_INFO_KIND_LOCAL) { +            id->local_num = scope->num_locals; +            scope->num_locals += 1; +        } +    } + +    // compute flags +    //scope->flags = 0; since we set some things in parameters +    if (scope->kind != SCOPE_MODULE) { +        scope->flags |= SCOPE_FLAG_NEWLOCALS; +    } +    if (scope->kind == SCOPE_FUNCTION || scope->kind == SCOPE_LAMBDA || scope->kind == SCOPE_LIST_COMP || scope->kind == SCOPE_DICT_COMP || scope->kind == SCOPE_SET_COMP || scope->kind == SCOPE_GEN_EXPR) { +        assert(scope->parent != NULL); +        scope->flags |= SCOPE_FLAG_OPTIMISED; + +        // TODO possibly other ways it can be nested +        if (scope->parent->kind == SCOPE_FUNCTION || (scope->parent->kind == SCOPE_CLASS && scope->parent->parent->kind == SCOPE_FUNCTION)) { +            scope->flags |= SCOPE_FLAG_NESTED; +        } +    } +    int num_free = 0; +    for (int i = 0; i < scope->id_info_len; i++) { +        id_info_t *id = &scope->id_info[i]; +        if (id->kind == ID_INFO_KIND_CELL || id->kind == ID_INFO_KIND_FREE) { +            num_free += 1; +        } +    } +    if (num_free == 0) { +        scope->flags |= SCOPE_FLAG_NOFREE; +    } +} + +void py_compile(py_parse_node_t pn) { +    compiler_t *comp = m_new(compiler_t, 1); + +    comp->qstr___class__ = qstr_from_strn_copy("__class__", 9); +    comp->qstr___locals__ = qstr_from_strn_copy("__locals__", 10); +    comp->qstr___name__ = qstr_from_strn_copy("__name__", 8); +    comp->qstr___module__ = qstr_from_strn_copy("__module__", 10); +    comp->qstr___qualname__ = qstr_from_strn_copy("__qualname__", 12); +    comp->qstr___doc__ = qstr_from_strn_copy("__doc__", 7); +    comp->qstr_assertion_error = qstr_from_strn_copy("AssertionError", 14); + +    comp->break_label = 0; +    comp->continue_label = 0; +    comp->except_nest_level = 0; +    comp->scope_head = NULL; +    comp->scope_cur = NULL; + +    comp->emit = emit_new(comp->qstr___class__); + +    pn = fold_constants(pn); +    scope_new_and_link(comp, SCOPE_MODULE, pn); + +    for (scope_t *s = comp->scope_head; s != NULL; s = s->next) { +        compile_scope(comp, s, PASS_1); +    } + +    for (scope_t *s = comp->scope_head; s != NULL; s = s->next) { +        compile_scope_compute_things(comp, s); +    } + +    for (scope_t *s = comp->scope_head; s != NULL; s = s->next) { +        compile_scope(comp, s, PASS_2); +        compile_scope(comp, s, PASS_3); +    } + +    m_free(comp); +} diff --git a/py/compile.h b/py/compile.h new file mode 100644 index 000000000..339acca0c --- /dev/null +++ b/py/compile.h @@ -0,0 +1 @@ +void py_compile(py_parse_node_t pn); diff --git a/py/emit.h b/py/emit.h new file mode 100644 index 000000000..8cad745dd --- /dev/null +++ b/py/emit.h @@ -0,0 +1,120 @@ +//#define EMIT_DO_CPY +#define EMIT_DO_BC +//#define EMIT_DO_X64 +//#define EMIT_DO_THUMB + +/* Notes on passes: + * We don't know exactly the opcodes in pass 1 because they depend on the + * closing over of variables (LOAD_CLOSURE, BUILD_TUPLE, MAKE_CLOSURE), which + * depends on determining the scope of variables in each function, and this + * is not known until the end of pass 1. + * As a consequence, we don't know the maximum stack size until the end of pass 2. + * This is problematic for some emitters (x64) since they need to know the maximum + * stack size to compile the entry to the function, and this effects code size. + */ + +typedef enum { +    PASS_1 = 1, // work out id's and their kind, and number of labels +    PASS_2 = 2, // work out stack size and code size and label offsets +    PASS_3 = 3, // emit code +} pass_kind_t; + +typedef struct _emitter_t emitter_t; + +void emit_common_declare_global(pass_kind_t pass, scope_t *scope, qstr qstr); +void emit_common_declare_nonlocal(pass_kind_t pass, scope_t *scope, qstr qstr); +void emit_common_load_id(pass_kind_t pass, scope_t *scope, qstr qstr___class__, emitter_t *emit, qstr qstr); +void emit_common_store_id(pass_kind_t pass, scope_t *scope, emitter_t *emit, qstr qstr); +void emit_common_delete_id(pass_kind_t pass, scope_t *scope, emitter_t *emit, qstr qstr); + +emitter_t *emit_new(); +void emit_set_native_types(emitter_t *emit, bool do_native_types); +void emit_start_pass(emitter_t *emit, pass_kind_t pass, scope_t *scope); +void emit_end_pass(emitter_t *emit); +bool emit_last_emit_was_return_value(emitter_t *emit); +int emit_get_stack_size(emitter_t *emit); +void emit_set_stack_size(emitter_t *emit, int size); + +int emit_label_new(emitter_t *emit); +void emit_label_assign(emitter_t *emit, int l); +void emit_import_name(emitter_t *emit, qstr qstr); +void emit_import_from(emitter_t *emit, qstr qstr); +void emit_import_star(emitter_t *emit); +void emit_load_const_tok(emitter_t *emit, py_token_kind_t tok); +void emit_load_const_small_int(emitter_t *emit, int arg); +void emit_load_const_int(emitter_t *emit, qstr qstr); +void emit_load_const_dec(emitter_t *emit, qstr qstr); +void emit_load_const_id(emitter_t *emit, qstr qstr); +void emit_load_const_str(emitter_t *emit, qstr qstr, bool bytes); +void emit_load_const_verbatim_start(emitter_t *emit); +void emit_load_const_verbatim_int(emitter_t *emit, int val); +void emit_load_const_verbatim_str(emitter_t *emit, const char *str); +void emit_load_const_verbatim_strn(emitter_t *emit, const char *str, int len); +void emit_load_const_verbatim_quoted_str(emitter_t *emit, qstr qstr, bool bytes); +void emit_load_const_verbatim_end(emitter_t *emit); +void emit_load_fast(emitter_t *emit, qstr qstr, int local_num); +void emit_load_name(emitter_t *emit, qstr qstr); +void emit_load_global(emitter_t *emit, qstr qstr); +void emit_load_deref(emitter_t *emit, qstr qstr); +void emit_load_closure(emitter_t *emit, qstr qstr); +void emit_load_attr(emitter_t *emit, qstr qstr); +void emit_load_method(emitter_t *emit, qstr qstr); +void emit_load_build_class(emitter_t *emit); +void emit_store_fast(emitter_t *emit, qstr qstr, int local_num); +void emit_store_name(emitter_t *emit, qstr qstr); +void emit_store_global(emitter_t *emit, qstr qstr); +void emit_store_deref(emitter_t *emit, qstr qstr); +void emit_store_attr(emitter_t *emit, qstr qstr); +void emit_store_locals(emitter_t *emit); +void emit_store_subscr(emitter_t *emit); +void emit_delete_fast(emitter_t *emit, qstr qstr, int local_num); +void emit_delete_name(emitter_t *emit, qstr qstr); +void emit_delete_global(emitter_t *emit, qstr qstr); +void emit_delete_deref(emitter_t *emit, qstr qstr); +void emit_delete_attr(emitter_t *emit, qstr qstr); +void emit_delete_subscr(emitter_t *emit); +void emit_dup_top(emitter_t *emit); +void emit_dup_top_two(emitter_t *emit); +void emit_pop_top(emitter_t *emit); +void emit_rot_two(emitter_t *emit); +void emit_rot_three(emitter_t *emit); +void emit_jump(emitter_t *emit, int label); +void emit_pop_jump_if_true(emitter_t *emit, int label); +void emit_pop_jump_if_false(emitter_t *emit, int label); +void emit_jump_if_true_or_pop(emitter_t *emit, int label); +void emit_jump_if_false_or_pop(emitter_t *emit, int label); +void emit_setup_loop(emitter_t *emit, int label); +void emit_break_loop(emitter_t *emit, int label); +void emit_continue_loop(emitter_t *emit, int label); +void emit_setup_with(emitter_t *emit, int label); +void emit_with_cleanup(emitter_t *emit); +void emit_setup_except(emitter_t *emit, int label); +void emit_setup_finally(emitter_t *emit, int label); +void emit_end_finally(emitter_t *emit); +void emit_get_iter(emitter_t *emit); // tos = getiter(tos) +void emit_for_iter(emitter_t *emit, int label); +void emit_for_iter_end(emitter_t *emit); +void emit_pop_block(emitter_t *emit); +void emit_pop_except(emitter_t *emit); +void emit_unary_op(emitter_t *emit, rt_unary_op_t op); +void emit_binary_op(emitter_t *emit, rt_binary_op_t op); +void emit_compare_op(emitter_t *emit, rt_compare_op_t op); +void emit_build_tuple(emitter_t *emit, int n_args); +void emit_build_list(emitter_t *emit, int n_args); +void emit_list_append(emitter_t *emit, int list_stack_index); +void emit_build_map(emitter_t *emit, int n_args); +void emit_store_map(emitter_t *emit); +void emit_map_add(emitter_t *emit, int map_stack_index); +void emit_build_set(emitter_t *emit, int n_args); +void emit_set_add(emitter_t *emit, int set_stack_index); +void emit_build_slice(emitter_t *emit, int n_args); +void emit_unpack_sequence(emitter_t *emit, int n_args); +void emit_unpack_ex(emitter_t *emit, int n_left, int n_right); +void emit_make_function(emitter_t *emit, scope_t *scope, int n_dict_params, int n_default_params); +void emit_make_closure(emitter_t *emit, scope_t *scope, int n_dict_params, int n_default_params); +void emit_call_function(emitter_t *emit, int n_positional, int n_keyword, bool have_star_arg, bool have_dbl_star_arg); +void emit_call_method(emitter_t *emit, int n_positional, int n_keyword, bool have_star_arg, bool have_dbl_star_arg); +void emit_return_value(emitter_t *emit); +void emit_raise_varargs(emitter_t *emit, int n_args); +void emit_yield_value(emitter_t *emit); +void emit_yield_from(emitter_t *emit); diff --git a/py/emitbc.c b/py/emitbc.c new file mode 100644 index 000000000..9d159ae60 --- /dev/null +++ b/py/emitbc.c @@ -0,0 +1,692 @@ +#include <unistd.h> +#include <stdlib.h> +#include <stdint.h> +#include <stdio.h> +#include <string.h> +#include <assert.h> + +#include "misc.h" +#include "lexer.h" +#include "machine.h" +#include "parse.h" +#include "compile.h" +#include "scope.h" +#include "runtime.h" +#include "emit.h" +#include "bc.h" + +#ifdef EMIT_DO_BC + +struct _emitter_t { +    int pass; +    int next_label; +    int stack_size; +    bool last_emit_was_return_value; + +    scope_t *scope; + +    int max_num_labels; +    uint *label_offsets; + +    uint code_offset; +    uint code_size; +    byte *code_base; +    byte dummy_data[8]; +}; + +emitter_t *emit_new() { +    emitter_t *emit = m_new(emitter_t, 1); +    emit->max_num_labels = 0; +    emit->label_offsets = NULL; +    emit->code_offset = 0; +    emit->code_size = 0; +    emit->code_base = NULL; +    return emit; +} + +uint emit_get_code_size(emitter_t* emit) { +    return emit->code_size; +} + +void* emit_get_code(emitter_t* emit) { +    return emit->code_base; +} + +void emit_start_pass(emitter_t *emit, pass_kind_t pass, scope_t *scope) { +    emit->pass = pass; +    emit->next_label = 1; +    emit->stack_size = 0; +    emit->last_emit_was_return_value = false; +    emit->scope = scope; +    if (pass == PASS_1) { +        scope->unique_code_id = rt_get_new_unique_code_id(); +    } else if (pass > PASS_1) { +        if (emit->label_offsets == NULL) { +            emit->label_offsets = m_new(uint, emit->max_num_labels); +        } +        if (pass == PASS_2) { +            memset(emit->label_offsets, -1, emit->max_num_labels * sizeof(uint)); +        } +    } +    emit->code_offset = 0; +} + +void emit_end_pass(emitter_t *emit) { +    // check stack is back to zero size +    if (emit->stack_size != 0) { +        printf("ERROR: stack size not back to zero; got %d\n", emit->stack_size); +    } + +    if (emit->pass == PASS_1) { +        // calculate number of labels need +        if (emit->next_label > emit->max_num_labels) { +            emit->max_num_labels = emit->next_label; +        } + +    } else if (emit->pass == PASS_2) { +        // calculate size of code in bytes +        emit->code_size = emit->code_offset; +        emit->code_base = m_new(byte, emit->code_size); +        printf("code_size: %u\n", emit->code_size); + +    } else if (emit->pass == PASS_3) { +        rt_assign_byte_code(emit->scope->unique_code_id, emit->code_base, emit->code_size, emit->scope->num_params); +    } +} + +// all functions must go through this one to emit bytes +static byte* emit_get_cur_to_write_bytes(emitter_t* emit, int num_bytes_to_write) { +    //printf("emit %d\n", num_bytes_to_write); +    if (emit->pass < PASS_3) { +        emit->code_offset += num_bytes_to_write; +        return emit->dummy_data; +    } else { +        assert(emit->code_offset + num_bytes_to_write <= emit->code_size); +        byte *c = emit->code_base + emit->code_offset; +        emit->code_offset += num_bytes_to_write; +        return c; +    } +} + +static void emit_write_byte_1(emitter_t* emit, byte b1) { +    byte* c = emit_get_cur_to_write_bytes(emit, 1); +    c[0] = b1; +} + +static void emit_write_byte_1_byte(emitter_t* emit, byte b1, uint b2) { +    assert((b2 & (~0xff)) == 0); +    byte* c = emit_get_cur_to_write_bytes(emit, 2); +    c[0] = b1; +    c[1] = b2; +} + +static void emit_write_byte_1_int(emitter_t* emit, byte b1, int num) { +    assert((num & (~0x7fff)) == 0 || (num & (~0x7fff)) == (~0x7fff)); +    byte* c = emit_get_cur_to_write_bytes(emit, 3); +    c[0] = b1; +    c[1] = num; +    c[2] = num >> 8; +} + +static void emit_write_byte_1_uint(emitter_t* emit, byte b1, uint num) { +    if (num <= 127) { // fits in 0x7f +        // fit argument in single byte +        byte* c = emit_get_cur_to_write_bytes(emit, 2); +        c[0] = b1; +        c[1] = num; +    } else if (num <= 16383) { // fits in 0x3fff +        // fit argument in two bytes +        byte* c = emit_get_cur_to_write_bytes(emit, 3); +        c[0] = b1; +        c[1] = (num >> 8) | 0x80; +        c[2] = num; +    } else { +        // larger numbers not implemented/supported +        assert(0); +    } +} + +static void emit_write_byte_1_qstr(emitter_t* emit, byte b1, qstr qstr) { +    emit_write_byte_1_uint(emit, b1, qstr); +} + +static void emit_write_byte_1_label(emitter_t* emit, byte b1, int label) { +    uint code_offset; +    if (emit->pass < PASS_3) { +        code_offset = 0; +    } else { +        code_offset = emit->label_offsets[label]; +    } +    emit_write_byte_1_uint(emit, b1, code_offset); +} + +bool emit_last_emit_was_return_value(emitter_t *emit) { +    return emit->last_emit_was_return_value; +} + +int emit_get_stack_size(emitter_t *emit) { +    return emit->stack_size; +} + +void emit_set_stack_size(emitter_t *emit, int size) { +    if (emit->pass > PASS_1) { +        emit->stack_size = size; +    } +} + +static void emit_pre(emitter_t *emit, int stack_size_delta) { +    if (emit->pass > PASS_1) { +        emit->stack_size += stack_size_delta; +        if (emit->stack_size > emit->scope->stack_size) { +            emit->scope->stack_size = emit->stack_size; +        } +    } +    emit->last_emit_was_return_value = false; +} + +int emit_label_new(emitter_t *emit) { +    return emit->next_label++; +} + +void emit_label_assign(emitter_t *emit, int l) { +    emit_pre(emit, 0); +    if (emit->pass > PASS_1) { +        assert(l < emit->max_num_labels); +        if (emit->pass == PASS_2) { +            // assign label offset +            assert(emit->label_offsets[l] == -1); +            emit->label_offsets[l] = emit->code_offset; +        } else if (emit->pass == PASS_3) { +            // ensure label offset has not changed from PASS_2 to PASS_3 +            assert(emit->label_offsets[l] == emit->code_offset); +            //printf("l%d: (at %d)\n", l, emit->code_offset); +        } +    } +} + +void emit_import_name(emitter_t *emit, qstr qstr) { +    emit_pre(emit, -1); +    emit_write_byte_1_qstr(emit, PYBC_IMPORT_NAME, qstr); +} + +void emit_import_from(emitter_t *emit, qstr qstr) { +    emit_pre(emit, 1); +    emit_write_byte_1_qstr(emit, PYBC_IMPORT_FROM, qstr); +} + +void emit_import_star(emitter_t *emit) { +    emit_pre(emit, -1); +    emit_write_byte_1(emit, PYBC_IMPORT_STAR); +} + +void emit_load_const_tok(emitter_t *emit, py_token_kind_t tok) { +    emit_pre(emit, 1); +    switch (tok) { +        case PY_TOKEN_KW_FALSE: emit_write_byte_1(emit, PYBC_LOAD_CONST_FALSE); break; +        case PY_TOKEN_KW_NONE: emit_write_byte_1(emit, PYBC_LOAD_CONST_NONE); break; +        case PY_TOKEN_KW_TRUE: emit_write_byte_1(emit, PYBC_LOAD_CONST_TRUE); break; +        default: assert(0); +    } +} + +void emit_load_const_small_int(emitter_t *emit, int arg) { +    emit_pre(emit, 1); +    emit_write_byte_1_int(emit, PYBC_LOAD_CONST_SMALL_INT, arg); +} + +void emit_load_const_int(emitter_t *emit, qstr qstr) { +    emit_pre(emit, 1); +    emit_write_byte_1_qstr(emit, PYBC_LOAD_CONST_INT, qstr); +} + +void emit_load_const_dec(emitter_t *emit, qstr qstr) { +    emit_pre(emit, 1); +    emit_write_byte_1_qstr(emit, PYBC_LOAD_CONST_DEC, qstr); +} + +void emit_load_const_id(emitter_t *emit, qstr qstr) { +    emit_pre(emit, 1); +    emit_write_byte_1_qstr(emit, PYBC_LOAD_CONST_ID, qstr); +} + +void emit_load_const_str(emitter_t *emit, qstr qstr, bool bytes) { +    emit_pre(emit, 1); +    if (bytes) { +        emit_write_byte_1_qstr(emit, PYBC_LOAD_CONST_BYTES, qstr); +    } else { +        emit_write_byte_1_qstr(emit, PYBC_LOAD_CONST_STRING, qstr); +    } +} + +void emit_load_const_verbatim_start(emitter_t *emit) { +    emit_pre(emit, 1); +    assert(0); +} + +void emit_load_const_verbatim_int(emitter_t *emit, int val) { +    assert(0); +} + +void emit_load_const_verbatim_str(emitter_t *emit, const char *str) { +    assert(0); +} + +void emit_load_const_verbatim_strn(emitter_t *emit, const char *str, int len) { +    assert(0); +} + +void emit_load_const_verbatim_quoted_str(emitter_t *emit, qstr qstr, bool bytes) { +    assert(0); +} + +void emit_load_const_verbatim_end(emitter_t *emit) { +    assert(0); +} + +void emit_load_fast(emitter_t *emit, qstr qstr, int local_num) { +    assert(local_num >= 0); +    emit_pre(emit, 1); +    switch (local_num) { +        case 0: emit_write_byte_1(emit, PYBC_LOAD_FAST_0); break; +        case 1: emit_write_byte_1(emit, PYBC_LOAD_FAST_1); break; +        case 2: emit_write_byte_1(emit, PYBC_LOAD_FAST_2); break; +        default: emit_write_byte_1_uint(emit, PYBC_LOAD_FAST_N, local_num); break; +    } +} + +void emit_load_name(emitter_t *emit, qstr qstr) { +    emit_pre(emit, 1); +    emit_write_byte_1_qstr(emit, PYBC_LOAD_NAME, qstr); +} + +void emit_load_global(emitter_t *emit, qstr qstr) { +    emit_pre(emit, 1); +    emit_write_byte_1_qstr(emit, PYBC_LOAD_GLOBAL, qstr); +} + +void emit_load_deref(emitter_t *emit, qstr qstr) { +    emit_pre(emit, 1); +    assert(0); +} + +void emit_load_closure(emitter_t *emit, qstr qstr) { +    emit_pre(emit, 1); +    assert(0); +} + +void emit_load_attr(emitter_t *emit, qstr qstr) { +    emit_pre(emit, 0); +    emit_write_byte_1_qstr(emit, PYBC_LOAD_ATTR, qstr); +} + +void emit_load_method(emitter_t *emit, qstr qstr) { +    emit_pre(emit, 0); +    emit_write_byte_1_qstr(emit, PYBC_LOAD_METHOD, qstr); +} + +void emit_load_build_class(emitter_t *emit) { +    emit_pre(emit, 1); +    emit_write_byte_1(emit, PYBC_LOAD_BUILD_CLASS); +} + +void emit_store_fast(emitter_t *emit, qstr qstr, int local_num) { +    assert(local_num >= 0); +    emit_pre(emit, -1); +    switch (local_num) { +        case 0: emit_write_byte_1(emit, PYBC_STORE_FAST_0); break; +        case 1: emit_write_byte_1(emit, PYBC_STORE_FAST_1); break; +        case 2: emit_write_byte_1(emit, PYBC_STORE_FAST_2); break; +        default: emit_write_byte_1_uint(emit, PYBC_STORE_FAST_N, local_num); break; +    } +} + +void emit_store_name(emitter_t *emit, qstr qstr) { +    emit_pre(emit, -1); +    emit_write_byte_1_qstr(emit, PYBC_STORE_NAME, qstr); +} + +void emit_store_global(emitter_t *emit, qstr qstr) { +    emit_pre(emit, -1); +    emit_write_byte_1_qstr(emit, PYBC_STORE_GLOBAL, qstr); +} + +void emit_store_deref(emitter_t *emit, qstr qstr) { +    emit_pre(emit, -1); +    assert(0); +} + +void emit_store_attr(emitter_t *emit, qstr qstr) { +    emit_pre(emit, -2); +    emit_write_byte_1_qstr(emit, PYBC_STORE_ATTR, qstr); +} + +void emit_store_locals(emitter_t *emit) { +    emit_pre(emit, -1); +    emit_write_byte_1(emit, PYBC_STORE_LOCALS); +} + +void emit_store_subscr(emitter_t *emit) { +    emit_pre(emit, -3); +    emit_write_byte_1(emit, PYBC_STORE_SUBSCR); +} + +void emit_delete_fast(emitter_t *emit, qstr qstr, int local_num) { +    assert(local_num >= 0); +    emit_pre(emit, 0); +    emit_write_byte_1_uint(emit, PYBC_DELETE_FAST_N, local_num); +} + +void emit_delete_name(emitter_t *emit, qstr qstr) { +    emit_pre(emit, 0); +    emit_write_byte_1_qstr(emit, PYBC_DELETE_NAME, qstr); +} + +void emit_delete_global(emitter_t *emit, qstr qstr) { +    emit_pre(emit, 0); +    emit_write_byte_1_qstr(emit, PYBC_DELETE_GLOBAL, qstr); +} + +void emit_delete_deref(emitter_t *emit, qstr qstr) { +    emit_pre(emit, 0); +    emit_write_byte_1_qstr(emit, PYBC_DELETE_DEREF, qstr); +} + +void emit_delete_attr(emitter_t *emit, qstr qstr) { +    emit_pre(emit, -1); +    emit_write_byte_1_qstr(emit, PYBC_DELETE_ATTR, qstr); +} + +void emit_delete_subscr(emitter_t *emit) { +    emit_pre(emit, -2); +    emit_write_byte_1(emit, PYBC_DELETE_SUBSCR); +} + +void emit_dup_top(emitter_t *emit) { +    emit_pre(emit, 1); +    emit_write_byte_1(emit, PYBC_DUP_TOP); +} + +void emit_dup_top_two(emitter_t *emit) { +    emit_pre(emit, 2); +    emit_write_byte_1(emit, PYBC_DUP_TOP_TWO); +} + +void emit_pop_top(emitter_t *emit) { +    emit_pre(emit, -1); +    emit_write_byte_1(emit, PYBC_POP_TOP); +} + +void emit_rot_two(emitter_t *emit) { +    emit_pre(emit, 0); +    emit_write_byte_1(emit, PYBC_ROT_TWO); +} + +void emit_rot_three(emitter_t *emit) { +    emit_pre(emit, 0); +    emit_write_byte_1(emit, PYBC_ROT_THREE); +} + +void emit_jump(emitter_t *emit, int label) { +    emit_pre(emit, 0); +    emit_write_byte_1_label(emit, PYBC_JUMP, label); +} + +void emit_pop_jump_if_true(emitter_t *emit, int label) { +    emit_pre(emit, -1); +    emit_write_byte_1_label(emit, PYBC_POP_JUMP_IF_TRUE, label); +} + +void emit_pop_jump_if_false(emitter_t *emit, int label) { +    emit_pre(emit, -1); +    emit_write_byte_1_label(emit, PYBC_POP_JUMP_IF_FALSE, label); +} + +void emit_jump_if_true_or_pop(emitter_t *emit, int label) { +    emit_pre(emit, -1); +    emit_write_byte_1_label(emit, PYBC_JUMP_IF_TRUE_OR_POP, label); +} + +void emit_jump_if_false_or_pop(emitter_t *emit, int label) { +    emit_pre(emit, -1); +    emit_write_byte_1_label(emit, PYBC_JUMP_IF_FALSE_OR_POP, label); +} + +void emit_setup_loop(emitter_t *emit, int label) { +    emit_pre(emit, 0); +    emit_write_byte_1_label(emit, PYBC_SETUP_LOOP, label); +} + +void emit_break_loop(emitter_t *emit, int label) { +    emit_pre(emit, 0); +    emit_write_byte_1_label(emit, PYBC_BREAK_LOOP, label); +} + +void emit_continue_loop(emitter_t *emit, int label) { +    emit_pre(emit, 0); +    emit_write_byte_1_label(emit, PYBC_CONTINUE_LOOP, label); +} + +void emit_setup_with(emitter_t *emit, int label) { +    emit_pre(emit, 7); +    emit_write_byte_1_label(emit, PYBC_SETUP_WITH, label); +} + +void emit_with_cleanup(emitter_t *emit) { +    emit_pre(emit, -7); +    emit_write_byte_1(emit, PYBC_WITH_CLEANUP); +} + +void emit_setup_except(emitter_t *emit, int label) { +    emit_pre(emit, 6); +    emit_write_byte_1_label(emit, PYBC_SETUP_EXCEPT, label); +} + +void emit_setup_finally(emitter_t *emit, int label) { +    emit_pre(emit, 6); +    emit_write_byte_1_label(emit, PYBC_SETUP_FINALLY, label); +} + +void emit_end_finally(emitter_t *emit) { +    emit_pre(emit, -1); +    emit_write_byte_1(emit, PYBC_END_FINALLY); +} + +void emit_get_iter(emitter_t *emit) { +    emit_pre(emit, 0); +    emit_write_byte_1(emit, PYBC_GET_ITER); +} + +void emit_for_iter(emitter_t *emit, int label) { +    emit_pre(emit, 1); +    emit_write_byte_1_label(emit, PYBC_FOR_ITER, label); +} + +void emit_for_iter_end(emitter_t *emit) { +    emit_pre(emit, -1); +} + +void emit_pop_block(emitter_t *emit) { +    emit_pre(emit, 0); +    emit_write_byte_1(emit, PYBC_POP_BLOCK); +} + +void emit_pop_except(emitter_t *emit) { +    emit_pre(emit, 0); +    emit_write_byte_1(emit, PYBC_POP_EXCEPT); +} + +void emit_unary_op(emitter_t *emit, rt_unary_op_t op) { +    emit_pre(emit, 0); +    emit_write_byte_1_byte(emit, PYBC_UNARY_OP, op); +} + +void emit_binary_op(emitter_t *emit, rt_binary_op_t op) { +    emit_pre(emit, -1); +    emit_write_byte_1_byte(emit, PYBC_BINARY_OP, op); +} + +void emit_compare_op(emitter_t *emit, rt_compare_op_t op) { +    emit_pre(emit, -1); +    emit_write_byte_1_byte(emit, PYBC_COMPARE_OP, op); +} + +void emit_build_tuple(emitter_t *emit, int n_args) { +    assert(n_args >= 0); +    emit_pre(emit, 1 - n_args); +    emit_write_byte_1_uint(emit, PYBC_BUILD_TUPLE, n_args); +} + +void emit_build_list(emitter_t *emit, int n_args) { +    assert(n_args >= 0); +    emit_pre(emit, 1 - n_args); +    emit_write_byte_1_uint(emit, PYBC_BUILD_LIST, n_args); +} + +void emit_list_append(emitter_t *emit, int list_stack_index) { +    assert(list_stack_index >= 0); +    emit_pre(emit, -1); +    emit_write_byte_1_uint(emit, PYBC_LIST_APPEND, list_stack_index); +} + +void emit_build_map(emitter_t *emit, int n_args) { +    assert(n_args >= 0); +    emit_pre(emit, 1); +    emit_write_byte_1_uint(emit, PYBC_BUILD_MAP, n_args); +} + +void emit_store_map(emitter_t *emit) { +    emit_pre(emit, -2); +    emit_write_byte_1(emit, PYBC_STORE_MAP); +} + +void emit_map_add(emitter_t *emit, int map_stack_index) { +    assert(map_stack_index >= 0); +    emit_pre(emit, -2); +    emit_write_byte_1_uint(emit, PYBC_MAP_ADD, map_stack_index); +} + +void emit_build_set(emitter_t *emit, int n_args) { +    assert(n_args >= 0); +    emit_pre(emit, 1 - n_args); +    emit_write_byte_1_uint(emit, PYBC_BUILD_SET, n_args); +} + +void emit_set_add(emitter_t *emit, int set_stack_index) { +    assert(set_stack_index >= 0); +    emit_pre(emit, -1); +    emit_write_byte_1_uint(emit, PYBC_SET_ADD, set_stack_index); +} + +void emit_build_slice(emitter_t *emit, int n_args) { +    assert(n_args >= 0); +    emit_pre(emit, 1 - n_args); +    emit_write_byte_1_uint(emit, PYBC_BUILD_SLICE, n_args); +} + +void emit_unpack_sequence(emitter_t *emit, int n_args) { +    assert(n_args >= 0); +    emit_pre(emit, -1 + n_args); +    emit_write_byte_1_uint(emit, PYBC_UNPACK_SEQUENCE, n_args); +} + +void emit_unpack_ex(emitter_t *emit, int n_left, int n_right) { +    assert(n_left >=0 && n_right >= 0); +    emit_pre(emit, -1 + n_left + n_right + 1); +    emit_write_byte_1_uint(emit, PYBC_UNPACK_EX, n_left | (n_right << 8)); +} + +void emit_make_function(emitter_t *emit, scope_t *scope, int n_dict_params, int n_default_params) { +    assert(n_default_params == 0 && n_dict_params == 0); +    emit_pre(emit, 1); +    emit_write_byte_1_uint(emit, PYBC_MAKE_FUNCTION, scope->unique_code_id); +} + +void emit_make_closure(emitter_t *emit, scope_t *scope, int n_dict_params, int n_default_params) { +    assert(0); +    emit_pre(emit, -2 - n_default_params - 2 * n_dict_params); +    if (emit->pass == PASS_3) { +        printf("MAKE_CLOSURE %d\n", (n_dict_params << 8) | n_default_params); +    } +} + +void emit_call_function(emitter_t *emit, int n_positional, int n_keyword, bool have_star_arg, bool have_dbl_star_arg) { +    int s = 0; +    if (have_star_arg) { +        s += 1; +    } +    if (have_dbl_star_arg) { +        s += 1; +    } +    emit_pre(emit, -n_positional - 2 * n_keyword - s); +    int op; +    if (have_star_arg) { +        if (have_dbl_star_arg) { +            op = PYBC_CALL_FUNCTION_VAR_KW; +        } else { +            op = PYBC_CALL_FUNCTION_VAR; +        } +    } else { +        if (have_dbl_star_arg) { +            op = PYBC_CALL_FUNCTION_KW; +        } else { +            op = PYBC_CALL_FUNCTION; +        } +    } +    emit_write_byte_1_uint(emit, op, (n_keyword << 8) | n_positional); // TODO make it 2 separate uints +} + +void emit_call_method(emitter_t *emit, int n_positional, int n_keyword, bool have_star_arg, bool have_dbl_star_arg) { +    int s = 0; +    if (have_star_arg) { +        s += 1; +    } +    if (have_dbl_star_arg) { +        s += 1; +    } +    emit_pre(emit, -n_positional - 2 * n_keyword - s); +    int op; +    if (have_star_arg) { +        if (have_dbl_star_arg) { +            op = PYBC_CALL_METHOD_VAR_KW; +        } else { +            op = PYBC_CALL_METHOD_VAR; +        } +    } else { +        if (have_dbl_star_arg) { +            op = PYBC_CALL_METHOD_KW; +        } else { +            op = PYBC_CALL_METHOD; +        } +    } +    emit_write_byte_1_uint(emit, op, (n_keyword << 8) | n_positional); // TODO make it 2 separate uints +} + +void emit_return_value(emitter_t *emit) { +    emit_pre(emit, -1); +    emit->last_emit_was_return_value = true; +    emit_write_byte_1(emit, PYBC_RETURN_VALUE); +} + +void emit_raise_varargs(emitter_t *emit, int n_args) { +    assert(n_args >= 0); +    emit_pre(emit, -n_args); +    emit_write_byte_1_uint(emit, PYBC_RAISE_VARARGS, n_args); +} + +void emit_yield_value(emitter_t *emit) { +    emit_pre(emit, 0); +    if (emit->pass == PASS_2) { +        emit->scope->flags |= SCOPE_FLAG_GENERATOR; +    } +    emit_write_byte_1(emit, PYBC_YIELD_VALUE); +} + +void emit_yield_from(emitter_t *emit) { +    emit_pre(emit, -1); +    if (emit->pass == PASS_2) { +        emit->scope->flags |= SCOPE_FLAG_GENERATOR; +    } +    emit_write_byte_1(emit, PYBC_YIELD_FROM); +} + +#endif // EMIT_DO_BC diff --git a/py/emitcommon.c b/py/emitcommon.c new file mode 100644 index 000000000..1fd8697c3 --- /dev/null +++ b/py/emitcommon.c @@ -0,0 +1,171 @@ +#include <unistd.h> +#include <stdio.h> +#include <stdint.h> +#include <string.h> +#include <assert.h> + +#include "misc.h" +#include "lexer.h" +#include "machine.h" +#include "parse.h" +#include "scope.h" +#include "runtime.h" +#include "emit.h" + +#define EMIT(fun, arg...) (emit_##fun(emit, ##arg)) + +void emit_common_declare_global(pass_kind_t pass, scope_t *scope, qstr qstr) { +    if (pass == PASS_1) { +        if (scope->kind == SCOPE_MODULE) { +            printf("SyntaxError?: can't declare global in outer code\n"); +            return; +        } +        bool added; +        id_info_t *id_info = scope_find_or_add_id(scope, qstr, &added); +        if (!added) { +            printf("SyntaxError?: identifier already declared something\n"); +            return; +        } +        id_info->kind = ID_INFO_KIND_GLOBAL_EXPLICIT; + +        // if the id exists in the global scope, set its kind to EXPLICIT_GLOBAL +        id_info = scope_find_global(scope, qstr); +        if (id_info != NULL) { +            id_info->kind = ID_INFO_KIND_GLOBAL_EXPLICIT; +        } +    } +} + +void emit_common_declare_nonlocal(pass_kind_t pass, scope_t *scope, qstr qstr) { +    if (pass == PASS_1) { +        if (scope->kind == SCOPE_MODULE) { +            printf("SyntaxError?: can't declare nonlocal in outer code\n"); +            return; +        } +        bool added; +        id_info_t *id_info = scope_find_or_add_id(scope, qstr, &added); +        if (!added) { +            printf("SyntaxError?: identifier already declared something\n"); +            return; +        } +        id_info_t *id_info2 = scope_find_local_in_parent(scope, qstr); +        if (id_info2 == NULL || !(id_info2->kind == ID_INFO_KIND_LOCAL || id_info2->kind == ID_INFO_KIND_CELL || id_info2->kind == ID_INFO_KIND_FREE)) { +            printf("SyntaxError: no binding for nonlocal '%s' found\n", qstr_str(qstr)); +            return; +        } +        id_info->kind = ID_INFO_KIND_FREE; +        scope_close_over_in_parents(scope, qstr); +    } +} + +void emit_common_load_id(pass_kind_t pass, scope_t *scope, qstr qstr___class__, emitter_t *emit, qstr qstr) { +    id_info_t *id_info = NULL; +    if (pass == PASS_1) { +        // name adding/lookup +        bool added; +        id_info = scope_find_or_add_id(scope, qstr, &added); +        if (added) { +            if (strcmp(qstr_str(qstr), "AssertionError") == 0) { +                id_info->kind = ID_INFO_KIND_GLOBAL_EXPLICIT; +                // TODO how much of a hack is this? +            } else if (strcmp(qstr_str(qstr), "super") == 0 && scope->kind == SCOPE_FUNCTION) { +                // special case, super is a global, and also counts as use of __class__ +                id_info->kind = ID_INFO_KIND_GLOBAL_EXPLICIT; +                id_info_t *id_info2 = scope_find_local_in_parent(scope, qstr___class__); +                if (id_info2 != NULL) { +                    id_info2 = scope_find_or_add_id(scope, qstr___class__, &added); +                    if (added) { +                        id_info2->kind = ID_INFO_KIND_FREE; +                        scope_close_over_in_parents(scope, qstr___class__); +                    } +                } +            } else { +                id_info_t *id_info2 = scope_find_local_in_parent(scope, qstr); +                if (id_info2 != NULL && (id_info2->kind == ID_INFO_KIND_LOCAL || id_info2->kind == ID_INFO_KIND_CELL || id_info2->kind == ID_INFO_KIND_FREE)) { +                    id_info->kind = ID_INFO_KIND_FREE; +                    scope_close_over_in_parents(scope, qstr); +                } else { +                    id_info->kind = ID_INFO_KIND_GLOBAL_IMPLICIT; +                } +            } +        } +    } else { +        id_info = scope_find(scope, qstr); +    } + +    assert(id_info != NULL); // TODO can this ever fail? + +    // call the emit backend with the correct code +    if (id_info == NULL || id_info->kind == ID_INFO_KIND_GLOBAL_IMPLICIT) { +        EMIT(load_name, qstr); +    } else if (id_info->kind == ID_INFO_KIND_GLOBAL_EXPLICIT) { +        EMIT(load_global, qstr); +    } else if (id_info->kind == ID_INFO_KIND_LOCAL) { +        EMIT(load_fast, qstr, id_info->local_num); +    } else if (id_info->kind == ID_INFO_KIND_CELL || id_info->kind == ID_INFO_KIND_FREE) { +        EMIT(load_deref, qstr); +    } else { +        assert(0); +    } +} + +static id_info_t *get_id_for_modification(pass_kind_t pass, scope_t *scope, emitter_t *emit, qstr qstr) { +    id_info_t *id_info = NULL; +    if (pass == PASS_1) { +        // name adding/lookup +        bool added; +        id_info = scope_find_or_add_id(scope, qstr, &added); +        if (added) { +            if (scope->kind == SCOPE_MODULE || scope->kind == SCOPE_CLASS) { +                id_info->kind = ID_INFO_KIND_GLOBAL_IMPLICIT; +            } else { +                id_info->kind = ID_INFO_KIND_LOCAL; +            } +        } else if (scope->kind >= SCOPE_FUNCTION && scope->kind <= SCOPE_GEN_EXPR && id_info->kind == ID_INFO_KIND_GLOBAL_IMPLICIT) { +            // rebind as a local variable +            id_info->kind = ID_INFO_KIND_LOCAL; +        } +    } else { +        id_info = scope_find(scope, qstr); +    } + +    assert(id_info != NULL); // TODO can this ever fail? + +    return id_info; +} + +void emit_common_store_id(pass_kind_t pass, scope_t *scope, emitter_t *emit, qstr qstr) { +    // create/get the id info +    id_info_t *id = get_id_for_modification(pass, scope, emit, qstr); + +    // call the emit backend with the correct code +    if (id == NULL || id->kind == ID_INFO_KIND_GLOBAL_IMPLICIT) { +        EMIT(store_name, qstr); +    } else if (id->kind == ID_INFO_KIND_GLOBAL_EXPLICIT) { +        EMIT(store_global, qstr); +    } else if (id->kind == ID_INFO_KIND_LOCAL) { +        EMIT(store_fast, qstr, id->local_num); +    } else if (id->kind == ID_INFO_KIND_CELL || id->kind == ID_INFO_KIND_FREE) { +        EMIT(store_deref, qstr); +    } else { +        assert(0); +    } +} + +void emit_common_delete_id(pass_kind_t pass, scope_t *scope, emitter_t *emit, qstr qstr) { +    // create/get the id info +    id_info_t *id = get_id_for_modification(pass, scope, emit, qstr); + +    // call the emit backend with the correct code +    if (id == NULL || id->kind == ID_INFO_KIND_GLOBAL_IMPLICIT) { +        EMIT(delete_name, qstr); +    } else if (id->kind == ID_INFO_KIND_GLOBAL_EXPLICIT) { +        EMIT(delete_global, qstr); +    } else if (id->kind == ID_INFO_KIND_LOCAL) { +        EMIT(delete_fast, qstr, id->local_num); +    } else if (id->kind == ID_INFO_KIND_CELL || id->kind == ID_INFO_KIND_FREE) { +        EMIT(delete_deref, qstr); +    } else { +        assert(0); +    } +} diff --git a/py/emitcpy.c b/py/emitcpy.c new file mode 100644 index 000000000..637abd772 --- /dev/null +++ b/py/emitcpy.c @@ -0,0 +1,834 @@ +#include <unistd.h> +#include <stdlib.h> +#include <stdint.h> +#include <stdio.h> +#include <string.h> +#include <assert.h> + +#include "misc.h" +#include "lexer.h" +#include "machine.h" +#include "parse.h" +#include "compile.h" +#include "scope.h" +#include "runtime.h" +#include "emit.h" + +#ifdef EMIT_DO_CPY + +struct _emitter_t { +    int pass; +    int next_label; +    int byte_code_offset; +    int stack_size; +    bool last_emit_was_return_value; + +    scope_t *scope; + +    int max_num_labels; +    int *label_offsets; +}; + +emitter_t *emit_new() { +    emitter_t *emit = m_new(emitter_t, 1); +    emit->max_num_labels = 0; +    emit->label_offsets = NULL; +    return emit; +} + +void emit_start_pass(emitter_t *emit, pass_kind_t pass, scope_t *scope) { +    emit->pass = pass; +    emit->next_label = 1; +    emit->byte_code_offset = 0; +    emit->stack_size = 0; +    emit->last_emit_was_return_value = false; +    emit->scope = scope; +    if (pass > PASS_1) { +        if (emit->label_offsets == NULL) { +            emit->label_offsets = m_new(int, emit->max_num_labels); +        } +        if (pass == PASS_2) { +            memset(emit->label_offsets, -1, emit->max_num_labels * sizeof(int)); +        } +    } +} + +void emit_end_pass(emitter_t *emit) { +    // check stack is back to zero size +    if (emit->stack_size != 0) { +        printf("ERROR: stack size not back to zero; got %d\n", emit->stack_size); +    } + +    // calculate number of labels need +    if (emit->pass == PASS_1) { +        if (emit->next_label > emit->max_num_labels) { +            emit->max_num_labels = emit->next_label; +        } +    } +} + +bool emit_last_emit_was_return_value(emitter_t *emit) { +    return emit->last_emit_was_return_value; +} + +int emit_get_stack_size(emitter_t *emit) { +    return emit->stack_size; +} + +void emit_set_stack_size(emitter_t *emit, int size) { +    emit->stack_size = size; +} + +static void emit_pre(emitter_t *emit, int stack_size_delta, int byte_code_size) { +    emit->stack_size += stack_size_delta; +    if (emit->pass > PASS_1 && emit->stack_size > emit->scope->stack_size) { +        emit->scope->stack_size = emit->stack_size; +    } +    emit->last_emit_was_return_value = false; +    if (emit->pass == PASS_3 && byte_code_size > 0) { +        if (emit->byte_code_offset >= 1000) { +            printf("%d ", emit->byte_code_offset); +        } else { +            printf("% 4d ", emit->byte_code_offset); +        } +    } +    emit->byte_code_offset += byte_code_size; +} + +int emit_label_new(emitter_t *emit) { +    return emit->next_label++; +} + +void emit_label_assign(emitter_t *emit, int l) { +    emit_pre(emit, 0, 0); +    if (emit->pass > PASS_1) { +        assert(l < emit->max_num_labels); +        if (emit->pass == PASS_2) { +            // assign label offset +            assert(emit->label_offsets[l] == -1); +            emit->label_offsets[l] = emit->byte_code_offset; +        } else if (emit->pass == PASS_3) { +            // ensure label offset has not changed from PASS_2 to PASS_3 +            assert(emit->label_offsets[l] == emit->byte_code_offset); +            //printf("l%d: (at %d)\n", l, emit->byte_code_offset); +        } +    } +} + +void emit_import_name(emitter_t *emit, qstr qstr) { +    emit_pre(emit, -1, 3); +    if (emit->pass == PASS_3) { +        printf("IMPORT_NAME %s\n", qstr_str(qstr)); +    } +} + +void emit_import_from(emitter_t *emit, qstr qstr) { +    emit_pre(emit, 1, 3); +    if (emit->pass == PASS_3) { +        printf("IMPORT_FROM %s\n", qstr_str(qstr)); +    } +} + +void emit_import_star(emitter_t *emit) { +    emit_pre(emit, -1, 1); +    if (emit->pass == PASS_3) { +        printf("IMPORT_STAR\n"); +    } +} + +void emit_load_const_tok(emitter_t *emit, py_token_kind_t tok) { +    emit_pre(emit, 1, 3); +    if (emit->pass == PASS_3) { +        printf("LOAD_CONST "); +        switch (tok) { +            case PY_TOKEN_KW_FALSE: printf("False"); break; +            case PY_TOKEN_KW_NONE: printf("None"); break; +            case PY_TOKEN_KW_TRUE: printf("True"); break; +            default: printf("?=%d\n", tok); return; assert(0); +        } +        printf("\n"); +    } +} + +void emit_load_const_small_int(emitter_t *emit, int arg) { +    emit_pre(emit, 1, 3); +    if (emit->pass == PASS_3) { +        printf("LOAD_CONST %d\n", arg); +    } +} + +void emit_load_const_int(emitter_t *emit, qstr qstr) { +    emit_pre(emit, 1, 3); +    if (emit->pass == PASS_3) { +        printf("LOAD_CONST %s\n", qstr_str(qstr)); +    } +} + +void emit_load_const_dec(emitter_t *emit, qstr qstr) { +    emit_pre(emit, 1, 3); +    if (emit->pass == PASS_3) { +        printf("LOAD_CONST %s\n", qstr_str(qstr)); +    } +} + +void emit_load_const_id(emitter_t *emit, qstr qstr) { +    emit_pre(emit, 1, 3); +    if (emit->pass == PASS_3) { +        printf("LOAD_CONST '%s'\n", qstr_str(qstr)); +    } +} + +void emit_load_const_str(emitter_t *emit, qstr qstr, bool bytes) { +    emit_pre(emit, 1, 3); +    if (emit->pass == PASS_3) { +        printf("LOAD_CONST "); +        emit_load_const_verbatim_quoted_str(emit, qstr, bytes); +        printf("\n"); +    } +} + +void emit_load_const_verbatim_start(emitter_t *emit) { +    emit_pre(emit, 1, 3); +    if (emit->pass == PASS_3) { +        printf("LOAD_CONST "); +    } +} + +void emit_load_const_verbatim_int(emitter_t *emit, int val) { +    if (emit->pass == PASS_3) { +        printf("%d", val); +    } +} + +void emit_load_const_verbatim_str(emitter_t *emit, const char *str) { +    if (emit->pass == PASS_3) { +        printf("%s", str); +    } +} + +void emit_load_const_verbatim_strn(emitter_t *emit, const char *str, int len) { +    if (emit->pass == PASS_3) { +        printf("%.*s", len, str); +    } +} + +void emit_load_const_verbatim_quoted_str(emitter_t *emit, qstr qstr, bool bytes) { +    // TODO strings should be escaped before we get here +    if (emit->pass == PASS_3) { +        const char *str = qstr_str(qstr); +        int len = strlen(str); +        bool has_single_quote = false; +        bool has_double_quote = false; +        for (int i = 0; i < len; i++) { +            if (str[i] == '\'') { +                has_single_quote = true; +            } else if (str[i] == '"') { +                has_double_quote = true; +            } +        } +        if (bytes) { +            printf("b"); +        } +        bool quote_single = false; +        if (has_single_quote && !has_double_quote) { +            printf("\""); +        } else { +            quote_single = true; +            printf("'"); +        } +        for (int i = 0; i < len; i++) { +            if (str[i] == '\n') { +                printf("\\n"); +            } else if (str[i] == '\\' && str[i + 1] == '\'') { +                i += 1; +                if (quote_single) { +                    printf("\\'"); +                } else { +                    printf("'"); +                } +            } else if (str[i] == '\'' && quote_single) { +                printf("\\'"); +            } else { +                printf("%c", str[i]); +            } +        } +        if (has_single_quote && !has_double_quote) { +            printf("\""); +        } else { +            printf("'"); +        } +    } +} + +void emit_load_const_verbatim_end(emitter_t *emit) { +    if (emit->pass == PASS_3) { +        printf("\n"); +    } +} + +void emit_load_name(emitter_t *emit, qstr qstr) { +    emit_pre(emit, 1, 3); +    if (emit->pass == PASS_3) { +        printf("LOAD_NAME %s\n", qstr_str(qstr)); +    } +} + +void emit_load_global(emitter_t *emit, qstr qstr) { +    emit_pre(emit, 1, 3); +    if (emit->pass == PASS_3) { +        printf("LOAD_GLOBAL %s\n", qstr_str(qstr)); +    } +} + +void emit_load_fast(emitter_t *emit, qstr qstr, int local_num) { +    emit_pre(emit, 1, 3); +    if (emit->pass == PASS_3) { +        printf("LOAD_FAST %s\n", qstr_str(qstr)); +    } +} + +void emit_load_deref(emitter_t *emit, qstr qstr) { +    emit_pre(emit, 1, 3); +    if (emit->pass == PASS_3) { +        printf("LOAD_DEREF %s\n", qstr_str(qstr)); +    } +} + +void emit_load_closure(emitter_t *emit, qstr qstr) { +    emit_pre(emit, 1, 3); +    if (emit->pass == PASS_3) { +        printf("LOAD_CLOSURE %s\n", qstr_str(qstr)); +    } +} + +void emit_load_attr(emitter_t *emit, qstr qstr) { +    emit_pre(emit, 0, 3); +    if (emit->pass == PASS_3) { +        printf("LOAD_ATTR %s\n", qstr_str(qstr)); +    } +} + +void emit_load_method(emitter_t *emit, qstr qstr) { +    emit_load_attr(emit, qstr); +} + +void emit_load_build_class(emitter_t *emit) { +    emit_pre(emit, 1, 1); +    if (emit->pass == PASS_3) { +        printf("LOAD_BUILD_CLASS\n"); +    } +} + +void emit_store_name(emitter_t *emit, qstr qstr) { +    emit_pre(emit, -1, 3); +    if (emit->pass == PASS_3) { +        printf("STORE_NAME %s\n", qstr_str(qstr)); +    } +} + +void emit_store_global(emitter_t *emit, qstr qstr) { +    emit_pre(emit, -1, 3); +    if (emit->pass == PASS_3) { +        printf("STORE_GLOBAL %s\n", qstr_str(qstr)); +    } +} + +void emit_store_fast(emitter_t *emit, qstr qstr, int local_num) { +    emit_pre(emit, -1, 3); +    if (emit->pass == PASS_3) { +        printf("STORE_FAST %s\n", qstr_str(qstr)); +    } +} + +void emit_store_deref(emitter_t *emit, qstr qstr) { +    emit_pre(emit, -1, 3); +    if (emit->pass == PASS_3) { +        printf("STORE_DEREF %s\n", qstr_str(qstr)); +    } +} + +void emit_store_attr(emitter_t *emit, qstr qstr) { +    emit_pre(emit, -2, 3); +    if (emit->pass == PASS_3) { +        printf("STORE_ATTR %s\n", qstr_str(qstr)); +    } +} + +void emit_store_locals(emitter_t *emit) { +    emit_pre(emit, -1, 1); +    if (emit->pass == PASS_3) { +        printf("STORE_LOCALS\n"); +    } +} + +void emit_store_subscr(emitter_t *emit) { +    emit_pre(emit, -3, 1); +    if (emit->pass == PASS_3) { +        printf("STORE_SUBSCR\n"); +    } +} + +void emit_delete_name(emitter_t *emit, qstr qstr) { +    emit_pre(emit, 0, 3); +    if (emit->pass == PASS_3) { +        printf("DELETE_NAME %s\n", qstr_str(qstr)); +    } +} + +void emit_delete_global(emitter_t *emit, qstr qstr) { +    emit_pre(emit, 0, 3); +    if (emit->pass == PASS_3) { +        printf("DELETE_GLOBAL %s\n", qstr_str(qstr)); +    } +} + +void emit_delete_fast(emitter_t *emit, qstr qstr, int local_num) { +    emit_pre(emit, 0, 3); +    if (emit->pass == PASS_3) { +        printf("DELETE_FAST %s\n", qstr_str(qstr)); +    } +} + +void emit_delete_deref(emitter_t *emit, qstr qstr) { +    emit_pre(emit, 0, 3); +    if (emit->pass == PASS_3) { +        printf("DELETE_DEREF %s\n", qstr_str(qstr)); +    } +} + +void emit_delete_attr(emitter_t *emit, qstr qstr) { +    emit_pre(emit, -1, 3); +    if (emit->pass == PASS_3) { +        printf("DELETE_ATTR %s\n", qstr_str(qstr)); +    } +} + +void emit_delete_subscr(emitter_t *emit) { +    emit_pre(emit, -2, 1); +    if (emit->pass == PASS_3) { +        printf("DELETE_SUBSCR\n"); +    } +} + +void emit_dup_top(emitter_t *emit) { +    emit_pre(emit, 1, 1); +    if (emit->pass == PASS_3) { +        printf("DUP_TOP\n"); +    } +} + +void emit_dup_top_two(emitter_t *emit) { +    emit_pre(emit, 2, 1); +    if (emit->pass == PASS_3) { +        printf("DUP_TOP_TWO\n"); +    } +} + +void emit_pop_top(emitter_t *emit) { +    emit_pre(emit, -1, 1); +    if (emit->pass == PASS_3) { +        printf("POP_TOP\n"); +    } +} + +void emit_rot_two(emitter_t *emit) { +    emit_pre(emit, 0, 1); +    if (emit->pass == PASS_3) { +        printf("ROT_TWO\n"); +    } +} + +void emit_rot_three(emitter_t *emit) { +    emit_pre(emit, 0, 1); +    if (emit->pass == PASS_3) { +        printf("ROT_THREE\n"); +    } +} + +void emit_jump(emitter_t *emit, int label) { +    emit_pre(emit, 0, 3); +    if (emit->pass == PASS_3) { +        int dest = emit->label_offsets[label]; +        if (dest < emit->byte_code_offset) { +            printf("JUMP_ABSOLUTE %d\n", emit->label_offsets[label]); +        } else { +            printf("JUMP_FORWARD %d\n", emit->label_offsets[label]); +        } +    } +} + +void emit_pop_jump_if_true(emitter_t *emit, int label) { +    emit_pre(emit, -1, 3); +    if (emit->pass == PASS_3) { +        printf("POP_JUMP_IF_TRUE %d\n", emit->label_offsets[label]); +    } +} + +void emit_pop_jump_if_false(emitter_t *emit, int label) { +    emit_pre(emit, -1, 3); +    if (emit->pass == PASS_3) { +        printf("POP_JUMP_IF_FALSE %d\n", emit->label_offsets[label]); +    } +} + +void emit_jump_if_true_or_pop(emitter_t *emit, int label) { +    emit_pre(emit, -1, 3); +    if (emit->pass == PASS_3) { +        printf("JUMP_IF_TRUE_OR_POP %d\n", emit->label_offsets[label]); +    } +} + +void emit_jump_if_false_or_pop(emitter_t *emit, int label) { +    emit_pre(emit, -1, 3); +    if (emit->pass == PASS_3) { +        printf("JUMP_IF_FALSE_OR_POP %d\n", emit->label_offsets[label]); +    } +} + +void emit_setup_loop(emitter_t *emit, int label) { +    emit_pre(emit, 0, 3); +    if (emit->pass == PASS_3) { +        printf("SETUP_LOOP %d\n", emit->label_offsets[label]); +    } +} + +void emit_break_loop(emitter_t *emit, int label) { +    emit_pre(emit, 0, 1); +    if (emit->pass == PASS_3) { +        printf("BREAK_LOOP\n"); // CPython doesn't have label +        //printf("BREAK_LOOP %d\n", emit->label_offsets[label]); +    } +} + +void emit_continue_loop(emitter_t *emit, int label) { +    emit_pre(emit, 0, 3); +    if (emit->pass == PASS_3) { +        printf("CONTINUE_LOOP %d\n", emit->label_offsets[label]); +    } +} + +void emit_setup_with(emitter_t *emit, int label) { +    emit_pre(emit, 7, 3); +    if (emit->pass == PASS_3) { +        printf("SETUP_WITH %d\n", emit->label_offsets[label]); +    } +} + +void emit_with_cleanup(emitter_t *emit) { +    emit_pre(emit, -7, 1); +    if (emit->pass == PASS_3) { +        printf("WITH_CLEANUP\n"); +    } +} + +void emit_setup_except(emitter_t *emit, int label) { +    emit_pre(emit, 6, 3); +    if (emit->pass == PASS_3) { +        printf("SETUP_EXCEPT %d\n", emit->label_offsets[label]); +    } +} + +void emit_setup_finally(emitter_t *emit, int label) { +    emit_pre(emit, 6, 3); +    if (emit->pass == PASS_3) { +        printf("SETUP_FINALLY %d\n", emit->label_offsets[label]); +    } +} + +void emit_end_finally(emitter_t *emit) { +    emit_pre(emit, -1, 1); +    if (emit->pass == PASS_3) { +        printf("END_FINALLY\n"); +    } +} + +void emit_get_iter(emitter_t *emit) { +    emit_pre(emit, 0, 1); +    if (emit->pass == PASS_3) { +        printf("GET_ITER\n"); +    } +} + +void emit_for_iter(emitter_t *emit, int label) { +    emit_pre(emit, 1, 3); +    if (emit->pass == PASS_3) { +        printf("FOR_ITER %d\n", emit->label_offsets[label]); +    } +} + +void emit_for_iter_end(emitter_t *emit) { +    emit_pre(emit, -1, 0); +} + +void emit_pop_block(emitter_t *emit) { +    emit_pre(emit, 0, 1); +    if (emit->pass == PASS_3) { +        printf("POP_BLOCK\n"); +    } +} + +void emit_pop_except(emitter_t *emit) { +    emit_pre(emit, 0, 1); +    if (emit->pass == PASS_3) { +        printf("POP_EXCEPT\n"); +    } +} + +void emit_unary_op(emitter_t *emit, rt_unary_op_t op) { +    emit_pre(emit, 0, 1); +    if (emit->pass == PASS_3) { +        switch (op) { +            case RT_UNARY_OP_NOT: printf("UNARY_NOT\n"); break; +            case RT_UNARY_OP_POSITIVE: printf("UNARY_POSITIVE\n"); break; +            case RT_UNARY_OP_NEGATIVE: printf("UNARY_NEGATIVE\n"); break; +            case RT_UNARY_OP_INVERT: printf("UNARY_INVERT\n"); break; +            default: assert(0); +        } +    } +} + +void emit_binary_op(emitter_t *emit, rt_binary_op_t op) { +    emit_pre(emit, -1, 1); +    if (emit->pass == PASS_3) { +        switch (op) { +            case RT_BINARY_OP_SUBSCR: printf("BINARY_SUBSCR\n"); break; +            case RT_BINARY_OP_OR: printf("BINARY_OR\n"); break; +            case RT_BINARY_OP_XOR: printf("BINARY_XOR\n"); break; +            case RT_BINARY_OP_AND: printf("BINARY_AND\n"); break; +            case RT_BINARY_OP_LSHIFT: printf("BINARY_LSHIFT\n"); break; +            case RT_BINARY_OP_RSHIFT: printf("BINARY_RSHIFT\n"); break; +            case RT_BINARY_OP_ADD: printf("BINARY_ADD\n"); break; +            case RT_BINARY_OP_SUBTRACT: printf("BINARY_SUBTRACT\n"); break; +            case RT_BINARY_OP_MULTIPLY: printf("BINARY_MULTIPLY\n"); break; +            case RT_BINARY_OP_FLOOR_DIVIDE: printf("BINARY_FLOOR_DIVIDE\n"); break; +            case RT_BINARY_OP_TRUE_DIVIDE: printf("BINARY_TRUE_DIVIDE\n"); break; +            case RT_BINARY_OP_MODULO: printf("BINARY_MODULO\n"); break; +            case RT_BINARY_OP_POWER: printf("BINARY_POWER\n"); break; +            case RT_BINARY_OP_INPLACE_OR: printf("INPLACE_OR\n"); break; +            case RT_BINARY_OP_INPLACE_XOR: printf("INPLACE_XOR\n"); break; +            case RT_BINARY_OP_INPLACE_AND: printf("INPLACE_AND\n"); break; +            case RT_BINARY_OP_INPLACE_LSHIFT: printf("INPLACE_LSHIFT\n"); break; +            case RT_BINARY_OP_INPLACE_RSHIFT: printf("INPLACE_RSHIFT\n"); break; +            case RT_BINARY_OP_INPLACE_ADD: printf("INPLACE_ADD\n"); break; +            case RT_BINARY_OP_INPLACE_SUBTRACT: printf("INPLACE_SUBTRACT\n"); break; +            case RT_BINARY_OP_INPLACE_MULTIPLY: printf("INPLACE_MULTIPLY\n"); break; +            case RT_BINARY_OP_INPLACE_FLOOR_DIVIDE: printf("INPLACE_FLOOR_DIVIDE\n"); break; +            case RT_BINARY_OP_INPLACE_TRUE_DIVIDE: printf("INPLACE_TRUE_DIVIDE\n"); break; +            case RT_BINARY_OP_INPLACE_MODULO: printf("INPLACE_MODULO\n"); break; +            case RT_BINARY_OP_INPLACE_POWER: printf("INPLACE_POWER\n"); break; +            default: assert(0); +        } +    } +} + +void emit_compare_op(emitter_t *emit, rt_compare_op_t op) { +    emit_pre(emit, -1, 3); +    if (emit->pass == PASS_3) { +        switch (op) { +            case RT_COMPARE_OP_LESS: printf("COMPARE_OP <\n"); break; +            case RT_COMPARE_OP_MORE: printf("COMPARE_OP >\n"); break; +            case RT_COMPARE_OP_EQUAL: printf("COMPARE_OP ==\n"); break; +            case RT_COMPARE_OP_LESS_EQUAL: printf("COMPARE_OP <=\n"); break; +            case RT_COMPARE_OP_MORE_EQUAL: printf("COMPARE_OP >=\n"); break; +            case RT_COMPARE_OP_NOT_EQUAL: printf("COMPARE_OP !=\n"); break; +            case RT_COMPARE_OP_IN: printf("COMPARE_OP in\n"); break; +            case RT_COMPARE_OP_NOT_IN: printf("COMPARE_OP not in\n"); break; +            case RT_COMPARE_OP_IS: printf("COMPARE_OP is\n"); break; +            case RT_COMPARE_OP_IS_NOT: printf("COMPARE_OP is not\n"); break; +            case RT_COMPARE_OP_EXCEPTION_MATCH: printf("COMPARE_OP exception match\n"); break; +            default: assert(0); +        } +    } +} + +void emit_build_tuple(emitter_t *emit, int n_args) { +    emit_pre(emit, 1 - n_args, 3); +    if (emit->pass == PASS_3) { +        printf("BUILD_TUPLE %d\n", n_args); +    } +} + +void emit_build_list(emitter_t *emit, int n_args) { +    emit_pre(emit, 1 - n_args, 3); +    if (emit->pass == PASS_3) { +        printf("BUILD_LIST %d\n", n_args); +    } +} + +void emit_list_append(emitter_t *emit, int list_index) { +    emit_pre(emit, -1, 3); +    if (emit->pass == PASS_3) { +        printf("LIST_APPEND %d\n", list_index); +    } +} + +void emit_build_map(emitter_t *emit, int n_args) { +    emit_pre(emit, 1, 3); +    if (emit->pass == PASS_3) { +        printf("BUILD_MAP %d\n", n_args); +    } +} + +void emit_store_map(emitter_t *emit) { +    emit_pre(emit, -2, 1); +    if (emit->pass == PASS_3) { +        printf("STORE_MAP\n"); +    } +} + +void emit_map_add(emitter_t *emit, int map_index) { +    emit_pre(emit, -2, 3); +    if (emit->pass == PASS_3) { +        printf("MAP_ADD %d\n", map_index); +    } +} + +void emit_build_set(emitter_t *emit, int n_args) { +    emit_pre(emit, 1 - n_args, 3); +    if (emit->pass == PASS_3) { +        printf("BUILD_SET %d\n", n_args); +    } +} + +void emit_set_add(emitter_t *emit, int set_index) { +    emit_pre(emit, -1, 3); +    if (emit->pass == PASS_3) { +        printf("SET_ADD %d\n", set_index); +    } +} + +void emit_build_slice(emitter_t *emit, int n_args) { +    emit_pre(emit, 1 - n_args, 3); +    if (emit->pass == PASS_3) { +        printf("BUILD_SLICE %d\n", n_args); +    } +} + +void emit_unpack_sequence(emitter_t *emit, int n_args) { +    emit_pre(emit, -1 + n_args, 3); +    if (emit->pass == PASS_3) { +        printf("UNPACK_SEQUENCE %d\n", n_args); +    } +} + +void emit_unpack_ex(emitter_t *emit, int n_left, int n_right) { +    emit_pre(emit, -1 + n_left + n_right + 1, 3); +    if (emit->pass == PASS_3) { +        printf("UNPACK_EX %d\n", n_left | (n_right << 8)); +    } +} + +void emit_call_function(emitter_t *emit, int n_positional, int n_keyword, bool have_star_arg, bool have_dbl_star_arg) { +    int s = 0; +    if (have_star_arg) { +        s += 1; +    } +    if (have_dbl_star_arg) { +        s += 1; +    } +    emit_pre(emit, -n_positional - 2 * n_keyword - s, 3); +    if (emit->pass == PASS_3) { +        if (have_star_arg) { +            if (have_dbl_star_arg) { +                printf("CALL_FUNCTION_VAR_KW"); +            } else { +                printf("CALL_FUNCTION_VAR"); +            } +        } else { +            if (have_dbl_star_arg) { +                printf("CALL_FUNCTION_KW"); +            } else { +                printf("CALL_FUNCTION"); +            } +        } +        printf(" %d, %d\n", n_positional, n_keyword); +    } +} + +void emit_call_method(emitter_t *emit, int n_positional, int n_keyword, bool have_star_arg, bool have_dbl_star_arg) { +    emit_call_function(emit, n_positional, n_keyword, have_star_arg, have_dbl_star_arg); +} + +void emit_return_value(emitter_t *emit) { +    emit_pre(emit, -1, 1); +    emit->last_emit_was_return_value = true; +    if (emit->pass == PASS_3) { +        printf("RETURN_VALUE\n"); +    } +} + +void emit_raise_varargs(emitter_t *emit, int n_args) { +    emit_pre(emit, -n_args, 3); +    if (emit->pass == PASS_3) { +        printf("RAISE_VARARGS %d\n", n_args); +    } +} + +void load_const_code_and_name(emitter_t *emit, qstr qstr) { +    emit_pre(emit, 1, 3); +    if (emit->pass == PASS_3) { +        printf("LOAD_CONST code %s\n", qstr_str(qstr)); +    } +    // load qualified name +    emit_pre(emit, 1, 3); +    if (emit->pass == PASS_3) { +        printf("LOAD_CONST '"); +        // code just to work out the qualname (or whatever it is) +        { +            int depth = 0; +            for (scope_t *s = emit->scope; s->parent != NULL; s = s->parent) { +                depth += 1; +            } +            for (int wanted_depth = depth; wanted_depth >= 0; wanted_depth--) { +                scope_t *s = emit->scope; +                for (int i = 0; i < wanted_depth; i++) { +                    s = s->parent; +                } +                if (s->kind == SCOPE_FUNCTION) { +                    printf("%s.<locals>.", qstr_str(s->simple_name)); +                } else if (s->kind == SCOPE_CLASS) { +                    printf("%s.", qstr_str(s->simple_name)); +                } +            } +        } +        printf("%s'\n", qstr_str(qstr)); +    } +} + +void emit_make_function(emitter_t *emit, scope_t *scope, int n_dict_params, int n_default_params) { +    load_const_code_and_name(emit, scope->simple_name); +    emit_pre(emit, -1 - n_default_params - 2 * n_dict_params, 3); +    if (emit->pass == PASS_3) { +        printf("MAKE_FUNCTION %d\n", (n_dict_params << 8) | n_default_params); +    } +} + +void emit_make_closure(emitter_t *emit, scope_t *scope, int n_dict_params, int n_default_params) { +    load_const_code_and_name(emit, scope->simple_name); +    emit_pre(emit, -2 - n_default_params - 2 * n_dict_params, 3); +    if (emit->pass == PASS_3) { +        printf("MAKE_CLOSURE %d\n", (n_dict_params << 8) | n_default_params); +    } +} + +void emit_yield_value(emitter_t *emit) { +    emit_pre(emit, 0, 1); +    if (emit->pass == PASS_2) { +        emit->scope->flags |= SCOPE_FLAG_GENERATOR; +    } +    if (emit->pass == PASS_3) { +        printf("YIELD_VALUE\n"); +    } +} + +void emit_yield_from(emitter_t *emit) { +    emit_pre(emit, -1, 1); +    if (emit->pass == PASS_2) { +        emit->scope->flags |= SCOPE_FLAG_GENERATOR; +    } +    if (emit->pass == PASS_3) { +        printf("YIELD_FROM\n"); +    } +} + +#endif // EMIT_DO_CPY diff --git a/py/emitthumb.c b/py/emitthumb.c new file mode 100644 index 000000000..cad6b6504 --- /dev/null +++ b/py/emitthumb.c @@ -0,0 +1,673 @@ +#include <unistd.h> +#include <stdlib.h> +#include <stdint.h> +#include <stdio.h> +#include <string.h> +#include <assert.h> + +#include "misc.h" +#include "lexer.h" +#include "machine.h" +#include "parse.h" +#include "scope.h" +#include "runtime.h" +#include "emit.h" +#include "asmthumb.h" + +#ifdef EMIT_DO_THUMB + +#define REG_LOCAL_1 (REG_R4) +#define REG_LOCAL_2 (REG_R5) +#define REG_LOCAL_3 (REG_R6) +#define REG_TEMP    (REG_R7) +#define REG_LOCAL_NUM (3) + +typedef enum { +    NEED_TO_PUSH_NOTHING, +    NEED_TO_PUSH_REG, +    NEED_TO_PUSH_I32, +} need_to_push_t; + +struct _emitter_t { +    int pass; +    int stack_start; +    int stack_size; +    bool last_emit_was_return_value; +    need_to_push_t need_to_push; +    int last_reg; +    int32_t last_i32; + +    scope_t *scope; + +    asm_thumb_t *as; +    bool do_native_types; +}; + +emitter_t *emit_new() { +    emitter_t *emit = m_new(emitter_t, 1); +    emit->as = asm_thumb_new(); +    emit->do_native_types = true; +    return emit; +} + +void emit_start_pass(emitter_t *emit, pass_kind_t pass, scope_t *scope) { +    emit->pass = pass; +    emit->stack_start = 0; +    emit->stack_size = 0; +    emit->last_emit_was_return_value = false; +    emit->need_to_push = NEED_TO_PUSH_NOTHING; +    emit->scope = scope; +    if (pass == PASS_1) { +        scope->unique_code_id = rt_get_new_unique_code_id(); +    } + +    asm_thumb_start_pass(emit->as, pass); + +    // entry to function +    int num_locals = 0; +    if (pass > PASS_1) { +        num_locals = scope->num_locals - REG_LOCAL_NUM; +        if (num_locals < 0) { +            num_locals = 0; +        } +        emit->stack_start = num_locals; +        num_locals += scope->stack_size; +    } +    asm_thumb_entry(emit->as, num_locals); + +    // initialise locals from parameters +    for (int i = 0; i < scope->num_params; i++) { +        if (i == 0) { +            asm_thumb_mov_reg_reg(emit->as, REG_LOCAL_1, REG_ARG_1); +        } else if (i == 1) { +            asm_thumb_mov_reg_reg(emit->as, REG_LOCAL_2, REG_ARG_2); +        } else if (i == 2) { +            asm_thumb_mov_reg_reg(emit->as, REG_LOCAL_3, REG_ARG_3); +        } else if (i == 3) { +            asm_thumb_mov_local_reg(emit->as, i - REG_LOCAL_NUM, REG_ARG_4); +        } else { +            // TODO not implemented +            assert(0); +        } +    } + +    asm_thumb_mov_reg_i32(emit->as, REG_R7, (machine_uint_t)rt_fun_table); +} + +void emit_end_pass(emitter_t *emit) { +    if (!emit->last_emit_was_return_value) { +        asm_thumb_exit(emit->as); +    } +    asm_thumb_end_pass(emit->as); + +    // check stack is back to zero size +    if (emit->stack_size != 0) { +        printf("ERROR: stack size not back to zero; got %d\n", emit->stack_size); +    } + +    if (emit->pass == PASS_3) { +        py_fun_t f = asm_thumb_get_code(emit->as); +        rt_assign_native_code(emit->scope->unique_code_id, f, asm_thumb_get_code_size(emit->as), emit->scope->num_params); +    } +} + +bool emit_last_emit_was_return_value(emitter_t *emit) { +    return emit->last_emit_was_return_value; +} + +int emit_get_stack_size(emitter_t *emit) { +    return emit->stack_size; +} + +void emit_set_stack_size(emitter_t *emit, int size) { +    emit->stack_size = size; +} + +static void adjust_stack(emitter_t *emit, int stack_size_delta) { +    emit->stack_size += stack_size_delta; +    assert(emit->stack_size >= 0); +    if (emit->pass > PASS_1 && emit->stack_size > emit->scope->stack_size) { +        emit->scope->stack_size = emit->stack_size; +    } +} + +static void stack_settle(emitter_t *emit) { +    switch (emit->need_to_push) { +        case NEED_TO_PUSH_NOTHING: +            break; + +        case NEED_TO_PUSH_REG: +            asm_thumb_mov_local_reg(emit->as, emit->stack_start + emit->stack_size, emit->last_reg); +            adjust_stack(emit, 1); +            break; + +        case NEED_TO_PUSH_I32: +            asm_thumb_mov_reg_i32_optimised(emit->as, REG_R0, emit->last_i32); +            asm_thumb_mov_local_reg(emit->as, emit->stack_start + emit->stack_size, REG_R0); +            adjust_stack(emit, 1); +            break; +    } +    emit->need_to_push = NEED_TO_PUSH_NOTHING; +} + +static void emit_pre_raw(emitter_t *emit, int stack_size_delta) { +    adjust_stack(emit, stack_size_delta); +    emit->last_emit_was_return_value = false; +} + +static void emit_pre(emitter_t *emit) { +    stack_settle(emit); +    emit_pre_raw(emit, 0); +} + +static void emit_pre_pop_reg(emitter_t *emit, int reg_dest) { +    switch (emit->need_to_push) { +        case NEED_TO_PUSH_NOTHING: +            asm_thumb_mov_reg_local(emit->as, reg_dest, emit->stack_start + emit->stack_size - 1); +            emit_pre_raw(emit, -1); +            break; + +        case NEED_TO_PUSH_REG: +            emit_pre_raw(emit, 0); +            if (emit->last_reg != reg_dest) { +                asm_thumb_mov_reg_reg(emit->as, reg_dest, emit->last_reg); +            } +            break; + +        case NEED_TO_PUSH_I32: +            emit_pre_raw(emit, 0); +            asm_thumb_mov_reg_i32_optimised(emit->as, reg_dest, emit->last_i32); +            break; +    } +    emit->need_to_push = NEED_TO_PUSH_NOTHING; +} + +static void emit_pre_pop_reg_reg(emitter_t *emit, int rega, int regb) { +    emit_pre_pop_reg(emit, rega); +    asm_thumb_mov_reg_local(emit->as, regb, emit->stack_start + emit->stack_size - 1); +    adjust_stack(emit, -1); +} + +static void emit_pre_pop_reg_reg_reg(emitter_t *emit, int rega, int regb, int regc) { +    emit_pre_pop_reg(emit, rega); +    asm_thumb_mov_reg_local(emit->as, regb, emit->stack_start + emit->stack_size - 1); +    asm_thumb_mov_reg_local(emit->as, regc, emit->stack_start + emit->stack_size - 2); +    adjust_stack(emit, -2); +} + +static void emit_post(emitter_t *emit) { +} + +static void emit_post_push_reg(emitter_t *emit, int reg) { +    emit->need_to_push = NEED_TO_PUSH_REG; +    emit->last_reg = reg; +} + +static void emit_post_push_i32(emitter_t *emit, int32_t i32) { +    emit->need_to_push = NEED_TO_PUSH_I32; +    emit->last_i32 = i32; +} + +static void emit_post_push_reg_reg(emitter_t *emit, int rega, int regb) { +    asm_thumb_mov_local_reg(emit->as, emit->stack_start + emit->stack_size, rega); +    emit->need_to_push = NEED_TO_PUSH_REG; +    emit->last_reg = regb; +    adjust_stack(emit, 1); +} + +static void emit_post_push_reg_reg_reg(emitter_t *emit, int rega, int regb, int regc) { +    asm_thumb_mov_local_reg(emit->as, emit->stack_start + emit->stack_size, rega); +    asm_thumb_mov_local_reg(emit->as, emit->stack_start + emit->stack_size + 1, regb); +    asm_thumb_mov_local_reg(emit->as, emit->stack_start + emit->stack_size + 2, regc); +    adjust_stack(emit, 3); +} + +static void emit_post_push_reg_reg_reg_reg(emitter_t *emit, int rega, int regb, int regc, int regd) { +    asm_thumb_mov_local_reg(emit->as, emit->stack_start + emit->stack_size, rega); +    asm_thumb_mov_local_reg(emit->as, emit->stack_start + emit->stack_size + 1, regb); +    asm_thumb_mov_local_reg(emit->as, emit->stack_start + emit->stack_size + 2, regc); +    asm_thumb_mov_local_reg(emit->as, emit->stack_start + emit->stack_size + 3, regd); +    adjust_stack(emit, 4); +} + +static void emit_get_stack_pointer_to_reg_for_pop(emitter_t *emit, int reg_dest, int n_pop) { +    asm_thumb_mov_reg_local_addr(emit->as, reg_dest, emit->stack_start + emit->stack_size - 1); +    adjust_stack(emit, -n_pop); +} + +static void emit_get_stack_pointer_to_reg_for_push(emitter_t *emit, int reg_dest, int n_push) { +    asm_thumb_mov_reg_local_addr(emit->as, reg_dest, emit->stack_start + emit->stack_size + n_push - 1); +    adjust_stack(emit, n_push); +} + +static void emit_call(emitter_t *emit, rt_fun_kind_t fun_kind) { +    asm_thumb_bl_ind(emit->as, rt_fun_table[fun_kind], fun_kind, REG_R3); +} + +static void emit_call_with_i32_arg(emitter_t *emit, rt_fun_kind_t fun_kind, int32_t arg_val, int arg_reg) { +    asm_thumb_mov_reg_i32_optimised(emit->as, arg_reg, arg_val); +    asm_thumb_bl_ind(emit->as, rt_fun_table[fun_kind], fun_kind, REG_R3); +} + +int emit_label_new(emitter_t *emit) { +    return asm_thumb_label_new(emit->as); +} + +void emit_label_assign(emitter_t *emit, int l) { +    asm_thumb_label_assign(emit->as, l); +} + +void emit_import_name(emitter_t *emit, qstr qstr) { +    assert(0); +} +void emit_import_from(emitter_t *emit, qstr qstr) { +    assert(0); +} +void emit_import_star(emitter_t *emit) { +    assert(0); +} + +void emit_load_const_tok(emitter_t *emit, py_token_kind_t tok) { +    emit_pre(emit); +    py_obj_t o; +    switch (tok) { +        case PY_TOKEN_KW_NONE: o = py_const_none; break; +        case PY_TOKEN_KW_FALSE: o = py_const_false; break; +        case PY_TOKEN_KW_TRUE: o = py_const_true; break; +        default: assert(0); o = 0; // shouldn't happen +    } +    emit_post_push_i32(emit, (machine_uint_t)o); +} + +void emit_load_const_small_int(emitter_t *emit, int arg) { +    emit_pre(emit); +    if (emit->do_native_types) { +        emit_post_push_i32(emit, arg); +    } else { +        emit_post_push_i32(emit, (arg << 1) | 1); +    } +} + +void emit_load_const_int(emitter_t *emit, qstr qstr) { +    assert(0); +} +void emit_load_const_dec(emitter_t *emit, qstr qstr) { +    assert(0); +} +void emit_load_const_id(emitter_t *emit, qstr qstr) { +    assert(0); +} + +void emit_load_const_str(emitter_t *emit, qstr qstr, bool bytes) { +    emit_pre(emit); +    emit_call_with_i32_arg(emit, RT_F_LOAD_CONST_STR, qstr, REG_ARG_1); +    emit_post_push_reg(emit, REG_RET); +} + +void emit_load_const_verbatim_start(emitter_t *emit) { +    assert(0); +} +void emit_load_const_verbatim_int(emitter_t *emit, int val) { +    assert(0); +} +void emit_load_const_verbatim_str(emitter_t *emit, const char *str) { +    assert(0); +} +void emit_load_const_verbatim_strn(emitter_t *emit, const char *str, int len) { +    assert(0); +} +void emit_load_const_verbatim_quoted_str(emitter_t *emit, qstr qstr, bool bytes) { +    assert(0); +} +void emit_load_const_verbatim_end(emitter_t *emit) { +    assert(0); +} + +void emit_load_fast(emitter_t *emit, qstr qstr, int local_num) { +    emit_pre(emit); +    if (local_num == 0) { +        emit_post_push_reg(emit, REG_LOCAL_1); +    } else if (local_num == 1) { +        emit_post_push_reg(emit, REG_LOCAL_2); +    } else if (local_num == 2) { +        emit_post_push_reg(emit, REG_LOCAL_3); +    } else { +        asm_thumb_mov_reg_local(emit->as, REG_R0, local_num - 1); +        emit_post_push_reg(emit, REG_R0); +    } +} + +void emit_load_name(emitter_t *emit, qstr qstr) { +    emit_pre(emit); +    emit_call_with_i32_arg(emit, RT_F_LOAD_NAME, qstr, REG_ARG_1); +    emit_post_push_reg(emit, REG_RET); +} + +void emit_load_global(emitter_t *emit, qstr qstr) { +    emit_pre(emit); +    emit_call_with_i32_arg(emit, RT_F_LOAD_GLOBAL, qstr, REG_ARG_1); +    emit_post_push_reg(emit, REG_RET); +} + +void emit_load_deref(emitter_t *emit, qstr qstr) { +    assert(0); +} +void emit_load_closure(emitter_t *emit, qstr qstr) { +    assert(0); +} + +void emit_load_attr(emitter_t *emit, qstr qstr) { +    emit_pre_pop_reg(emit, REG_ARG_1); // arg1 = base +    emit_call_with_i32_arg(emit, RT_F_LOAD_ATTR, qstr, REG_ARG_2); // arg2 = attribute name +    emit_post_push_reg(emit, REG_RET); +} + +void emit_load_method(emitter_t *emit, qstr qstr) { +    emit_pre_pop_reg(emit, REG_ARG_1); // arg1 = base +    emit_get_stack_pointer_to_reg_for_push(emit, REG_ARG_3, 2); // arg3 = dest ptr +    emit_call_with_i32_arg(emit, RT_F_LOAD_METHOD, qstr, REG_ARG_2); // arg2 = method name +} + +void emit_load_build_class(emitter_t *emit) { +   assert(0); +} // basically load __build_class__ from builtins + +void emit_store_fast(emitter_t *emit, qstr qstr, int local_num) { +    if (local_num == 0) { +        emit_pre_pop_reg(emit, REG_LOCAL_1); +    } else if (local_num == 1) { +        emit_pre_pop_reg(emit, REG_LOCAL_2); +    } else if (local_num == 2) { +        emit_pre_pop_reg(emit, REG_LOCAL_3); +    } else { +        emit_pre_pop_reg(emit, REG_R0); +        asm_thumb_mov_local_reg(emit->as, local_num - 1, REG_R0); +    } +    emit_post(emit); +} + +void emit_store_name(emitter_t *emit, qstr qstr) { +    emit_pre_pop_reg(emit, REG_ARG_2); +    emit_call_with_i32_arg(emit, RT_F_STORE_NAME, qstr, REG_ARG_1); // arg1 = name +    emit_post(emit); +} + +void emit_store_global(emitter_t *emit, qstr qstr) { +    assert(0); +} + +void emit_store_deref(emitter_t *emit, qstr qstr) { +    assert(0); +} +void emit_store_attr(emitter_t *emit, qstr qstr) { +    assert(0); +} +void emit_store_locals(emitter_t *emit) { +    assert(0); +} + +void emit_store_subscr(emitter_t *emit) { +    emit_pre_pop_reg_reg_reg(emit, REG_ARG_2, REG_ARG_1, REG_ARG_3); // index, base, value to store +    emit_call(emit, RT_F_STORE_SUBSCR); +} + +void emit_delete_fast(emitter_t *emit, qstr qstr, int local_num) { +    assert(0); +} +void emit_delete_name(emitter_t *emit, qstr qstr) { +    assert(0); +} +void emit_delete_global(emitter_t *emit, qstr qstr) { +    assert(0); +} +void emit_delete_deref(emitter_t *emit, qstr qstr) { +    assert(0); +} +void emit_delete_attr(emitter_t *emit, qstr qstr) { +    assert(0); +} +void emit_delete_subscr(emitter_t *emit) { +    assert(0); +} + +void emit_dup_top(emitter_t *emit) { +    emit_pre_pop_reg(emit, REG_R0); +    emit_post_push_reg_reg(emit, REG_R0, REG_R0); +} + +void emit_dup_top_two(emitter_t *emit) { +    emit_pre_pop_reg_reg(emit, REG_R0, REG_R1); +    emit_post_push_reg_reg_reg_reg(emit, REG_R1, REG_R0, REG_R1, REG_R0); +} + +void emit_pop_top(emitter_t *emit) { +    emit_pre_pop_reg(emit, REG_R0); +    emit_post(emit); +} + +void emit_rot_two(emitter_t *emit) { +    assert(0); +} + +void emit_rot_three(emitter_t *emit) { +    emit_pre_pop_reg_reg_reg(emit, REG_R0, REG_R1, REG_R2); +    emit_post_push_reg_reg_reg(emit, REG_R0, REG_R2, REG_R1); +} + +void emit_jump(emitter_t *emit, int label) { +    emit_pre(emit); +    asm_thumb_b_label(emit->as, label); +    emit_post(emit); +} + +void emit_pop_jump_if_false(emitter_t *emit, int label) { +    if (emit->do_native_types) { +        emit_pre_pop_reg(emit, REG_RET); +        asm_thumb_cmp_reg_bz_label(emit->as, REG_RET, label); +        emit_post(emit); +    } else { +        emit_pre_pop_reg(emit, REG_ARG_1); +        emit_call(emit, RT_F_IS_TRUE); +        asm_thumb_cmp_reg_bz_label(emit->as, REG_RET, label); +        emit_post(emit); +    } +} + +void emit_pop_jump_if_true(emitter_t *emit, int label) { +    assert(0); +} +void emit_jump_if_true_or_pop(emitter_t *emit, int label) { +    assert(0); +} +void emit_jump_if_false_or_pop(emitter_t *emit, int label) { +    assert(0); +} + +void emit_setup_loop(emitter_t *emit, int label) { +    emit_pre(emit); +    emit_post(emit); +} + +void emit_break_loop(emitter_t *emit, int label) { +    assert(0); +} +void emit_continue_loop(emitter_t *emit, int label) { +    assert(0); +} +void emit_setup_with(emitter_t *emit, int label) { +    assert(0); +} +void emit_with_cleanup(emitter_t *emit) { +    assert(0); +} +void emit_setup_except(emitter_t *emit, int label) { +    assert(0); +} +void emit_setup_finally(emitter_t *emit, int label) { +    assert(0); +} +void emit_end_finally(emitter_t *emit) { +    assert(0); +} +void emit_get_iter(emitter_t *emit) { +    assert(0); +} // tos = getiter(tos) +void emit_for_iter(emitter_t *emit, int label) { +    assert(0); +} +void emit_for_iter_end(emitter_t *emit) { +    assert(0); +} +void emit_pop_except(emitter_t *emit) { +    assert(0); +} + +void emit_unary_op(emitter_t *emit, rt_unary_op_t op) { +    emit_pre_pop_reg(emit, REG_ARG_2); +    emit_call_with_i32_arg(emit, RT_F_UNARY_OP, op, REG_ARG_1); +    emit_post_push_reg(emit, REG_RET); +} + +void emit_build_tuple(emitter_t *emit, int n_args) { +    assert(0); +} + +void emit_build_list(emitter_t *emit, int n_args) { +    emit_pre(emit); +    emit_get_stack_pointer_to_reg_for_pop(emit, REG_ARG_2, n_args); // pointer to items in reverse order +    emit_call_with_i32_arg(emit, RT_F_BUILD_LIST, n_args, REG_ARG_1); +    emit_post_push_reg(emit, REG_RET); // new list +} + +void emit_list_append(emitter_t *emit, int list_index) { +    assert(0); +} + +void emit_build_map(emitter_t *emit, int n_args) { +    emit_pre(emit); +    emit_call_with_i32_arg(emit, RT_F_BUILD_MAP, n_args, REG_ARG_1); +    emit_post_push_reg(emit, REG_RET); // new map +} + +void emit_store_map(emitter_t *emit) { +    emit_pre_pop_reg_reg_reg(emit, REG_ARG_2, REG_ARG_3, REG_ARG_1); // key, value, map +    emit_call(emit, RT_F_STORE_MAP); +    emit_post_push_reg(emit, REG_RET); // map +} + +void emit_map_add(emitter_t *emit, int map_index) { +    assert(0); +} + +void emit_build_set(emitter_t *emit, int n_args) { +    emit_pre(emit); +    emit_get_stack_pointer_to_reg_for_pop(emit, REG_ARG_2, n_args); // pointer to items in reverse order +    emit_call_with_i32_arg(emit, RT_F_BUILD_SET, n_args, REG_ARG_1); +    emit_post_push_reg(emit, REG_RET); // new set +} + +void emit_set_add(emitter_t *emit, int set_index) { +    assert(0); +} +void emit_build_slice(emitter_t *emit, int n_args) { +    assert(0); +} +void emit_unpack_sequence(emitter_t *emit, int n_args) { +    assert(0); +} +void emit_unpack_ex(emitter_t *emit, int n_left, int n_right) { +    assert(0); +} + +void emit_make_function(emitter_t *emit, scope_t *scope, int n_dict_params, int n_default_params) { +    assert(n_default_params == 0 && n_dict_params == 0); +    emit_pre(emit); +    emit_call_with_i32_arg(emit, RT_F_MAKE_FUNCTION_FROM_ID, scope->unique_code_id, REG_ARG_1); +    emit_post_push_reg(emit, REG_RET); +} + +void emit_make_closure(emitter_t *emit, scope_t *scope, int n_dict_params, int n_default_params) { +    assert(0); +} + +void emit_call_function(emitter_t *emit, int n_positional, int n_keyword, bool have_star_arg, bool have_dbl_star_arg) { +    assert(n_keyword == 0 && !have_star_arg && !have_dbl_star_arg); +    if (n_positional == 0) { +        emit_pre_pop_reg(emit, REG_ARG_1); // the function +        emit_call(emit, RT_F_CALL_FUNCTION_0); +    } else if (n_positional == 1) { +        emit_pre_pop_reg_reg(emit, REG_ARG_2, REG_ARG_1); // the single argument, the function +        emit_call(emit, RT_F_CALL_FUNCTION_1); +    } else if (n_positional == 2) { +        emit_pre_pop_reg_reg_reg(emit, REG_ARG_3, REG_ARG_2, REG_ARG_1); // the second argument, the first argument, the function +        emit_call(emit, RT_F_CALL_FUNCTION_2); +    } else { +        assert(0); +    } +    emit_post_push_reg(emit, REG_RET); +} + +void emit_call_method(emitter_t *emit, int n_positional, int n_keyword, bool have_star_arg, bool have_dbl_star_arg) { +    assert(n_keyword == 0 && !have_star_arg && !have_dbl_star_arg); +    if (n_positional == 0) { +        emit_pre_pop_reg_reg(emit, REG_ARG_2, REG_ARG_1); // the self object (or NULL), the method +        emit_call(emit, RT_F_CALL_METHOD_1); +    } else if (n_positional == 1) { +        emit_pre_pop_reg_reg_reg(emit, REG_ARG_3, REG_ARG_2, REG_ARG_1); // the first argument, the self object (or NULL), the method +        emit_call(emit, RT_F_CALL_METHOD_2); +    } else { +        assert(0); +    } +    emit_post_push_reg(emit, REG_RET); +} + +void emit_pop_block(emitter_t *emit) { +    emit_pre(emit); +    emit_post(emit); +} + +void emit_binary_op(emitter_t *emit, rt_binary_op_t op) { +    if (emit->do_native_types) { +        emit_pre_pop_reg_reg(emit, REG_ARG_2, REG_ARG_1); +        asm_thumb_add_reg_reg_reg(emit->as, REG_RET, REG_ARG_1, REG_ARG_2); +        emit_post_push_reg(emit, REG_RET); +    } else { +        emit_pre_pop_reg_reg(emit, REG_ARG_3, REG_ARG_2); +        emit_call_with_i32_arg(emit, RT_F_BINARY_OP, op, REG_ARG_1); +        emit_post_push_reg(emit, REG_RET); +    } +} + +void emit_compare_op(emitter_t *emit, rt_compare_op_t op) { +    if (emit->do_native_types) { +        emit_pre_pop_reg_reg(emit, REG_ARG_2, REG_ARG_1); +        asm_thumb_cmp_reg_reg(emit->as, REG_ARG_1, REG_ARG_2); +        asm_thumb_ite_ge(emit->as); +        asm_thumb_mov_reg_i8(emit->as, REG_RET, 0); // if r0 >= r1 +        asm_thumb_mov_reg_i8(emit->as, REG_RET, 1); // if r0 < r1 +        emit_post_push_reg(emit, REG_RET); +    } else { +        emit_pre_pop_reg_reg(emit, REG_ARG_3, REG_ARG_2); +        emit_call_with_i32_arg(emit, RT_F_COMPARE_OP, op, REG_ARG_1); +        emit_post_push_reg(emit, REG_RET); +    } +} + +void emit_return_value(emitter_t *emit) { +    emit_pre_pop_reg(emit, REG_RET); +    emit->last_emit_was_return_value = true; +    //asm_thumb_call_ind(emit->as, 0, REG_R0); to seg fault for debugging with gdb +    asm_thumb_exit(emit->as); +} + +void emit_raise_varargs(emitter_t *emit, int n_args) { +    assert(0); +} +void emit_yield_value(emitter_t *emit) { +    assert(0); +} +void emit_yield_from(emitter_t *emit) { +    assert(0); +} + +#endif // EMIT_DO_THUMB diff --git a/py/emitx64.c b/py/emitx64.c new file mode 100644 index 000000000..da4c7e333 --- /dev/null +++ b/py/emitx64.c @@ -0,0 +1,680 @@ +/* This code is equivalent to emitx64.c but pre-allocates stack + * space and uses mov instead of push/pop instructions to access + * the temporary stack.  It runs in similar time, but uses 3*n + * more bytes, where n is number of push/pop instructions. + * + * This code is preferred because it keeps the stack aligned on a + * 16 byte boundary. + * + * Improvements: + *  Doesn't call stub functions, does all the work inline. + *  Has optimisations for loading i64s to stack. + */ + +#include <unistd.h> +#include <stdlib.h> +#include <stdint.h> +#include <stdio.h> +#include <string.h> +#include <assert.h> + +#include "misc.h" +#include "lexer.h" +#include "machine.h" +#include "parse.h" +#include "scope.h" +#include "runtime.h" +#include "emit.h" +#include "asmx64.h" + +#ifdef EMIT_DO_X64 + +#define REG_LOCAL_1 (REG_RBX) +#define REG_LOCAL_NUM (1) + +typedef enum { +    NEED_TO_PUSH_NOTHING, +    NEED_TO_PUSH_R64, +    NEED_TO_PUSH_I64, +} need_to_push_t; + +struct _emitter_t { +    int pass; +    int stack_start; +    int stack_size; +    bool last_emit_was_return_value; +    need_to_push_t need_to_push; +    int last_r64; +    int64_t last_i64; + +    scope_t *scope; + +    asm_x64_t *as; +    bool do_native_types; +}; + +emitter_t *emit_new() { +    emitter_t *emit = m_new(emitter_t, 1); +    emit->as = asm_x64_new(); +    emit->do_native_types = false; +    return emit; +} + +void emit_set_native_types(emitter_t *emit, bool do_native_types) { +    emit->do_native_types = do_native_types; +} + +void emit_start_pass(emitter_t *emit, pass_kind_t pass, scope_t *scope) { +    emit->pass = pass; +    emit->stack_start = 0; +    emit->stack_size = 0; +    emit->last_emit_was_return_value = false; +    emit->need_to_push = NEED_TO_PUSH_NOTHING; +    emit->scope = scope; +    if (pass == PASS_1) { +        scope->unique_code_id = rt_get_new_unique_code_id(); +    } + +    asm_x64_start_pass(emit->as, pass); + +    // entry to function +    int num_locals = 0; +    if (pass > PASS_1) { +        num_locals = scope->num_locals - REG_LOCAL_NUM; +        if (num_locals < 0) { +            num_locals = 0; +        } +        emit->stack_start = num_locals; +        num_locals += scope->stack_size; +    } +    asm_x64_entry(emit->as, num_locals); + +    // initialise locals from parameters +    for (int i = 0; i < scope->num_params; i++) { +        if (i == 0) { +            asm_x64_mov_r64_to_r64(emit->as, REG_ARG_1, REG_LOCAL_1); +        } else if (i == 1) { +            asm_x64_mov_r64_to_local(emit->as, REG_ARG_2, i - 1); +        } else if (i == 2) { +            asm_x64_mov_r64_to_local(emit->as, REG_ARG_3, i - 1); +        } else { +            // TODO not implemented +            assert(0); +        } +    } +} + +void emit_end_pass(emitter_t *emit) { +    if (!emit->last_emit_was_return_value) { +        asm_x64_exit(emit->as); +    } +    asm_x64_end_pass(emit->as); + +    // check stack is back to zero size +    if (emit->stack_size != 0) { +        printf("ERROR: stack size not back to zero; got %d\n", emit->stack_size); +    } + +    if (emit->pass == PASS_3) { +        py_fun_t f = asm_x64_get_code(emit->as); +        rt_assign_native_code(emit->scope->unique_code_id, f, asm_x64_get_code_size(emit->as), emit->scope->num_params); +    } +} + +bool emit_last_emit_was_return_value(emitter_t *emit) { +    return emit->last_emit_was_return_value; +} + +int emit_get_stack_size(emitter_t *emit) { +    return emit->stack_size; +} + +void emit_set_stack_size(emitter_t *emit, int size) { +    emit->stack_size = size; +} + +static void adjust_stack(emitter_t *emit, int stack_size_delta) { +    emit->stack_size += stack_size_delta; +    assert(emit->stack_size >= 0); +    if (emit->pass > PASS_1 && emit->stack_size > emit->scope->stack_size) { +        emit->scope->stack_size = emit->stack_size; +    } +} + +static void stack_settle(emitter_t *emit) { +    switch (emit->need_to_push) { +        case NEED_TO_PUSH_NOTHING: +            break; + +        case NEED_TO_PUSH_R64: +            asm_x64_mov_r64_to_local(emit->as, emit->last_r64, emit->stack_start + emit->stack_size); +            adjust_stack(emit, 1); +            break; + +        case NEED_TO_PUSH_I64: +            asm_x64_mov_i64_to_r64_optimised(emit->as, emit->last_i64, REG_RAX); +            asm_x64_mov_r64_to_local(emit->as, REG_RAX, emit->stack_start + emit->stack_size); +            adjust_stack(emit, 1); +            break; +    } +    emit->need_to_push = NEED_TO_PUSH_NOTHING; +} + +static void emit_pre_raw(emitter_t *emit, int stack_size_delta) { +    adjust_stack(emit, stack_size_delta); +    emit->last_emit_was_return_value = false; +} + +static void emit_pre(emitter_t *emit) { +    stack_settle(emit); +    emit_pre_raw(emit, 0); +} + +static void emit_pre_pop_r64(emitter_t *emit, int r64) { +    switch (emit->need_to_push) { +        case NEED_TO_PUSH_NOTHING: +            asm_x64_mov_local_to_r64(emit->as, emit->stack_start + emit->stack_size - 1, r64); +            emit_pre_raw(emit, -1); +            break; + +        case NEED_TO_PUSH_R64: +            emit_pre_raw(emit, 0); +            if (emit->last_r64 != r64) { +                asm_x64_mov_r64_to_r64(emit->as, emit->last_r64, r64); +            } +            break; + +        case NEED_TO_PUSH_I64: +            emit_pre_raw(emit, 0); +            asm_x64_mov_i64_to_r64_optimised(emit->as, emit->last_i64, r64); +            break; +    } +    emit->need_to_push = NEED_TO_PUSH_NOTHING; +} + +static void emit_pre_pop_r64_r64(emitter_t *emit, int r64a, int r64b) { +    emit_pre_pop_r64(emit, r64a); +    asm_x64_mov_local_to_r64(emit->as, emit->stack_start + emit->stack_size - 1, r64b); +    adjust_stack(emit, -1); +} + +static void emit_pre_pop_r64_r64_r64(emitter_t *emit, int r64a, int r64b, int r64c) { +    emit_pre_pop_r64(emit, r64a); +    asm_x64_mov_local_to_r64(emit->as, emit->stack_start + emit->stack_size - 1, r64b); +    asm_x64_mov_local_to_r64(emit->as, emit->stack_start + emit->stack_size - 2, r64c); +    adjust_stack(emit, -2); +} + +static void emit_post(emitter_t *emit) { +} + +static void emit_post_push_r64(emitter_t *emit, int r64) { +    emit->need_to_push = NEED_TO_PUSH_R64; +    emit->last_r64 = r64; +} + +static void emit_post_push_i64(emitter_t *emit, int64_t i64) { +    emit->need_to_push = NEED_TO_PUSH_I64; +    emit->last_i64 = i64; +} + +static void emit_post_push_r64_r64(emitter_t *emit, int r64a, int r64b) { +    asm_x64_mov_r64_to_local(emit->as, r64a, emit->stack_start + emit->stack_size); +    emit->need_to_push = NEED_TO_PUSH_R64; +    emit->last_r64 = r64b; +    adjust_stack(emit, 1); +} + +static void emit_post_push_r64_r64_r64(emitter_t *emit, int r64a, int r64b, int r64c) { +    asm_x64_mov_r64_to_local(emit->as, r64a, emit->stack_start + emit->stack_size); +    asm_x64_mov_r64_to_local(emit->as, r64b, emit->stack_start + emit->stack_size + 1); +    asm_x64_mov_r64_to_local(emit->as, r64c, emit->stack_start + emit->stack_size + 2); +    adjust_stack(emit, 3); +} + +static void emit_post_push_r64_r64_r64_r64(emitter_t *emit, int r64a, int r64b, int r64c, int r64d) { +    asm_x64_mov_r64_to_local(emit->as, r64a, emit->stack_start + emit->stack_size); +    asm_x64_mov_r64_to_local(emit->as, r64b, emit->stack_start + emit->stack_size + 1); +    asm_x64_mov_r64_to_local(emit->as, r64c, emit->stack_start + emit->stack_size + 2); +    asm_x64_mov_r64_to_local(emit->as, r64d, emit->stack_start + emit->stack_size + 3); +    adjust_stack(emit, 4); +} + +static void emit_get_stack_pointer_to_r64_for_pop(emitter_t *emit, int r64, int n_pop) { +    asm_x64_mov_local_addr_to_r64(emit->as, emit->stack_start + emit->stack_size - 1, r64); +    adjust_stack(emit, -n_pop); +} + +static void emit_get_stack_pointer_to_r64_for_push(emitter_t *emit, int r64, int n_push) { +    asm_x64_mov_local_addr_to_r64(emit->as, emit->stack_start + emit->stack_size + n_push - 1, r64); +    adjust_stack(emit, n_push); +} + +static void emit_call(emitter_t *emit, void *fun) { +    asm_x64_call_ind(emit->as, fun, REG_RAX); +} + +static void emit_call_with_i64_arg(emitter_t *emit, void *fun, int64_t arg_val, int arg_r64) { +    asm_x64_mov_i64_to_r64_optimised(emit->as, arg_val, arg_r64); +    asm_x64_call_ind(emit->as, fun, REG_RAX); +} + +int emit_label_new(emitter_t *emit) { +    return asm_x64_label_new(emit->as); +} + +void emit_label_assign(emitter_t *emit, int l) { +    asm_x64_label_assign(emit->as, l); +} + +void emit_import_name(emitter_t *emit, qstr qstr) { +    assert(0); +} +void emit_import_from(emitter_t *emit, qstr qstr) { +    assert(0); +} +void emit_import_star(emitter_t *emit) { +    assert(0); +} + +void emit_load_const_tok(emitter_t *emit, py_token_kind_t tok) { +    emit_pre(emit); +    py_obj_t o; +    switch (tok) { +        case PY_TOKEN_KW_NONE: o = py_const_none; break; +        case PY_TOKEN_KW_FALSE: o = py_const_false; break; +        case PY_TOKEN_KW_TRUE: o = py_const_true; break; +        default: assert(0); // shouldn't happen +    } +    emit_post_push_i64(emit, (uint64_t)o); +} + +void emit_load_const_small_int(emitter_t *emit, int arg) { +    emit_pre(emit); +    if (emit->do_native_types) { +        emit_post_push_i64(emit, arg); +    } else { +        emit_post_push_i64(emit, (arg << 1) | 1); +    } +} + +void emit_load_const_int(emitter_t *emit, qstr qstr) { +    assert(0); +} +void emit_load_const_dec(emitter_t *emit, qstr qstr) { +    assert(0); +} +void emit_load_const_id(emitter_t *emit, qstr qstr) { +    assert(0); +} + +void emit_load_const_str(emitter_t *emit, qstr qstr, bool bytes) { +    emit_pre(emit); +    emit_call_with_i64_arg(emit, rt_load_const_str, qstr, REG_ARG_1); +    emit_post_push_r64(emit, REG_RET); +} + +void emit_load_const_verbatim_start(emitter_t *emit) { +    assert(0); +} +void emit_load_const_verbatim_int(emitter_t *emit, int val) { +    assert(0); +} +void emit_load_const_verbatim_str(emitter_t *emit, const char *str) { +    assert(0); +} +void emit_load_const_verbatim_strn(emitter_t *emit, const char *str, int len) { +    assert(0); +} +void emit_load_const_verbatim_quoted_str(emitter_t *emit, qstr qstr, bool bytes) { +    assert(0); +} +void emit_load_const_verbatim_end(emitter_t *emit) { +    assert(0); +} + +void emit_load_fast(emitter_t *emit, qstr qstr, int local_num) { +    if (local_num == 0) { +        emit_pre(emit); +        emit_post_push_r64(emit, REG_LOCAL_1); +    } else { +        emit_pre(emit); +        asm_x64_mov_local_to_r64(emit->as, local_num - 1, REG_RAX); +        emit_post_push_r64(emit, REG_RAX); +    } +} + +void emit_load_name(emitter_t *emit, qstr qstr) { +    emit_pre(emit); +    emit_call_with_i64_arg(emit, rt_load_name, qstr, REG_ARG_1); +    emit_post_push_r64(emit, REG_RET); +} + +void emit_load_global(emitter_t *emit, qstr qstr) { +    emit_pre(emit); +    emit_call_with_i64_arg(emit, rt_load_global, qstr, REG_ARG_1); +    emit_post_push_r64(emit, REG_RET); +} + +void emit_load_deref(emitter_t *emit, qstr qstr) { +    assert(0); +} +void emit_load_closure(emitter_t *emit, qstr qstr) { +    assert(0); +} + +void emit_load_attr(emitter_t *emit, qstr qstr) { +    emit_pre_pop_r64(emit, REG_ARG_1); // arg1 = base +    emit_call_with_i64_arg(emit, rt_load_attr, qstr, REG_ARG_2); // arg2 = attribute name +    emit_post_push_r64(emit, REG_RET); +} + +void emit_load_method(emitter_t *emit, qstr qstr) { +    emit_pre_pop_r64(emit, REG_ARG_1); // arg1 = base +    emit_get_stack_pointer_to_r64_for_push(emit, REG_ARG_3, 2); // arg3 = dest ptr +    emit_call_with_i64_arg(emit, rt_load_method, qstr, REG_ARG_2); // arg2 = method name +} + +void emit_load_build_class(emitter_t *emit) { +   assert(0); +} // basically load __build_class__ from builtins + +void emit_store_fast(emitter_t *emit, qstr qstr, int local_num) { +    if (local_num == 0) { +        emit_pre_pop_r64(emit, REG_LOCAL_1); +        emit_post(emit); +    } else { +        emit_pre_pop_r64(emit, REG_RAX); +        asm_x64_mov_r64_to_local(emit->as, REG_RAX, local_num - 1); +        emit_post(emit); +    } +} + +void emit_store_name(emitter_t *emit, qstr qstr) { +    emit_pre_pop_r64(emit, REG_ARG_2); +    emit_call_with_i64_arg(emit, rt_store_name, qstr, REG_ARG_1); // arg1 = name +    emit_post(emit); +} + +void emit_store_global(emitter_t *emit, qstr qstr) { +    assert(0); +} + +void emit_store_deref(emitter_t *emit, qstr qstr) { +    assert(0); +} +void emit_store_attr(emitter_t *emit, qstr qstr) { +    assert(0); +} +void emit_store_locals(emitter_t *emit) { +    assert(0); +} + +void emit_store_subscr(emitter_t *emit) { +    emit_pre_pop_r64_r64_r64(emit, REG_ARG_2, REG_ARG_1, REG_ARG_3); // index, base, value to store +    emit_call(emit, rt_store_subscr); +} + +void emit_delete_fast(emitter_t *emit, qstr qstr, int local_num) { +    assert(0); +} +void emit_delete_name(emitter_t *emit, qstr qstr) { +    assert(0); +} +void emit_delete_global(emitter_t *emit, qstr qstr) { +    assert(0); +} +void emit_delete_deref(emitter_t *emit, qstr qstr) { +    assert(0); +} +void emit_delete_attr(emitter_t *emit, qstr qstr) { +    assert(0); +} +void emit_delete_subscr(emitter_t *emit) { +    assert(0); +} + +void emit_dup_top(emitter_t *emit) { +    emit_pre_pop_r64(emit, REG_RAX); +    emit_post_push_r64_r64(emit, REG_RAX, REG_RAX); +} + +void emit_dup_top_two(emitter_t *emit) { +    emit_pre_pop_r64_r64(emit, REG_RAX, REG_RDI); +    emit_post_push_r64_r64_r64_r64(emit, REG_RDI, REG_RAX, REG_RDI, REG_RAX); +} + +void emit_pop_top(emitter_t *emit) { +    emit_pre_pop_r64(emit, REG_RAX); +    emit_post(emit); +} + +void emit_rot_two(emitter_t *emit) { +    assert(0); +} + +void emit_rot_three(emitter_t *emit) { +    emit_pre_pop_r64_r64_r64(emit, REG_RAX, REG_RDI, REG_RSI); +    emit_post_push_r64_r64_r64(emit, REG_RAX, REG_RSI, REG_RDI); +} + +void emit_jump(emitter_t *emit, int label) { +    emit_pre(emit); +    asm_x64_jmp_label(emit->as, label); +    emit_post(emit); +} + +void emit_pop_jump_if_false(emitter_t *emit, int label) { +    if (emit->do_native_types) { +        emit_pre_pop_r64(emit, REG_RET); +        asm_x64_test_r8_with_r8(emit->as, REG_RET, REG_RET); +        asm_x64_jcc_label(emit->as, JCC_JZ, label); +        emit_post(emit); +    } else { +        emit_pre_pop_r64(emit, REG_ARG_1); +        emit_call(emit, rt_is_true); +        asm_x64_test_r8_with_r8(emit->as, REG_RET, REG_RET); +        asm_x64_jcc_label(emit->as, JCC_JZ, label); +        emit_post(emit); +    } +} + +void emit_pop_jump_if_true(emitter_t *emit, int label) { +    assert(0); +} +void emit_jump_if_true_or_pop(emitter_t *emit, int label) { +    assert(0); +} +void emit_jump_if_false_or_pop(emitter_t *emit, int label) { +    assert(0); +} + +void emit_setup_loop(emitter_t *emit, int label) { +    emit_pre(emit); +    emit_post(emit); +} + +void emit_break_loop(emitter_t *emit, int label) { +    assert(0); +} +void emit_continue_loop(emitter_t *emit, int label) { +    assert(0); +} +void emit_setup_with(emitter_t *emit, int label) { +    assert(0); +} +void emit_with_cleanup(emitter_t *emit) { +    assert(0); +} +void emit_setup_except(emitter_t *emit, int label) { +    assert(0); +} +void emit_setup_finally(emitter_t *emit, int label) { +    assert(0); +} +void emit_end_finally(emitter_t *emit) { +    assert(0); +} +void emit_get_iter(emitter_t *emit) { +    assert(0); +} // tos = getiter(tos) +void emit_for_iter(emitter_t *emit, int label) { +    assert(0); +} +void emit_for_iter_end(emitter_t *emit) { +    assert(0); +} +void emit_pop_except(emitter_t *emit) { +    assert(0); +} + +void emit_unary_op(emitter_t *emit, rt_unary_op_t op) { +    emit_pre_pop_r64(emit, REG_ARG_2); +    emit_call_with_i64_arg(emit, rt_unary_op, op, REG_ARG_1); +    emit_post_push_r64(emit, REG_RET); +} + +void emit_build_tuple(emitter_t *emit, int n_args) { +    assert(0); +} + +void emit_build_list(emitter_t *emit, int n_args) { +    emit_pre(emit); +    emit_get_stack_pointer_to_r64_for_pop(emit, REG_ARG_2, n_args); // pointer to items in reverse order +    emit_call_with_i64_arg(emit, rt_build_list, n_args, REG_ARG_1); +    emit_post_push_r64(emit, REG_RET); // new list +} + +void emit_list_append(emitter_t *emit, int list_index) { +    assert(0); +} + +void emit_build_map(emitter_t *emit, int n_args) { +    emit_pre(emit); +    emit_call_with_i64_arg(emit, rt_build_map, n_args, REG_ARG_1); +    emit_post_push_r64(emit, REG_RET); // new map +} + +void emit_store_map(emitter_t *emit) { +    emit_pre_pop_r64_r64_r64(emit, REG_ARG_2, REG_ARG_3, REG_ARG_1); // key, value, map +    emit_call(emit, rt_store_map); +    emit_post_push_r64(emit, REG_RET); // map +} + +void emit_map_add(emitter_t *emit, int map_index) { +    assert(0); +} + +void emit_build_set(emitter_t *emit, int n_args) { +    emit_pre(emit); +    emit_get_stack_pointer_to_r64_for_pop(emit, REG_ARG_2, n_args); // pointer to items in reverse order +    emit_call_with_i64_arg(emit, rt_build_set, n_args, REG_ARG_1); +    emit_post_push_r64(emit, REG_RET); // new set +} + +void emit_set_add(emitter_t *emit, int set_index) { +    assert(0); +} +void emit_build_slice(emitter_t *emit, int n_args) { +    assert(0); +} +void emit_unpack_sequence(emitter_t *emit, int n_args) { +    assert(0); +} +void emit_unpack_ex(emitter_t *emit, int n_left, int n_right) { +    assert(0); +} + +void emit_make_function(emitter_t *emit, scope_t *scope, int n_dict_params, int n_default_params) { +    assert(n_default_params == 0 && n_dict_params == 0); +    emit_pre(emit); +    emit_call_with_i64_arg(emit, rt_make_function_from_id, scope->unique_code_id, REG_ARG_1); +    emit_post_push_r64(emit, REG_RET); +} + +void emit_make_closure(emitter_t *emit, scope_t *scope, int n_dict_params, int n_default_params) { +    assert(0); +} + +void emit_call_function(emitter_t *emit, int n_positional, int n_keyword, bool have_star_arg, bool have_dbl_star_arg) { +    assert(n_keyword == 0 && !have_star_arg && !have_dbl_star_arg); +    if (n_positional == 0) { +        emit_pre_pop_r64(emit, REG_ARG_1); // the function +        emit_call(emit, rt_call_function_0); +    } else if (n_positional == 1) { +        emit_pre_pop_r64_r64(emit, REG_ARG_2, REG_ARG_1); // the single argument, the function +        emit_call(emit, rt_call_function_1); +    } else if (n_positional == 2) { +        emit_pre_pop_r64_r64_r64(emit, REG_ARG_3, REG_ARG_2, REG_ARG_1); // the second argument, the first argument, the function +        emit_call(emit, rt_call_function_2); +    } else { +        assert(0); +    } +    emit_post_push_r64(emit, REG_RET); +} + +void emit_call_method(emitter_t *emit, int n_positional, int n_keyword, bool have_star_arg, bool have_dbl_star_arg) { +    assert(n_keyword == 0 && !have_star_arg && !have_dbl_star_arg); +    if (n_positional == 0) { +        emit_pre_pop_r64_r64(emit, REG_ARG_2, REG_ARG_1); // the self object (or NULL), the method +        emit_call(emit, rt_call_method_1); +    } else if (n_positional == 1) { +        emit_pre_pop_r64_r64_r64(emit, REG_ARG_3, REG_ARG_2, REG_ARG_1); // the first argument, the self object (or NULL), the method +        emit_call(emit, rt_call_method_2); +    } else { +        assert(0); +    } +    emit_post_push_r64(emit, REG_RET); +} + +void emit_pop_block(emitter_t *emit) { +    emit_pre(emit); +    emit_post(emit); +} + +void emit_binary_op(emitter_t *emit, rt_binary_op_t op) { +    if (emit->do_native_types) { +        assert(op == RT_BINARY_OP_ADD); +        emit_pre_pop_r64_r64(emit, REG_ARG_2, REG_RET); +        asm_x64_add_r64_to_r64(emit->as, REG_ARG_2, REG_RET); +        emit_post_push_r64(emit, REG_RET); +    } else { +        emit_pre_pop_r64_r64(emit, REG_ARG_3, REG_ARG_2); +        emit_call_with_i64_arg(emit, rt_binary_op, op, REG_ARG_1); +        emit_post_push_r64(emit, REG_RET); +    } +} + +void emit_compare_op(emitter_t *emit, rt_compare_op_t op) { +    if (emit->do_native_types) { +        assert(op == RT_COMPARE_OP_LESS); +        emit_pre_pop_r64_r64(emit, REG_ARG_3, REG_ARG_2); +        asm_x64_xor_r64_to_r64(emit->as, REG_RET, REG_RET); +        asm_x64_cmp_r64_with_r64(emit->as, REG_ARG_3, REG_ARG_2); +        asm_x64_setcc_r8(emit->as, JCC_JL, REG_RET); +        emit_post_push_r64(emit, REG_RET); +    } else { +        emit_pre_pop_r64_r64(emit, REG_ARG_3, REG_ARG_2); +        emit_call_with_i64_arg(emit, rt_compare_op, op, REG_ARG_1); +        emit_post_push_r64(emit, REG_RET); +    } +} + +void emit_return_value(emitter_t *emit) { +    emit_pre_pop_r64(emit, REG_RAX); +    emit->last_emit_was_return_value = true; +    //asm_x64_call_ind(emit->as, 0, REG_RAX); to seg fault for debugging with gdb +    asm_x64_exit(emit->as); +} + +void emit_raise_varargs(emitter_t *emit, int n_args) { +    assert(0); +} +void emit_yield_value(emitter_t *emit) { +    assert(0); +} +void emit_yield_from(emitter_t *emit) { +    assert(0); +} + +#endif // EMIT_DO_X64 diff --git a/py/grammar.h b/py/grammar.h new file mode 100644 index 000000000..05bb237a5 --- /dev/null +++ b/py/grammar.h @@ -0,0 +1,300 @@ +// rules for writing rules: +// - zero_or_more is implemented using opt_rule around a one_or_more rule +// - don't put opt_rule in arguments of or rule; instead, wrap the call to this or rule in opt_rule + +// # Start symbols for the grammar: +// #       single_input is a single interactive statement; +// #       file_input is a module or sequence of commands read from an input file; +// #       eval_input is the input for the eval() functions. +// # NB: compound_stmt in single_input is followed by extra NEWLINE! +// single_input: NEWLINE | simple_stmt | compound_stmt NEWLINE +// file_input: (NEWLINE | stmt)* ENDMARKER +// eval_input: testlist NEWLINE* ENDMARKER + +DEF_RULE(file_input, nc, and(1), opt_rule(file_input_2)) +DEF_RULE(file_input_2, c(generic_all_nodes), one_or_more, rule(file_input_3)) +DEF_RULE(file_input_3, nc, or(2), tok(NEWLINE), rule(stmt)) + +// decorator: '@' dotted_name [ '(' [arglist] ')' ] NEWLINE +// decorators: decorator+ +// decorated: decorators (classdef | funcdef) +// funcdef: 'def' NAME parameters ['->' test] ':' suite +// parameters: '(' [typedargslist] ')' +// typedargslist: tfpdef ['=' test] (',' tfpdef ['=' test])* [',' ['*' [tfpdef] (',' tfpdef ['=' test])* [',' '**' tfpdef] | '**' tfpdef]] | '*' [tfpdef] (',' tfpdef ['=' test])* [',' '**' tfpdef] | '**' tfpdef +// tfpdef: NAME [':' test] +// varargslist: vfpdef ['=' test] (',' vfpdef ['=' test])* [',' ['*' [vfpdef] (',' vfpdef ['=' test])* [',' '**' vfpdef] | '**' vfpdef]] |  '*' [vfpdef] (',' vfpdef ['=' test])* [',' '**' vfpdef] | '**' vfpdef +// vfpdef: NAME + +DEF_RULE(decorator, nc, and(4), tok(DEL_AT), rule(dotted_name), opt_rule(trailer_paren), tok(NEWLINE)) +//DEF_RULE(decorator_2, nc, and(3), tok(DEL_PAREN_OPEN), opt_rule(arglist), tok(DEL_PAREN_CLOSE)) +DEF_RULE(decorators, nc, one_or_more, rule(decorator)) +DEF_RULE(decorated, c(decorated), and(2), rule(decorators), rule(decorated_body)) +DEF_RULE(decorated_body, nc, or(2), rule(classdef), rule(funcdef)) +DEF_RULE(funcdef, c(funcdef), and(8), tok(KW_DEF), tok(NAME), tok(DEL_PAREN_OPEN), opt_rule(typedargslist), tok(DEL_PAREN_CLOSE), opt_rule(funcdef_2), tok(DEL_COLON), rule(suite)) +DEF_RULE(funcdef_2, nc, and(2), tok(DEL_MINUS_MORE), rule(test)) +// TODO typedargslist lets through more than is allowed +DEF_RULE(typedargslist, nc, list_with_end, rule(typedargslist_item), tok(DEL_COMMA)) +DEF_RULE(typedargslist_item, nc, or(3), rule(typedargslist_name), rule(typedargslist_star), rule(typedargslist_dbl_star)) +DEF_RULE(typedargslist_name, nc, and(3), tok(NAME), opt_rule(typedargslist_colon), opt_rule(typedargslist_equal)) +DEF_RULE(typedargslist_star, nc, and(2), tok(OP_STAR), opt_rule(tfpdef)) +DEF_RULE(typedargslist_dbl_star, nc, and(3), tok(OP_DBL_STAR), tok(NAME), opt_rule(typedargslist_colon)) +DEF_RULE(typedargslist_colon, nc, and(2), tok(DEL_COLON), rule(test)) +DEF_RULE(typedargslist_equal, nc, and(2), tok(DEL_EQUAL), rule(test)) +DEF_RULE(tfpdef, nc, and(2), tok(NAME), opt_rule(typedargslist_colon)) +// TODO varargslist lets through more than is allowed +DEF_RULE(varargslist, nc, list_with_end, rule(varargslist_item), tok(DEL_COMMA)) +DEF_RULE(varargslist_item, nc, or(3), rule(varargslist_name), rule(varargslist_star), rule(varargslist_dbl_star)) +DEF_RULE(varargslist_name, nc, and(2), tok(NAME), opt_rule(varargslist_equal)) +DEF_RULE(varargslist_star, nc, and(2), tok(OP_STAR), opt_rule(vfpdef)) +DEF_RULE(varargslist_dbl_star, nc, and(2), tok(OP_DBL_STAR), tok(NAME)) +DEF_RULE(varargslist_equal, nc, and(2), tok(DEL_EQUAL), rule(test)) +DEF_RULE(vfpdef, nc, and(1), tok(NAME)) + +// stmt: if_stmt | while_stmt | for_stmt | try_stmt | with_stmt | funcdef | classdef | decorated | simple_stmt + +DEF_RULE(stmt, nc, or(9), rule(if_stmt), rule(while_stmt), rule(for_stmt), rule(try_stmt), rule(with_stmt), rule(funcdef), rule(classdef), rule(decorated), rule(simple_stmt)) + +// simple_stmt: small_stmt (';' small_stmt)* [';'] NEWLINE + +DEF_RULE(simple_stmt, nc, and(2), rule(simple_stmt_2), tok(NEWLINE)) +DEF_RULE(simple_stmt_2, c(generic_all_nodes), list_with_end, rule(small_stmt), tok(DEL_SEMICOLON)) + +// small_stmt: expr_stmt | del_stmt | pass_stmt | flow_stmt | import_stmt | global_stmt | nonlocal_stmt | assert_stmt +// expr_stmt: testlist_star_expr (augassign (yield_expr|testlist) | ('=' (yield_expr|testlist_star_expr))*) +// testlist_star_expr: (test|star_expr) (',' (test|star_expr))* [','] +// augassign: '+=' | '-=' | '*=' | '/=' | '%=' | '&=' | '|=' | '^=' | '<<=' | '>>=' | '**=' | '//=' +// # For normal assignments, additional restrictions enforced by the interpreter + +DEF_RULE(small_stmt, nc, or(8), rule(del_stmt), rule(pass_stmt), rule(flow_stmt), rule(import_stmt), rule(global_stmt), rule(nonlocal_stmt), rule(assert_stmt), rule(expr_stmt)) +DEF_RULE(expr_stmt, c(expr_stmt), and(2), rule(testlist_star_expr), opt_rule(expr_stmt_2)) +DEF_RULE(expr_stmt_2, nc, or(2), rule(expr_stmt_augassign), rule(expr_stmt_assign_list)) +DEF_RULE(expr_stmt_augassign, nc, and(2), rule(augassign), rule(expr_stmt_6)) +DEF_RULE(expr_stmt_assign_list, nc, one_or_more, rule(expr_stmt_assign)) +DEF_RULE(expr_stmt_assign, nc, and(2), tok(DEL_EQUAL), rule(expr_stmt_6)) +DEF_RULE(expr_stmt_6, nc, or(2), rule(yield_expr), rule(testlist_star_expr)) +DEF_RULE(testlist_star_expr, c(generic_tuple), list_with_end, rule(testlist_star_expr_2), tok(DEL_COMMA)) +DEF_RULE(testlist_star_expr_2, nc, or(2), rule(star_expr), rule(test)) +DEF_RULE(augassign, nc, or(12), tok(DEL_PLUS_EQUAL), tok(DEL_MINUS_EQUAL), tok(DEL_STAR_EQUAL), tok(DEL_SLASH_EQUAL), tok(DEL_PERCENT_EQUAL), tok(DEL_AMPERSAND_EQUAL), tok(DEL_PIPE_EQUAL), tok(DEL_CARET_EQUAL), tok(DEL_DBL_LESS_EQUAL), tok(DEL_DBL_MORE_EQUAL), tok(DEL_DBL_STAR_EQUAL), tok(DEL_DBL_SLASH_EQUAL)) + +// del_stmt: 'del' exprlist +// pass_stmt: 'pass' +// flow_stmt: break_stmt | continue_stmt | return_stmt | raise_stmt | yield_stmt +// break_stmt: 'break' +// continue_stmt: 'continue' +// return_stmt: 'return' [testlist] +// yield_stmt: yield_expr +// raise_stmt: 'raise' [test ['from' test]] + +DEF_RULE(del_stmt, c(del_stmt), and(2), tok(KW_DEL), rule(exprlist)) +DEF_RULE(pass_stmt, c(generic_all_nodes), and(1), tok(KW_PASS)) +DEF_RULE(flow_stmt, nc, or(5), rule(break_stmt), rule(continue_stmt), rule(return_stmt), rule(raise_stmt), rule(yield_stmt)) +DEF_RULE(break_stmt, c(break_stmt), and(1), tok(KW_BREAK)) +DEF_RULE(continue_stmt, c(continue_stmt), and(1), tok(KW_CONTINUE)) +DEF_RULE(return_stmt, c(return_stmt), and(2), tok(KW_RETURN), opt_rule(testlist)) +DEF_RULE(yield_stmt, c(yield_stmt), and(1), rule(yield_expr)) +DEF_RULE(raise_stmt, c(raise_stmt), and(2), tok(KW_RAISE), opt_rule(raise_stmt_arg)) +DEF_RULE(raise_stmt_arg, nc, and(2), rule(test), opt_rule(raise_stmt_from)) +DEF_RULE(raise_stmt_from, nc, and(2), tok(KW_FROM), rule(test)) + +// import_stmt: import_name | import_from +// import_name: 'import' dotted_as_names +// import_from: 'from' (('.' | '...')* dotted_name | ('.' | '...')+) 'import' ('*' | '(' import_as_names ')' | import_as_names) +// import_as_name: NAME ['as' NAME] +// dotted_as_name: dotted_name ['as' NAME] +// import_as_names: import_as_name (',' import_as_name)* [','] +// dotted_as_names: dotted_as_name (',' dotted_as_name)* +// dotted_name: NAME ('.' NAME)* +// global_stmt: 'global' NAME (',' NAME)* +// nonlocal_stmt: 'nonlocal' NAME (',' NAME)* +// assert_stmt: 'assert' test [',' test] + +DEF_RULE(import_stmt, nc, or(2), rule(import_name), rule(import_from)) +DEF_RULE(import_name, c(import_name), and(2), tok(KW_IMPORT), rule(dotted_as_names)) +DEF_RULE(import_from, c(import_from), and(4), tok(KW_FROM), rule(import_from_2), tok(KW_IMPORT), rule(import_from_3)) +DEF_RULE(import_from_2, nc, or(2), rule(dotted_name), rule(import_from_2b)) +DEF_RULE(import_from_2b, nc, and(2), rule(one_or_more_period_or_ellipses), opt_rule(dotted_name)) +DEF_RULE(import_from_3, nc, or(3), tok(OP_STAR), rule(import_as_names_paren), rule(import_as_names)) +DEF_RULE(import_as_names_paren, nc, and(3), tok(DEL_PAREN_OPEN), rule(import_as_names), tok(DEL_PAREN_CLOSE)) +DEF_RULE(one_or_more_period_or_ellipses, nc, one_or_more, rule(period_or_ellipses)) +DEF_RULE(period_or_ellipses, nc, or(2), tok(DEL_PERIOD), tok(ELLIPSES)) +DEF_RULE(import_as_name, nc, and(2), tok(NAME), opt_rule(as_name)) +DEF_RULE(dotted_as_name, nc, and(2), rule(dotted_name), opt_rule(as_name)) +DEF_RULE(as_name, nc, and(2), tok(KW_AS), tok(NAME)) +DEF_RULE(import_as_names, nc, list_with_end, rule(import_as_name), tok(DEL_COMMA)) +DEF_RULE(dotted_as_names, nc, list, rule(dotted_as_name), tok(DEL_COMMA)) +DEF_RULE(dotted_name, nc, list, tok(NAME), tok(DEL_PERIOD)) +DEF_RULE(global_stmt, c(global_stmt), and(2), tok(KW_GLOBAL), rule(name_list)) +DEF_RULE(nonlocal_stmt, c(nonlocal_stmt), and(2), tok(KW_NONLOCAL), rule(name_list)) +DEF_RULE(name_list, nc, list, tok(NAME), tok(DEL_COMMA)) +DEF_RULE(assert_stmt, c(assert_stmt), and(3), tok(KW_ASSERT), rule(test), opt_rule(assert_stmt_extra)) +DEF_RULE(assert_stmt_extra, nc, and(2), tok(DEL_COMMA), rule(test)) + +// if_stmt: 'if' test ':' suite ('elif' test ':' suite)* ['else' ':' suite] +// while_stmt: 'while' test ':' suite ['else' ':' suite] +// for_stmt: 'for' exprlist 'in' testlist ':' suite ['else' ':' suite] +// try_stmt: 'try' ':' suite ((except_clause ':' suite)+ ['else' ':' suite] ['finally' ':' suite] | 'finally' ':' suite) +// # NB compile.c makes sure that the default except clause is last +// except_clause: 'except' [test ['as' NAME]] +// with_stmt: 'with' with_item (',' with_item)* ':' suite +// with_item: test ['as' expr] +// suite: simple_stmt | NEWLINE INDENT stmt+ DEDENT + +DEF_RULE(if_stmt, c(if_stmt), and(6), tok(KW_IF), rule(test), tok(DEL_COLON), rule(suite), opt_rule(if_stmt_elif_list), opt_rule(else_stmt)) +DEF_RULE(if_stmt_elif_list, nc, one_or_more, rule(if_stmt_elif)) +DEF_RULE(if_stmt_elif, nc, and(4), tok(KW_ELIF), rule(test), tok(DEL_COLON), rule(suite)) +DEF_RULE(while_stmt, c(while_stmt), and(5), tok(KW_WHILE), rule(test), tok(DEL_COLON), rule(suite), opt_rule(else_stmt)) +DEF_RULE(for_stmt, c(for_stmt), and(7), tok(KW_FOR), rule(exprlist), tok(KW_IN), rule(testlist), tok(DEL_COLON), rule(suite), opt_rule(else_stmt)) +DEF_RULE(try_stmt, c(try_stmt), and(4), tok(KW_TRY), tok(DEL_COLON), rule(suite), rule(try_stmt_2)) +DEF_RULE(try_stmt_2, nc, or(2), rule(try_stmt_except_and_more), rule(try_stmt_finally)) +DEF_RULE(try_stmt_except_and_more, nc, and(3), rule(try_stmt_except_list), opt_rule(else_stmt), opt_rule(try_stmt_finally)) +DEF_RULE(try_stmt_except, nc, and(4), tok(KW_EXCEPT), opt_rule(try_stmt_as_name), tok(DEL_COLON), rule(suite)) +DEF_RULE(try_stmt_as_name, nc, and(2), rule(test), opt_rule(as_name)) +DEF_RULE(try_stmt_except_list, nc, one_or_more, rule(try_stmt_except)) +DEF_RULE(try_stmt_finally, nc, and(3), tok(KW_FINALLY), tok(DEL_COLON), rule(suite)) +DEF_RULE(else_stmt, nc, and(3), tok(KW_ELSE), tok(DEL_COLON), rule(suite)) +DEF_RULE(with_stmt, c(with_stmt), and(4), tok(KW_WITH), rule(with_stmt_list), tok(DEL_COLON), rule(suite)) +DEF_RULE(with_stmt_list, nc, list, rule(with_item), tok(DEL_COMMA)) +DEF_RULE(with_item, nc, and(2), rule(test), opt_rule(with_item_as)) +DEF_RULE(with_item_as, nc, and(2), tok(KW_AS), rule(expr)) +DEF_RULE(suite, nc, or(2), rule(suite_block), rule(simple_stmt)) +DEF_RULE(suite_block, nc, and(4), tok(NEWLINE), tok(INDENT), rule(suite_block_stmts), tok(DEDENT)) +DEF_RULE(suite_block_stmts, c(generic_all_nodes), one_or_more, rule(stmt)) + +// test: or_test ['if' or_test 'else' test] | lambdef +// test_nocond: or_test | lambdef_nocond +// lambdef: 'lambda' [varargslist] ':' test +// lambdef_nocond: 'lambda' [varargslist] ':' test_nocond + +DEF_RULE(test, nc, or(2), rule(lambdef), rule(test_if_expr)) +DEF_RULE(test_if_expr, c(test_if_expr), and(2), rule(or_test), opt_rule(test_if_else)) +DEF_RULE(test_if_else, nc, and(4), tok(KW_IF), rule(or_test), tok(KW_ELSE), rule(test)) +DEF_RULE(test_nocond, nc, or(2), rule(lambdef_nocond), rule(or_test)) +DEF_RULE(lambdef, c(lambdef), and(4), tok(KW_LAMBDA), opt_rule(varargslist), tok(DEL_COLON), rule(test)) +DEF_RULE(lambdef_nocond, c(lambdef), and(4), tok(KW_LAMBDA), opt_rule(varargslist), tok(DEL_COLON), rule(test_nocond)) + +// or_test: and_test ('or' and_test)* +// and_test: not_test ('and' not_test)* +// not_test: 'not' not_test | comparison +// comparison: expr (comp_op expr)* +// comp_op: '<'|'>'|'=='|'>='|'<='|'!='|'in'|'not' 'in'|'is'|'is' 'not' +// star_expr: '*' expr +// expr: xor_expr ('|' xor_expr)* +// xor_expr: and_expr ('^' and_expr)* +// and_expr: shift_expr ('&' shift_expr)* +// shift_expr: arith_expr (('<<'|'>>') arith_expr)* +// arith_expr: term (('+'|'-') term)* +// term: factor (('*'|'/'|'%'|'//') factor)* +// factor: ('+'|'-'|'~') factor | power +// power: atom trailer* ['**' factor] + +DEF_RULE(or_test, c(or_test), list, rule(and_test), tok(KW_OR)) +DEF_RULE(and_test, c(and_test), list, rule(not_test), tok(KW_AND)) +DEF_RULE(not_test, nc, or(2), rule(not_test_2), rule(comparison)) +DEF_RULE(not_test_2, c(not_test_2), and(2), tok(KW_NOT), rule(not_test)) +DEF_RULE(comparison, c(comparison), list, rule(expr), rule(comp_op)) +DEF_RULE(comp_op, nc, or(9), tok(OP_LESS), tok(OP_MORE), tok(OP_DBL_EQUAL), tok(OP_LESS_EQUAL), tok(OP_MORE_EQUAL), tok(OP_NOT_EQUAL), tok(KW_IN), rule(comp_op_not_in), rule(comp_op_is)) +DEF_RULE(comp_op_not_in, nc, and(2), tok(KW_NOT), tok(KW_IN)) +DEF_RULE(comp_op_is, nc, and(2), tok(KW_IS), opt_rule(comp_op_is_not)) +DEF_RULE(comp_op_is_not, nc, and(1), tok(KW_NOT)) +DEF_RULE(star_expr, c(star_expr), and(2), tok(OP_STAR), rule(expr)) +DEF_RULE(expr, c(expr), list, rule(xor_expr), tok(OP_PIPE)) +DEF_RULE(xor_expr, c(xor_expr), list, rule(and_expr), tok(OP_CARET)) +DEF_RULE(and_expr, c(and_expr), list, rule(shift_expr), tok(OP_AMPERSAND)) +DEF_RULE(shift_expr, c(shift_expr), list, rule(arith_expr), rule(shift_op)) +DEF_RULE(shift_op, nc, or(2), tok(OP_DBL_LESS), tok(OP_DBL_MORE)) +DEF_RULE(arith_expr, c(arith_expr), list, rule(term), rule(arith_op)) +DEF_RULE(arith_op, nc, or(2), tok(OP_PLUS), tok(OP_MINUS)) +DEF_RULE(term, c(term), list, rule(factor), rule(term_op)) +DEF_RULE(term_op, nc, or(4), tok(OP_STAR), tok(OP_SLASH), tok(OP_PERCENT), tok(OP_DBL_SLASH)) +DEF_RULE(factor, nc, or(2), rule(factor_2), rule(power)) +DEF_RULE(factor_2, c(factor_2), and(2), rule(factor_op), rule(factor)) +DEF_RULE(factor_op, nc, or(3), tok(OP_PLUS), tok(OP_MINUS), tok(OP_TILDE)) +DEF_RULE(power, c(generic_all_nodes), and(3), rule(atom), opt_rule(power_trailers), opt_rule(power_dbl_star)) +DEF_RULE(power_trailers, c(power_trailers), one_or_more, rule(trailer)) +DEF_RULE(power_dbl_star, c(power_dbl_star), and(2), tok(OP_DBL_STAR), rule(factor)) + +// atom: '(' [yield_expr|testlist_comp] ')' | '[' [testlist_comp] ']' | '{' [dictorsetmaker] '}' | NAME | NUMBER | STRING+ | '...' | 'None' | 'True' | 'False' +// testlist_comp: (test|star_expr) ( comp_for | (',' (test|star_expr))* [','] ) +// trailer: '(' [arglist] ')' | '[' subscriptlist ']' | '.' NAME + +DEF_RULE(atom, nc, or(10), tok(NAME), tok(NUMBER), rule(atom_string), tok(ELLIPSES), tok(KW_NONE), tok(KW_TRUE), tok(KW_FALSE), rule(atom_paren), rule(atom_bracket), rule(atom_brace)) +DEF_RULE(atom_string, c(atom_string), one_or_more, rule(string_or_bytes)) +DEF_RULE(string_or_bytes, nc, or(2), tok(STRING), tok(BYTES)) +DEF_RULE(atom_paren, c(atom_paren), and(3), tok(DEL_PAREN_OPEN), opt_rule(atom_2b), tok(DEL_PAREN_CLOSE)) +DEF_RULE(atom_2b, nc, or(2), rule(yield_expr), rule(testlist_comp)) +DEF_RULE(atom_bracket, c(atom_bracket), and(3), tok(DEL_BRACKET_OPEN), opt_rule(testlist_comp), tok(DEL_BRACKET_CLOSE)) +DEF_RULE(atom_brace, c(atom_brace), and(3), tok(DEL_BRACE_OPEN), opt_rule(dictorsetmaker), tok(DEL_BRACE_CLOSE)) +DEF_RULE(testlist_comp, nc, and(2), rule(testlist_comp_2), opt_rule(testlist_comp_3)) +DEF_RULE(testlist_comp_2, nc, or(2), rule(star_expr), rule(test)) +DEF_RULE(testlist_comp_3, nc, or(2), rule(comp_for), rule(testlist_comp_3b)) +DEF_RULE(testlist_comp_3b, nc, and(2), tok(DEL_COMMA), opt_rule(testlist_comp_3c)) +DEF_RULE(testlist_comp_3c, nc, list_with_end, rule(testlist_comp_2), tok(DEL_COMMA)) +DEF_RULE(trailer, nc, or(3), rule(trailer_paren), rule(trailer_bracket), rule(trailer_period)) +DEF_RULE(trailer_paren, c(trailer_paren), and(3), tok(DEL_PAREN_OPEN), opt_rule(arglist), tok(DEL_PAREN_CLOSE)) +DEF_RULE(trailer_bracket, c(trailer_bracket), and(3), tok(DEL_BRACKET_OPEN), rule(subscriptlist), tok(DEL_BRACKET_CLOSE)) +DEF_RULE(trailer_period, c(trailer_period), and(2), tok(DEL_PERIOD), tok(NAME)) + +// subscriptlist: subscript (',' subscript)* [','] +// subscript: test | [test] ':' [test] [sliceop] +// sliceop: ':' [test] + +DEF_RULE(subscriptlist, c(generic_tuple), list_with_end, rule(subscript), tok(DEL_COMMA)) +DEF_RULE(subscript, nc, or(2), rule(subscript_3), rule(subscript_2)) +DEF_RULE(subscript_2, c(subscript_2), and(2), rule(test), opt_rule(subscript_3)) +DEF_RULE(subscript_3, c(subscript_3), and(2), tok(DEL_COLON), opt_rule(subscript_3b)) +DEF_RULE(subscript_3b, nc, or(2), rule(subscript_3c), rule(subscript_3d)) +DEF_RULE(subscript_3c, nc, and(2), tok(DEL_COLON), opt_rule(test)) +DEF_RULE(subscript_3d, nc, and(2), rule(test), opt_rule(sliceop)) +DEF_RULE(sliceop, nc, and(2), tok(DEL_COLON), opt_rule(test)) + +// exprlist: (expr|star_expr) (',' (expr|star_expr))* [','] +// testlist: test (',' test)* [','] +// dictorsetmaker: (test ':' test (comp_for | (',' test ':' test)* [','])) | (test (comp_for | (',' test)* [','])) + +DEF_RULE(exprlist, nc, list_with_end, rule(exprlist_2), tok(DEL_COMMA)) +DEF_RULE(exprlist_2, nc, or(2), rule(star_expr), rule(expr)) +DEF_RULE(testlist, c(generic_tuple), list_with_end, rule(test), tok(DEL_COMMA)) +// TODO dictorsetmaker lets through more than is allowed +DEF_RULE(dictorsetmaker, nc, and(2), rule(dictorsetmaker_item), opt_rule(dictorsetmaker_tail)) +DEF_RULE(dictorsetmaker_item, c(dictorsetmaker_item), and(2), rule(test), opt_rule(dictorsetmaker_colon)) +DEF_RULE(dictorsetmaker_colon, nc, and(2), tok(DEL_COLON), rule(test)) +DEF_RULE(dictorsetmaker_tail, nc, or(2), rule(comp_for), rule(dictorsetmaker_list)) +DEF_RULE(dictorsetmaker_list, nc, and(2), tok(DEL_COMMA), opt_rule(dictorsetmaker_list2)) +DEF_RULE(dictorsetmaker_list2, nc, list_with_end, rule(dictorsetmaker_item), tok(DEL_COMMA)) + +// classdef: 'class' NAME ['(' [arglist] ')'] ':' suite + +DEF_RULE(classdef, c(classdef), and(5), tok(KW_CLASS), tok(NAME), opt_rule(classdef_2), tok(DEL_COLON), rule(suite)) +DEF_RULE(classdef_2, nc, and(3), tok(DEL_PAREN_OPEN), opt_rule(arglist), tok(DEL_PAREN_CLOSE)) + +// arglist: (argument ',')* (argument [','] | '*' test (',' argument)* [',' '**' test] | '**' test) + +// TODO arglist lets through more than is allowed, compiler needs to do further verification +DEF_RULE(arglist, c(generic_all_nodes), list_with_end, rule(arglist_2), tok(DEL_COMMA)) +DEF_RULE(arglist_2, nc, or(3), rule(arglist_star), rule(arglist_dbl_star), rule(argument)) +DEF_RULE(arglist_star, c(arglist_star), and(2), tok(OP_STAR), rule(test)) +DEF_RULE(arglist_dbl_star, c(arglist_dbl_star), and(2), tok(OP_DBL_STAR), rule(test)) + +// # The reason that keywords are test nodes instead of NAME is that using NAME +// # results in an ambiguity. ast.c makes sure it's a NAME. +// argument: test [comp_for] | test '=' test  # Really [keyword '='] test +// comp_iter: comp_for | comp_if +// comp_for: 'for' exprlist 'in' or_test [comp_iter] +// comp_if: 'if' test_nocond [comp_iter] + +DEF_RULE(argument, c(argument), and(2), rule(test), opt_rule(argument_2)) +DEF_RULE(argument_2, nc, or(2), rule(comp_for), rule(argument_3)) +DEF_RULE(argument_3, nc, and(2), tok(DEL_EQUAL), rule(test)) +DEF_RULE(comp_iter, nc, or(2), rule(comp_for), rule(comp_if)) +DEF_RULE(comp_for, nc, and(5), tok(KW_FOR), rule(exprlist), tok(KW_IN), rule(or_test), opt_rule(comp_iter)) +DEF_RULE(comp_if, nc, and(3), tok(KW_IF), rule(test_nocond), opt_rule(comp_iter)) + +// # not used in grammar, but may appear in "node" passed from Parser to Compiler +// encoding_decl: NAME + +// yield_expr: 'yield' [yield_arg] +// yield_arg: 'from' test | testlist + +DEF_RULE(yield_expr, c(yield_expr), and(2), tok(KW_YIELD), opt_rule(yield_arg)) +DEF_RULE(yield_arg, nc, or(2), rule(yield_arg_from), rule(testlist)) +DEF_RULE(yield_arg_from, nc, and(2), tok(KW_FROM), rule(test)) diff --git a/py/lexer.c b/py/lexer.c new file mode 100644 index 000000000..9c2195ef5 --- /dev/null +++ b/py/lexer.c @@ -0,0 +1,677 @@ +/* lexer.c -- simple tokeniser for Python implementation + */ + +#include <stdint.h> +#include <stdio.h> +#include <assert.h> + +#include "misc.h" +#include "lexer.h" + +#define TAB_SIZE (8) +#define CHR_EOF (-1) + +struct _py_lexer_t { +    const char *name;           // (file) name of source +    bool free;                  // free source when done with it + +    const char *src_beg;        // beginning of source +    const char *src_cur;        // current location in source; points to chr0 +    const char *src_end;        // end (exclusive) of source +    unichar chr0, chr1, chr2;   // current characters from source + +    uint line;                  // source line +    uint column;                // source column + +    uint cont_line;             // continued line + +    int emit_dent; +    int nested_bracket_level; + +    uint alloc_indent_level; +    uint num_indent_level; +    uint16_t *indent_level; + +    py_token_t tok_cur; +    py_token_t tok_next; +}; + +static bool py_token_is_str(const py_token_t *tok, const char *str) { +    uint i = 0; +    const char *tstr = tok->str; + +    while (i < tok->len && *tstr == *str) { +        ++i; +        ++tstr; +        ++str; +    } + +    return i == tok->len && *str == 0; +} + +void py_token_show(const py_token_t *tok) { +    printf("(%s:%d:%d) kind:%d cont_line:%d str:%p len:%d", tok->src_name, tok->src_line, tok->src_column, tok->kind, tok->cont_line, tok->str, tok->len); +    if (tok->str != NULL && tok->len > 0) { +        const char *i = tok->str; +        const char *j = i + tok->len; +        printf(" "); +        while (i < j) { +            unichar c = g_utf8_get_char(i); +            i = g_utf8_next_char(i); +            if (g_unichar_isprint(c)) { +                printf("%c", c); +            } else { +                printf("?"); +            } +        } +    } +    printf("\n"); +} + +void py_token_show_error_prefix(const py_token_t *tok) { +    printf("(%s:%d:%d) ", tok->src_name, tok->src_line, tok->src_column); +} + +bool py_token_show_error(const py_token_t *tok, const char *msg) { +    printf("(%s:%d:%d) %s\n", tok->src_name, tok->src_line, tok->src_column, msg); +    return false; +} + +static bool is_end(py_lexer_t *lex) { +    return lex->chr0 == CHR_EOF; +} + +static bool is_physical_newline(py_lexer_t *lex) { +    return lex->chr0 == '\n' || lex->chr0 == '\r'; +} + +static bool is_char(py_lexer_t *lex, char c) { +    return lex->chr0 == c; +} + +static bool is_char_or(py_lexer_t *lex, char c1, char c2) { +    return lex->chr0 == c1 || lex->chr0 == c2; +} + +static bool is_char_or3(py_lexer_t *lex, char c1, char c2, char c3) { +    return lex->chr0 == c1 || lex->chr0 == c2 || lex->chr0 == c3; +} + +/* +static bool is_char_following(py_lexer_t *lex, char c) { +    return lex->chr1 == c; +} +*/ + +static bool is_char_following_or(py_lexer_t *lex, char c1, char c2) { +    return lex->chr1 == c1 || lex->chr1 == c2; +} + +static bool is_char_following_following_or(py_lexer_t *lex, char c1, char c2) { +    return lex->chr2 == c1 || lex->chr2 == c2; +} + +static bool is_char_and(py_lexer_t *lex, char c1, char c2) { +    return lex->chr0 == c1 && lex->chr1 == c2; +} + +static bool is_whitespace(py_lexer_t *lex) { +    return g_unichar_isspace(lex->chr0); +} + +static bool is_letter(py_lexer_t *lex) { +    return g_unichar_isalpha(lex->chr0); +} + +static bool is_digit(py_lexer_t *lex) { +    return g_unichar_isdigit(lex->chr0); +} + +static bool is_following_digit(py_lexer_t *lex) { +    return g_unichar_isdigit(lex->chr1); +} + +// TODO UNICODE include unicode characters in definition of identifiers +static bool is_head_of_identifier(py_lexer_t *lex) { +    return is_letter(lex) || lex->chr0 == '_'; +} + +// TODO UNICODE include unicode characters in definition of identifiers +static bool is_tail_of_identifier(py_lexer_t *lex) { +    return is_head_of_identifier(lex) || is_digit(lex); +} + +static void next_char(py_lexer_t *lex) { +    if (lex->chr0 == CHR_EOF) { +        return; +    } + +    int advance = 1; + +    if (lex->chr0 == '\n') { +        // LF is a new line +        ++lex->line; +        lex->column = 1; +        lex->cont_line = lex->line; +    } else if (lex->chr0 == '\r') { +        // CR is a new line +        ++lex->line; +        lex->column = 1; +        lex->cont_line = lex->line; +        if (lex->chr1 == '\n') { +            // CR LF is a single new line +            advance = 2; +        } +    } else if (lex->chr0 == '\t') { +        // a tab +        lex->column = (((lex->column - 1 + TAB_SIZE) / TAB_SIZE) * TAB_SIZE) + 1; +    } else { +        // a character worth one column +        ++lex->column; +    } + +    for (; advance > 0; advance--) { +        lex->chr0 = lex->chr1; +        lex->chr1 = lex->chr2; +        lex->src_cur++; +        if (lex->src_cur + 2 < lex->src_end) { +            lex->chr2 = lex->src_cur[2]; +        } else { +            // EOF +            if (lex->chr1 != '\n' && lex->chr1 != '\r') { +                lex->chr2 = '\n'; // insert newline at end of file +            } else { +                lex->chr2 = CHR_EOF; +            } +        } +    } +} + +void indent_push(py_lexer_t *lex, uint indent) { +    if (lex->num_indent_level >= lex->alloc_indent_level) { +        lex->alloc_indent_level *= 2; +        lex->indent_level = m_renew(uint16_t, lex->indent_level, lex->alloc_indent_level); +    } +    lex->indent_level[lex->num_indent_level++] = indent; +} + +uint indent_top(py_lexer_t *lex) { +    return lex->indent_level[lex->num_indent_level - 1]; +} + +void indent_pop(py_lexer_t *lex) { +    lex->num_indent_level -= 1; +} + +// some tricky operator encoding: +//     <op>  = begin with <op>, if this opchar matches then begin here +//     e<op> = end with <op>, if this opchar matches then end +//     E<op> = mandatory end with <op>, this opchar must match, then end +//     c<op> = continue with <op>, if this opchar matches then continue matching +// this means if the start of two ops are the same then they are equal til the last char + +static const char *tok_enc = +    "()[]{},:;@~" // singles +    "<e=c<e="     // < <= << <<= +    ">e=c>e="     // > >= >> >>= +    "*e=c*e="     // * *= ** **= +    "+e="         // + += +    "-e=e>"       // - -= -> +    "&e="         // & &= +    "|e="         // | |= +    "/e=c/e="     // / /= // //= +    "%e="         // % %= +    "^e="         // ^ ^= +    "=e="         // = == +    "!E="         // != +    ".c.E.";      // . ... + +// TODO static assert that number of tokens is less than 256 so we can safely make this table with byte sized entries +static const uint8_t tok_enc_kind[] = { +    PY_TOKEN_DEL_PAREN_OPEN, PY_TOKEN_DEL_PAREN_CLOSE, +    PY_TOKEN_DEL_BRACKET_OPEN, PY_TOKEN_DEL_BRACKET_CLOSE, +    PY_TOKEN_DEL_BRACE_OPEN, PY_TOKEN_DEL_BRACE_CLOSE, +    PY_TOKEN_DEL_COMMA, PY_TOKEN_DEL_COLON, PY_TOKEN_DEL_SEMICOLON, PY_TOKEN_DEL_AT, PY_TOKEN_OP_TILDE, + +    PY_TOKEN_OP_LESS, PY_TOKEN_OP_LESS_EQUAL, PY_TOKEN_OP_DBL_LESS, PY_TOKEN_DEL_DBL_LESS_EQUAL, +    PY_TOKEN_OP_MORE, PY_TOKEN_OP_MORE_EQUAL, PY_TOKEN_OP_DBL_MORE, PY_TOKEN_DEL_DBL_MORE_EQUAL, +    PY_TOKEN_OP_STAR, PY_TOKEN_DEL_STAR_EQUAL, PY_TOKEN_OP_DBL_STAR, PY_TOKEN_DEL_DBL_STAR_EQUAL, +    PY_TOKEN_OP_PLUS, PY_TOKEN_DEL_PLUS_EQUAL, +    PY_TOKEN_OP_MINUS, PY_TOKEN_DEL_MINUS_EQUAL, PY_TOKEN_DEL_MINUS_MORE, +    PY_TOKEN_OP_AMPERSAND, PY_TOKEN_DEL_AMPERSAND_EQUAL, +    PY_TOKEN_OP_PIPE, PY_TOKEN_DEL_PIPE_EQUAL, +    PY_TOKEN_OP_SLASH, PY_TOKEN_DEL_SLASH_EQUAL, PY_TOKEN_OP_DBL_SLASH, PY_TOKEN_DEL_DBL_SLASH_EQUAL, +    PY_TOKEN_OP_PERCENT, PY_TOKEN_DEL_PERCENT_EQUAL, +    PY_TOKEN_OP_CARET, PY_TOKEN_DEL_CARET_EQUAL, +    PY_TOKEN_DEL_EQUAL, PY_TOKEN_OP_DBL_EQUAL, +    PY_TOKEN_OP_NOT_EQUAL, +    PY_TOKEN_DEL_PERIOD, PY_TOKEN_ELLIPSES, +}; + +// must have the same order as enum in lexer.h +static const char *tok_kw[] = { +    "False", +    "None", +    "True", +    "and", +    "as", +    "assert", +    "break", +    "class", +    "continue", +    "def", +    "del", +    "elif", +    "else", +    "except", +    "finally", +    "for", +    "from", +    "global", +    "if", +    "import", +    "in", +    "is", +    "lambda", +    "nonlocal", +    "not", +    "or", +    "pass", +    "raise", +    "return", +    "try", +    "while", +    "with", +    "yield", +    NULL, +}; + +static void py_lexer_next_token_into(py_lexer_t *lex, py_token_t *tok) { +    bool had_physical_newline = false; + +    while (!is_end(lex)) { +        if (is_physical_newline(lex)) { +            had_physical_newline = true; +            next_char(lex); +        } else if (is_whitespace(lex)) { +            next_char(lex); +        } else if (is_char(lex, '#')) { +            next_char(lex); +            while (!is_end(lex) && !is_physical_newline(lex)) { +                next_char(lex); +            } +            // had_physical_newline will be set on next loop +        } else if (is_char(lex, '\\')) { +            // backslash (outside string literals) must appear just before a physical newline +            next_char(lex); +            if (!is_physical_newline(lex)) { +                // TODO SyntaxError +                assert(0); +            } else { +                next_char(lex); +            } +        } else { +            break; +        } +    } + +    tok->src_name = lex->name; +    tok->src_line = lex->line; +    tok->src_column = lex->column; +    tok->kind = PY_TOKEN_INVALID; +    tok->cont_line = lex->cont_line; +    tok->str = lex->src_cur; +    tok->len = 0; + +    if (lex->emit_dent < 0) { +        tok->kind = PY_TOKEN_DEDENT; +        lex->emit_dent += 1; + +    } else if (lex->emit_dent > 0) { +        tok->kind = PY_TOKEN_INDENT; +        lex->emit_dent -= 1; + +    } else if (had_physical_newline && lex->nested_bracket_level == 0 +                   && tok != &lex->tok_cur // so that we don't emit a newline if file starts with a comment +               ) { +        tok->kind = PY_TOKEN_NEWLINE; + +        uint num_spaces = lex->column - 1; +        lex->emit_dent = 0; +        if (num_spaces == indent_top(lex)) { +        } else if (num_spaces > indent_top(lex)) { +            indent_push(lex, num_spaces); +            lex->emit_dent += 1; +        } else { +            while (num_spaces < indent_top(lex)) { +                indent_pop(lex); +                lex->emit_dent -= 1; +            } +            if (num_spaces != indent_top(lex)) { +                //SyntaxError +            } +        } + +    } else if (is_end(lex)) { +        // TODO emit a newline if file does not end in one +        if (indent_top(lex) > 0) { +            tok->kind = PY_TOKEN_NEWLINE; +            lex->emit_dent = 0; +            while (indent_top(lex) > 0) { +                indent_pop(lex); +                lex->emit_dent -= 1; +            } +        } else { +            tok->kind = PY_TOKEN_END; +        } + +    } else if (is_char_or(lex, '\'', '\"') +               || (is_char_or3(lex, 'r', 'u', 'b') && is_char_following_or(lex, '\'', '\"')) +               || ((is_char_and(lex, 'r', 'b') || is_char_and(lex, 'b', 'r')) && is_char_following_following_or(lex, '\'', '\"'))) { +        // a string or bytes literal + +        // parse type codes +        bool is_raw = false; +        bool is_bytes = false; +        if (is_char(lex, 'u')) { +            next_char(lex); +        } else if (is_char(lex, 'b')) { +            is_bytes = true; +            next_char(lex); +            if (is_char(lex, 'r')) { +                is_raw = true; +                next_char(lex); +            } +        } else if (is_char(lex, 'r')) { +            is_raw = true; +            next_char(lex); +            if (is_char(lex, 'b')) { +                is_bytes = true; +                next_char(lex); +            } +        } + +        // set token kind +        if (is_bytes) { +            tok->kind = PY_TOKEN_BYTES; +        } else { +            tok->kind = PY_TOKEN_STRING; +        } + +        // get first quoting character +        char quote_char = '\''; +        if (is_char(lex, '\"')) { +            quote_char = '\"'; +        } +        next_char(lex); + +        // work out if it's a single or triple quoted literal +        int num_quotes; +        if (is_char_and(lex, quote_char, quote_char)) { +            // triple quotes +            next_char(lex); +            next_char(lex); +            num_quotes = 3; +        } else { +            // single quotes +            num_quotes = 1; +        } + +        // set start of token +        tok->str = lex->src_cur; + +        // parse the literal +        // TODO proper escaping +        int n_closing = 0; +        while (!is_end(lex) && (num_quotes > 1 || !is_char(lex, '\n')) && n_closing < num_quotes) { +            if (is_char(lex, quote_char)) { +                n_closing += 1; +            } else { +                n_closing = 0; +                if (!is_raw && is_char(lex, '\\')) { +                    next_char(lex); +                } +            } +            next_char(lex); +        } + +        // check we got the required end quotes +        if (n_closing < num_quotes) { +            tok->kind = PY_TOKEN_LONELY_STRING_OPEN; +        } + +        // set token string (byte) length +        tok->len = lex->src_cur - tok->str - n_closing; + +        // we set the length, return now so it's not set incorrectly below +        return; + +    } else if (is_head_of_identifier(lex)) { +        tok->kind = PY_TOKEN_NAME; + +        next_char(lex); + +        while (!is_end(lex) && is_tail_of_identifier(lex)) { +            next_char(lex); +        } + +    } else if (is_digit(lex) || (is_char(lex, '.') && is_following_digit(lex))) { +        tok->kind = PY_TOKEN_NUMBER; + +        next_char(lex); + +        while (!is_end(lex)) { +            if (is_char_or(lex, 'e', 'E')) { +                next_char(lex); +                if (is_char(lex, '+') || is_char(lex, '-')) { +                    next_char(lex); +                } +            } else if (is_letter(lex) || is_digit(lex) || is_char_or(lex, '_', '.')) { +                next_char(lex); +            } else { +                break; +            } +        } + +    } else { +        // search for encoded delimiter or operator + +        const char *t = tok_enc; +        uint tok_enc_index = 0; +        for (; *t != 0 && !is_char(lex, *t); t += 1) { +            if (*t == 'e' || *t == 'c') { +                t += 1; +            } else if (*t == 'E') { +                tok_enc_index -= 1; +                t += 1; +            } +            tok_enc_index += 1; +        } + +        next_char(lex); + +        if (*t == 0) { +            // didn't match any delimiter or operator characters +            tok->kind = PY_TOKEN_INVALID; + +        } else { +            // matched a delimiter or operator character + +            // get the maximum characters for a valid token +            t += 1; +            uint t_index = tok_enc_index; +            for (;;) { +                for (; *t == 'e'; t += 1) { +                    t += 1; +                    t_index += 1; +                    if (is_char(lex, *t)) { +                        next_char(lex); +                        tok_enc_index = t_index; +                        break; +                    } +                } + +                if (*t == 'E') { +                    t += 1; +                    if (is_char(lex, *t)) { +                        next_char(lex); +                        tok_enc_index = t_index; +                    } else { +                        tok->kind = PY_TOKEN_INVALID; +                    } +                    break; +                } + +                if (*t == 'c') { +                    t += 1; +                    t_index += 1; +                    if (is_char(lex, *t)) { +                        next_char(lex); +                        tok_enc_index = t_index; +                        t += 1; +                    } else { +                        break; +                    } +                } else { +                    break; +                } +            } + +            // set token kind +            tok->kind = tok_enc_kind[tok_enc_index]; + +            // compute bracket level for implicit line joining +            if (tok->kind == PY_TOKEN_DEL_PAREN_OPEN || tok->kind == PY_TOKEN_DEL_BRACKET_OPEN || tok->kind == PY_TOKEN_DEL_BRACE_OPEN) { +                lex->nested_bracket_level += 1; +            } else if (tok->kind == PY_TOKEN_DEL_PAREN_CLOSE || tok->kind == PY_TOKEN_DEL_BRACKET_CLOSE || tok->kind == PY_TOKEN_DEL_BRACE_CLOSE) { +                lex->nested_bracket_level -= 1; +            } +        } +    } + +    // set token string (byte) length +    tok->len = lex->src_cur - tok->str; + +    // check for keywords (must be done after setting token string length) +    if (tok->kind == PY_TOKEN_NAME) { +        for (int i = 0; tok_kw[i] != NULL; i++) { +            if (py_token_is_str(tok, tok_kw[i])) { +                tok->kind = PY_TOKEN_KW_FALSE + i; +                break; +            } +        } +    } +} + +py_lexer_t *py_lexer_from_str_len(const char *src_name, const char *str, uint len, bool free_str) { +    py_lexer_t *lex; + +    lex = m_new(py_lexer_t, 1); + +    //lex->name = g_strdup(src_name); // TODO +    lex->name = src_name; +    lex->free = free_str; +    lex->src_beg = str; +    lex->src_cur = str; +    lex->src_end = str + len; +    lex->line = 1; +    lex->column = 1; +    lex->cont_line = lex->line; +    lex->emit_dent = 0; +    lex->nested_bracket_level = 0; +    lex->alloc_indent_level = 16; +    lex->num_indent_level = 1; +    lex->indent_level = m_new(uint16_t, lex->alloc_indent_level); +    lex->indent_level[0] = 0; + +    // preload characters +    // TODO unicode +    if (len == 0) { +        lex->chr0 = '\n'; // insert newline at end of file +        lex->chr1 = CHR_EOF; +        lex->chr2 = CHR_EOF; +    } else if (len == 1) { +        lex->chr0 = str[0]; +        if (lex->chr0 != '\n' && lex->chr0 != '\r') { +            lex->chr1 = '\n'; // insert newline at end of file +        } else { +            lex->chr1 = CHR_EOF; +        } +        lex->chr2 = CHR_EOF; +    } else if (len == 2) { +        lex->chr0 = str[0]; +        lex->chr1 = str[1]; +        if (lex->chr1 != '\n' && lex->chr1 != '\r') { +            lex->chr2 = '\n'; // insert newline at end of file +        } else { +            lex->chr2 = CHR_EOF; +        } +    } else { +        lex->chr0 = str[0]; +        lex->chr1 = str[1]; +        lex->chr2 = str[2]; +    } + +    py_lexer_next_token_into(lex, &lex->tok_cur); +    py_lexer_next_token_into(lex, &lex->tok_next); + +    return lex; +} + +void py_lexer_free(py_lexer_t *lex) { +    if (lex == NULL) { +        return; +    } +    //m_free(lex->name); +    if (lex->free) { +        m_free((char*)lex->src_beg); +    } +    m_free(lex); +} + +void py_lexer_to_next(py_lexer_t *lex) { +    lex->tok_cur = lex->tok_next; +    py_lexer_next_token_into(lex, &lex->tok_next); +} + +const py_token_t *py_lexer_cur(const py_lexer_t *lex) { +    return &lex->tok_cur; +} + +bool py_lexer_is_kind(py_lexer_t *lex, py_token_kind_t kind) { +    return lex->tok_cur.kind == kind; +} + +/* +bool py_lexer_is_str(py_lexer_t *lex, const char *str) { +    return py_token_is_str(&lex->tok_cur, str); +} + +bool py_lexer_is_next_kind(py_lexer_t *lex, py_token_kind_t kind) { +    return lex->tok_next.kind == kind; +} + +bool py_lexer_is_next_str(py_lexer_t *lex, const char *str) { +    return py_token_is_str(&lex->tok_next, str); +} + +bool py_lexer_opt_kind(py_lexer_t *lex, py_token_kind_t kind) { +    if (py_lexer_is_kind(lex, kind)) { +        py_lexer_to_next(lex); +        return true; +    } +    return false; +} + +bool py_lexer_opt_str(py_lexer_t *lex, const char *str) { +    if (py_lexer_is_str(lex, str)) { +        py_lexer_to_next(lex); +        return true; +    } +    return false; +} +*/ + +bool py_lexer_show_error(py_lexer_t *lex, const char *msg) { +    return py_token_show_error(&lex->tok_cur, msg); +} diff --git a/py/lexer.h b/py/lexer.h new file mode 100644 index 000000000..32ab48a08 --- /dev/null +++ b/py/lexer.h @@ -0,0 +1,141 @@ +/* lexer.h -- simple tokeniser for Python implementation + */ + +#ifndef INCLUDED_LEXER_H +#define INCLUDED_LEXER_H + +/* uses (byte) length instead of null termination + * tokens are the same - UTF-8 with (byte) length + */ + +typedef enum _py_token_kind_t { +    PY_TOKEN_END,                   // 0 + +    PY_TOKEN_INVALID, +    PY_TOKEN_LONELY_STRING_OPEN, + +    PY_TOKEN_NEWLINE,               // 3 +    PY_TOKEN_INDENT,                // 4 +    PY_TOKEN_DEDENT,                // 5 + +    PY_TOKEN_NAME,                  // 6 +    PY_TOKEN_NUMBER, +    PY_TOKEN_STRING, +    PY_TOKEN_BYTES, + +    PY_TOKEN_ELLIPSES, + +    PY_TOKEN_KW_FALSE,              // 11 +    PY_TOKEN_KW_NONE, +    PY_TOKEN_KW_TRUE, +    PY_TOKEN_KW_AND, +    PY_TOKEN_KW_AS, +    PY_TOKEN_KW_ASSERT, +    PY_TOKEN_KW_BREAK, +    PY_TOKEN_KW_CLASS, +    PY_TOKEN_KW_CONTINUE, +    PY_TOKEN_KW_DEF,                // 20 +    PY_TOKEN_KW_DEL, +    PY_TOKEN_KW_ELIF, +    PY_TOKEN_KW_ELSE, +    PY_TOKEN_KW_EXCEPT, +    PY_TOKEN_KW_FINALLY, +    PY_TOKEN_KW_FOR, +    PY_TOKEN_KW_FROM, +    PY_TOKEN_KW_GLOBAL, +    PY_TOKEN_KW_IF, +    PY_TOKEN_KW_IMPORT,             // 30 +    PY_TOKEN_KW_IN, +    PY_TOKEN_KW_IS, +    PY_TOKEN_KW_LAMBDA, +    PY_TOKEN_KW_NONLOCAL, +    PY_TOKEN_KW_NOT, +    PY_TOKEN_KW_OR, +    PY_TOKEN_KW_PASS, +    PY_TOKEN_KW_RAISE, +    PY_TOKEN_KW_RETURN, +    PY_TOKEN_KW_TRY,                // 40 +    PY_TOKEN_KW_WHILE, +    PY_TOKEN_KW_WITH, +    PY_TOKEN_KW_YIELD, + +    PY_TOKEN_OP_PLUS,               // 44 +    PY_TOKEN_OP_MINUS, +    PY_TOKEN_OP_STAR, +    PY_TOKEN_OP_DBL_STAR, +    PY_TOKEN_OP_SLASH, +    PY_TOKEN_OP_DBL_SLASH, +    PY_TOKEN_OP_PERCENT, +    PY_TOKEN_OP_LESS, +    PY_TOKEN_OP_DBL_LESS, +    PY_TOKEN_OP_MORE, +    PY_TOKEN_OP_DBL_MORE,           // 54 +    PY_TOKEN_OP_AMPERSAND, +    PY_TOKEN_OP_PIPE, +    PY_TOKEN_OP_CARET, +    PY_TOKEN_OP_TILDE, +    PY_TOKEN_OP_LESS_EQUAL, +    PY_TOKEN_OP_MORE_EQUAL, +    PY_TOKEN_OP_DBL_EQUAL, +    PY_TOKEN_OP_NOT_EQUAL, + +    PY_TOKEN_DEL_PAREN_OPEN,        // 63 +    PY_TOKEN_DEL_PAREN_CLOSE, +    PY_TOKEN_DEL_BRACKET_OPEN, +    PY_TOKEN_DEL_BRACKET_CLOSE, +    PY_TOKEN_DEL_BRACE_OPEN, +    PY_TOKEN_DEL_BRACE_CLOSE, +    PY_TOKEN_DEL_COMMA, +    PY_TOKEN_DEL_COLON, +    PY_TOKEN_DEL_PERIOD, +    PY_TOKEN_DEL_SEMICOLON, +    PY_TOKEN_DEL_AT,                // 73 +    PY_TOKEN_DEL_EQUAL, +    PY_TOKEN_DEL_PLUS_EQUAL, +    PY_TOKEN_DEL_MINUS_EQUAL, +    PY_TOKEN_DEL_STAR_EQUAL, +    PY_TOKEN_DEL_SLASH_EQUAL, +    PY_TOKEN_DEL_DBL_SLASH_EQUAL, +    PY_TOKEN_DEL_PERCENT_EQUAL, +    PY_TOKEN_DEL_AMPERSAND_EQUAL, +    PY_TOKEN_DEL_PIPE_EQUAL, +    PY_TOKEN_DEL_CARET_EQUAL,       // 83 +    PY_TOKEN_DEL_DBL_MORE_EQUAL, +    PY_TOKEN_DEL_DBL_LESS_EQUAL, +    PY_TOKEN_DEL_DBL_STAR_EQUAL, +    PY_TOKEN_DEL_MINUS_MORE, +} py_token_kind_t; + +typedef struct _py_token_t { +    const char *src_name;       // (file) name of source +    uint src_line;              // actual source line +    uint src_column;            // actual source column + +    py_token_kind_t kind;       // kind of token +    uint cont_line;             // token belongs to this line in a continued line +    const char *str;            // string of token +    uint len;                   // (byte) length of string of token +} py_token_t; + +typedef struct _py_lexer_t py_lexer_t; + +void py_token_show(const py_token_t *tok); +void py_token_show_error_prefix(const py_token_t *tok); +bool py_token_show_error(const py_token_t *tok, const char *msg); + +py_lexer_t *py_lexer_from_file(const char *filename); +py_lexer_t *py_lexer_from_str_len(const char *src_name, const char *str, uint len, bool free_str); +void py_lexer_free(py_lexer_t *lex); +void py_lexer_to_next(py_lexer_t *lex); +const py_token_t *py_lexer_cur(const py_lexer_t *lex); +bool py_lexer_is_kind(py_lexer_t *lex, py_token_kind_t kind); +/* unused +bool py_lexer_is_str(py_lexer_t *lex, const char *str); +bool py_lexer_is_next_kind(py_lexer_t *lex, py_token_kind_t kind); +bool py_lexer_is_next_str(py_lexer_t *lex, const char *str); +bool py_lexer_opt_kind(py_lexer_t *lex, py_token_kind_t kind); +bool py_lexer_opt_str(py_lexer_t *lex, const char *str); +*/ +bool py_lexer_show_error(py_lexer_t *lex, const char *msg); + +#endif /* INCLUDED_LEXER_H */ diff --git a/py/lexerfile.c b/py/lexerfile.c new file mode 100644 index 000000000..74bb5a061 --- /dev/null +++ b/py/lexerfile.c @@ -0,0 +1,23 @@ +#include <stdint.h> +#include <stdio.h> +#include <unistd.h> +#include <fcntl.h> + +#include "misc.h" +#include "lexer.h" + +py_lexer_t *py_lexer_from_file(const char *filename) { +    // TODO abstract away file functionality +    int fd = open(filename, O_RDONLY); +    if (fd < 0) { +        printf("cannot open file %s\n", filename); +        return NULL; +    } +    uint size = lseek(fd, 0, SEEK_END); +    lseek(fd, 0, SEEK_SET); +    char *data = m_new(char, size); +    read(fd, data, size); +    close(fd); + +    return py_lexer_from_str_len(filename, data, size, true); +} diff --git a/py/machine.h b/py/machine.h new file mode 100644 index 000000000..fa39c8f2d --- /dev/null +++ b/py/machine.h @@ -0,0 +1,4 @@ +typedef int64_t machine_int_t; // must be pointer size +typedef uint64_t machine_uint_t; // must be pointer size +typedef void *machine_ptr_t; // must be of pointer size +typedef double machine_float_t; diff --git a/py/main.c b/py/main.c new file mode 100644 index 000000000..7b17c38a8 --- /dev/null +++ b/py/main.c @@ -0,0 +1,58 @@ +#include <stdint.h> +#include <stdio.h> +#include <string.h> + +#include "misc.h" +#include "lexer.h" +#include "machine.h" +#include "parse.h" +#include "compile.h" +#include "runtime.h" + +int main(int argc, char **argv) { +    qstr_init(); +    rt_init(); + +    if (argc != 2) { +        printf("usage: py <file>\n"); +        return 1; +    } +    py_lexer_t *lex = py_lexer_from_file(argv[1]); +    //const char *pysrc = "def f():\n  x=x+1\n  print(42)\n"; +    //py_lexer_t *lex = py_lexer_from_str_len("<>", pysrc, strlen(pysrc), false); +    if (lex == NULL) { +        return 1; +    } + +    if (0) { +        while (!py_lexer_is_kind(lex, PY_TOKEN_END)) { +            py_token_show(py_lexer_cur(lex)); +            py_lexer_to_next(lex); +        } +    } else { +        py_parse_node_t pn = py_parse(lex, 0); +        //printf("----------------\n"); +        //parse_node_show(pn, 0); +        //printf("----------------\n"); +        py_compile(pn); +        //printf("----------------\n"); +    } + +    py_lexer_free(lex); + +    if (1) { +        // execute it +        py_obj_t module_fun = rt_make_function_from_id(1); +        if (module_fun != py_const_none) { +            py_obj_t ret = rt_call_function_0(module_fun); +            printf("done! got: "); +            py_obj_print(ret); +            printf("\n"); +        } +    } + +    rt_deinit(); + +    //printf("total bytes = %d\n", m_get_total_bytes_allocated()); +    return 0; +} diff --git a/py/malloc.c b/py/malloc.c new file mode 100644 index 000000000..8775f68aa --- /dev/null +++ b/py/malloc.c @@ -0,0 +1,56 @@ +#include <stdio.h> +#include <stdlib.h> + +#include "misc.h" + +static int total_bytes_allocated = 0; + +void m_free(void *ptr) { +    if (ptr != NULL) { +        free(ptr); +    } +} + +void *m_malloc(int num_bytes) { +    if (num_bytes == 0) { +        return NULL; +    } +    void *ptr = malloc(num_bytes); +    if (ptr == NULL) { +        printf("could not allocate memory, allocating %d bytes\n", num_bytes); +        return NULL; +    } +    total_bytes_allocated += num_bytes; +    return ptr; +} + +void *m_malloc0(int num_bytes) { +    if (num_bytes == 0) { +        return NULL; +    } +    void *ptr = calloc(1, num_bytes); +    if (ptr == NULL) { +        printf("could not allocate memory, allocating %d bytes\n", num_bytes); +        return NULL; +    } +    total_bytes_allocated += num_bytes; +    return ptr; +} + +void *m_realloc(void *ptr, int num_bytes) { +    if (num_bytes == 0) { +        free(ptr); +        return NULL; +    } +    ptr = realloc(ptr, num_bytes); +    if (ptr == NULL) { +        printf("could not allocate memory, reallocating %d bytes\n", num_bytes); +        return NULL; +    } +    total_bytes_allocated += num_bytes; +    return ptr; +} + +int m_get_total_bytes_allocated() { +    return total_bytes_allocated; +} diff --git a/py/misc.c b/py/misc.c new file mode 100644 index 000000000..a5bf8d553 --- /dev/null +++ b/py/misc.c @@ -0,0 +1,84 @@ +#include <stdint.h> +#include <string.h> + +#include "misc.h" + +// attribute flags +#define FL_PRINT (0x01) +#define FL_SPACE (0x02) +#define FL_DIGIT (0x04) +#define FL_ALPHA (0x08) +#define FL_UPPER (0x10) +#define FL_LOWER (0x20) + +// shorthand character attributes +#define AT_PR (FL_PRINT) +#define AT_SP (FL_SPACE | FL_PRINT) +#define AT_DI (FL_DIGIT | FL_PRINT) +#define AT_AL (FL_ALPHA | FL_PRINT) +#define AT_UP (FL_UPPER | FL_ALPHA | FL_PRINT) +#define AT_LO (FL_LOWER | FL_ALPHA | FL_PRINT) + +// table of attributes for ascii characters +static const uint8_t attr[] = { +    0, 0, 0, 0, 0, 0, 0, 0, +    0, AT_SP, AT_SP, AT_SP, 0, AT_SP, 0, 0, +    0, 0, 0, 0, 0, 0, 0, 0, +    0, 0, 0, 0, 0, 0, 0, 0, +    AT_SP, AT_PR, AT_PR, AT_PR, AT_PR, AT_PR, AT_PR, AT_PR, +    AT_PR, AT_PR, AT_PR, AT_PR, AT_PR, AT_PR, AT_PR, AT_PR, +    AT_DI, AT_DI, AT_DI, AT_DI, AT_DI, AT_DI, AT_DI, AT_DI, +    AT_DI, AT_DI, AT_PR, AT_PR, AT_PR, AT_PR, AT_PR, AT_PR, +    AT_PR, AT_UP, AT_UP, AT_UP, AT_UP, AT_UP, AT_UP, AT_UP, +    AT_UP, AT_UP, AT_UP, AT_UP, AT_UP, AT_UP, AT_UP, AT_UP, +    AT_UP, AT_UP, AT_UP, AT_UP, AT_UP, AT_UP, AT_UP, AT_UP, +    AT_UP, AT_UP, AT_UP, AT_PR, AT_PR, AT_PR, AT_PR, AT_PR, +    AT_PR, AT_LO, AT_LO, AT_LO, AT_LO, AT_LO, AT_LO, AT_LO, +    AT_LO, AT_LO, AT_LO, AT_LO, AT_LO, AT_LO, AT_LO, AT_LO, +    AT_LO, AT_LO, AT_LO, AT_LO, AT_LO, AT_LO, AT_LO, AT_LO, +    AT_LO, AT_LO, AT_LO, AT_PR, AT_PR, AT_PR, AT_PR, 0 +}; + +unichar g_utf8_get_char(const char *s) { +    return *s; +} + +char *g_utf8_next_char(const char *s) { +    return (char*)(s + 1); +} + +bool g_unichar_isspace(unichar c) { +    return c < 128 && (attr[c] & FL_SPACE) != 0; +} + +bool g_unichar_isalpha(unichar c) { +    return c < 128 && (attr[c] & FL_ALPHA) != 0; +} + +bool g_unichar_isprint(unichar c) { +    return c < 128 && (attr[c] & FL_PRINT) != 0; +} + +bool g_unichar_isdigit(unichar c) { +    return c < 128 && (attr[c] & FL_DIGIT) != 0; +} + +/* +bool char_is_alpha_or_digit(unichar c) { +    return c < 128 && (attr[c] & (FL_ALPHA | FL_DIGIT)) != 0; +} + +bool char_is_upper(unichar c) { +    return c < 128 && (attr[c] & FL_UPPER) != 0; +} + +bool char_is_lower(unichar c) { +    return c < 128 && (attr[c] & FL_LOWER) != 0; +} +*/ + +/* +char *g_strdup(const char *s) { +    return strdup(s); +} +*/ diff --git a/py/misc.h b/py/misc.h new file mode 100644 index 000000000..9ba80a5c3 --- /dev/null +++ b/py/misc.h @@ -0,0 +1,91 @@ +// a mini library of useful types and functions + +#ifndef _INCLUDED_MINILIB_H +#define _INCLUDED_MINILIB_H + +/** types *******************************************************/ + +typedef int bool; +enum { +    false = 0, +    true = 1 +}; + +typedef unsigned char byte; +typedef unsigned int uint; + +/** memomry allocation ******************************************/ + +#define m_new(type, num) ((type*)(m_malloc(sizeof(type) * (num)))) +#define m_new0(type, num) ((type*)(m_malloc0(sizeof(type) * (num)))) +#define m_renew(type, ptr, num) ((type*)(m_realloc((ptr), sizeof(type) * (num)))) + +void m_free(void *ptr); +void *m_malloc(int num_bytes); +void *m_malloc0(int num_bytes); +void *m_realloc(void *ptr, int num_bytes); + +int m_get_total_bytes_allocated(); + +/** unichar / UTF-8 *********************************************/ + +typedef int unichar; // TODO + +unichar g_utf8_get_char(const char *s); +char *g_utf8_next_char(const char *s); + +bool g_unichar_isspace(unichar c); +bool g_unichar_isalpha(unichar c); +bool g_unichar_isprint(unichar c); +bool g_unichar_isdigit(unichar c); + +//char *g_strdup(const char *s); + +/** blob ********************************************************/ + +/* +unsigned short decode_le16(byte *buf); +unsigned int decode_le32(byte *buf); +void encode_le16(byte *buf, unsigned short i); +void encode_le32(byte *buf, unsigned int i); +*/ + +/** string ******************************************************/ + +/* +#define streq(s1, s2) (strcmp((s1), (s2)) == 0) +*/ + +/** variable string *********************************************/ + +/* +typedef struct _vstr_t vstr_t; + +vstr_t *vstr_new(); +void vstr_free(vstr_t *vstr); +void vstr_reset(vstr_t *vstr); +bool vstr_had_error(vstr_t *vstr); +char *vstr_str(vstr_t *vstr); +int vstr_len(vstr_t *vstr); +void vstr_hint_size(vstr_t *vstr, int size); +char *vstr_add_len(vstr_t *vstr, int len); +void vstr_add_str(vstr_t *vstr, const char *str); +void vstr_add_strn(vstr_t *vstr, const char *str, int len); +void vstr_add_byte(vstr_t *vstr, byte v); +void vstr_add_le16(vstr_t *vstr, unsigned short v); +void vstr_add_le32(vstr_t *vstr, unsigned int v); +void vstr_cut_tail(vstr_t *vstr, int len); +void vstr_printf(vstr_t *vstr, const char *fmt, ...); +*/ + +/** unique string ***********************************************/ + +typedef unsigned int qstr; + +void qstr_init(); +qstr qstr_from_str_static(const char *str); +qstr qstr_from_str_take(char *str); +qstr qstr_from_strn_copy(const char *str, int len); +const char* qstr_str(qstr qstr); + +#endif // _INCLUDED_MINILIB_H diff --git a/py/parse.c b/py/parse.c new file mode 100644 index 000000000..94a5a5d9c --- /dev/null +++ b/py/parse.c @@ -0,0 +1,565 @@ +#include <unistd.h> +#include <stdlib.h> +#include <stdint.h> +#include <stdio.h> +#include <ctype.h> +#include <string.h> +#include <assert.h> + +#include "misc.h" +#include "lexer.h" +#include "machine.h" +#include "parse.h" + +#define RULE_ACT_KIND_MASK      (0xf0) +#define RULE_ACT_ARG_MASK       (0x0f) +#define RULE_ACT_OR             (0x10) +#define RULE_ACT_AND            (0x20) +#define RULE_ACT_LIST           (0x30) + +#define RULE_ARG_BLANK          (0x0000) +#define RULE_ARG_KIND_MASK      (0xf000) +#define RULE_ARG_ARG_MASK       (0x0fff) +#define RULE_ARG_TOK            (0x1000) +#define RULE_ARG_RULE           (0x2000) +#define RULE_ARG_OPT_TOK        (0x3000) +#define RULE_ARG_OPT_RULE       (0x4000) + +// (un)comment to use rule names; for debugging +//#define USE_RULE_NAME (1) + +typedef struct _rule_t { +    byte rule_id; +    byte act; +#ifdef USE_RULE_NAME +    const char *rule_name; +#endif +    uint16_t arg[]; +} rule_t; + +enum { +    RULE_none = 0, +#define DEF_RULE(rule, comp, kind, arg...) RULE_##rule, +#include "grammar.h" +#undef DEF_RULE +    RULE_maximum_number_of, +}; + +#define or(n)                   (RULE_ACT_OR | n) +#define and(n)                  (RULE_ACT_AND | n) +#define one_or_more             (RULE_ACT_LIST | 2) +#define list                    (RULE_ACT_LIST | 1) +#define list_with_end           (RULE_ACT_LIST | 3) +#define tok(t)                  (RULE_ARG_TOK | PY_TOKEN_##t) +#define rule(r)                 (RULE_ARG_RULE | RULE_##r) +#define opt_tok(t)              (RULE_ARG_OPT_TOK | PY_TOKEN_##t) +#define opt_rule(r)             (RULE_ARG_OPT_RULE | RULE_##r) +#ifdef USE_RULE_NAME +#define DEF_RULE(rule, comp, kind, arg...) static rule_t rule_##rule = { RULE_##rule, kind, #rule, { arg } }; +#else +#define DEF_RULE(rule, comp, kind, arg...) static rule_t rule_##rule = { RULE_##rule, kind, { arg } }; +#endif +#include "grammar.h" +#undef or +#undef and +#undef list +#undef list_with_end +#undef tok +#undef rule +#undef opt_tok +#undef opt_rule +#undef one_or_more +#undef DEF_RULE + +static rule_t *rules[] = { +    NULL, +#define DEF_RULE(rule, comp, kind, arg...) &rule_##rule, +#include "grammar.h" +#undef DEF_RULE +}; + +typedef struct _rule_stack_t { +    byte rule_id; +    int32_t arg_i; // what should be the size and signedness? +} rule_stack_t; + +typedef struct _parser_t { +    uint rule_stack_alloc; +    uint rule_stack_top; +    rule_stack_t *rule_stack; + +    uint result_stack_top; +    py_parse_node_t *result_stack; +} parser_t; + +static void push_rule(parser_t *parser, rule_t *rule, int arg_i) { +    if (parser->rule_stack_top >= parser->rule_stack_alloc) { +        parser->rule_stack_alloc *= 2; +        parser->rule_stack = m_renew(rule_stack_t, parser->rule_stack, parser->rule_stack_alloc); +    } +    parser->rule_stack[parser->rule_stack_top].rule_id = rule->rule_id; +    parser->rule_stack[parser->rule_stack_top].arg_i = arg_i; +    parser->rule_stack_top += 1; +} + +static void push_rule_from_arg(parser_t *parser, uint arg) { +    assert((arg & RULE_ARG_KIND_MASK) == RULE_ARG_RULE || (arg & RULE_ARG_KIND_MASK) == RULE_ARG_OPT_RULE); +    uint rule_id = arg & RULE_ARG_ARG_MASK; +    assert(rule_id < RULE_maximum_number_of); +    push_rule(parser, rules[rule_id], 0); +} + +static void pop_rule(parser_t *parser, rule_t **rule, uint *arg_i) { +    parser->rule_stack_top -= 1; +    *rule = rules[parser->rule_stack[parser->rule_stack_top].rule_id]; +    *arg_i = parser->rule_stack[parser->rule_stack_top].arg_i; +} + +py_parse_node_t py_parse_node_new_leaf(machine_int_t kind, machine_int_t arg) { +    return (py_parse_node_t)(kind | (arg << 4)); +} + +int num_parse_nodes_allocated = 0; +py_parse_node_struct_t *parse_node_new_struct(int rule_id, int num_args) { +    py_parse_node_struct_t *pn = m_malloc(sizeof(py_parse_node_struct_t) + num_args * sizeof(py_parse_node_t)); +    pn->source = 0; // TODO +    pn->kind_num_nodes = (rule_id & 0xff) | (num_args << 8); +    num_parse_nodes_allocated += 1; +    return pn; +} + +void parse_node_show(py_parse_node_t pn, int indent) { +    for (int i = 0; i < indent; i++) { +        printf(" "); +    } +    if (PY_PARSE_NODE_IS_NULL(pn)) { +        printf("NULL\n"); +    } else if (PY_PARSE_NODE_IS_LEAF(pn)) { +        int arg = PY_PARSE_NODE_LEAF_ARG(pn); +        switch (PY_PARSE_NODE_LEAF_KIND(pn)) { +            case PY_PARSE_NODE_ID: printf("id(%s)\n", qstr_str(arg)); break; +            case PY_PARSE_NODE_SMALL_INT: printf("int(%d)\n", arg); break; +            case PY_PARSE_NODE_INTEGER: printf("int(%s)\n", qstr_str(arg)); break; +            case PY_PARSE_NODE_DECIMAL: printf("dec(%s)\n", qstr_str(arg)); break; +            case PY_PARSE_NODE_STRING: printf("str(%s)\n", qstr_str(arg)); break; +            case PY_PARSE_NODE_BYTES: printf("bytes(%s)\n", qstr_str(arg)); break; +            case PY_PARSE_NODE_TOKEN: printf("tok(%d)\n", arg); break; +            default: assert(0); +        } +    } else { +        py_parse_node_struct_t *pns2 = (py_parse_node_struct_t*)pn; +        int n = pns2->kind_num_nodes >> 8; +#ifdef USE_RULE_NAME +        printf("%s(%d) (n=%d)\n", rules[PY_PARSE_NODE_STRUCT_KIND(pns2)]->rule_name, PY_PARSE_NODE_STRUCT_KIND(pns2), n); +#else +        printf("rule(%u) (n=%d)\n", (uint)PY_PARSE_NODE_STRUCT_KIND(pns2), n); +#endif +        for (int i = 0; i < n; i++) { +            parse_node_show(pns2->nodes[i], indent + 2); +        } +    } +} + +/* +static void result_stack_show(parser_t *parser) { +    printf("result stack, most recent first\n"); +    for (int i = parser->result_stack_top - 1; i >= 0; i--) { +        parse_node_show(parser->result_stack[i], 0); +    } +} +*/ + +static py_parse_node_t pop_result(parser_t *parser) { +    assert(parser->result_stack_top > 0); +    return parser->result_stack[--parser->result_stack_top]; +} + +static py_parse_node_t peek_result(parser_t *parser, int pos) { +    assert(parser->result_stack_top > pos); +    return parser->result_stack[parser->result_stack_top - 1 - pos]; +} + +static void push_result_node(parser_t *parser, py_parse_node_t pn) { +    parser->result_stack[parser->result_stack_top++] = pn; +} + +static void push_result_token(parser_t *parser, const py_lexer_t *lex) { +    const py_token_t *tok = py_lexer_cur(lex); +    py_parse_node_t pn; +    if (tok->kind == PY_TOKEN_NAME) { +        pn = py_parse_node_new_leaf(PY_PARSE_NODE_ID, qstr_from_strn_copy(tok->str, tok->len)); +    } else if (tok->kind == PY_TOKEN_NUMBER) { +        bool dec = false; +        bool small_int = true; +        int int_val = 0; +        int len = tok->len; +        const char *str = tok->str; +        int base = 10; +        int i = 0; +        if (len >= 3 && str[0] == '0') { +            if (str[1] == 'o' || str[1] == 'O') { +                // octal +                base = 8; +                i = 2; +            } else if (str[1] == 'x' || str[1] == 'X') { +                // hexadecimal +                base = 16; +                i = 2; +            } else if (str[1] == 'b' || str[1] == 'B') { +                // binary +                base = 2; +                i = 2; +            } +        } +        for (; i < len; i++) { +            if (g_unichar_isdigit(str[i]) && str[i] - '0' < base) { +                int_val = base * int_val + str[i] - '0'; +            } else if (base == 16 && 'a' <= str[i] && str[i] <= 'f') { +                int_val = base * int_val + str[i] - 'a' + 10; +            } else if (base == 16 && 'F' <= str[i] && str[i] <= 'F') { +                int_val = base * int_val + str[i] - 'A' + 10; +            } else if (str[i] == '.' || str[i] == 'e' || str[i] == 'E') { +                dec = true; +                break; +            } else { +                small_int = false; +                break; +            } +        } +        if (dec) { +            pn = py_parse_node_new_leaf(PY_PARSE_NODE_DECIMAL, qstr_from_strn_copy(str, len)); +        } else if (small_int && -0x10000 <= int_val && int_val <= 0xffff) { +            pn = py_parse_node_new_leaf(PY_PARSE_NODE_SMALL_INT, int_val); +        } else { +            pn = py_parse_node_new_leaf(PY_PARSE_NODE_INTEGER, qstr_from_strn_copy(str, len)); +        } +    } else if (tok->kind == PY_TOKEN_STRING) { +        pn = py_parse_node_new_leaf(PY_PARSE_NODE_STRING, qstr_from_strn_copy(tok->str, tok->len)); +    } else if (tok->kind == PY_TOKEN_BYTES) { +        pn = py_parse_node_new_leaf(PY_PARSE_NODE_BYTES, qstr_from_strn_copy(tok->str, tok->len)); +    } else { +        pn = py_parse_node_new_leaf(PY_PARSE_NODE_TOKEN, tok->kind); +    } +    push_result_node(parser, pn); +} + +static void push_result_rule(parser_t *parser, rule_t *rule, int num_args) { +    py_parse_node_struct_t *pn = parse_node_new_struct(rule->rule_id, num_args); +    for (int i = num_args; i > 0; i--) { +        pn->nodes[i - 1] = pop_result(parser); +    } +    push_result_node(parser, (py_parse_node_t)pn); +} + +py_parse_node_t py_parse(py_lexer_t *lex, int wanted_rule) { +    wanted_rule = RULE_file_input; +    parser_t *parser = m_new(parser_t, 1); +    parser->rule_stack_alloc = 64; +    parser->rule_stack_top = 0; +    parser->rule_stack = m_new(rule_stack_t, parser->rule_stack_alloc); + +    parser->result_stack = m_new(py_parse_node_t, 1000); +    parser->result_stack_top = 0; + +    push_rule(parser, rules[wanted_rule], 0); + +    uint n, i; +    bool backtrack = false; +    rule_t *rule; +    py_token_kind_t tok_kind; +    bool emit_rule; +    bool had_trailing_sep; + +    for (;;) { +        next_rule: +        if (parser->rule_stack_top == 0) { +            break; +        } + +        pop_rule(parser, &rule, &i); +        n = rule->act & RULE_ACT_ARG_MASK; + +        /* +        // debugging +        printf("depth=%d ", parser->rule_stack_top); +        for (int j = 0; j < parser->rule_stack_top; ++j) { +            printf(" "); +        } +        printf("%s n=%d i=%d bt=%d\n", rule->rule_name, n, i, backtrack); +        */ + +        switch (rule->act & RULE_ACT_KIND_MASK) { +            case RULE_ACT_OR: +                if (i > 0 && !backtrack) { +                    goto next_rule; +                } else { +                    backtrack = false; +                } +                for (; i < n - 1; ++i) { +                    switch (rule->arg[i] & RULE_ARG_KIND_MASK) { +                        case RULE_ARG_TOK: +                            if (py_lexer_is_kind(lex, rule->arg[i] & RULE_ARG_ARG_MASK)) { +                                push_result_token(parser, lex); +                                py_lexer_to_next(lex); +                                goto next_rule; +                            } +                            break; +                        case RULE_ARG_RULE: +                            push_rule(parser, rule, i + 1); +                            push_rule_from_arg(parser, rule->arg[i]); +                            goto next_rule; +                        default: +                            assert(0); +                    } +                } +                if ((rule->arg[i] & RULE_ARG_KIND_MASK) == RULE_ARG_TOK) { +                    if (py_lexer_is_kind(lex, rule->arg[i] & RULE_ARG_ARG_MASK)) { +                        push_result_token(parser, lex); +                        py_lexer_to_next(lex); +                    } else { +                        backtrack = true; +                        goto next_rule; +                    } +                } else { +                    push_rule_from_arg(parser, rule->arg[i]); +                } +                break; + +            case RULE_ACT_AND: + +                // failed, backtrack if we can, else syntax error +                if (backtrack) { +                    assert(i > 0); +                    if ((rule->arg[i - 1] & RULE_ARG_KIND_MASK) == RULE_ARG_OPT_RULE) { +                        // an optional rule that failed, so continue with next arg +                        push_result_node(parser, PY_PARSE_NODE_NULL); +                        backtrack = false; +                    } else { +                        // a mandatory rule that failed, so propagate backtrack +                        if (i > 1) { +                            // already eaten tokens so can't backtrack +                            goto syntax_error; +                        } else { +                            goto next_rule; +                        } +                    } +                } + +                // progress through the rule +                for (; i < n; ++i) { +                    switch (rule->arg[i] & RULE_ARG_KIND_MASK) { +                        case RULE_ARG_TOK: +                            // need to match a token +                            tok_kind = rule->arg[i] & RULE_ARG_ARG_MASK; +                            if (py_lexer_is_kind(lex, tok_kind)) { +                                // matched token +                                if (tok_kind == PY_TOKEN_NAME) { +                                    push_result_token(parser, lex); +                                } +                                py_lexer_to_next(lex); +                            } else { +                                // failed to match token +                                if (i > 0) { +                                    // already eaten tokens so can't backtrack +                                    goto syntax_error; +                                } else { +                                    // this rule failed, so backtrack +                                    backtrack = true; +                                    goto next_rule; +                                } +                            } +                            break; +                        case RULE_ARG_RULE: +                            //if (i + 1 < n) { +                                push_rule(parser, rule, i + 1); +                            //} +                            push_rule_from_arg(parser, rule->arg[i]); +                            goto next_rule; +                        case RULE_ARG_OPT_RULE: +                            push_rule(parser, rule, i + 1); +                            push_rule_from_arg(parser, rule->arg[i]); +                            goto next_rule; +                        default: +                            assert(0); +                    } +                } + +                assert(i == n); + +                // matched the rule, so now build the corresponding parse_node + +                // count number of arguments for the parse_node +                i = 0; +                emit_rule = false; +                for (int x = 0; x < n; ++x) { +                    if ((rule->arg[x] & RULE_ARG_KIND_MASK) == RULE_ARG_TOK) { +                        tok_kind = rule->arg[x] & RULE_ARG_ARG_MASK; +                        if (tok_kind >= PY_TOKEN_NAME) { +                            emit_rule = true; +                        } +                        if (tok_kind == PY_TOKEN_NAME) { +                            // only tokens which were names are pushed to stack +                            i += 1; +                        } +                    } else { +                        // rules are always pushed +                        i += 1; +                    } +                } + +                // always emit these rules, even if they have only 1 argument +                if (rule->rule_id == RULE_expr_stmt || rule->rule_id == RULE_yield_stmt) { +                    emit_rule = true; +                } + +                // never emit these rules if they have only 1 argument +                // NOTE: can't put atom_paren here because we need it to distinguisg, for example, [a,b] from [(a,b)] +                if (rule->rule_id == RULE_else_stmt || rule->rule_id == RULE_testlist_comp_3b || rule->rule_id == RULE_import_as_names_paren || rule->rule_id == RULE_typedargslist_colon || rule->rule_id == RULE_typedargslist_equal || rule->rule_id == RULE_dictorsetmaker_colon || rule->rule_id == RULE_classdef_2 || rule->rule_id == RULE_with_item_as || rule->rule_id == RULE_assert_stmt_extra || rule->rule_id == RULE_as_name || rule->rule_id == RULE_raise_stmt_from || rule->rule_id == RULE_vfpdef) { +                    emit_rule = false; +                } + +                // always emit these rules, and add an extra blank node at the end (to be used by the compiler to store data) +                if (rule->rule_id == RULE_funcdef || rule->rule_id == RULE_classdef || rule->rule_id == RULE_comp_for || rule->rule_id == RULE_lambdef || rule->rule_id == RULE_lambdef_nocond) { +                    emit_rule = true; +                    push_result_node(parser, PY_PARSE_NODE_NULL); +                    i += 1; +                } + +                int num_not_nil = 0; +                for (int x = 0; x < i; ++x) { +                    if (peek_result(parser, x) != PY_PARSE_NODE_NULL) { +                        num_not_nil += 1; +                    } +                } +                //printf("done and %s n=%d i=%d notnil=%d\n", rule->rule_name, n, i, num_not_nil); +                if (emit_rule) { +                    push_result_rule(parser, rule, i); +                } else if (num_not_nil == 0) { +                    push_result_rule(parser, rule, i); // needed for, eg, atom_paren, testlist_comp_3b +                    //result_stack_show(parser); +                    //assert(0); +                } else if (num_not_nil == 1) { +                    // single result, leave it on stack +                    py_parse_node_t pn = PY_PARSE_NODE_NULL; +                    for (int x = 0; x < i; ++x) { +                        py_parse_node_t pn2 = pop_result(parser); +                        if (pn2 != PY_PARSE_NODE_NULL) { +                            pn = pn2; +                        } +                    } +                    push_result_node(parser, pn); +                } else { +                    push_result_rule(parser, rule, i); +                } +                break; + +            case RULE_ACT_LIST: +                // n=2 is: item item* +                // n=1 is: item (sep item)* +                // n=3 is: item (sep item)* [sep] +                if (backtrack) { +                    list_backtrack: +                    had_trailing_sep = false; +                    if (n == 2) { +                        if (i == 1) { +                            // fail on item, first time round; propagate backtrack +                            goto next_rule; +                        } else { +                            // fail on item, in later rounds; finish with this rule +                            backtrack = false; +                        } +                    } else { +                        if (i == 1) { +                            // fail on item, first time round; propagate backtrack +                            goto next_rule; +                        } else if ((i & 1) == 1) { +                            // fail on item, in later rounds; have eaten tokens so can't backtrack +                            if (n == 3) { +                                // list allows trailing separator; finish parsing list +                                had_trailing_sep = true; +                                backtrack = false; +                            } else { +                                // list doesn't allowing trailing separator; fail +                                goto syntax_error; +                            } +                        } else { +                            // fail on separator; finish parsing list +                            backtrack = false; +                        } +                    } +                } else { +                    for (;;) { +                        uint arg = rule->arg[i & 1 & n]; +                        switch (arg & RULE_ARG_KIND_MASK) { +                            case RULE_ARG_TOK: +                                if (py_lexer_is_kind(lex, arg & RULE_ARG_ARG_MASK)) { +                                    if (i & 1 & n) { +                                        // separators which are tokens are not pushed to result stack +                                    } else { +                                        push_result_token(parser, lex); +                                    } +                                    py_lexer_to_next(lex); +                                    // got element of list, so continue parsing list +                                    i += 1; +                                } else { +                                    // couldn't get element of list +                                    i += 1; +                                    backtrack = true; +                                    goto list_backtrack; +                                } +                                break; +                            case RULE_ARG_RULE: +                                push_rule(parser, rule, i + 1); +                                push_rule_from_arg(parser, arg); +                                goto next_rule; +                            default: +                                assert(0); +                        } +                    } +                } +                assert(i >= 1); + +                // compute number of elements in list, result in i +                i -= 1; +                if ((n & 1) && (rule->arg[1] & RULE_ARG_KIND_MASK) == RULE_ARG_TOK) { +                    // don't count separators when they are tokens +                    i = (i + 1) / 2; +                } + +                if (i == 1) { +                    // list matched single item +                    if (had_trailing_sep) { +                        // if there was a trailing separator, make a list of a single item +                        push_result_rule(parser, rule, i); +                    } else { +                        // just leave single item on stack (ie don't wrap in a list) +                    } +                } else { +                    //printf("done list %s %d %d\n", rule->rule_name, n, i); +                    push_result_rule(parser, rule, i); +                } +                break; + +            default: +                assert(0); +        } +    } +    if (!py_lexer_is_kind(lex, PY_TOKEN_END)) { +        py_lexer_show_error(lex, "unexpected token at end:"); +        py_token_show(py_lexer_cur(lex)); +    } +    //printf("--------------\n"); +    //result_stack_show(parser); +    assert(parser->result_stack_top == 1); +    //printf("maximum depth: %d\n", parser->rule_stack_alloc); +    //printf("number of parse nodes allocated: %d\n", num_parse_nodes_allocated); +    return parser->result_stack[0]; + +syntax_error: +    py_lexer_show_error(lex, "syntax error:"); +#ifdef USE_RULE_NAME +    py_lexer_show_error(lex, rule->rule_name); +#endif +    py_token_show(py_lexer_cur(lex)); +    return PY_PARSE_NODE_NULL; +} diff --git a/py/parse.h b/py/parse.h new file mode 100644 index 000000000..07d553c14 --- /dev/null +++ b/py/parse.h @@ -0,0 +1,54 @@ +struct _py_lexer_t; + +// a py_parse_node_t is: +//  - 0000...0000: no node +//  - xxxx...0001: an identifier; bits 4 and above are the qstr +//  - xxxx...0011: a small integer; bits 4 and above are the signed value, 2's complement +//  - xxxx...0101: an integer; bits 4 and above are the qstr holding the value +//  - xxxx...0111: a decimal; bits 4 and above are the qstr holding the value +//  - xxxx...1001: a string; bits 4 and above are the qstr holding the value +//  - xxxx...1011: a string with triple quotes; bits 4 and above are the qstr holding the value +//  - xxxx...1101: a token; bits 4 and above are py_token_kind_t +//  - xxxx...xxx0: pointer to py_parse_node_struct_t + +#define PY_PARSE_NODE_NULL      (0) +#define PY_PARSE_NODE_ID        (0x1) +#define PY_PARSE_NODE_SMALL_INT (0x3) +#define PY_PARSE_NODE_INTEGER   (0x5) +#define PY_PARSE_NODE_DECIMAL   (0x7) +#define PY_PARSE_NODE_STRING    (0x9) +#define PY_PARSE_NODE_BYTES     (0xb) +#define PY_PARSE_NODE_TOKEN     (0xd) + +typedef machine_uint_t py_parse_node_t; // must be pointer size + +typedef struct _py_parse_node_struct_t { +    uint32_t source;            // file identifier, and line number +    uint32_t kind_num_nodes;    // parse node kind, and number of nodes +    py_parse_node_t nodes[];    // nodes +} py_parse_node_struct_t; + +// macros for py_parse_node_t usage +// some of these evaluate their argument more than once + +#define PY_PARSE_NODE_IS_NULL(pn) ((pn) == PY_PARSE_NODE_NULL) +#define PY_PARSE_NODE_IS_LEAF(pn) ((pn) & 1) +#define PY_PARSE_NODE_IS_STRUCT(pn) ((pn) != PY_PARSE_NODE_NULL && ((pn) & 1) == 0) +#define PY_PARSE_NODE_IS_STRUCT_KIND(pn, k) ((pn) != PY_PARSE_NODE_NULL && ((pn) & 1) == 0 && PY_PARSE_NODE_STRUCT_KIND((py_parse_node_struct_t*)(pn)) == (k)) + +#define PY_PARSE_NODE_IS_ID(pn) (((pn) & 0xf) == PY_PARSE_NODE_ID) +#define PY_PARSE_NODE_IS_SMALL_INT(pn) (((pn) & 0xf) == PY_PARSE_NODE_SMALL_INT) +#define PY_PARSE_NODE_IS_TOKEN(pn) (((pn) & 0xf) == PY_PARSE_NODE_TOKEN) +#define PY_PARSE_NODE_IS_TOKEN_KIND(pn, k) ((pn) == (PY_PARSE_NODE_TOKEN | (k << 4))) + +#define PY_PARSE_NODE_LEAF_KIND(pn) ((pn) & 0xf) +// TODO should probably have int and uint versions of this macro +#define PY_PARSE_NODE_LEAF_ARG(pn) (((machine_int_t)(pn)) >> 4) +#define PY_PARSE_NODE_STRUCT_KIND(pns) ((pns)->kind_num_nodes & 0xff) +#define PY_PARSE_NODE_STRUCT_NUM_NODES(pns) ((pns)->kind_num_nodes >> 8) + +py_parse_node_t py_parse_node_new_leaf(machine_int_t kind, machine_int_t arg); + +void parse_node_show(py_parse_node_t pn, int indent); + +py_parse_node_t py_parse(struct _py_lexer_t *lex, int wanted_rule); diff --git a/py/qstr.c b/py/qstr.c new file mode 100644 index 000000000..33d15c7e7 --- /dev/null +++ b/py/qstr.c @@ -0,0 +1,56 @@ +#include <assert.h> +#include <string.h> + +#include "misc.h" + +static int qstrs_alloc; +static int qstrs_len; +static const char **qstrs; + +void qstr_init() { +    qstrs_alloc = 400; +    qstrs_len = 1; +    qstrs = m_new(const char*, qstrs_alloc); +    qstrs[0] = "nil"; +} + +static qstr qstr_add(const char *str) { +    if (qstrs_len >= qstrs_alloc) { +        qstrs_alloc *= 2; +        qstrs = m_renew(const char*, qstrs, qstrs_alloc); +    } +    qstrs[qstrs_len++] = str; +    return qstrs_len - 1; +} + +qstr qstr_from_str_static(const char *str) { +    for (int i = 0; i < qstrs_len; i++) { +        if (strcmp(qstrs[i], str) == 0) { +            return i; +        } +    } +    return qstr_add(str); +} + +qstr qstr_from_str_take(char *str) { +    for (int i = 0; i < qstrs_len; i++) { +        if (strcmp(qstrs[i], str) == 0) { +            m_free(str); +            return i; +        } +    } +    return qstr_add(str); +} + +qstr qstr_from_strn_copy(const char *str, int len) { +    for (int i = 0; i < qstrs_len; i++) { +        if (strncmp(qstrs[i], str, len) == 0 && qstrs[i][len] == '\0') { +            return i; +        } +    } +    return qstr_add(strndup(str, len)); +} + +const char *qstr_str(qstr qstr) { +    return qstrs[qstr]; +} diff --git a/py/runtime.c b/py/runtime.c new file mode 100644 index 000000000..bf2e2ee06 --- /dev/null +++ b/py/runtime.c @@ -0,0 +1,944 @@ +#include <stdint.h> +#include <stdlib.h> +#include <stdio.h> +#include <string.h> +#include <assert.h> + +#include "misc.h" +#include "machine.h" +#include "runtime.h" +#include "bc.h" + +#define DEBUG_printf(args...) (void)0 +//#define DEBUG_printf(args...) printf(args) + +#define DEBUG_OP_printf(args...) (void)0 +//#define DEBUG_OP_printf(args...) printf(args) + +// enable/disable float support with this definition +#define PY_FLOAT (1) + +typedef machine_int_t py_small_int_t; + +#define IS_O(o, k) (((((py_small_int_t)(o)) & 1) == 0) && (((py_obj_base_t*)(o))->kind == (k))) +#define IS_SMALL_INT(o) (((py_small_int_t)(o)) & 1) +#define FROM_SMALL_INT(o) (((py_small_int_t)(o)) >> 1) +#define TO_SMALL_INT(o) ((py_obj_t)(((o) << 1) | 1)) + +#ifdef PY_FLOAT +typedef machine_float_t float_t; +#endif + +typedef enum { +    O_CONST, +    O_STR, +#ifdef PY_FLOAT +    O_FLOAT, +#endif +    O_FUN_0, +    O_FUN_1, +    O_FUN_2, +    O_FUN_N, +    O_FUN_BC, +    O_BOUND_METH, +    O_LIST, +    O_SET, +    O_MAP, +    O_CLASS, +} py_obj_kind_t; + +typedef enum { +    MAP_QSTR, +    MAP_PY_OBJ, +} py_map_kind_t; + +typedef struct _py_map_elem_t { +    py_obj_t key; +    py_obj_t value; +} py_map_elem_t; + +typedef struct _py_map_t { +    py_map_kind_t kind; +    machine_uint_t alloc; +    machine_uint_t used; +    py_map_elem_t *table; +} py_map_t; + +typedef struct _py_obj_base_t { +    py_obj_kind_t kind; +    union { +        const char *id; +        qstr u_str; +#ifdef PY_FLOAT +        float_t flt; +#endif +        struct { // for O_FUN_[012N] +            void *fun; +            int n_args; +        } u_fun; +        struct { // for O_FUN_BC +            byte *code; +            uint len; +            int n_args; +        } u_fun_bc; +        struct { // for O_BOUND_METH +            py_obj_t meth; +            py_obj_t self; +        } u_bound_meth; +        struct { // for O_LIST +            int alloc; +            int len; +            py_obj_t *items; +        } u_list; +        struct { // for O_SET +            int alloc; +            int used; +            py_obj_t *table; +        } u_set; +        py_map_t u_map; // for O_MAP +        /* +        struct { // for O_MAP +            int alloc; +            int used; +            py_map_elem_t *table; +        } u_map; +        */ +        struct { // for O_CLASS +            py_map_t *map; +        } u_class; +    }; +} py_obj_base_t; + +py_obj_t py_const_none; +py_obj_t py_const_false; +py_obj_t py_const_true; + +py_map_t map_name; +py_map_t map_builtins; + +// approximatelly doubling primes; made with Mathematica command: Table[Prime[Floor[(1.7)^n]], {n, 3, 24}] +static int doubling_primes[] = {7, 19, 43, 89, 179, 347, 647, 1229, 2297, 4243, 7829, 14347, 26017, 47149, 84947, 152443, 273253, 488399, 869927, 1547173, 2745121, 4861607}; + +int get_doubling_prime_greater_or_equal_to(int x) { +    for (int i = 0; i < sizeof(doubling_primes) / sizeof(int); i++) { +        if (doubling_primes[i] >= x) { +            return doubling_primes[i]; +        } +    } +    // ran out of primes in the table! +    // return something sensible, at least make it odd +    return x | 1; +} + +void py_map_init(py_map_t *map, py_map_kind_t kind, int n) { +    map->kind = kind; +    map->alloc = get_doubling_prime_greater_or_equal_to(n + 1); +    map->used = 0; +    map->table = m_new(py_map_elem_t, map->alloc); +    for (int i = 0; i < map->alloc; i++) { +        map->table[i].key = NULL; +        map->table[i].value = NULL; +    } +} + +py_map_t *py_map_new(py_map_kind_t kind, int n) { +    py_map_t *map = m_new(py_map_t, 1); +    py_map_init(map, kind, n); +    return map; +} + +int py_obj_hash(py_obj_t o_in) { +    if (IS_SMALL_INT(o_in)) { +        return FROM_SMALL_INT(o_in); +    } else if (IS_O(o_in, O_STR)) { +        return ((py_obj_base_t*)o_in)->u_str; +    } else { +        assert(0); +        return 0; +    } +} + +bool py_obj_equal(py_obj_t o1, py_obj_t o2) { +    if (o1 == o2) { +        return true; +    } else if (IS_SMALL_INT(o1) && IS_SMALL_INT(o2)) { +        return false; +    } else if (IS_O(o1, O_STR) && IS_O(o2, O_STR)) { +        return ((py_obj_base_t*)o1)->u_str == ((py_obj_base_t*)o2)->u_str; +    } else { +        assert(0); +        return false; +    } +} + +py_map_elem_t* py_map_lookup_helper(py_map_t *map, py_obj_t index, bool add_if_not_found) { +    bool is_map_py_obj = (map->kind == MAP_PY_OBJ); +    machine_uint_t hash; +    if (is_map_py_obj) { +        hash = py_obj_hash(index); +    } else { +        hash = (machine_uint_t)index; +    } +    uint pos = hash % map->alloc; +    for (;;) { +        py_map_elem_t *elem = &map->table[pos]; +        if (elem->key == NULL) { +            // not in table +            if (add_if_not_found) { +                if (map->used + 1 >= map->alloc) { +                    // not enough room in table, rehash it +                    int old_alloc = map->alloc; +                    py_map_elem_t *old_table = map->table; +                    map->alloc = get_doubling_prime_greater_or_equal_to(map->alloc + 1); +                    map->used = 0; +                    map->table = m_new(py_map_elem_t, map->alloc); +                    for (int i = 0; i < old_alloc; i++) { +                        if (old_table[i].key != NULL) { +                            py_map_lookup_helper(map, old_table[i].key, true)->value = old_table[i].value; +                        } +                    } +                    m_free(old_table); +                    // restart the search for the new element +                    pos = hash % map->alloc; +                } else { +                    map->used += 1; +                    elem->key = index; +                    return elem; +                } +            } else { +                return NULL; +            } +        } else if (elem->key == index || (is_map_py_obj && py_obj_equal(elem->key, index))) { +            // found it +            if (add_if_not_found) { +                elem->key = index; +            } +            return elem; +        } else { +            // not yet found, keep searching in this table +            pos = (pos + 1) % map->alloc; +        } +    } +} + +py_map_elem_t* py_qstr_map_lookup(py_map_t *map, qstr index, bool add_if_not_found) { +    py_obj_t o = (py_obj_t)(machine_uint_t)index; +    return py_map_lookup_helper(map, o, add_if_not_found); +} + +py_map_elem_t* py_map_lookup(py_obj_t o, py_obj_t index, bool add_if_not_found) { +    assert(IS_O(o, O_MAP)); +    return py_map_lookup_helper(&((py_obj_base_t *)o)->u_map, index, add_if_not_found); +} + +static bool fit_small_int(py_small_int_t o) { +    return true; +} + +py_obj_t py_obj_new_const(const char *id) { +    py_obj_base_t *o = m_new(py_obj_base_t, 1); +    o->kind = O_CONST; +    o->id = id; +    return (py_obj_t)o; +} + +py_obj_t py_obj_new_str(qstr qstr) { +    py_obj_base_t *o = m_new(py_obj_base_t, 1); +    o->kind = O_STR; +    o->u_str = qstr; +    return (py_obj_t)o; +} + +#ifdef PY_FLOAT +py_obj_t py_obj_new_float(float_t val) { +    py_obj_base_t *o = m_new(py_obj_base_t, 1); +    o->kind = O_FLOAT; +    o->flt = val; +    return (py_obj_t)o; +} +#endif + +py_obj_t list_append(py_obj_t self_in, py_obj_t arg) { +    assert(IS_O(self_in, O_LIST)); +    py_obj_base_t *self = self_in; +    if (self->u_list.len >= self->u_list.alloc) { +        self->u_list.alloc *= 2; +        self->u_list.items = m_renew(py_obj_t, self->u_list.items, self->u_list.alloc); +    } +    self->u_list.items[self->u_list.len++] = arg; +    return arg; +} + +static qstr q_append; +static qstr q_print; +static qstr q_len; +static qstr q___build_class__; + +typedef enum { +    PY_CODE_NATIVE, +    PY_CODE_BYTE, +} py_code_kind_t; + +typedef struct _py_code_t { +    py_code_kind_t kind; +    int n_args; +    union { +        struct { +            py_fun_t fun; +        } u_native; +        struct { +            byte *code; +            uint len; +        } u_byte; +    }; +} py_code_t; + +static int next_unique_code_id; +static py_code_t *unique_codes; + +py_obj_t fun_list_append; + +py_obj_t py_builtin_print(py_obj_t o) { +    if (IS_O(o, O_STR)) { +        // special case, print string raw +        printf("%s\n", qstr_str(((py_obj_base_t*)o)->u_str)); +    } else { +        // print the object Python style +        py_obj_print(o); +        printf("\n"); +    } +    return py_const_none; +} + +py_obj_t py_builtin_len(py_obj_t o_in) { +    py_small_int_t len = 0; +    if (IS_O(o_in, O_LIST)) { +        py_obj_base_t *o = o_in; +        len = o->u_list.len; +    } else if (IS_O(o_in, O_MAP)) { +        py_obj_base_t *o = o_in; +        len = o->u_map.used; +    } else { +        assert(0); +    } +    return TO_SMALL_INT(len); +} + +py_obj_t py_builtin___build_class__(py_obj_t o1, py_obj_t o2) { +    py_obj_base_t *o = m_new(py_obj_base_t, 1); +    o->kind = O_CLASS; +    o->u_class.map = py_map_new(MAP_QSTR, 0); +    return o; +} + +FILE *fp_native = NULL; + +void rt_init() { +    q_append = qstr_from_str_static("append"); +    q_print = qstr_from_str_static("print"); +    q_len = qstr_from_str_static("len"); +    q___build_class__ = qstr_from_str_static("__build_class__"); + +    py_const_none = py_obj_new_const("None"); +    py_const_false = py_obj_new_const("False"); +    py_const_true = py_obj_new_const("True"); + +    py_map_init(&map_name, MAP_QSTR, 0); + +    py_map_init(&map_builtins, MAP_QSTR, 3); +    py_qstr_map_lookup(&map_builtins, q_print, true)->value = rt_make_function_1(py_builtin_print); +    py_qstr_map_lookup(&map_builtins, q_len, true)->value = rt_make_function_1(py_builtin_len); +    py_qstr_map_lookup(&map_builtins, q___build_class__, true)->value = rt_make_function_2(py_builtin___build_class__); + +    next_unique_code_id = 1; +    unique_codes = NULL; + +    fun_list_append = rt_make_function_2(list_append); + +    fp_native = fopen("out-native", "wb"); +} + +void rt_deinit() { +    if (fp_native != NULL) { +        fclose(fp_native); +    } +} + +int rt_get_new_unique_code_id() { +    return next_unique_code_id++; +} + +void rt_assign_native_code(int unique_code_id, py_fun_t fun, uint len, int n_args) { +    if (unique_codes == NULL) { +        unique_codes = m_new(py_code_t, next_unique_code_id); +    } +    assert(unique_code_id < next_unique_code_id); +    unique_codes[unique_code_id].kind = PY_CODE_NATIVE; +    unique_codes[unique_code_id].n_args = n_args; +    unique_codes[unique_code_id].u_native.fun = fun; + +    DEBUG_printf("assign native code: id=%d fun=%p len=%u n_args=%d\n", unique_code_id, fun, len, n_args); +    byte *fun_data = (byte*)(((machine_uint_t)fun) & (~1)); // need to clear lower bit in case it's thumb code +    for (int i = 0; i < 128 && i < len; i++) { +        if (i > 0 && i % 16 == 0) { +            DEBUG_printf("\n"); +        } +        DEBUG_printf(" %02x", fun_data[i]); +    } +    DEBUG_printf("\n"); + +    if (fp_native != NULL) { +        fwrite(fun_data, len, 1, fp_native); +    } +} + +void rt_assign_byte_code(int unique_code_id, byte *code, uint len, int n_args) { +    if (unique_codes == NULL) { +        unique_codes = m_new(py_code_t, next_unique_code_id); +    } +    assert(unique_code_id < next_unique_code_id); +    unique_codes[unique_code_id].kind = PY_CODE_BYTE; +    unique_codes[unique_code_id].n_args = n_args; +    unique_codes[unique_code_id].u_byte.code = code; +    unique_codes[unique_code_id].u_byte.len = len; + +    DEBUG_printf("assign byte code: id=%d code=%p len=%u n_args=%d\n", unique_code_id, code, len, n_args); +} + +const char *py_obj_get_type_str(py_obj_t o_in) { +    if (IS_SMALL_INT(o_in)) { +        return "int"; +    } else { +        py_obj_base_t *o = o_in; +        switch (o->kind) { +            case O_CONST: +                if (o == py_const_none) { +                    return "NoneType"; +                } else { +                    return "bool"; +                } +            case O_STR: +                return "str"; +#ifdef PY_FLOAT +            case O_FLOAT: +                return "float"; +#endif +            case O_LIST: +                return "list"; +            case O_SET: +                return "set"; +            case O_MAP: +                return "dict"; +            default: +                assert(0); +                return "UnknownType"; +        } +    } +} + +void py_obj_print(py_obj_t o_in) { +    if (IS_SMALL_INT(o_in)) { +        printf("%d", (int)FROM_SMALL_INT(o_in)); +    } else { +        py_obj_base_t *o = o_in; +        switch (o->kind) { +            case O_CONST: +                printf("%s", o->id); +                break; +            case O_STR: +                // TODO need to escape chars etc +                printf("'%s'", qstr_str(o->u_str)); +                break; +#ifdef PY_FLOAT +            case O_FLOAT: +                printf("%f", o->flt); +                break; +#endif +            case O_LIST: +                printf("["); +                for (int i = 0; i < o->u_list.len; i++) { +                    if (i > 0) { +                        printf(", "); +                    } +                    py_obj_print(o->u_list.items[i]); +                } +                printf("]"); +                break; +            case O_SET: +            { +                bool first = true; +                printf("{"); +                for (int i = 0; i < o->u_set.alloc; i++) { +                    if (o->u_set.table[i] != NULL) { +                        if (!first) { +                            printf(", "); +                        } +                        first = false; +                        py_obj_print(o->u_set.table[i]); +                    } +                } +                printf("}"); +                break; +            } +            case O_MAP: +            { +                bool first = true; +                printf("{"); +                for (int i = 0; i < o->u_map.alloc; i++) { +                    if (o->u_map.table[i].key != NULL) { +                        if (!first) { +                            printf(", "); +                        } +                        first = false; +                        py_obj_print(o->u_map.table[i].key); +                        printf(": "); +                        py_obj_print(o->u_map.table[i].value); +                    } +                } +                printf("}"); +                break; +            } +            default: +                assert(0); +        } +    } +} + +int rt_is_true(py_obj_t arg) { +    DEBUG_OP_printf("is true %p\n", arg); +    if (IS_SMALL_INT(arg)) { +        if (FROM_SMALL_INT(arg) == 0) { +            return 0; +        } else { +            return 1; +        } +    } else if (arg == py_const_none) { +        return 0; +    } else if (arg == py_const_false) { +        return 0; +    } else if (arg == py_const_true) { +        return 1; +    } else { +        assert(0); +        return 0; +    } +} + +int rt_get_int(py_obj_t arg) { +    if (IS_SMALL_INT(arg)) { +        return FROM_SMALL_INT(arg); +    } else { +        assert(0); +        return 0; +    } +} + +py_obj_t rt_load_const_str(qstr qstr) { +    DEBUG_OP_printf("load '%s'\n", qstr_str(qstr)); +    return py_obj_new_str(qstr); +} + +py_obj_t rt_load_name(qstr qstr) { +    // logic: search locals, globals, builtins +    DEBUG_OP_printf("load %s\n", qstr_str(qstr)); +    py_map_elem_t *elem = py_qstr_map_lookup(&map_name, qstr, false); +    if (elem == NULL) { +        elem = py_qstr_map_lookup(&map_builtins, qstr, false); +        if (elem == NULL) { +            printf("name doesn't exist: %s\n", qstr_str(qstr)); +            assert(0); +        } +    } +    return elem->value; +} + +py_obj_t rt_load_global(qstr qstr) { +    return rt_load_name(qstr); // TODO +} + +py_obj_t rt_load_build_class() { +    DEBUG_OP_printf("load_build_class\n"); +    py_map_elem_t *elem = py_qstr_map_lookup(&map_builtins, q___build_class__, false); +    if (elem == NULL) { +        printf("name doesn't exist: __build_class__\n"); +        assert(0); +    } +    return elem->value; +} + +void rt_store_name(qstr qstr, py_obj_t obj) { +    DEBUG_OP_printf("store %s <- %p\n", qstr_str(qstr), obj); +    py_qstr_map_lookup(&map_name, qstr, true)->value = obj; +} + +py_obj_t rt_unary_op(int op, py_obj_t arg) { +    assert(0); +    return py_const_none; +} + +py_obj_t rt_binary_op(int op, py_obj_t lhs, py_obj_t rhs) { +    DEBUG_OP_printf("binary %d %p %p\n", op, lhs, rhs); +    if (op == RT_BINARY_OP_SUBSCR) { +        if (IS_O(lhs, O_LIST) && IS_SMALL_INT(rhs)) { +            return ((py_obj_base_t*)lhs)->u_list.items[FROM_SMALL_INT(rhs)]; +        } else { +            assert(0); +        } +    } else if (IS_SMALL_INT(lhs) && IS_SMALL_INT(rhs)) { +        py_small_int_t val; +        switch (op) { +            case RT_BINARY_OP_ADD: +            case RT_BINARY_OP_INPLACE_ADD: val = FROM_SMALL_INT(lhs) + FROM_SMALL_INT(rhs); break; +            case RT_BINARY_OP_SUBTRACT: val = FROM_SMALL_INT(lhs) - FROM_SMALL_INT(rhs); break; +            case RT_BINARY_OP_MULTIPLY: val = FROM_SMALL_INT(lhs) * FROM_SMALL_INT(rhs); break; +            case RT_BINARY_OP_FLOOR_DIVIDE: val = FROM_SMALL_INT(lhs) / FROM_SMALL_INT(rhs); break; +#ifdef PY_FLOAT +            case RT_BINARY_OP_TRUE_DIVIDE: return py_obj_new_float((float_t)FROM_SMALL_INT(lhs) / (float_t)FROM_SMALL_INT(rhs)); +#endif +            default: printf("%d\n", op); assert(0); val = 0; +        } +        if (fit_small_int(val)) { +            return TO_SMALL_INT(val); +        } +    } else if (IS_O(lhs, O_STR) && IS_O(rhs, O_STR)) { +        const char *lhs_str = qstr_str(((py_obj_base_t*)lhs)->u_str); +        const char *rhs_str = qstr_str(((py_obj_base_t*)rhs)->u_str); +        char *val; +        switch (op) { +            case RT_BINARY_OP_ADD: +            case RT_BINARY_OP_INPLACE_ADD: val = m_new(char, strlen(lhs_str) + strlen(rhs_str) + 1); strcpy(val, lhs_str); strcat(val, rhs_str); break; +            default: printf("%d\n", op); assert(0); val = NULL; +        } +        return py_obj_new_str(qstr_from_str_take(val)); +    } +    assert(0); +    return py_const_none; +} + +py_obj_t rt_compare_op(int op, py_obj_t lhs, py_obj_t rhs) { +    DEBUG_OP_printf("compare %d %p %p\n", op, lhs, rhs); +    if (IS_SMALL_INT(lhs) && IS_SMALL_INT(rhs)) { +        int cmp; +        switch (op) { +            case RT_COMPARE_OP_LESS: cmp = FROM_SMALL_INT(lhs) < FROM_SMALL_INT(rhs); break; +            case RT_COMPARE_OP_MORE: cmp = FROM_SMALL_INT(lhs) > FROM_SMALL_INT(rhs); break; +            default: assert(0); cmp = 0; +        } +        if (cmp) { +            return py_const_true; +        } else { +            return py_const_false; +        } +    } +    assert(0); +    return py_const_none; +} + +py_obj_t rt_make_function_from_id(int unique_code_id) { +    if (unique_code_id >= next_unique_code_id) { +        // illegal code id +        return py_const_none; +    } +    py_code_t *c = &unique_codes[unique_code_id]; +    py_obj_base_t *o = m_new(py_obj_base_t, 1); +    switch (c->kind) { +        case PY_CODE_NATIVE: +            switch (c->n_args) { +                case 0: o->kind = O_FUN_0; break; +                case 1: o->kind = O_FUN_1; break; +                case 2: o->kind = O_FUN_2; break; +                default: assert(0); +            } +            o->u_fun.fun = c->u_native.fun; +            break; +        case PY_CODE_BYTE: +            o->kind = O_FUN_BC; +            o->u_fun_bc.code = c->u_byte.code; +            o->u_fun_bc.len = c->u_byte.len; +            o->u_fun_bc.n_args = c->n_args; +            break; +        default: +            assert(0); +    } +    return o; +} + +py_obj_t rt_make_function_0(py_fun_0_t fun) { +    py_obj_base_t *o = m_new(py_obj_base_t, 1); +    o->kind = O_FUN_0; +    o->u_fun.fun = fun; +    return o; +} + +py_obj_t rt_make_function_1(py_fun_1_t fun) { +    py_obj_base_t *o = m_new(py_obj_base_t, 1); +    o->kind = O_FUN_1; +    o->u_fun.fun = fun; +    return o; +} + +py_obj_t rt_make_function_2(py_fun_2_t fun) { +    py_obj_base_t *o = m_new(py_obj_base_t, 1); +    o->kind = O_FUN_2; +    o->u_fun.fun = fun; +    return o; +} + +py_obj_t rt_make_function(int n_args, py_fun_t code) { +    // assumes code is a pointer to a py_fun_t (i think this is safe...) +    py_obj_base_t *o = m_new(py_obj_base_t, 1); +    o->kind = O_FUN_N; +    o->u_fun.fun = code; +    o->u_fun.n_args = n_args; +    return o; +} + +py_obj_t rt_call_function_0(py_obj_t fun) { +    if (IS_O(fun, O_FUN_0)) { +        py_obj_base_t *o = fun; +        DEBUG_OP_printf("calling native %p...\n", o->u_fun.fun); +        return ((py_fun_0_t)o->u_fun.fun)(); +    } else if (IS_O(fun, O_FUN_BC)) { +        py_obj_base_t *o = fun; +        assert(o->u_fun_bc.n_args == 0); +        DEBUG_OP_printf("calling byte code %p...\n", o->u_fun_bc.code); +        return py_execute_byte_code(o->u_fun_bc.code, o->u_fun_bc.len, NULL, 0); +    } else { +        printf("fun0:%p\n", fun); +        assert(0); +        return py_const_none; +    } +} + +py_obj_t rt_call_function_1(py_obj_t fun, py_obj_t arg) { +    if (IS_O(fun, O_FUN_1)) { +        py_obj_base_t *o = fun; +        DEBUG_OP_printf("calling native %p...\n", o->u_fun.fun); +        return ((py_fun_1_t)o->u_fun.fun)(arg); +    } else if (IS_O(fun, O_FUN_BC)) { +        py_obj_base_t *o = fun; +        assert(o->u_fun_bc.n_args == 1); +        DEBUG_OP_printf("calling byte code %p...\n", o->u_fun_bc.code); +        return py_execute_byte_code(o->u_fun_bc.code, o->u_fun_bc.len, &arg, 1); +    } else if (IS_O(fun, O_BOUND_METH)) { +        py_obj_base_t *o = fun; +        return rt_call_function_2(o->u_bound_meth.meth, o->u_bound_meth.self, arg); +    } else { +        printf("fun1:%p\n", fun); +        assert(0); +        return py_const_none; +    } +} + +py_obj_t rt_call_function_2(py_obj_t fun, py_obj_t arg1, py_obj_t arg2) { +    if (IS_O(fun, O_FUN_2)) { +        py_obj_base_t *o = fun; +        DEBUG_OP_printf("calling native %p...\n", o->u_fun.fun); +        return ((py_fun_2_t)o->u_fun.fun)(arg1, arg2); +    } else if (IS_O(fun, O_FUN_BC)) { +        py_obj_base_t *o = fun; +        assert(o->u_fun_bc.n_args == 2); +        DEBUG_OP_printf("calling byte code %p...\n", o->u_fun_bc.code); +        py_obj_t args[2]; +        args[0] = arg1; +        args[1] = arg2; +        return py_execute_byte_code(o->u_fun_bc.code, o->u_fun_bc.len, &args[0], 2); +    } else { +        assert(0); +        return py_const_none; +    } +} + +py_obj_t rt_call_method_1(py_obj_t fun, py_obj_t self) { +    DEBUG_OP_printf("call method %p %p\n", fun, self); +    if (self == NULL) { +        return rt_call_function_0(fun); +    } else { +        return rt_call_function_1(fun, self); +    } +} + +py_obj_t rt_call_method_2(py_obj_t fun, py_obj_t self, py_obj_t arg) { +    DEBUG_OP_printf("call method %p %p %p\n", fun, self, arg); +    if (self == NULL) { +        return rt_call_function_1(fun, arg); +    } else { +        return rt_call_function_2(fun, self, arg); +    } +} + +// items are in reverse order +py_obj_t rt_build_list(int n_args, py_obj_t *items) { +    py_obj_base_t *o = m_new(py_obj_base_t, 1); +    o->kind = O_LIST; +    o->u_list.alloc = n_args; +    if (o->u_list.alloc < 4) { +        o->u_list.alloc = 4; +    } +    o->u_list.len = n_args; +    o->u_list.items = m_new(py_obj_t, o->u_list.alloc); +    for (int i = 0; i < n_args; i++) { +        o->u_list.items[i] = items[n_args - i - 1]; +    } +    return o; +} + +py_obj_t py_set_lookup(py_obj_t o_in, py_obj_t index, bool add_if_not_found) { +    assert(IS_O(o_in, O_SET)); +    py_obj_base_t *o = o_in; +    int hash = py_obj_hash(index); +    int pos = hash % o->u_set.alloc; +    for (;;) { +        py_obj_t elem = o->u_set.table[pos]; +        if (elem == NULL) { +            // not in table +            if (add_if_not_found) { +                if (o->u_set.used + 1 >= o->u_set.alloc) { +                    // not enough room in table, rehash it +                    int old_alloc = o->u_set.alloc; +                    py_obj_t *old_table = o->u_set.table; +                    o->u_set.alloc = get_doubling_prime_greater_or_equal_to(o->u_set.alloc + 1); +                    o->u_set.used = 0; +                    o->u_set.table = m_new(py_obj_t, o->u_set.alloc); +                    for (int i = 0; i < old_alloc; i++) { +                        if (old_table[i] != NULL) { +                            py_set_lookup(o, old_table[i], true); +                        } +                    } +                    m_free(old_table); +                    // restart the search for the new element +                    pos = hash % o->u_set.alloc; +                } else { +                    o->u_set.used += 1; +                    o->u_set.table[pos] = index; +                    return index; +                } +            } else { +                return NULL; +            } +        } else if (py_obj_equal(elem, index)) { +            // found it +            return elem; +        } else { +            // not yet found, keep searching in this table +            pos = (pos + 1) % o->u_set.alloc; +        } +    } +} + +py_obj_t rt_build_set(int n_args, py_obj_t *items) { +    py_obj_base_t *o = m_new(py_obj_base_t, 1); +    o->kind = O_SET; +    o->u_set.alloc = get_doubling_prime_greater_or_equal_to(n_args + 1); +    o->u_set.used = 0; +    o->u_set.table = m_new(py_obj_t, o->u_set.alloc); +    for (int i = 0; i < o->u_set.alloc; i++) { +        o->u_set.table[i] = NULL; +    } +    for (int i = 0; i < n_args; i++) { +        py_set_lookup(o, items[i], true); +    } +    return o; +} + +py_obj_t rt_build_map(int n_args) { +    py_obj_base_t *o = m_new(py_obj_base_t, 1); +    o->kind = O_MAP; +    py_map_init(&o->u_map, MAP_PY_OBJ, n_args); +    return o; +} + +py_obj_t rt_store_map(py_obj_t map, py_obj_t key, py_obj_t value) { +    assert(IS_O(map, O_MAP)); // should always be +    py_map_lookup(map, key, true)->value = value; +    return map; +} + +void rt_store_subscr(py_obj_t base, py_obj_t index, py_obj_t value) { +    if (IS_O(base, O_LIST) && IS_SMALL_INT(index)) { +        // list store +        py_obj_base_t *o = base; +        int idx = FROM_SMALL_INT(index); +        if (idx < 0) { +            idx += o->u_list.len; +        } +        if (0 <= idx && idx < o->u_list.len) { +            o->u_list.items[idx] = value; +        } else { +            assert(0); +        } +    } else if (IS_O(base, O_MAP)) { +        // map store +        py_map_lookup(base, index, true)->value = value; +    } else { +        assert(0); +    } +} + +py_obj_t build_bound_method(py_obj_t self, py_obj_t meth) { +    py_obj_base_t *o = m_new(py_obj_base_t, 1); +    o->kind = O_BOUND_METH; +    o->u_bound_meth.meth = meth; +    o->u_bound_meth.self = self; +    return o; +} + +py_obj_t rt_load_attr(py_obj_t base, qstr attr) { +    DEBUG_OP_printf("load %s\n", qstr_str(attr)); +    if (IS_O(base, O_LIST) && attr == q_append) { +        return build_bound_method(base, fun_list_append); +    } else if (IS_O(base, O_CLASS)) { +        py_obj_base_t *o = base; +        py_map_elem_t *elem = py_qstr_map_lookup(o->u_class.map, attr, false); +        if (elem == NULL) { +            printf("Nope! %s\n", qstr_str(attr)); +            assert(0); +        } +        return elem->value; +    } else { +        printf("AttributeError: '%s' object has no attribute '%s'\n", py_obj_get_type_str(base), qstr_str(attr)); +        assert(0); +        return py_const_none; +    } +} + +void rt_load_method(py_obj_t base, qstr attr, py_obj_t *dest) { +    DEBUG_OP_printf("load method %s\n", qstr_str(attr)); +    if (IS_O(base, O_LIST) && attr == q_append) { +        dest[1] = fun_list_append; +        dest[0] = base; +    } else { +        dest[1] = rt_load_attr(base, attr); +        dest[0] = NULL; +    } +} + +void *rt_fun_table[RT_F_NUMBER_OF] = { +    rt_load_const_str, +    rt_load_name, +    rt_load_global, +    rt_load_attr, +    rt_load_method, +    rt_store_name, +    rt_store_subscr, +    rt_is_true, +    rt_unary_op, +    rt_build_list, +    rt_build_map, +    rt_store_map, +    rt_build_set, +    rt_make_function_from_id, +    rt_call_function_0, +    rt_call_function_1, +    rt_call_function_2, +    rt_call_method_1, +    rt_call_method_2, +    rt_binary_op, +    rt_compare_op, +}; + +/* +void rt_f_vector(rt_fun_kind_t fun_kind) { +    (rt_f_table[fun_kind])(); +} +*/ diff --git a/py/runtime.h b/py/runtime.h new file mode 100644 index 000000000..4c842b235 --- /dev/null +++ b/py/runtime.h @@ -0,0 +1,121 @@ +typedef enum { +    RT_UNARY_OP_NOT, +    RT_UNARY_OP_POSITIVE, +    RT_UNARY_OP_NEGATIVE, +    RT_UNARY_OP_INVERT, +} rt_unary_op_t; + +typedef enum { +    RT_BINARY_OP_SUBSCR, +    RT_BINARY_OP_OR, +    RT_BINARY_OP_XOR, +    RT_BINARY_OP_AND, +    RT_BINARY_OP_LSHIFT, +    RT_BINARY_OP_RSHIFT, +    RT_BINARY_OP_ADD, +    RT_BINARY_OP_SUBTRACT, +    RT_BINARY_OP_MULTIPLY, +    RT_BINARY_OP_FLOOR_DIVIDE, +    RT_BINARY_OP_TRUE_DIVIDE, +    RT_BINARY_OP_MODULO, +    RT_BINARY_OP_POWER, +    RT_BINARY_OP_INPLACE_OR, +    RT_BINARY_OP_INPLACE_XOR, +    RT_BINARY_OP_INPLACE_AND, +    RT_BINARY_OP_INPLACE_LSHIFT, +    RT_BINARY_OP_INPLACE_RSHIFT, +    RT_BINARY_OP_INPLACE_ADD, +    RT_BINARY_OP_INPLACE_SUBTRACT, +    RT_BINARY_OP_INPLACE_MULTIPLY, +    RT_BINARY_OP_INPLACE_FLOOR_DIVIDE, +    RT_BINARY_OP_INPLACE_TRUE_DIVIDE, +    RT_BINARY_OP_INPLACE_MODULO, +    RT_BINARY_OP_INPLACE_POWER, +} rt_binary_op_t; + +typedef enum { +    RT_COMPARE_OP_LESS, +    RT_COMPARE_OP_MORE, +    RT_COMPARE_OP_EQUAL, +    RT_COMPARE_OP_LESS_EQUAL, +    RT_COMPARE_OP_MORE_EQUAL, +    RT_COMPARE_OP_NOT_EQUAL, +    RT_COMPARE_OP_IN, +    RT_COMPARE_OP_NOT_IN, +    RT_COMPARE_OP_IS, +    RT_COMPARE_OP_IS_NOT, +    RT_COMPARE_OP_EXCEPTION_MATCH, +} rt_compare_op_t; + +typedef enum { +    RT_F_LOAD_CONST_STR = 0, +    RT_F_LOAD_NAME, +    RT_F_LOAD_GLOBAL, +    RT_F_LOAD_ATTR, +    RT_F_LOAD_METHOD, +    RT_F_STORE_NAME, +    RT_F_STORE_SUBSCR, +    RT_F_IS_TRUE, +    RT_F_UNARY_OP, +    RT_F_BUILD_LIST, +    RT_F_BUILD_MAP, +    RT_F_STORE_MAP, +    RT_F_BUILD_SET, +    RT_F_MAKE_FUNCTION_FROM_ID, +    RT_F_CALL_FUNCTION_0, +    RT_F_CALL_FUNCTION_1, +    RT_F_CALL_FUNCTION_2, +    RT_F_CALL_METHOD_1, +    RT_F_CALL_METHOD_2, +    RT_F_BINARY_OP, +    RT_F_COMPARE_OP, +    RT_F_NUMBER_OF, +} rt_fun_kind_t; + +extern void *rt_fun_table[RT_F_NUMBER_OF]; + +typedef machine_ptr_t py_obj_t; // must be of pointer size +typedef py_obj_t (*py_fun_0_t)(); +typedef py_obj_t (*py_fun_1_t)(py_obj_t); +typedef py_obj_t (*py_fun_2_t)(py_obj_t, py_obj_t); +typedef py_obj_t (*py_fun_t)(); + +extern py_obj_t py_const_none; +extern py_obj_t py_const_false; +extern py_obj_t py_const_true; + +void rt_init(); +void rt_deinit(); +int rt_get_new_unique_code_id(); +void rt_assign_native_code(int unique_code_id, py_fun_t f, uint len, int n_args); +void rt_assign_byte_code(int unique_code_id, byte *code, uint len, int n_args); +py_fun_t rt_get_code(qstr id); +void py_obj_print(py_obj_t o); +int rt_is_true(py_obj_t arg); +int rt_get_int(py_obj_t arg); +py_obj_t rt_load_const_str(qstr qstr); +//py_obj_t rt_load_const_code(qstr qstr); +py_obj_t rt_load_name(qstr qstr); +py_obj_t rt_load_global(qstr qstr); +py_obj_t rt_load_build_class(); +void rt_store_name(qstr qstr, py_obj_t obj); +py_obj_t rt_unary_op(int op, py_obj_t arg); +py_obj_t rt_binary_op(int op, py_obj_t lhs, py_obj_t rhs); +py_obj_t rt_compare_op(int op, py_obj_t lhs, py_obj_t rhs); +py_obj_t rt_make_function_from_id(int unique_code_id); +py_obj_t rt_make_function_0(py_fun_0_t f); +py_obj_t rt_make_function_1(py_fun_1_t f); +py_obj_t rt_make_function_2(py_fun_2_t f); +py_obj_t rt_make_function(int n_args, py_fun_t code); +py_obj_t rt_call_function_0(py_obj_t fun); +py_obj_t rt_call_function_1(py_obj_t fun, py_obj_t arg); +py_obj_t rt_call_function_2(py_obj_t fun, py_obj_t arg1, py_obj_t arg2); +py_obj_t rt_call_method_1(py_obj_t fun, py_obj_t self); +py_obj_t rt_call_method_2(py_obj_t fun, py_obj_t self, py_obj_t arg); +py_obj_t rt_build_list(int n_args, py_obj_t *items); +py_obj_t rt_build_map(int n_args); +py_obj_t rt_store_map(py_obj_t map, py_obj_t key, py_obj_t value); +py_obj_t rt_build_set(int n_args, py_obj_t *items); +void rt_store_subscr(py_obj_t base, py_obj_t index, py_obj_t val); +py_obj_t rt_load_attr(py_obj_t base, qstr attr); +void rt_load_method(py_obj_t base, qstr attr, py_obj_t *dest); diff --git a/py/scope.c b/py/scope.c new file mode 100644 index 000000000..a715b2b50 --- /dev/null +++ b/py/scope.c @@ -0,0 +1,218 @@ +#include <stdint.h> +#include <stdio.h> +#include <string.h> +#include <assert.h> + +#include "misc.h" +#include "machine.h" +#include "parse.h" +#include "scope.h" + +scope_t *scope_new(scope_kind_t kind, py_parse_node_t pn) { +    scope_t *scope = m_new(scope_t, 1); +    scope->kind = kind; +    scope->parent = NULL; +    scope->next = NULL; +    scope->pn = pn; +    switch (kind) { +        case SCOPE_MODULE: +            scope->simple_name = 0; +            break; +        case SCOPE_FUNCTION: +        case SCOPE_CLASS: +            assert(PY_PARSE_NODE_IS_STRUCT(pn)); +            scope->simple_name = PY_PARSE_NODE_LEAF_ARG(((py_parse_node_struct_t*)pn)->nodes[0]); +            break; +        case SCOPE_LAMBDA: +            scope->simple_name = qstr_from_str_static("<lambda>"); +            break; +        case SCOPE_LIST_COMP: +            scope->simple_name = qstr_from_str_static("<listcomp>"); +            break; +        case SCOPE_DICT_COMP: +            scope->simple_name = qstr_from_str_static("<dictcomp>"); +            break; +        case SCOPE_SET_COMP: +            scope->simple_name = qstr_from_str_static("<setcomp>"); +            break; +        case SCOPE_GEN_EXPR: +            scope->simple_name = qstr_from_str_static("<genexpr>"); +            break; +        default: +            assert(0); +    } +    scope->id_info_alloc = 8; +    scope->id_info_len = 0; +    scope->id_info = m_new(id_info_t, scope->id_info_alloc); + +    scope->flags = 0; +    scope->num_params = 0; +    /* not needed +    scope->num_default_params = 0; +    scope->num_dict_params = 0; +    */ +    scope->num_locals = 0; +    scope->unique_code_id = 0; + +    return scope; +} + +id_info_t *scope_find_or_add_id(scope_t *scope, qstr qstr, bool *added) { +    for (int i = 0; i < scope->id_info_len; i++) { +        if (scope->id_info[i].qstr == qstr) { +            *added = false; +            return &scope->id_info[i]; +        } +    } + +    // make sure we have enough memory +    if (scope->id_info_len >= scope->id_info_alloc) { +        scope->id_info_alloc *= 2; +        scope->id_info = m_renew(id_info_t, scope->id_info, scope->id_info_alloc); +    } + +    id_info_t *id_info; + +    { +    /* +    // just pick next slot in array +    id_info = &scope->id_info[scope->id_info_len++]; +    */ +    } + +    { +    // sort insert into id_info array, so we are equivalent to CPython (no other reason to do it) +    scope->id_info_len += 1; +    for (int i = scope->id_info_len - 1;; i--) { +        if (i == 0 || strcmp(qstr_str(scope->id_info[i - 1].qstr), qstr_str(qstr)) < 0) { +            id_info = &scope->id_info[i]; +            break; +        } else { +            scope->id_info[i] = scope->id_info[i - 1]; +        } +    } +    } + +    id_info->param = false; +    id_info->kind = 0; +    id_info->qstr = qstr; +    *added = true; +    return id_info; +} + +id_info_t *scope_find(scope_t *scope, qstr qstr) { +    for (int i = 0; i < scope->id_info_len; i++) { +        if (scope->id_info[i].qstr == qstr) { +            return &scope->id_info[i]; +        } +    } +    return NULL; +} + +id_info_t *scope_find_global(scope_t *scope, qstr qstr) { +    while (scope->parent != NULL) { +        scope = scope->parent; +    } +    for (int i = 0; i < scope->id_info_len; i++) { +        if (scope->id_info[i].qstr == qstr) { +            return &scope->id_info[i]; +        } +    } +    return NULL; +} + +id_info_t *scope_find_local_in_parent(scope_t *scope, qstr qstr) { +    if (scope->parent == NULL) { +        return NULL; +    } +    for (scope_t *s = scope->parent; s->parent != NULL; s = s->parent) { +        for (int i = 0; i < s->id_info_len; i++) { +            if (s->id_info[i].qstr == qstr) { +                return &s->id_info[i]; +            } +        } +    } +    return NULL; +} + +void scope_close_over_in_parents(scope_t *scope, qstr qstr) { +    assert(scope->parent != NULL); // we should have at least 1 parent +    for (scope_t *s = scope->parent; s->parent != NULL; s = s->parent) { +        id_info_t *id = NULL; +        for (int i = 0; i < s->id_info_len; i++) { +            if (s->id_info[i].qstr == qstr) { +                id = &s->id_info[i]; +                break; +            } +        } +        if (id == NULL) { +            // variable not declared in this scope, so declare it as free and keep searching parents +            bool added; +            id = scope_find_or_add_id(s, qstr, &added); +            assert(added); +            id->kind = ID_INFO_KIND_FREE; +        } else { +            // variable is declared in this scope, so finish +            switch (id->kind) { +                case ID_INFO_KIND_LOCAL: id->kind = ID_INFO_KIND_CELL; break; // variable local to this scope, close it over +                case ID_INFO_KIND_FREE: break; // variable already closed over in a parent scope +                case ID_INFO_KIND_CELL: break; // variable already closed over in this scope +                default: assert(0); // TODO +            } +            return; +        } +    } +    assert(0); // we should have found the variable in one of the parents +} + +void scope_print_info(scope_t *s) { +    if (s->kind == SCOPE_MODULE) { +        printf("code <module>\n"); +    } else if (s->kind == SCOPE_LAMBDA) { +        printf("code <lambda>\n"); +    } else if (s->kind == SCOPE_LIST_COMP) { +        printf("code <listcomp>\n"); +    } else if (s->kind == SCOPE_DICT_COMP) { +        printf("code <dictcomp>\n"); +    } else if (s->kind == SCOPE_SET_COMP) { +        printf("code <setcomp>\n"); +    } else if (s->kind == SCOPE_GEN_EXPR) { +        printf("code <genexpr>\n"); +    } else { +        printf("code %s\n", qstr_str(s->simple_name)); +    } +    /* +    printf("var global:"); +    for (int i = 0; i < s->id_info_len; i++) { +        if (s->id_info[i].kind == ID_INFO_KIND_GLOBAL_EXPLICIT) { +            printf(" %s", qstr_str(s->id_info[i].qstr)); +        } +    } +    printf("\n"); +    printf("var name:"); +    for (int i = 0; i < s->id_info_len; i++) { +        if (s->id_info[i].kind == ID_INFO_KIND_GLOBAL_IMPLICIT) { +            printf(" %s", qstr_str(s->id_info[i].qstr)); +        } +    } +    printf("\n"); +    printf("var local:"); +    for (int i = 0; i < s->id_info_len; i++) { +        if (s->id_info[i].kind == ID_INFO_KIND_LOCAL) { +            printf(" %s", qstr_str(s->id_info[i].qstr)); +        } +    } +    printf("\n"); +    printf("var free:"); +    for (int i = 0; i < s->id_info_len; i++) { +        if (s->id_info[i].kind == ID_INFO_KIND_FREE) { +            printf(" %s", qstr_str(s->id_info[i].qstr)); +        } +    } +    printf("\n"); +    */ +    printf("     flags %04x\n", s->flags); +    printf("     argcount %d\n", s->num_params); +    printf("     nlocals %d\n", s->num_locals); +    printf("     stacksize %d\n", s->stack_size); +} diff --git a/py/scope.h b/py/scope.h new file mode 100644 index 000000000..2d620fb4f --- /dev/null +++ b/py/scope.h @@ -0,0 +1,58 @@ +enum { +    ID_INFO_KIND_GLOBAL_IMPLICIT, +    ID_INFO_KIND_GLOBAL_EXPLICIT, +    ID_INFO_KIND_LOCAL, // in a function f, written and only referenced by f +    ID_INFO_KIND_CELL,  // in a function f, read/written by children of f +    ID_INFO_KIND_FREE,  // in a function f, belongs to the parent of f +}; + +typedef struct _id_info_t { +    bool param; +    int kind; +    qstr qstr; +    int local_num; // when it's an ID_INFO_KIND_LOCAL this is the unique number of the local +} id_info_t; + +// taken from python source, Include/code.h +#define SCOPE_FLAG_OPTIMISED    0x0001 +#define SCOPE_FLAG_NEWLOCALS    0x0002 +#define SCOPE_FLAG_VARARGS      0x0004 +#define SCOPE_FLAG_VARKEYWORDS  0x0008 +#define SCOPE_FLAG_NESTED       0x0010 +#define SCOPE_FLAG_GENERATOR    0x0020 +/* The SCOPE_FLAG_NOFREE flag is set if there are no free or cell variables. +   This information is redundant, but it allows a single flag test +   to determine whether there is any extra work to be done when the +   call frame is setup. +*/ +#define SCOPE_FLAG_NOFREE       0x0040 + +// scope is a "block" in Python parlance +typedef enum { SCOPE_MODULE, SCOPE_FUNCTION, SCOPE_LAMBDA, SCOPE_LIST_COMP, SCOPE_DICT_COMP, SCOPE_SET_COMP, SCOPE_GEN_EXPR, SCOPE_CLASS } scope_kind_t; +typedef struct _scope_t { +    scope_kind_t kind; +    struct _scope_t *parent; +    struct _scope_t *next; +    py_parse_node_t pn; +    qstr simple_name; +    int id_info_alloc; +    int id_info_len; +    id_info_t *id_info; +    int flags; +    int num_params; +    /* not needed +    int num_default_params; +    int num_dict_params; +    */ +    int num_locals; +    int stack_size; +    int unique_code_id; +} scope_t; + +scope_t *scope_new(scope_kind_t kind, py_parse_node_t pn); +id_info_t *scope_find_or_add_id(scope_t *scope, qstr qstr, bool *added); +id_info_t *scope_find(scope_t *scope, qstr qstr); +id_info_t *scope_find_global(scope_t *scope, qstr qstr); +id_info_t *scope_find_local_in_parent(scope_t *scope, qstr qstr); +void scope_close_over_in_parents(scope_t *scope, qstr qstr); +void scope_print_info(scope_t *s); | 
