Initial commit.

This commit is contained in:
Damien 2013-10-04 19:53:11 +01:00
commit 429d71943d
32 changed files with 10729 additions and 0 deletions

1
py/.gitignore vendored Normal file
View File

@ -0,0 +1 @@
*.o

50
py/Makefile Normal file
View File

@ -0,0 +1,50 @@
CC = gcc
CFLAGS = -Wall -ansi -std=gnu99 -Os #-DNDEBUG
LDFLAGS =
SRC = \
malloc.c \
misc.c \
qstr.c \
lexer.c \
lexerfile.c \
parse.c \
scope.c \
compile.c \
emitcommon.c \
emitcpy.c \
emitbc.c \
asmx64.c \
emitx64v2.c \
emitthumb.c \
asmthumb.c \
runtime.c \
bc.c \
main.c \
SRC_ASM = \
runtime1.s \
OBJ = $(SRC:.c=.o) $(SRC_ASM:.s=.o)
LIB =
PROG = py
$(PROG): $(OBJ)
$(CC) -o $@ $(OBJ) $(LIB) $(LDFLAGS)
runtime.o: runtime.c
$(CC) $(CFLAGS) -O3 -c -o $@ $<
bc.o: bc.c
$(CC) $(CFLAGS) -O3 -c -o $@ $<
parse.o: grammar.h
compile.o: grammar.h
emitcpy.o: emit.h
emitbc.o: emit.h
emitx64.o: emit.h
emitx64v2.o: emit.h
emitthumb.o: emit.h
clean:
/bin/rm $(OBJ)

421
py/asmthumb.c Normal file
View File

@ -0,0 +1,421 @@
#include <stdint.h>
#include <stdio.h>
#include <assert.h>
#include <string.h>
#include "misc.h"
#include "machine.h"
#include "asmthumb.h"
#define UNSIGNED_FIT8(x) (((x) & 0xffffff00) == 0)
#define UNSIGNED_FIT16(x) (((x) & 0xffff0000) == 0)
#define SIGNED_FIT8(x) (((x) & 0xffffff80) == 0) || (((x) & 0xffffff80) == 0xffffff80)
#define SIGNED_FIT9(x) (((x) & 0xffffff00) == 0) || (((x) & 0xffffff00) == 0xffffff00)
#define SIGNED_FIT12(x) (((x) & 0xfffff800) == 0) || (((x) & 0xfffff800) == 0xfffff800)
struct _asm_thumb_t {
int pass;
uint code_offset;
uint code_size;
byte *code_base;
byte dummy_data[8];
int next_label;
int max_num_labels;
int *label_offsets;
int num_locals;
uint push_reglist;
uint stack_adjust;
};
asm_thumb_t *asm_thumb_new() {
asm_thumb_t *as;
as = m_new(asm_thumb_t, 1);
as->pass = 0;
as->code_offset = 0;
as->code_size = 0;
as->code_base = NULL;
as->label_offsets = NULL;
as->num_locals = 0;
return as;
}
void asm_thumb_free(asm_thumb_t *as, bool free_code) {
if (free_code) {
m_free(as->code_base);
}
/*
if (as->label != NULL) {
int i;
for (i = 0; i < as->label->len; ++i)
{
Label *lab = &g_array_index(as->label, Label, i);
if (lab->unresolved != NULL)
g_array_free(lab->unresolved, true);
}
g_array_free(as->label, true);
}
*/
m_free(as);
}
void asm_thumb_start_pass(asm_thumb_t *as, int pass) {
as->pass = pass;
as->code_offset = 0;
as->next_label = 1;
if (pass == ASM_THUMB_PASS_1) {
as->max_num_labels = 0;
} else {
if (pass == ASM_THUMB_PASS_2) {
memset(as->label_offsets, -1, as->max_num_labels * sizeof(int));
}
}
}
void asm_thumb_end_pass(asm_thumb_t *as) {
if (as->pass == ASM_THUMB_PASS_1) {
// calculate number of labels need
if (as->next_label > as->max_num_labels) {
as->max_num_labels = as->next_label;
}
as->label_offsets = m_new(int, as->max_num_labels);
} else if (as->pass == ASM_THUMB_PASS_2) {
// calculate size of code in bytes
as->code_size = as->code_offset;
as->code_base = m_new(byte, as->code_size);
printf("code_size: %u\n", as->code_size);
}
/*
// check labels are resolved
if (as->label != NULL)
{
int i;
for (i = 0; i < as->label->len; ++i)
if (g_array_index(as->label, Label, i).unresolved != NULL)
return false;
}
*/
}
// all functions must go through this one to emit bytes
static byte *asm_thumb_get_cur_to_write_bytes(asm_thumb_t *as, int num_bytes_to_write) {
//printf("emit %d\n", num_bytes_to_write);
if (as->pass < ASM_THUMB_PASS_3) {
as->code_offset += num_bytes_to_write;
return as->dummy_data;
} else {
assert(as->code_offset + num_bytes_to_write <= as->code_size);
byte *c = as->code_base + as->code_offset;
as->code_offset += num_bytes_to_write;
return c;
}
}
uint asm_thumb_get_code_size(asm_thumb_t *as) {
return as->code_size;
}
void *asm_thumb_get_code(asm_thumb_t *as) {
// need to set low bit to indicate that it's thumb code
return (void *)(((machine_uint_t)as->code_base) | 1);
}
/*
static void asm_thumb_write_byte_1(asm_thumb_t *as, byte b1) {
byte *c = asm_thumb_get_cur_to_write_bytes(as, 1);
c[0] = b1;
}
*/
static void asm_thumb_write_op16(asm_thumb_t *as, uint op) {
byte *c = asm_thumb_get_cur_to_write_bytes(as, 2);
// little endian
c[0] = op;
c[1] = op >> 8;
}
static void asm_thumb_write_op32(asm_thumb_t *as, uint op1, uint op2) {
byte *c = asm_thumb_get_cur_to_write_bytes(as, 4);
// little endian, op1 then op2
c[0] = op1;
c[1] = op1 >> 8;
c[2] = op2;
c[3] = op2 >> 8;
}
/*
#define IMM32_L0(x) ((x) & 0xff)
#define IMM32_L1(x) (((x) >> 8) & 0xff)
#define IMM32_L2(x) (((x) >> 16) & 0xff)
#define IMM32_L3(x) (((x) >> 24) & 0xff)
static void asm_thumb_write_word32(asm_thumb_t *as, int w32) {
byte *c = asm_thumb_get_cur_to_write_bytes(as, 4);
c[0] = IMM32_L0(w32);
c[1] = IMM32_L1(w32);
c[2] = IMM32_L2(w32);
c[3] = IMM32_L3(w32);
}
*/
// rlolist is a bit map indicating desired lo-registers
#define OP_PUSH_RLIST(rlolist) (0xb400 | (rlolist))
#define OP_PUSH_RLIST_LR(rlolist) (0xb400 | 0x0100 | (rlolist))
#define OP_POP_RLIST(rlolist) (0xbc00 | (rlolist))
#define OP_POP_RLIST_PC(rlolist) (0xbc00 | 0x0100 | (rlolist))
#define OP_ADD_SP(num_words) (0xb000 | (num_words))
#define OP_SUB_SP(num_words) (0xb080 | (num_words))
void asm_thumb_entry(asm_thumb_t *as, int num_locals) {
// work out what to push and how many extra space to reserve on stack
// so that we have enough for all locals and it's aligned an 8-byte boundary
uint reglist;
uint stack_adjust;
if (num_locals < 0) {
num_locals = 0;
}
// don't ppop r0 because it's used for return value
switch (num_locals) {
case 0:
reglist = 0xf2;
stack_adjust = 0;
break;
case 1:
reglist = 0xf2;
stack_adjust = 0;
break;
case 2:
reglist = 0xfe;
stack_adjust = 0;
break;
case 3:
reglist = 0xfe;
stack_adjust = 0;
break;
default:
reglist = 0xfe;
stack_adjust = ((num_locals - 3) + 1) & (~1);
break;
}
asm_thumb_write_op16(as, OP_PUSH_RLIST_LR(reglist));
if (stack_adjust > 0) {
asm_thumb_write_op16(as, OP_SUB_SP(stack_adjust));
}
as->push_reglist = reglist;
as->stack_adjust = stack_adjust;
as->num_locals = num_locals;
}
void asm_thumb_exit(asm_thumb_t *as) {
if (as->stack_adjust > 0) {
asm_thumb_write_op16(as, OP_ADD_SP(as->stack_adjust));
}
asm_thumb_write_op16(as, OP_POP_RLIST_PC(as->push_reglist));
}
int asm_thumb_label_new(asm_thumb_t *as) {
return as->next_label++;
}
void asm_thumb_label_assign(asm_thumb_t *as, int label) {
if (as->pass > ASM_THUMB_PASS_1) {
assert(label < as->max_num_labels);
if (as->pass == ASM_THUMB_PASS_2) {
// assign label offset
assert(as->label_offsets[label] == -1);
as->label_offsets[label] = as->code_offset;
} else if (as->pass == ASM_THUMB_PASS_3) {
// ensure label offset has not changed from PASS_2 to PASS_3
//printf("l%d: (at %d=%ld)\n", label, as->label_offsets[label], as->code_offset);
assert(as->label_offsets[label] == as->code_offset);
}
}
}
// the i8 value will be zero extended into the r32 register!
void asm_thumb_mov_reg_i8(asm_thumb_t *as, uint rlo_dest, int i8) {
assert(rlo_dest < REG_R8);
// movs rlo_dest, #i8
asm_thumb_write_op16(as, 0x2000 | (rlo_dest << 8) | i8);
}
// if loading lo half, the i16 value will be zero extended into the r32 register!
void asm_thumb_mov_i16_to_reg(asm_thumb_t *as, int i16, uint reg_dest, bool load_hi_half) {
assert(reg_dest < REG_R15);
uint op;
if (load_hi_half) {
// movt reg_dest, #i16
op = 0xf2c0;
} else {
// movw reg_dest, #i16
op = 0xf240;
}
asm_thumb_write_op32(as, op | ((i16 >> 1) & 0x0400) | ((i16 >> 12) & 0xf), ((i16 << 4) & 0x7000) | (reg_dest << 8) | (i16 & 0xff));
}
void asm_thumb_mov_reg_i32(asm_thumb_t *as, uint reg_dest, machine_uint_t i32) {
// movw, movt does it in 8 bytes
// ldr [pc, #], dw does it in 6 bytes, but we might not reach to end of code for dw
asm_thumb_mov_i16_to_reg(as, i32, reg_dest, false);
asm_thumb_mov_i16_to_reg(as, i32 >> 16, reg_dest, true);
}
void asm_thumb_mov_reg_i32_optimised(asm_thumb_t *as, uint reg_dest, int i32) {
if (reg_dest < 8 && UNSIGNED_FIT8(i32)) {
asm_thumb_mov_reg_i8(as, reg_dest, i32);
} else if (UNSIGNED_FIT16(i32)) {
asm_thumb_mov_i16_to_reg(as, i32, reg_dest, false);
} else {
asm_thumb_mov_reg_i32(as, reg_dest, i32);
}
}
void asm_thumb_mov_reg_reg(asm_thumb_t *as, uint reg_dest, uint reg_src) {
uint op_lo;
if (reg_src < 8) {
op_lo = reg_src << 3;
} else {
op_lo = 0x40 | ((reg_src - 8) << 3);
}
if (reg_dest < 8) {
op_lo |= reg_dest;
} else {
op_lo |= 0x80 | (reg_dest - 8);
}
asm_thumb_write_op16(as, 0x4600 | op_lo);
}
#define OP_STR_TO_SP_OFFSET(rlo_dest, word_offset) (0x9000 | ((rlo_dest) << 8) | ((word_offset) & 0x00ff))
#define OP_LDR_FROM_SP_OFFSET(rlo_dest, word_offset) (0x9800 | ((rlo_dest) << 8) | ((word_offset) & 0x00ff))
void asm_thumb_mov_local_reg(asm_thumb_t *as, int local_num, uint rlo_src) {
assert(rlo_src < REG_R8);
int word_offset = as->num_locals - local_num - 1;
assert(as->pass < ASM_THUMB_PASS_3 || word_offset >= 0);
asm_thumb_write_op16(as, OP_STR_TO_SP_OFFSET(rlo_src, word_offset));
}
void asm_thumb_mov_reg_local(asm_thumb_t *as, uint rlo_dest, int local_num) {
assert(rlo_dest < REG_R8);
int word_offset = as->num_locals - local_num - 1;
assert(as->pass < ASM_THUMB_PASS_3 || word_offset >= 0);
asm_thumb_write_op16(as, OP_LDR_FROM_SP_OFFSET(rlo_dest, word_offset));
}
void asm_thumb_mov_reg_local_addr(asm_thumb_t *as, uint reg_dest, int local_num) {
assert(0);
// see format 12, load address
asm_thumb_write_op16(as, 0x0000);
}
#define OP_ADD_REG_REG_REG(rlo_dest, rlo_src_a, rlo_src_b) (0x1800 | ((rlo_src_b) << 6) | ((rlo_src_a) << 3) | (rlo_dest))
void asm_thumb_add_reg_reg_reg(asm_thumb_t *as, uint rlo_dest, uint rlo_src_a, uint rlo_src_b) {
asm_thumb_write_op16(as, OP_ADD_REG_REG_REG(rlo_dest, rlo_src_a, rlo_src_b));
}
#define OP_CMP_REG_REG(rlo_a, rlo_b) (0x4280 | ((rlo_b) << 3) | (rlo_a))
void asm_thumb_cmp_reg_reg(asm_thumb_t *as, uint rlo_a, uint rlo_b) {
asm_thumb_write_op16(as, OP_CMP_REG_REG(rlo_a, rlo_b));
}
void asm_thumb_ite_ge(asm_thumb_t *as) {
asm_thumb_write_op16(as, 0xbfac);
}
#define OP_B(byte_offset) (0xe000 | (((byte_offset) >> 1) & 0x07ff))
// this could be wrong, because it should have a range of +/- 16MiB...
#define OP_BW_HI(byte_offset) (0xf000 | (((byte_offset) >> 12) & 0x07ff))
#define OP_BW_LO(byte_offset) (0xb800 | (((byte_offset) >> 1) & 0x07ff))
void asm_thumb_b_label(asm_thumb_t *as, int label) {
if (as->pass > ASM_THUMB_PASS_1) {
int dest = as->label_offsets[label];
int rel = dest - as->code_offset;
rel -= 4; // account for instruction prefetch, PC is 4 bytes ahead of this instruction
if (dest >= 0 && rel <= -4) {
// is a backwards jump, so we know the size of the jump on the first pass
// calculate rel assuming 12 bit relative jump
if (SIGNED_FIT12(rel)) {
asm_thumb_write_op16(as, OP_B(rel));
} else {
goto large_jump;
}
} else {
// is a forwards jump, so need to assume it's large
large_jump:
asm_thumb_write_op32(as, OP_BW_HI(rel), OP_BW_LO(rel));
}
}
}
#define OP_CMP_REG_IMM(rlo, i8) (0x2800 | ((rlo) << 8) | (i8))
// all these bit arithmetics need coverage testing!
#define OP_BEQ(byte_offset) (0xd000 | (((byte_offset) >> 1) & 0x00ff))
#define OP_BEQW_HI(byte_offset) (0xf000 | (((byte_offset) >> 10) & 0x0400) | (((byte_offset) >> 14) & 0x003f))
#define OP_BEQW_LO(byte_offset) (0x8000 | ((byte_offset) & 0x2000) | (((byte_offset) >> 1) & 0x0fff))
void asm_thumb_cmp_reg_bz_label(asm_thumb_t *as, uint rlo, int label) {
assert(rlo < REG_R8);
// compare reg with 0
asm_thumb_write_op16(as, OP_CMP_REG_IMM(rlo, 0));
// branch if equal
if (as->pass > ASM_THUMB_PASS_1) {
int dest = as->label_offsets[label];
int rel = dest - as->code_offset;
rel -= 4; // account for instruction prefetch, PC is 4 bytes ahead of this instruction
if (dest >= 0 && rel <= -4) {
// is a backwards jump, so we know the size of the jump on the first pass
// calculate rel assuming 12 bit relative jump
if (SIGNED_FIT9(rel)) {
asm_thumb_write_op16(as, OP_BEQ(rel));
} else {
goto large_jump;
}
} else {
// is a forwards jump, so need to assume it's large
large_jump:
asm_thumb_write_op32(as, OP_BEQW_HI(rel), OP_BEQW_LO(rel));
}
}
}
#define OP_BLX(reg) (0x4780 | ((reg) << 3))
#define OP_SVC(arg) (0xdf00 | (arg))
#define OP_LDR_FROM_BASE_OFFSET(rlo_dest, rlo_base, word_offset) (0x6800 | (((word_offset) << 6) & 0x07c0) | ((rlo_base) << 3) | (rlo_dest))
void asm_thumb_bl_ind(asm_thumb_t *as, void *fun_ptr, uint fun_id, uint reg_temp) {
/* TODO make this use less bytes
uint rlo_base = REG_R3;
uint rlo_dest = REG_R7;
uint word_offset = 4;
asm_thumb_write_op16(as, 0x0000);
asm_thumb_write_op16(as, 0x6800 | (word_offset << 6) | (rlo_base << 3) | rlo_dest); // ldr rlo_dest, [rlo_base, #offset]
asm_thumb_write_op16(as, 0x4780 | (REG_R9 << 3)); // blx reg
*/
if (0) {
// load ptr to function into register using immediate, then branch
// not relocatable
asm_thumb_mov_reg_i32(as, reg_temp, (machine_uint_t)fun_ptr);
asm_thumb_write_op16(as, OP_BLX(reg_temp));
} else if (1) {
asm_thumb_write_op16(as, OP_LDR_FROM_BASE_OFFSET(reg_temp, REG_R7, fun_id));
asm_thumb_write_op16(as, OP_BLX(reg_temp));
} else {
// use SVC
asm_thumb_write_op16(as, OP_SVC(fun_id));
}
}

60
py/asmthumb.h Normal file
View File

@ -0,0 +1,60 @@
#define ASM_THUMB_PASS_1 (1)
#define ASM_THUMB_PASS_2 (2)
#define ASM_THUMB_PASS_3 (3)
#define REG_R0 (0)
#define REG_R1 (1)
#define REG_R2 (2)
#define REG_R3 (3)
#define REG_R4 (4)
#define REG_R5 (5)
#define REG_R6 (6)
#define REG_R7 (7)
#define REG_R8 (8)
#define REG_R9 (9)
#define REG_R10 (10)
#define REG_R11 (11)
#define REG_R12 (12)
#define REG_R13 (13)
#define REG_R14 (14)
#define REG_R15 (15)
#define REG_LR (REG_R14)
#define REG_RET REG_R0
#define REG_ARG_1 REG_R0
#define REG_ARG_2 REG_R1
#define REG_ARG_3 REG_R2
#define REG_ARG_4 REG_R3
typedef struct _asm_thumb_t asm_thumb_t;
asm_thumb_t *asm_thumb_new();
void asm_thumb_free(asm_thumb_t *as, bool free_code);
void asm_thumb_start_pass(asm_thumb_t *as, int pass);
void asm_thumb_end_pass(asm_thumb_t *as);
uint asm_thumb_get_code_size(asm_thumb_t *as);
void *asm_thumb_get_code(asm_thumb_t *as);
void asm_thumb_entry(asm_thumb_t *as, int num_locals);
void asm_thumb_exit(asm_thumb_t *as);
int asm_thumb_label_new(asm_thumb_t *as);
void asm_thumb_label_assign(asm_thumb_t *as, int label);
// argument order follows ARM, in general dest is first
void asm_thumb_mov_reg_i8(asm_thumb_t *as, uint rlo_dest, int i8_src);
void asm_thumb_mov_reg_i32(asm_thumb_t *as, uint reg_dest, machine_uint_t i32_src);
void asm_thumb_mov_reg_i32_optimised(asm_thumb_t *as, uint reg_dest, int i32_src);
void asm_thumb_mov_reg_reg(asm_thumb_t *as, uint reg_dest, uint reg_src);
void asm_thumb_mov_local_reg(asm_thumb_t *as, int local_num_dest, uint rlo_src);
void asm_thumb_mov_reg_local(asm_thumb_t *as, uint rlo_dest, int local_num);
void asm_thumb_mov_reg_local_addr(asm_thumb_t *as, uint reg_dest, int local_num);
void asm_thumb_add_reg_reg_reg(asm_thumb_t *as, uint rlo_dest, uint rlo_src_a, uint rlo_src_b);
void asm_thumb_cmp_reg_reg(asm_thumb_t *as, uint rlo_a, uint rlo_b);
void asm_thumb_ite_ge(asm_thumb_t *as);
void asm_thumb_b_label(asm_thumb_t *as, int label);
void asm_thumb_cmp_reg_bz_label(asm_thumb_t *as, uint rlo, int label);
void asm_thumb_bl_ind(asm_thumb_t *as, void *fun_ptr, uint fun_id, uint reg_temp);

621
py/asmx64.c Normal file
View File

@ -0,0 +1,621 @@
#include <stdio.h>
#include <assert.h>
#include <sys/types.h>
#include <sys/mman.h>
#include <string.h>
#include "misc.h"
#include "asmx64.h"
/* all offsets are measured in multiples of 8 bytes */
#define WORD_SIZE (8)
#define OPCODE_NOP (0x90)
#define OPCODE_PUSH_R64 (0x50)
#define OPCODE_PUSH_I64 (0x68)
#define OPCODE_PUSH_M64 (0xff) /* /6 */
#define OPCODE_POP_R64 (0x58)
#define OPCODE_RET (0xc3)
#define OPCODE_MOV_I8_TO_R8 (0xb0) /* +rb */
#define OPCODE_MOV_I64_TO_R64 (0xb8)
#define OPCODE_MOV_I32_TO_RM32 (0xc7)
#define OPCODE_MOV_R64_TO_RM64 (0x89)
#define OPCODE_MOV_RM64_TO_R64 (0x8b)
#define OPCODE_LEA_MEM_TO_R64 (0x8d) /* /r */
#define OPCODE_XOR_R64_TO_RM64 (0x31) /* /r */
#define OPCODE_ADD_R64_TO_RM64 (0x01)
#define OPCODE_ADD_I32_TO_RM32 (0x81) /* /0 */
#define OPCODE_ADD_I8_TO_RM32 (0x83) /* /0 */
#define OPCODE_SUB_R64_FROM_RM64 (0x29)
#define OPCODE_SUB_I32_FROM_RM64 (0x81) /* /5 */
#define OPCODE_SUB_I8_FROM_RM64 (0x83) /* /5 */
#define OPCODE_SHL_RM32_BY_I8 (0xc1) /* /4 */
#define OPCODE_SHR_RM32_BY_I8 (0xc1) /* /5 */
#define OPCODE_SAR_RM32_BY_I8 (0xc1) /* /7 */
#define OPCODE_CMP_I32_WITH_RM32 (0x81) /* /7 */
#define OPCODE_CMP_I8_WITH_RM32 (0x83) /* /7 */
#define OPCODE_CMP_R64_WITH_RM64 (0x39)
#define OPCODE_CMP_RM32_WITH_R32 (0x3b)
#define OPCODE_TEST_R8_WITH_RM8 (0x84) /* /r */
#define OPCODE_JMP_REL8 (0xeb)
#define OPCODE_JMP_REL32 (0xe9)
#define OPCODE_JCC_REL8 (0x70) /* | jcc type */
#define OPCODE_JCC_REL32_A (0x0f)
#define OPCODE_JCC_REL32_B (0x80) /* | jcc type */
#define OPCODE_SETCC_RM8_A (0x0f)
#define OPCODE_SETCC_RM8_B (0x90) /* | jcc type, /0 */
#define OPCODE_CALL_REL32 (0xe8)
#define OPCODE_CALL_RM32 (0xff) /* /2 */
#define OPCODE_LEAVE (0xc9)
#define MODRM_R64(x) ((x) << 3)
#define MODRM_RM_DISP0 (0x00)
#define MODRM_RM_DISP8 (0x40)
#define MODRM_RM_DISP32 (0x80)
#define MODRM_RM_REG (0xc0)
#define MODRM_RM_R64(x) (x)
#define REX_PREFIX (0x40)
#define REX_W (0x08) // width
#define REX_R (0x04) // register
#define REX_X (0x02) // index
#define REX_B (0x01) // base
#define IMM32_L0(x) ((x) & 0xff)
#define IMM32_L1(x) (((x) >> 8) & 0xff)
#define IMM32_L2(x) (((x) >> 16) & 0xff)
#define IMM32_L3(x) (((x) >> 24) & 0xff)
#define IMM64_L4(x) (((x) >> 32) & 0xff)
#define IMM64_L5(x) (((x) >> 40) & 0xff)
#define IMM64_L6(x) (((x) >> 48) & 0xff)
#define IMM64_L7(x) (((x) >> 56) & 0xff)
#define UNSIGNED_FIT8(x) (((x) & 0xffffffffffffff00) == 0)
#define UNSIGNED_FIT32(x) (((x) & 0xffffffff00000000) == 0)
#define SIGNED_FIT8(x) (((x) & 0xffffff80) == 0) || (((x) & 0xffffff80) == 0xffffff80)
struct _asm_x64_t {
int pass;
uint code_offset;
uint code_size;
byte *code_base;
byte dummy_data[8];
int next_label;
int max_num_labels;
int *label_offsets;
};
// for allocating memory, see src/v8/src/platform-linux.cc
void *alloc_mem(uint req_size, uint *alloc_size, bool is_exec) {
req_size = (req_size + 0xfff) & (~0xfff);
int prot = PROT_READ | PROT_WRITE | (is_exec ? PROT_EXEC : 0);
void *ptr = mmap(NULL, req_size, prot, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
if (ptr == MAP_FAILED) {
assert(0);
}
*alloc_size = req_size;
return ptr;
}
asm_x64_t* asm_x64_new() {
asm_x64_t* as;
as = m_new(asm_x64_t, 1);
as->pass = 0;
as->code_offset = 0;
as->code_size = 0;
as->code_base = NULL;
as->label_offsets = NULL;
return as;
}
void asm_x64_free(asm_x64_t* as, bool free_code) {
if (free_code) {
m_free(as->code_base);
}
/*
if (as->label != NULL) {
int i;
for (i = 0; i < as->label->len; ++i)
{
Label* lab = &g_array_index(as->label, Label, i);
if (lab->unresolved != NULL)
g_array_free(lab->unresolved, true);
}
g_array_free(as->label, true);
}
*/
m_free(as);
}
void asm_x64_start_pass(asm_x64_t *as, int pass) {
as->pass = pass;
as->code_offset = 0;
as->next_label = 1;
if (pass == ASM_X64_PASS_1) {
as->max_num_labels = 0;
} else {
if (pass == ASM_X64_PASS_2) {
memset(as->label_offsets, -1, as->max_num_labels * sizeof(int));
}
}
}
void asm_x64_end_pass(asm_x64_t *as) {
if (as->pass == ASM_X64_PASS_1) {
// calculate number of labels need
if (as->next_label > as->max_num_labels) {
as->max_num_labels = as->next_label;
}
as->label_offsets = m_new(int, as->max_num_labels);
} else if (as->pass == ASM_X64_PASS_2) {
// calculate size of code in bytes
as->code_size = as->code_offset;
as->code_base = m_new(byte, as->code_size);
printf("code_size: %u\n", as->code_size);
}
/*
// check labels are resolved
if (as->label != NULL)
{
int i;
for (i = 0; i < as->label->len; ++i)
if (g_array_index(as->label, Label, i).unresolved != NULL)
return false;
}
*/
}
// all functions must go through this one to emit bytes
static byte* asm_x64_get_cur_to_write_bytes(asm_x64_t* as, int num_bytes_to_write) {
//printf("emit %d\n", num_bytes_to_write);
if (as->pass < ASM_X64_PASS_3) {
as->code_offset += num_bytes_to_write;
return as->dummy_data;
} else {
assert(as->code_offset + num_bytes_to_write <= as->code_size);
byte *c = as->code_base + as->code_offset;
as->code_offset += num_bytes_to_write;
return c;
}
}
uint asm_x64_get_code_size(asm_x64_t* as) {
return as->code_size;
}
void* asm_x64_get_code(asm_x64_t* as) {
return as->code_base;
}
static void asm_x64_write_byte_1(asm_x64_t* as, byte b1) {
byte* c = asm_x64_get_cur_to_write_bytes(as, 1);
c[0] = b1;
}
static void asm_x64_write_byte_2(asm_x64_t* as, byte b1, byte b2) {
byte* c = asm_x64_get_cur_to_write_bytes(as, 2);
c[0] = b1;
c[1] = b2;
}
static void asm_x64_write_byte_3(asm_x64_t* as, byte b1, byte b2, byte b3) {
byte* c = asm_x64_get_cur_to_write_bytes(as, 3);
c[0] = b1;
c[1] = b2;
c[2] = b3;
}
static void asm_x64_write_word32(asm_x64_t* as, int w32) {
byte* c = asm_x64_get_cur_to_write_bytes(as, 4);
c[0] = IMM32_L0(w32);
c[1] = IMM32_L1(w32);
c[2] = IMM32_L2(w32);
c[3] = IMM32_L3(w32);
}
static void asm_x64_write_word64(asm_x64_t* as, int64_t w64) {
byte* c = asm_x64_get_cur_to_write_bytes(as, 8);
c[0] = IMM32_L0(w64);
c[1] = IMM32_L1(w64);
c[2] = IMM32_L2(w64);
c[3] = IMM32_L3(w64);
c[4] = IMM64_L4(w64);
c[5] = IMM64_L5(w64);
c[6] = IMM64_L6(w64);
c[7] = IMM64_L7(w64);
}
/* unused
static void asm_x64_write_word32_to(asm_x64_t* as, int offset, int w32) {
byte* c;
assert(offset + 4 <= as->code_size);
c = as->code_base + offset;
c[0] = IMM32_L0(w32);
c[1] = IMM32_L1(w32);
c[2] = IMM32_L2(w32);
c[3] = IMM32_L3(w32);
}
*/
static void asm_x64_write_r64_disp(asm_x64_t* as, int r64, int disp_r64, int disp_offset) {
assert(disp_r64 != REG_RSP);
if (disp_offset == 0 && disp_r64 != REG_RBP) {
asm_x64_write_byte_1(as, MODRM_R64(r64) | MODRM_RM_DISP0 | MODRM_RM_R64(disp_r64));
} else if (SIGNED_FIT8(disp_offset)) {
asm_x64_write_byte_2(as, MODRM_R64(r64) | MODRM_RM_DISP8 | MODRM_RM_R64(disp_r64), IMM32_L0(disp_offset));
} else {
asm_x64_write_byte_1(as, MODRM_R64(r64) | MODRM_RM_DISP32 | MODRM_RM_R64(disp_r64));
asm_x64_write_word32(as, disp_offset);
}
}
void asm_x64_nop(asm_x64_t* as)
{
asm_x64_write_byte_1(as, OPCODE_NOP);
}
void asm_x64_push_r64(asm_x64_t* as, int src_r64)
{
asm_x64_write_byte_1(as, OPCODE_PUSH_R64 | src_r64);
}
void asm_x64_push_i32(asm_x64_t* as, int src_i32)
{
asm_x64_write_byte_1(as, OPCODE_PUSH_I64);
asm_x64_write_word32(as, src_i32); // will be sign extended to 64 bits
}
void asm_x64_push_disp(asm_x64_t* as, int src_r64, int src_offset) {
asm_x64_write_byte_1(as, OPCODE_PUSH_M64);
asm_x64_write_r64_disp(as, 6, src_r64, src_offset);
}
void asm_x64_pop_r64(asm_x64_t* as, int dest_r64)
{
asm_x64_write_byte_1(as, OPCODE_POP_R64 | dest_r64);
}
static void asm_x64_ret(asm_x64_t* as)
{
asm_x64_write_byte_1(as, OPCODE_RET);
}
void asm_x64_mov_r32_to_r32(asm_x64_t* as, int src_r32, int dest_r32) {
// defaults to 32 bit operation
asm_x64_write_byte_2(as, OPCODE_MOV_R64_TO_RM64, MODRM_R64(src_r32) | MODRM_RM_REG | MODRM_RM_R64(dest_r32));
}
void asm_x64_mov_r64_to_r64(asm_x64_t* as, int src_r64, int dest_r64) {
// use REX prefix for 64 bit operation
asm_x64_write_byte_3(as, REX_PREFIX | REX_W, OPCODE_MOV_R64_TO_RM64, MODRM_R64(src_r64) | MODRM_RM_REG | MODRM_RM_R64(dest_r64));
}
void asm_x64_mov_r64_to_disp(asm_x64_t* as, int src_r64, int dest_r64, int dest_disp) {
// use REX prefix for 64 bit operation
asm_x64_write_byte_2(as, REX_PREFIX | REX_W, OPCODE_MOV_R64_TO_RM64);
asm_x64_write_r64_disp(as, src_r64, dest_r64, dest_disp);
}
void asm_x64_mov_disp_to_r64(asm_x64_t* as, int src_r64, int src_disp, int dest_r64) {
// use REX prefix for 64 bit operation
asm_x64_write_byte_2(as, REX_PREFIX | REX_W, OPCODE_MOV_RM64_TO_R64);
asm_x64_write_r64_disp(as, dest_r64, src_r64, src_disp);
}
void asm_x64_lea_disp_to_r64(asm_x64_t* as, int src_r64, int src_disp, int dest_r64) {
// use REX prefix for 64 bit operation
asm_x64_write_byte_2(as, REX_PREFIX | REX_W, OPCODE_LEA_MEM_TO_R64);
asm_x64_write_r64_disp(as, dest_r64, src_r64, src_disp);
}
void asm_x64_mov_i8_to_r8(asm_x64_t *as, int src_i8, int dest_r64) {
asm_x64_write_byte_2(as, OPCODE_MOV_I8_TO_R8 | dest_r64, src_i8);
}
void asm_x64_mov_i32_to_r64(asm_x64_t* as, int src_i32, int dest_r64) {
// cpu defaults to i32 to r64, with zero extension
asm_x64_write_byte_1(as, OPCODE_MOV_I64_TO_R64 | dest_r64);
asm_x64_write_word32(as, src_i32);
}
void asm_x64_mov_i64_to_r64(asm_x64_t* as, int64_t src_i64, int dest_r64) {
// cpu defaults to i32 to r64
// to mov i64 to r64 need to use REX prefix
asm_x64_write_byte_2(as, REX_PREFIX | REX_W, OPCODE_MOV_I64_TO_R64 | dest_r64);
asm_x64_write_word64(as, src_i64);
}
void asm_x64_mov_i64_to_r64_optimised(asm_x64_t *as, int64_t src_i64, int dest_r64) {
if (UNSIGNED_FIT32(src_i64)) {
// 5 bytes
asm_x64_mov_i32_to_r64(as, src_i64 & 0xffffffff, dest_r64);
} else {
// 10 bytes
asm_x64_mov_i64_to_r64(as, src_i64, dest_r64);
}
}
void asm_x64_mov_i32_to_disp(asm_x64_t* as, int src_i32, int dest_r32, int dest_disp)
{
assert(0);
asm_x64_write_byte_1(as, OPCODE_MOV_I32_TO_RM32);
//asm_x64_write_r32_disp(as, 0, dest_r32, dest_disp);
asm_x64_write_word32(as, src_i32);
}
void asm_x64_xor_r64_to_r64(asm_x64_t *as, int src_r64, int dest_r64) {
asm_x64_write_byte_3(as, REX_PREFIX | REX_W, OPCODE_XOR_R64_TO_RM64, MODRM_R64(src_r64) | MODRM_RM_REG | MODRM_RM_R64(dest_r64));
}
void asm_x64_add_r64_to_r64(asm_x64_t* as, int src_r64, int dest_r64) {
asm_x64_write_byte_3(as, REX_PREFIX | REX_W, OPCODE_ADD_R64_TO_RM64, MODRM_R64(src_r64) | MODRM_RM_REG | MODRM_RM_R64(dest_r64));
}
void asm_x64_add_i32_to_r32(asm_x64_t* as, int src_i32, int dest_r32)
{
assert(dest_r32 != REG_RSP); // in this case i think src_i32 must be 64 bits
if (SIGNED_FIT8(src_i32))
{
asm_x64_write_byte_2(as, OPCODE_ADD_I8_TO_RM32, MODRM_R64(0) | MODRM_RM_REG | MODRM_RM_R64(dest_r32));
asm_x64_write_byte_1(as, src_i32 & 0xff);
}
else
{
asm_x64_write_byte_2(as, OPCODE_ADD_I32_TO_RM32, MODRM_R64(0) | MODRM_RM_REG | MODRM_RM_R64(dest_r32));
asm_x64_write_word32(as, src_i32);
}
}
void asm_x64_sub_r32_from_r32(asm_x64_t* as, int src_r32, int dest_r32) {
// defaults to 32 bit operation
asm_x64_write_byte_2(as, OPCODE_SUB_R64_FROM_RM64, MODRM_R64(src_r32) | MODRM_RM_REG | MODRM_RM_R64(dest_r32));
}
void asm_x64_sub_r64_from_r64(asm_x64_t* as, int src_r64, int dest_r64) {
// use REX prefix for 64 bit operation
asm_x64_write_byte_3(as, REX_PREFIX | REX_W, OPCODE_SUB_R64_FROM_RM64, MODRM_R64(src_r64) | MODRM_RM_REG | MODRM_RM_R64(dest_r64));
}
void asm_x64_sub_i32_from_r32(asm_x64_t* as, int src_i32, int dest_r32) {
if (SIGNED_FIT8(src_i32)) {
// defaults to 32 bit operation
asm_x64_write_byte_2(as, OPCODE_SUB_I8_FROM_RM64, MODRM_R64(5) | MODRM_RM_REG | MODRM_RM_R64(dest_r32));
asm_x64_write_byte_1(as, src_i32 & 0xff);
} else {
// defaults to 32 bit operation
asm_x64_write_byte_2(as, OPCODE_SUB_I32_FROM_RM64, MODRM_R64(5) | MODRM_RM_REG | MODRM_RM_R64(dest_r32));
asm_x64_write_word32(as, src_i32);
}
}
void asm_x64_sub_i32_from_r64(asm_x64_t* as, int src_i32, int dest_r64) {
if (SIGNED_FIT8(src_i32)) {
// use REX prefix for 64 bit operation
asm_x64_write_byte_3(as, REX_PREFIX | REX_W, OPCODE_SUB_I8_FROM_RM64, MODRM_R64(5) | MODRM_RM_REG | MODRM_RM_R64(dest_r64));
asm_x64_write_byte_1(as, src_i32 & 0xff);
} else {
// use REX prefix for 64 bit operation
asm_x64_write_byte_3(as, REX_PREFIX | REX_W, OPCODE_SUB_I32_FROM_RM64, MODRM_R64(5) | MODRM_RM_REG | MODRM_RM_R64(dest_r64));
asm_x64_write_word32(as, src_i32);
}
}
/* shifts not tested */
void asm_x64_shl_r32_by_imm(asm_x64_t* as, int r32, int imm) {
asm_x64_write_byte_2(as, OPCODE_SHL_RM32_BY_I8, MODRM_R64(4) | MODRM_RM_REG | MODRM_RM_R64(r32));
asm_x64_write_byte_1(as, imm);
}
void asm_x64_shr_r32_by_imm(asm_x64_t* as, int r32, int imm) {
asm_x64_write_byte_2(as, OPCODE_SHR_RM32_BY_I8, MODRM_R64(5) | MODRM_RM_REG | MODRM_RM_R64(r32));
asm_x64_write_byte_1(as, imm);
}
void asm_x64_sar_r32_by_imm(asm_x64_t* as, int r32, int imm) {
asm_x64_write_byte_2(as, OPCODE_SAR_RM32_BY_I8, MODRM_R64(7) | MODRM_RM_REG | MODRM_RM_R64(r32));
asm_x64_write_byte_1(as, imm);
}
void asm_x64_cmp_r64_with_r64(asm_x64_t* as, int src_r64_a, int src_r64_b) {
asm_x64_write_byte_3(as, REX_PREFIX | REX_W, OPCODE_CMP_R64_WITH_RM64, MODRM_R64(src_r64_a) | MODRM_RM_REG | MODRM_RM_R64(src_r64_b));
}
void asm_x64_cmp_r32_with_disp(asm_x64_t* as, int src_r32_a, int src_r32_b, int src_disp_b) {
assert(0);
asm_x64_write_byte_1(as, OPCODE_CMP_R64_WITH_RM64);
//asm_x64_write_r32_disp(as, src_r32_a, src_r32_b, src_disp_b);
}
void asm_x64_cmp_disp_with_r32(asm_x64_t* as, int src_r32_a, int src_disp_a, int src_r32_b) {
assert(0);
asm_x64_write_byte_1(as, OPCODE_CMP_RM32_WITH_R32);
//asm_x64_write_r32_disp(as, src_r32_b, src_r32_a, src_disp_a);
}
void asm_x64_cmp_i32_with_r32(asm_x64_t* as, int src_i32, int src_r32) {
if (SIGNED_FIT8(src_i32)) {
asm_x64_write_byte_2(as, OPCODE_CMP_I8_WITH_RM32, MODRM_R64(7) | MODRM_RM_REG | MODRM_RM_R64(src_r32));
asm_x64_write_byte_1(as, src_i32 & 0xff);
} else {
asm_x64_write_byte_2(as, OPCODE_CMP_I32_WITH_RM32, MODRM_R64(7) | MODRM_RM_REG | MODRM_RM_R64(src_r32));
asm_x64_write_word32(as, src_i32);
}
}
void asm_x64_test_r8_with_r8(asm_x64_t* as, int src_r64_a, int src_r64_b) {
asm_x64_write_byte_2(as, OPCODE_TEST_R8_WITH_RM8, MODRM_R64(src_r64_a) | MODRM_RM_REG | MODRM_RM_R64(src_r64_b));
}
void asm_x64_setcc_r8(asm_x64_t* as, int jcc_type, int dest_r8) {
asm_x64_write_byte_3(as, OPCODE_SETCC_RM8_A, OPCODE_SETCC_RM8_B | jcc_type, MODRM_R64(0) | MODRM_RM_REG | MODRM_RM_R64(dest_r8));
}
int asm_x64_label_new(asm_x64_t* as) {
return as->next_label++;
}
void asm_x64_label_assign(asm_x64_t* as, int label) {
if (as->pass > ASM_X64_PASS_1) {
assert(label < as->max_num_labels);
if (as->pass == ASM_X64_PASS_2) {
// assign label offset
assert(as->label_offsets[label] == -1);
as->label_offsets[label] = as->code_offset;
} else if (as->pass == ASM_X64_PASS_3) {
// ensure label offset has not changed from PASS_2 to PASS_3
//printf("l%d: (at %d=%ld)\n", label, as->label_offsets[label], as->code_offset);
assert(as->label_offsets[label] == as->code_offset);
}
}
}
void asm_x64_jmp_label(asm_x64_t* as, int label) {
if (as->pass > ASM_X64_PASS_1) {
int dest = as->label_offsets[label];
int rel = dest - as->code_offset;
if (dest >= 0 && rel < 0) {
// is a backwards jump, so we know the size of the jump on the first pass
// calculate rel assuming 8 bit relative jump
rel -= 2;
if (SIGNED_FIT8(rel)) {
asm_x64_write_byte_2(as, OPCODE_JMP_REL8, rel & 0xff);
} else {
rel += 2;
goto large_jump;
}
} else {
// is a forwards jump, so need to assume it's large
large_jump:
rel -= 5;
asm_x64_write_byte_1(as, OPCODE_JMP_REL32);
asm_x64_write_word32(as, rel);
}
}
}
void asm_x64_jcc_label(asm_x64_t* as, int jcc_type, int label) {
if (as->pass > ASM_X64_PASS_1) {
int dest = as->label_offsets[label];
int rel = dest - as->code_offset;
if (dest >= 0 && rel < 0) {
// is a backwards jump, so we know the size of the jump on the first pass
// calculate rel assuming 8 bit relative jump
rel -= 2;
if (SIGNED_FIT8(rel)) {
asm_x64_write_byte_2(as, OPCODE_JCC_REL8 | jcc_type, rel & 0xff);
} else {
rel += 2;
goto large_jump;
}
} else {
// is a forwards jump, so need to assume it's large
large_jump:
rel -= 6;
asm_x64_write_byte_2(as, OPCODE_JCC_REL32_A, OPCODE_JCC_REL32_B | jcc_type);
asm_x64_write_word32(as, rel);
}
}
}
void asm_x64_entry(asm_x64_t* as, int num_locals) {
asm_x64_push_r64(as, REG_RBP);
asm_x64_mov_r64_to_r64(as, REG_RSP, REG_RBP);
if (num_locals < 0) {
num_locals = 0;
}
num_locals |= 1; // make it odd so stack is aligned on 16 byte boundary
asm_x64_sub_i32_from_r64(as, num_locals * WORD_SIZE, REG_RSP);
asm_x64_push_r64(as, REG_RBX);
}
void asm_x64_exit(asm_x64_t* as) {
asm_x64_pop_r64(as, REG_RBX);
asm_x64_write_byte_1(as, OPCODE_LEAVE);
asm_x64_ret(as);
}
void asm_x64_push_arg(asm_x64_t* as, int src_arg_num) {
assert(0);
asm_x64_push_disp(as, REG_RBP, 8 + src_arg_num * WORD_SIZE);
}
void asm_x64_mov_arg_to_r32(asm_x64_t* as, int src_arg_num, int dest_r32) {
assert(0);
//asm_x64_mov_disp_to_r32(as, REG_RBP, 8 + src_arg_num * WORD_SIZE, dest_r32);
}
void asm_x64_mov_r32_to_arg(asm_x64_t* as, int src_r32, int dest_arg_num) {
assert(0);
//asm_x64_mov_r32_to_disp(as, src_r32, REG_RBP, 8 + dest_arg_num * WORD_SIZE);
}
static int asm_x64_local_offset_from_ebp(int local_num)
{
return -(local_num + 1) * WORD_SIZE;
}
void asm_x64_mov_local_to_r64(asm_x64_t* as, int src_local_num, int dest_r64) {
asm_x64_mov_disp_to_r64(as, REG_RBP, asm_x64_local_offset_from_ebp(src_local_num), dest_r64);
}
void asm_x64_mov_r64_to_local(asm_x64_t* as, int src_r64, int dest_local_num) {
asm_x64_mov_r64_to_disp(as, src_r64, REG_RBP, asm_x64_local_offset_from_ebp(dest_local_num));
}
void asm_x64_mov_local_addr_to_r64(asm_x64_t* as, int local_num, int dest_r64) {
int offset = asm_x64_local_offset_from_ebp(local_num);
if (offset == 0) {
asm_x64_mov_r64_to_r64(as, REG_RBP, dest_r64);
} else {
asm_x64_lea_disp_to_r64(as, REG_RBP, offset, dest_r64);
}
}
void asm_x64_push_local(asm_x64_t* as, int local_num) {
asm_x64_push_disp(as, REG_RBP, asm_x64_local_offset_from_ebp(local_num));
}
void asm_x64_push_local_addr(asm_x64_t* as, int local_num, int temp_r64)
{
asm_x64_mov_r64_to_r64(as, REG_RBP, temp_r64);
asm_x64_add_i32_to_r32(as, asm_x64_local_offset_from_ebp(local_num), temp_r64);
asm_x64_push_r64(as, temp_r64);
}
/*
can't use these because code might be relocated when resized
void asm_x64_call(asm_x64_t* as, void* func)
{
asm_x64_sub_i32_from_r32(as, 8, REG_RSP);
asm_x64_write_byte_1(as, OPCODE_CALL_REL32);
asm_x64_write_word32(as, func - (void*)(as->code_cur + 4));
asm_x64_mov_r64_to_r64(as, REG_RBP, REG_RSP);
}
void asm_x64_call_i1(asm_x64_t* as, void* func, int i1)
{
asm_x64_sub_i32_from_r32(as, 8, REG_RSP);
asm_x64_sub_i32_from_r32(as, 12, REG_RSP);
asm_x64_push_i32(as, i1);
asm_x64_write_byte_1(as, OPCODE_CALL_REL32);
asm_x64_write_word32(as, func - (void*)(as->code_cur + 4));
asm_x64_add_i32_to_r32(as, 16, REG_RSP);
asm_x64_mov_r64_to_r64(as, REG_RBP, REG_RSP);
}
*/
void asm_x64_call_ind(asm_x64_t* as, void *ptr, int temp_r64) {
/*
asm_x64_mov_i64_to_r64_optimised(as, (int64_t)ptr, temp_r64);
asm_x64_write_byte_2(as, OPCODE_CALL_RM32, MODRM_R64(2) | MODRM_RM_REG | MODRM_RM_R64(temp_r64));
*/
// this reduces code size by 2 bytes per call, but doesn't seem to speed it up at all
asm_x64_write_byte_1(as, OPCODE_CALL_REL32);
asm_x64_write_word32(as, ptr - (void*)(as->code_base + as->code_offset + 4));
}

76
py/asmx64.h Normal file
View File

@ -0,0 +1,76 @@
#define ASM_X64_PASS_1 (1)
#define ASM_X64_PASS_2 (2)
#define ASM_X64_PASS_3 (3)
#define REG_RAX (0)
#define REG_RCX (1)
#define REG_RDX (2)
#define REG_RBX (3)
#define REG_RSP (4)
#define REG_RBP (5)
#define REG_RSI (6)
#define REG_RDI (7)
// condition codes, used for jcc and setcc (desipite their j-name!)
#define JCC_JB (0x2) // below, unsigned
#define JCC_JZ (0x4)
#define JCC_JE (0x4)
#define JCC_JNZ (0x5)
#define JCC_JNE (0x5)
#define JCC_JL (0xc) // less, signed
#define REG_RET REG_RAX
#define REG_ARG_1 REG_RDI
#define REG_ARG_2 REG_RSI
#define REG_ARG_3 REG_RDX
typedef struct _asm_x64_t asm_x64_t;
asm_x64_t* asm_x64_new();
void asm_x64_free(asm_x64_t* as, bool free_code);
void asm_x64_start_pass(asm_x64_t *as, int pass);
void asm_x64_end_pass(asm_x64_t *as);
uint asm_x64_get_code_size(asm_x64_t* as);
void* asm_x64_get_code(asm_x64_t* as);
void asm_x64_nop(asm_x64_t* as);
void asm_x64_push_r64(asm_x64_t* as, int src_r64);
void asm_x64_push_i32(asm_x64_t* as, int src_i32); // will be sign extended to 64 bits
void asm_x64_push_disp(asm_x64_t* as, int src_r32, int src_offset);
void asm_x64_pop_r64(asm_x64_t* as, int dest_r64);
void asm_x64_mov_r64_to_r64(asm_x64_t* as, int src_r64, int dest_r64);
void asm_x64_mov_r32_to_disp(asm_x64_t* as, int src_r32, int dest_r32, int dest_disp);
void asm_x64_mov_disp_to_r32(asm_x64_t* as, int src_r32, int src_disp, int dest_r32);
void asm_x64_mov_i32_to_r64(asm_x64_t* as, int src_i32, int dest_r64);
void asm_x64_mov_i64_to_r64(asm_x64_t* as, int64_t src_i64, int dest_r64);
void asm_x64_mov_i32_to_disp(asm_x64_t* as, int src_i32, int dest_r32, int dest_disp);
void asm_x64_mov_i64_to_r64_optimised(asm_x64_t *as, int64_t src_i64, int dest_r64);
void asm_x64_xor_r64_to_r64(asm_x64_t *as, int src_r64, int dest_r64);
void asm_x64_add_r64_to_r64(asm_x64_t* as, int src_r64, int dest_r64);
void asm_x64_add_i32_to_r32(asm_x64_t* as, int src_i32, int dest_r32);
void asm_x64_sub_r32_from_r32(asm_x64_t* as, int src_r32, int dest_r32);
void asm_x64_sub_i32_from_r32(asm_x64_t* as, int src_i32, int dest_r32);
void asm_x64_shl_r32_by_imm(asm_x64_t* as, int r32, int imm);
void asm_x64_shr_r32_by_imm(asm_x64_t* as, int r32, int imm);
void asm_x64_sar_r32_by_imm(asm_x64_t* as, int r32, int imm);
void asm_x64_cmp_r64_with_r64(asm_x64_t* as, int src_r64_a, int src_r64_b);
void asm_x64_cmp_r32_with_disp(asm_x64_t* as, int src_r32_a, int src_r32_b, int src_disp_b);
void asm_x64_cmp_disp_with_r32(asm_x64_t* as, int src_r32_a, int src_disp_a, int src_r32_b);
void asm_x64_cmp_i32_with_r32(asm_x64_t* as, int src_i32, int src_r32);
void asm_x64_test_r8_with_r8(asm_x64_t* as, int src_r64_a, int src_r64_b);
void asm_x64_setcc_r8(asm_x64_t* as, int jcc_type, int dest_r8);
int asm_x64_label_new(asm_x64_t* as);
void asm_x64_label_assign(asm_x64_t* as, int label);
void asm_x64_jmp_label(asm_x64_t* as, int label);
void asm_x64_jcc_label(asm_x64_t* as, int jcc_type, int label);
void asm_x64_entry(asm_x64_t* as, int num_locals);
void asm_x64_exit(asm_x64_t* as);
void asm_x64_push_arg(asm_x64_t* as, int src_arg_num);
void asm_x64_mov_arg_to_r32(asm_x64_t* as, int src_arg_num, int dest_r32);
void asm_x64_mov_r32_to_arg(asm_x64_t* as, int src_r32, int dest_arg_num);
void asm_x64_mov_local_to_r64(asm_x64_t* as, int src_local_num, int dest_r64);
void asm_x64_mov_r64_to_local(asm_x64_t* as, int src_r64, int dest_local_num);
void asm_x64_mov_local_addr_to_r64(asm_x64_t* as, int local_num, int dest_r64);
void asm_x64_push_local(asm_x64_t* as, int local_num);
void asm_x64_push_local_addr(asm_x64_t* as, int local_num, int temp_r32);
void asm_x64_call_ind(asm_x64_t* as, void* ptr, int temp_r32);

272
py/bc.c Normal file
View File

@ -0,0 +1,272 @@
#include <stdint.h>
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <assert.h>
#include "misc.h"
#include "machine.h"
#include "runtime.h"
#include "bc.h"
#define DECODE_UINT do { unum = *ip++; if (unum > 127) { unum = ((unum & 0x3f) << 8) | (*ip++); } } while (0)
#define DECODE_QSTR do { qstr = *ip++; if (qstr > 127) { qstr = ((qstr & 0x3f) << 8) | (*ip++); } } while (0)
#define PUSH(val) *--sp = (val)
#define POP() (*sp++)
py_obj_t py_execute_byte_code(byte *code, uint len, py_obj_t *args, uint n_args) {
byte *ip = code;
py_obj_t stack[10];
py_obj_t *sp = &stack[10]; // stack grows down, sp points to top of stack
machine_uint_t unum;
machine_int_t snum;
qstr qstr;
py_obj_t obj1, obj2;
py_obj_t fast0 = NULL, fast1 = NULL, fast2 = NULL, fastn[4] = {NULL, NULL, NULL, NULL};
// init args
for (int i = 0; i < n_args; i++) {
if (i == 0) {
fast0 = args[0];
} else if (i == 1) {
fast1 = args[1];
} else if (i == 2) {
fast2 = args[2];
} else {
assert(i - 3 < 4);
fastn[i - 3] = args[i];
}
}
// execute byte code
for (;;) {
int op = *ip++;
switch (op) {
case PYBC_LOAD_CONST_FALSE:
PUSH(py_const_false);
break;
case PYBC_LOAD_CONST_NONE:
PUSH(py_const_none);
break;
case PYBC_LOAD_CONST_TRUE:
PUSH(py_const_true);
break;
case PYBC_LOAD_CONST_SMALL_INT:
snum = ip[0] | (ip[1] << 8);
if (snum & 0x8000) {
snum |= ~0xffff;
}
ip += 2;
PUSH((py_obj_t)(snum << 1 | 1));
break;
case PYBC_LOAD_CONST_ID:
DECODE_QSTR;
PUSH(rt_load_const_str(qstr)); // TODO
break;
case PYBC_LOAD_CONST_STRING:
DECODE_QSTR;
PUSH(rt_load_const_str(qstr));
break;
case PYBC_LOAD_FAST_0:
PUSH(fast0);
break;
case PYBC_LOAD_FAST_1:
PUSH(fast1);
break;
case PYBC_LOAD_FAST_2:
PUSH(fast2);
break;
case PYBC_LOAD_FAST_N:
DECODE_UINT;
PUSH(fastn[unum - 3]);
break;
case PYBC_LOAD_NAME:
DECODE_QSTR;
PUSH(rt_load_name(qstr));
break;
case PYBC_LOAD_GLOBAL:
DECODE_QSTR;
PUSH(rt_load_global(qstr));
break;
case PYBC_LOAD_ATTR:
DECODE_QSTR;
*sp = rt_load_attr(*sp, qstr);
break;
case PYBC_LOAD_METHOD:
DECODE_QSTR;
sp -= 1;
rt_load_method(sp[1], qstr, sp);
break;
case PYBC_LOAD_BUILD_CLASS:
PUSH(rt_load_build_class());
break;
case PYBC_STORE_FAST_0:
fast0 = POP();
break;
case PYBC_STORE_FAST_1:
fast1 = POP();
break;
case PYBC_STORE_FAST_2:
fast2 = POP();
break;
case PYBC_STORE_FAST_N:
DECODE_UINT;
fastn[unum - 3] = POP();
break;
case PYBC_STORE_NAME:
DECODE_QSTR;
rt_store_name(qstr, POP());
break;
case PYBC_STORE_SUBSCR:
rt_store_subscr(sp[1], sp[0], sp[2]);
sp += 3;
break;
case PYBC_DUP_TOP:
obj1 = *sp;
PUSH(obj1);
break;
case PYBC_DUP_TOP_TWO:
sp -= 2;
sp[0] = sp[2];
sp[1] = sp[3];
break;
case PYBC_POP_TOP:
++sp;
break;
case PYBC_ROT_THREE:
obj1 = sp[0];
sp[0] = sp[1];
sp[1] = sp[2];
sp[2] = obj1;
break;
case PYBC_JUMP:
DECODE_UINT;
ip = code + unum;
break;
case PYBC_POP_JUMP_IF_FALSE:
DECODE_UINT;
if (!rt_is_true(POP())) {
ip = code + unum;
}
break;
case PYBC_SETUP_LOOP:
DECODE_UINT;
break;
case PYBC_POP_BLOCK:
break;
case PYBC_BINARY_OP:
unum = *ip++;
obj2 = POP();
obj1 = *sp;
*sp = rt_binary_op(unum, obj1, obj2);
break;
case PYBC_COMPARE_OP:
unum = *ip++;
obj2 = POP();
obj1 = *sp;
*sp = rt_compare_op(unum, obj1, obj2);
break;
case PYBC_BUILD_LIST:
DECODE_UINT;
obj1 = rt_build_list(unum, sp);
sp += unum - 1;
*sp = obj1;
break;
case PYBC_BUILD_MAP:
DECODE_UINT;
PUSH(rt_build_map(unum));
break;
case PYBC_STORE_MAP:
sp += 2;
rt_store_map(sp[0], sp[-2], sp[-1]);
break;
case PYBC_BUILD_SET:
DECODE_UINT;
obj1 = rt_build_set(unum, sp);
sp += unum - 1;
*sp = obj1;
break;
case PYBC_MAKE_FUNCTION:
DECODE_UINT;
PUSH(rt_make_function_from_id(unum));
break;
case PYBC_CALL_FUNCTION:
DECODE_UINT;
assert((unum & 0xff00) == 0); // n_keyword
// switch on n_positional
if ((unum & 0xff) == 0) {
*sp = rt_call_function_0(*sp);
} else if ((unum & 0xff) == 1) {
obj1 = *sp++; // the single argument
*sp = rt_call_function_1(*sp, obj1);
} else if ((unum & 0xff) == 2) {
obj2 = *sp++; // the second argument
obj1 = *sp++; // the first argument
*sp = rt_call_function_2(*sp, obj1, obj2);
} else {
assert(0);
}
break;
case PYBC_CALL_METHOD:
DECODE_UINT;
assert((unum & 0xff00) == 0); // n_keyword
// switch on n_positional
if ((unum & 0xff) == 0) {
obj1 = *sp++; // the self object (or NULL)
*sp = rt_call_method_1(*sp, obj1);
} else if ((unum & 0xff) == 1) {
obj2 = *sp++; // the first argument
obj1 = *sp++; // the self object (or NULL)
*sp = rt_call_function_2(*sp, obj1, obj2);
} else {
assert(0);
}
break;
case PYBC_RETURN_VALUE:
return *sp;
default:
printf("code %p, offset %u, byte code 0x%02x not implemented\n", code, (uint)(ip - code), op);
assert(0);
return py_const_none;
}
}
}

97
py/bc.h Normal file
View File

@ -0,0 +1,97 @@
#define PYBC_LOAD_CONST_FALSE (0x10)
#define PYBC_LOAD_CONST_NONE (0x11)
#define PYBC_LOAD_CONST_TRUE (0x12)
#define PYBC_LOAD_CONST_SMALL_INT (0x13) // int
#define PYBC_LOAD_CONST_INT (0x14) // qstr
#define PYBC_LOAD_CONST_DEC (0x15) // qstr
#define PYBC_LOAD_CONST_ID (0x16) // qstr
#define PYBC_LOAD_CONST_BYTES (0x17) // qstr
#define PYBC_LOAD_CONST_STRING (0x18) // qstr
#define PYBC_LOAD_FAST_0 (0x20)
#define PYBC_LOAD_FAST_1 (0x21)
#define PYBC_LOAD_FAST_2 (0x22)
#define PYBC_LOAD_FAST_N (0x23) // uint
#define PYBC_LOAD_NAME (0x24) // qstr
#define PYBC_LOAD_GLOBAL (0x25) // qstr
#define PYBC_LOAD_ATTR (0x26) // qstr
#define PYBC_LOAD_METHOD (0x27) // qstr
#define PYBC_LOAD_BUILD_CLASS (0x28)
#define PYBC_STORE_FAST_0 (0x30)
#define PYBC_STORE_FAST_1 (0x31)
#define PYBC_STORE_FAST_2 (0x32)
#define PYBC_STORE_FAST_N (0x33) // uint
#define PYBC_STORE_NAME (0x34) // qstr
#define PYBC_STORE_GLOBAL (0x35) // qstr
#define PYBC_STORE_ATTR (0x36) // qstr
#define PYBC_STORE_LOCALS (0x37)
#define PYBC_STORE_SUBSCR (0x38)
#define PYBC_DELETE_FAST_N (0x39) // uint
#define PYBC_DELETE_NAME (0x3a) // qstr
#define PYBC_DELETE_GLOBAL (0x3b) // qstr
#define PYBC_DELETE_DEREF (0x3c) // qstr
#define PYBC_DELETE_ATTR (0x3d) // qstr
#define PYBC_DELETE_SUBSCR (0x3e)
#define PYBC_DUP_TOP (0x40)
#define PYBC_DUP_TOP_TWO (0x41)
#define PYBC_POP_TOP (0x42)
#define PYBC_ROT_TWO (0x43)
#define PYBC_ROT_THREE (0x44)
#define PYBC_JUMP (0x45) // pos
#define PYBC_POP_JUMP_IF_TRUE (0x46) // pos
#define PYBC_POP_JUMP_IF_FALSE (0x47) // pos
#define PYBC_JUMP_IF_TRUE_OR_POP (0x48) // pos
#define PYBC_JUMP_IF_FALSE_OR_POP (0x49) // pos
#define PYBC_SETUP_LOOP (0x4a) // pos
#define PYBC_BREAK_LOOP (0x4b) // pos
#define PYBC_CONTINUE_LOOP (0x4c) // pos
#define PYBC_SETUP_WITH (0x4d) // pos
#define PYBC_WITH_CLEANUP (0x4e)
#define PYBC_SETUP_EXCEPT (0x4f) // pos
#define PYBC_SETUP_FINALLY (0x50) // pos
#define PYBC_END_FINALLY (0x51)
#define PYBC_GET_ITER (0x52)
#define PYBC_FOR_ITER (0x53) // pos
#define PYBC_POP_BLOCK (0x54)
#define PYBC_POP_EXCEPT (0x55)
#define PYBC_UNARY_OP (0x60) // byte
#define PYBC_BINARY_OP (0x61) // byte
#define PYBC_COMPARE_OP (0x62) // byte
#define PYBC_BUILD_TUPLE (0x70) // uint
#define PYBC_BUILD_LIST (0x71) // uint
#define PYBC_LIST_APPEND (0x72) // uint
#define PYBC_BUILD_MAP (0x73) // uint
#define PYBC_STORE_MAP (0x74)
#define PYBC_MAP_ADD (0x75) // uint
#define PYBC_BUILD_SET (0x76) // uint
#define PYBC_SET_ADD (0x77) // uint
#define PYBC_BUILD_SLICE (0x78) // uint
#define PYBC_UNPACK_SEQUENCE (0x79) // uint
#define PYBC_UNPACK_EX (0x7a) // uint
#define PYBC_RETURN_VALUE (0x80)
#define PYBC_RAISE_VARARGS (0x81) // uint
#define PYBC_YIELD_VALUE (0x82)
#define PYBC_YIELD_FROM (0x83)
#define PYBC_MAKE_FUNCTION (0x90) // uint
#define PYBC_MAKE_CLOSURE (0x91) // uint?
#define PYBC_CALL_FUNCTION (0x92) // uint
#define PYBC_CALL_FUNCTION_VAR (0x93) // uint
#define PYBC_CALL_FUNCTION_KW (0x94) // uint
#define PYBC_CALL_FUNCTION_VAR_KW (0x95) // uint
#define PYBC_CALL_METHOD (0x96) // uint
#define PYBC_CALL_METHOD_VAR (0x97) // uint
#define PYBC_CALL_METHOD_KW (0x98) // uint
#define PYBC_CALL_METHOD_VAR_KW (0x99) // uint
#define PYBC_IMPORT_NAME (0xe0)
#define PYBC_IMPORT_FROM (0xe1)
#define PYBC_IMPORT_STAR (0xe2)
py_obj_t py_execute_byte_code(byte *code, uint len, py_obj_t *args, uint n_args);

2510
py/compile.c Normal file

File diff suppressed because it is too large Load Diff

1
py/compile.h Normal file
View File

@ -0,0 +1 @@
void py_compile(py_parse_node_t pn);

120
py/emit.h Normal file
View File

@ -0,0 +1,120 @@
//#define EMIT_DO_CPY
#define EMIT_DO_BC
//#define EMIT_DO_X64
//#define EMIT_DO_THUMB
/* Notes on passes:
* We don't know exactly the opcodes in pass 1 because they depend on the
* closing over of variables (LOAD_CLOSURE, BUILD_TUPLE, MAKE_CLOSURE), which
* depends on determining the scope of variables in each function, and this
* is not known until the end of pass 1.
* As a consequence, we don't know the maximum stack size until the end of pass 2.
* This is problematic for some emitters (x64) since they need to know the maximum
* stack size to compile the entry to the function, and this effects code size.
*/
typedef enum {
PASS_1 = 1, // work out id's and their kind, and number of labels
PASS_2 = 2, // work out stack size and code size and label offsets
PASS_3 = 3, // emit code
} pass_kind_t;
typedef struct _emitter_t emitter_t;
void emit_common_declare_global(pass_kind_t pass, scope_t *scope, qstr qstr);
void emit_common_declare_nonlocal(pass_kind_t pass, scope_t *scope, qstr qstr);
void emit_common_load_id(pass_kind_t pass, scope_t *scope, qstr qstr___class__, emitter_t *emit, qstr qstr);
void emit_common_store_id(pass_kind_t pass, scope_t *scope, emitter_t *emit, qstr qstr);
void emit_common_delete_id(pass_kind_t pass, scope_t *scope, emitter_t *emit, qstr qstr);
emitter_t *emit_new();
void emit_set_native_types(emitter_t *emit, bool do_native_types);
void emit_start_pass(emitter_t *emit, pass_kind_t pass, scope_t *scope);
void emit_end_pass(emitter_t *emit);
bool emit_last_emit_was_return_value(emitter_t *emit);
int emit_get_stack_size(emitter_t *emit);
void emit_set_stack_size(emitter_t *emit, int size);
int emit_label_new(emitter_t *emit);
void emit_label_assign(emitter_t *emit, int l);
void emit_import_name(emitter_t *emit, qstr qstr);
void emit_import_from(emitter_t *emit, qstr qstr);
void emit_import_star(emitter_t *emit);
void emit_load_const_tok(emitter_t *emit, py_token_kind_t tok);
void emit_load_const_small_int(emitter_t *emit, int arg);
void emit_load_const_int(emitter_t *emit, qstr qstr);
void emit_load_const_dec(emitter_t *emit, qstr qstr);
void emit_load_const_id(emitter_t *emit, qstr qstr);
void emit_load_const_str(emitter_t *emit, qstr qstr, bool bytes);
void emit_load_const_verbatim_start(emitter_t *emit);
void emit_load_const_verbatim_int(emitter_t *emit, int val);
void emit_load_const_verbatim_str(emitter_t *emit, const char *str);
void emit_load_const_verbatim_strn(emitter_t *emit, const char *str, int len);
void emit_load_const_verbatim_quoted_str(emitter_t *emit, qstr qstr, bool bytes);
void emit_load_const_verbatim_end(emitter_t *emit);
void emit_load_fast(emitter_t *emit, qstr qstr, int local_num);
void emit_load_name(emitter_t *emit, qstr qstr);
void emit_load_global(emitter_t *emit, qstr qstr);
void emit_load_deref(emitter_t *emit, qstr qstr);
void emit_load_closure(emitter_t *emit, qstr qstr);
void emit_load_attr(emitter_t *emit, qstr qstr);
void emit_load_method(emitter_t *emit, qstr qstr);
void emit_load_build_class(emitter_t *emit);
void emit_store_fast(emitter_t *emit, qstr qstr, int local_num);
void emit_store_name(emitter_t *emit, qstr qstr);
void emit_store_global(emitter_t *emit, qstr qstr);
void emit_store_deref(emitter_t *emit, qstr qstr);
void emit_store_attr(emitter_t *emit, qstr qstr);
void emit_store_locals(emitter_t *emit);
void emit_store_subscr(emitter_t *emit);
void emit_delete_fast(emitter_t *emit, qstr qstr, int local_num);
void emit_delete_name(emitter_t *emit, qstr qstr);
void emit_delete_global(emitter_t *emit, qstr qstr);
void emit_delete_deref(emitter_t *emit, qstr qstr);
void emit_delete_attr(emitter_t *emit, qstr qstr);
void emit_delete_subscr(emitter_t *emit);
void emit_dup_top(emitter_t *emit);
void emit_dup_top_two(emitter_t *emit);
void emit_pop_top(emitter_t *emit);
void emit_rot_two(emitter_t *emit);
void emit_rot_three(emitter_t *emit);
void emit_jump(emitter_t *emit, int label);
void emit_pop_jump_if_true(emitter_t *emit, int label);
void emit_pop_jump_if_false(emitter_t *emit, int label);
void emit_jump_if_true_or_pop(emitter_t *emit, int label);
void emit_jump_if_false_or_pop(emitter_t *emit, int label);
void emit_setup_loop(emitter_t *emit, int label);
void emit_break_loop(emitter_t *emit, int label);
void emit_continue_loop(emitter_t *emit, int label);
void emit_setup_with(emitter_t *emit, int label);
void emit_with_cleanup(emitter_t *emit);
void emit_setup_except(emitter_t *emit, int label);
void emit_setup_finally(emitter_t *emit, int label);
void emit_end_finally(emitter_t *emit);
void emit_get_iter(emitter_t *emit); // tos = getiter(tos)
void emit_for_iter(emitter_t *emit, int label);
void emit_for_iter_end(emitter_t *emit);
void emit_pop_block(emitter_t *emit);
void emit_pop_except(emitter_t *emit);
void emit_unary_op(emitter_t *emit, rt_unary_op_t op);
void emit_binary_op(emitter_t *emit, rt_binary_op_t op);
void emit_compare_op(emitter_t *emit, rt_compare_op_t op);
void emit_build_tuple(emitter_t *emit, int n_args);
void emit_build_list(emitter_t *emit, int n_args);
void emit_list_append(emitter_t *emit, int list_stack_index);
void emit_build_map(emitter_t *emit, int n_args);
void emit_store_map(emitter_t *emit);
void emit_map_add(emitter_t *emit, int map_stack_index);
void emit_build_set(emitter_t *emit, int n_args);
void emit_set_add(emitter_t *emit, int set_stack_index);
void emit_build_slice(emitter_t *emit, int n_args);
void emit_unpack_sequence(emitter_t *emit, int n_args);
void emit_unpack_ex(emitter_t *emit, int n_left, int n_right);
void emit_make_function(emitter_t *emit, scope_t *scope, int n_dict_params, int n_default_params);
void emit_make_closure(emitter_t *emit, scope_t *scope, int n_dict_params, int n_default_params);
void emit_call_function(emitter_t *emit, int n_positional, int n_keyword, bool have_star_arg, bool have_dbl_star_arg);
void emit_call_method(emitter_t *emit, int n_positional, int n_keyword, bool have_star_arg, bool have_dbl_star_arg);
void emit_return_value(emitter_t *emit);
void emit_raise_varargs(emitter_t *emit, int n_args);
void emit_yield_value(emitter_t *emit);
void emit_yield_from(emitter_t *emit);

692
py/emitbc.c Normal file
View File

@ -0,0 +1,692 @@
#include <unistd.h>
#include <stdlib.h>
#include <stdint.h>
#include <stdio.h>
#include <string.h>
#include <assert.h>
#include "misc.h"
#include "lexer.h"
#include "machine.h"
#include "parse.h"
#include "compile.h"
#include "scope.h"
#include "runtime.h"
#include "emit.h"
#include "bc.h"
#ifdef EMIT_DO_BC
struct _emitter_t {
int pass;
int next_label;
int stack_size;
bool last_emit_was_return_value;
scope_t *scope;
int max_num_labels;
uint *label_offsets;
uint code_offset;
uint code_size;
byte *code_base;
byte dummy_data[8];
};
emitter_t *emit_new() {
emitter_t *emit = m_new(emitter_t, 1);
emit->max_num_labels = 0;
emit->label_offsets = NULL;
emit->code_offset = 0;
emit->code_size = 0;
emit->code_base = NULL;
return emit;
}
uint emit_get_code_size(emitter_t* emit) {
return emit->code_size;
}
void* emit_get_code(emitter_t* emit) {
return emit->code_base;
}
void emit_start_pass(emitter_t *emit, pass_kind_t pass, scope_t *scope) {
emit->pass = pass;
emit->next_label = 1;
emit->stack_size = 0;
emit->last_emit_was_return_value = false;
emit->scope = scope;
if (pass == PASS_1) {
scope->unique_code_id = rt_get_new_unique_code_id();
} else if (pass > PASS_1) {
if (emit->label_offsets == NULL) {
emit->label_offsets = m_new(uint, emit->max_num_labels);
}
if (pass == PASS_2) {
memset(emit->label_offsets, -1, emit->max_num_labels * sizeof(uint));
}
}
emit->code_offset = 0;
}
void emit_end_pass(emitter_t *emit) {
// check stack is back to zero size
if (emit->stack_size != 0) {
printf("ERROR: stack size not back to zero; got %d\n", emit->stack_size);
}
if (emit->pass == PASS_1) {
// calculate number of labels need
if (emit->next_label > emit->max_num_labels) {
emit->max_num_labels = emit->next_label;
}
} else if (emit->pass == PASS_2) {
// calculate size of code in bytes
emit->code_size = emit->code_offset;
emit->code_base = m_new(byte, emit->code_size);
printf("code_size: %u\n", emit->code_size);
} else if (emit->pass == PASS_3) {
rt_assign_byte_code(emit->scope->unique_code_id, emit->code_base, emit->code_size, emit->scope->num_params);
}
}
// all functions must go through this one to emit bytes
static byte* emit_get_cur_to_write_bytes(emitter_t* emit, int num_bytes_to_write) {
//printf("emit %d\n", num_bytes_to_write);
if (emit->pass < PASS_3) {
emit->code_offset += num_bytes_to_write;
return emit->dummy_data;
} else {
assert(emit->code_offset + num_bytes_to_write <= emit->code_size);
byte *c = emit->code_base + emit->code_offset;
emit->code_offset += num_bytes_to_write;
return c;
}
}
static void emit_write_byte_1(emitter_t* emit, byte b1) {
byte* c = emit_get_cur_to_write_bytes(emit, 1);
c[0] = b1;
}
static void emit_write_byte_1_byte(emitter_t* emit, byte b1, uint b2) {
assert((b2 & (~0xff)) == 0);
byte* c = emit_get_cur_to_write_bytes(emit, 2);
c[0] = b1;
c[1] = b2;
}
static void emit_write_byte_1_int(emitter_t* emit, byte b1, int num) {
assert((num & (~0x7fff)) == 0 || (num & (~0x7fff)) == (~0x7fff));
byte* c = emit_get_cur_to_write_bytes(emit, 3);
c[0] = b1;
c[1] = num;
c[2] = num >> 8;
}
static void emit_write_byte_1_uint(emitter_t* emit, byte b1, uint num) {
if (num <= 127) { // fits in 0x7f
// fit argument in single byte
byte* c = emit_get_cur_to_write_bytes(emit, 2);
c[0] = b1;
c[1] = num;
} else if (num <= 16383) { // fits in 0x3fff
// fit argument in two bytes
byte* c = emit_get_cur_to_write_bytes(emit, 3);
c[0] = b1;
c[1] = (num >> 8) | 0x80;
c[2] = num;
} else {
// larger numbers not implemented/supported
assert(0);
}
}
static void emit_write_byte_1_qstr(emitter_t* emit, byte b1, qstr qstr) {
emit_write_byte_1_uint(emit, b1, qstr);
}
static void emit_write_byte_1_label(emitter_t* emit, byte b1, int label) {
uint code_offset;
if (emit->pass < PASS_3) {
code_offset = 0;
} else {
code_offset = emit->label_offsets[label];
}
emit_write_byte_1_uint(emit, b1, code_offset);
}
bool emit_last_emit_was_return_value(emitter_t *emit) {
return emit->last_emit_was_return_value;
}
int emit_get_stack_size(emitter_t *emit) {
return emit->stack_size;
}
void emit_set_stack_size(emitter_t *emit, int size) {
if (emit->pass > PASS_1) {
emit->stack_size = size;
}
}
static void emit_pre(emitter_t *emit, int stack_size_delta) {
if (emit->pass > PASS_1) {
emit->stack_size += stack_size_delta;
if (emit->stack_size > emit->scope->stack_size) {
emit->scope->stack_size = emit->stack_size;
}
}
emit->last_emit_was_return_value = false;
}
int emit_label_new(emitter_t *emit) {
return emit->next_label++;
}
void emit_label_assign(emitter_t *emit, int l) {
emit_pre(emit, 0);
if (emit->pass > PASS_1) {
assert(l < emit->max_num_labels);
if (emit->pass == PASS_2) {
// assign label offset
assert(emit->label_offsets[l] == -1);
emit->label_offsets[l] = emit->code_offset;
} else if (emit->pass == PASS_3) {
// ensure label offset has not changed from PASS_2 to PASS_3
assert(emit->label_offsets[l] == emit->code_offset);
//printf("l%d: (at %d)\n", l, emit->code_offset);
}
}
}
void emit_import_name(emitter_t *emit, qstr qstr) {
emit_pre(emit, -1);
emit_write_byte_1_qstr(emit, PYBC_IMPORT_NAME, qstr);
}
void emit_import_from(emitter_t *emit, qstr qstr) {
emit_pre(emit, 1);
emit_write_byte_1_qstr(emit, PYBC_IMPORT_FROM, qstr);
}
void emit_import_star(emitter_t *emit) {
emit_pre(emit, -1);
emit_write_byte_1(emit, PYBC_IMPORT_STAR);
}
void emit_load_const_tok(emitter_t *emit, py_token_kind_t tok) {
emit_pre(emit, 1);
switch (tok) {
case PY_TOKEN_KW_FALSE: emit_write_byte_1(emit, PYBC_LOAD_CONST_FALSE); break;
case PY_TOKEN_KW_NONE: emit_write_byte_1(emit, PYBC_LOAD_CONST_NONE); break;
case PY_TOKEN_KW_TRUE: emit_write_byte_1(emit, PYBC_LOAD_CONST_TRUE); break;
default: assert(0);
}
}
void emit_load_const_small_int(emitter_t *emit, int arg) {
emit_pre(emit, 1);
emit_write_byte_1_int(emit, PYBC_LOAD_CONST_SMALL_INT, arg);
}
void emit_load_const_int(emitter_t *emit, qstr qstr) {
emit_pre(emit, 1);
emit_write_byte_1_qstr(emit, PYBC_LOAD_CONST_INT, qstr);
}
void emit_load_const_dec(emitter_t *emit, qstr qstr) {
emit_pre(emit, 1);
emit_write_byte_1_qstr(emit, PYBC_LOAD_CONST_DEC, qstr);
}
void emit_load_const_id(emitter_t *emit, qstr qstr) {
emit_pre(emit, 1);
emit_write_byte_1_qstr(emit, PYBC_LOAD_CONST_ID, qstr);
}
void emit_load_const_str(emitter_t *emit, qstr qstr, bool bytes) {
emit_pre(emit, 1);
if (bytes) {
emit_write_byte_1_qstr(emit, PYBC_LOAD_CONST_BYTES, qstr);
} else {
emit_write_byte_1_qstr(emit, PYBC_LOAD_CONST_STRING, qstr);
}
}
void emit_load_const_verbatim_start(emitter_t *emit) {
emit_pre(emit, 1);
assert(0);
}
void emit_load_const_verbatim_int(emitter_t *emit, int val) {
assert(0);
}
void emit_load_const_verbatim_str(emitter_t *emit, const char *str) {
assert(0);
}
void emit_load_const_verbatim_strn(emitter_t *emit, const char *str, int len) {
assert(0);
}
void emit_load_const_verbatim_quoted_str(emitter_t *emit, qstr qstr, bool bytes) {
assert(0);
}
void emit_load_const_verbatim_end(emitter_t *emit) {
assert(0);
}
void emit_load_fast(emitter_t *emit, qstr qstr, int local_num) {
assert(local_num >= 0);
emit_pre(emit, 1);
switch (local_num) {
case 0: emit_write_byte_1(emit, PYBC_LOAD_FAST_0); break;
case 1: emit_write_byte_1(emit, PYBC_LOAD_FAST_1); break;
case 2: emit_write_byte_1(emit, PYBC_LOAD_FAST_2); break;
default: emit_write_byte_1_uint(emit, PYBC_LOAD_FAST_N, local_num); break;
}
}
void emit_load_name(emitter_t *emit, qstr qstr) {
emit_pre(emit, 1);
emit_write_byte_1_qstr(emit, PYBC_LOAD_NAME, qstr);
}
void emit_load_global(emitter_t *emit, qstr qstr) {
emit_pre(emit, 1);
emit_write_byte_1_qstr(emit, PYBC_LOAD_GLOBAL, qstr);
}
void emit_load_deref(emitter_t *emit, qstr qstr) {
emit_pre(emit, 1);
assert(0);
}
void emit_load_closure(emitter_t *emit, qstr qstr) {
emit_pre(emit, 1);
assert(0);
}
void emit_load_attr(emitter_t *emit, qstr qstr) {
emit_pre(emit, 0);
emit_write_byte_1_qstr(emit, PYBC_LOAD_ATTR, qstr);
}
void emit_load_method(emitter_t *emit, qstr qstr) {
emit_pre(emit, 0);
emit_write_byte_1_qstr(emit, PYBC_LOAD_METHOD, qstr);
}
void emit_load_build_class(emitter_t *emit) {
emit_pre(emit, 1);
emit_write_byte_1(emit, PYBC_LOAD_BUILD_CLASS);
}
void emit_store_fast(emitter_t *emit, qstr qstr, int local_num) {
assert(local_num >= 0);
emit_pre(emit, -1);
switch (local_num) {
case 0: emit_write_byte_1(emit, PYBC_STORE_FAST_0); break;
case 1: emit_write_byte_1(emit, PYBC_STORE_FAST_1); break;
case 2: emit_write_byte_1(emit, PYBC_STORE_FAST_2); break;
default: emit_write_byte_1_uint(emit, PYBC_STORE_FAST_N, local_num); break;
}
}
void emit_store_name(emitter_t *emit, qstr qstr) {
emit_pre(emit, -1);
emit_write_byte_1_qstr(emit, PYBC_STORE_NAME, qstr);
}
void emit_store_global(emitter_t *emit, qstr qstr) {
emit_pre(emit, -1);
emit_write_byte_1_qstr(emit, PYBC_STORE_GLOBAL, qstr);
}
void emit_store_deref(emitter_t *emit, qstr qstr) {
emit_pre(emit, -1);
assert(0);
}
void emit_store_attr(emitter_t *emit, qstr qstr) {
emit_pre(emit, -2);
emit_write_byte_1_qstr(emit, PYBC_STORE_ATTR, qstr);
}
void emit_store_locals(emitter_t *emit) {
emit_pre(emit, -1);
emit_write_byte_1(emit, PYBC_STORE_LOCALS);
}
void emit_store_subscr(emitter_t *emit) {
emit_pre(emit, -3);
emit_write_byte_1(emit, PYBC_STORE_SUBSCR);
}
void emit_delete_fast(emitter_t *emit, qstr qstr, int local_num) {
assert(local_num >= 0);
emit_pre(emit, 0);
emit_write_byte_1_uint(emit, PYBC_DELETE_FAST_N, local_num);
}
void emit_delete_name(emitter_t *emit, qstr qstr) {
emit_pre(emit, 0);
emit_write_byte_1_qstr(emit, PYBC_DELETE_NAME, qstr);
}
void emit_delete_global(emitter_t *emit, qstr qstr) {
emit_pre(emit, 0);
emit_write_byte_1_qstr(emit, PYBC_DELETE_GLOBAL, qstr);
}
void emit_delete_deref(emitter_t *emit, qstr qstr) {
emit_pre(emit, 0);
emit_write_byte_1_qstr(emit, PYBC_DELETE_DEREF, qstr);
}
void emit_delete_attr(emitter_t *emit, qstr qstr) {
emit_pre(emit, -1);
emit_write_byte_1_qstr(emit, PYBC_DELETE_ATTR, qstr);
}
void emit_delete_subscr(emitter_t *emit) {
emit_pre(emit, -2);
emit_write_byte_1(emit, PYBC_DELETE_SUBSCR);
}
void emit_dup_top(emitter_t *emit) {
emit_pre(emit, 1);
emit_write_byte_1(emit, PYBC_DUP_TOP);
}
void emit_dup_top_two(emitter_t *emit) {
emit_pre(emit, 2);
emit_write_byte_1(emit, PYBC_DUP_TOP_TWO);
}
void emit_pop_top(emitter_t *emit) {
emit_pre(emit, -1);
emit_write_byte_1(emit, PYBC_POP_TOP);
}
void emit_rot_two(emitter_t *emit) {
emit_pre(emit, 0);
emit_write_byte_1(emit, PYBC_ROT_TWO);
}
void emit_rot_three(emitter_t *emit) {
emit_pre(emit, 0);
emit_write_byte_1(emit, PYBC_ROT_THREE);
}
void emit_jump(emitter_t *emit, int label) {
emit_pre(emit, 0);
emit_write_byte_1_label(emit, PYBC_JUMP, label);
}
void emit_pop_jump_if_true(emitter_t *emit, int label) {
emit_pre(emit, -1);
emit_write_byte_1_label(emit, PYBC_POP_JUMP_IF_TRUE, label);
}
void emit_pop_jump_if_false(emitter_t *emit, int label) {
emit_pre(emit, -1);
emit_write_byte_1_label(emit, PYBC_POP_JUMP_IF_FALSE, label);
}
void emit_jump_if_true_or_pop(emitter_t *emit, int label) {
emit_pre(emit, -1);
emit_write_byte_1_label(emit, PYBC_JUMP_IF_TRUE_OR_POP, label);
}
void emit_jump_if_false_or_pop(emitter_t *emit, int label) {
emit_pre(emit, -1);
emit_write_byte_1_label(emit, PYBC_JUMP_IF_FALSE_OR_POP, label);
}
void emit_setup_loop(emitter_t *emit, int label) {
emit_pre(emit, 0);
emit_write_byte_1_label(emit, PYBC_SETUP_LOOP, label);
}
void emit_break_loop(emitter_t *emit, int label) {
emit_pre(emit, 0);
emit_write_byte_1_label(emit, PYBC_BREAK_LOOP, label);
}
void emit_continue_loop(emitter_t *emit, int label) {
emit_pre(emit, 0);
emit_write_byte_1_label(emit, PYBC_CONTINUE_LOOP, label);
}
void emit_setup_with(emitter_t *emit, int label) {
emit_pre(emit, 7);
emit_write_byte_1_label(emit, PYBC_SETUP_WITH, label);
}
void emit_with_cleanup(emitter_t *emit) {
emit_pre(emit, -7);
emit_write_byte_1(emit, PYBC_WITH_CLEANUP);
}
void emit_setup_except(emitter_t *emit, int label) {
emit_pre(emit, 6);
emit_write_byte_1_label(emit, PYBC_SETUP_EXCEPT, label);
}
void emit_setup_finally(emitter_t *emit, int label) {
emit_pre(emit, 6);
emit_write_byte_1_label(emit, PYBC_SETUP_FINALLY, label);
}
void emit_end_finally(emitter_t *emit) {
emit_pre(emit, -1);
emit_write_byte_1(emit, PYBC_END_FINALLY);
}
void emit_get_iter(emitter_t *emit) {
emit_pre(emit, 0);
emit_write_byte_1(emit, PYBC_GET_ITER);
}
void emit_for_iter(emitter_t *emit, int label) {
emit_pre(emit, 1);
emit_write_byte_1_label(emit, PYBC_FOR_ITER, label);
}
void emit_for_iter_end(emitter_t *emit) {
emit_pre(emit, -1);
}
void emit_pop_block(emitter_t *emit) {
emit_pre(emit, 0);
emit_write_byte_1(emit, PYBC_POP_BLOCK);
}
void emit_pop_except(emitter_t *emit) {
emit_pre(emit, 0);
emit_write_byte_1(emit, PYBC_POP_EXCEPT);
}
void emit_unary_op(emitter_t *emit, rt_unary_op_t op) {
emit_pre(emit, 0);
emit_write_byte_1_byte(emit, PYBC_UNARY_OP, op);
}
void emit_binary_op(emitter_t *emit, rt_binary_op_t op) {
emit_pre(emit, -1);
emit_write_byte_1_byte(emit, PYBC_BINARY_OP, op);
}
void emit_compare_op(emitter_t *emit, rt_compare_op_t op) {
emit_pre(emit, -1);
emit_write_byte_1_byte(emit, PYBC_COMPARE_OP, op);
}
void emit_build_tuple(emitter_t *emit, int n_args) {
assert(n_args >= 0);
emit_pre(emit, 1 - n_args);
emit_write_byte_1_uint(emit, PYBC_BUILD_TUPLE, n_args);
}
void emit_build_list(emitter_t *emit, int n_args) {
assert(n_args >= 0);
emit_pre(emit, 1 - n_args);
emit_write_byte_1_uint(emit, PYBC_BUILD_LIST, n_args);
}
void emit_list_append(emitter_t *emit, int list_stack_index) {
assert(list_stack_index >= 0);
emit_pre(emit, -1);
emit_write_byte_1_uint(emit, PYBC_LIST_APPEND, list_stack_index);
}
void emit_build_map(emitter_t *emit, int n_args) {
assert(n_args >= 0);
emit_pre(emit, 1);
emit_write_byte_1_uint(emit, PYBC_BUILD_MAP, n_args);
}
void emit_store_map(emitter_t *emit) {
emit_pre(emit, -2);
emit_write_byte_1(emit, PYBC_STORE_MAP);
}
void emit_map_add(emitter_t *emit, int map_stack_index) {
assert(map_stack_index >= 0);
emit_pre(emit, -2);
emit_write_byte_1_uint(emit, PYBC_MAP_ADD, map_stack_index);
}
void emit_build_set(emitter_t *emit, int n_args) {
assert(n_args >= 0);
emit_pre(emit, 1 - n_args);
emit_write_byte_1_uint(emit, PYBC_BUILD_SET, n_args);
}
void emit_set_add(emitter_t *emit, int set_stack_index) {
assert(set_stack_index >= 0);
emit_pre(emit, -1);
emit_write_byte_1_uint(emit, PYBC_SET_ADD, set_stack_index);
}
void emit_build_slice(emitter_t *emit, int n_args) {
assert(n_args >= 0);
emit_pre(emit, 1 - n_args);
emit_write_byte_1_uint(emit, PYBC_BUILD_SLICE, n_args);
}
void emit_unpack_sequence(emitter_t *emit, int n_args) {
assert(n_args >= 0);
emit_pre(emit, -1 + n_args);
emit_write_byte_1_uint(emit, PYBC_UNPACK_SEQUENCE, n_args);
}
void emit_unpack_ex(emitter_t *emit, int n_left, int n_right) {
assert(n_left >=0 && n_right >= 0);
emit_pre(emit, -1 + n_left + n_right + 1);
emit_write_byte_1_uint(emit, PYBC_UNPACK_EX, n_left | (n_right << 8));
}
void emit_make_function(emitter_t *emit, scope_t *scope, int n_dict_params, int n_default_params) {
assert(n_default_params == 0 && n_dict_params == 0);
emit_pre(emit, 1);
emit_write_byte_1_uint(emit, PYBC_MAKE_FUNCTION, scope->unique_code_id);
}
void emit_make_closure(emitter_t *emit, scope_t *scope, int n_dict_params, int n_default_params) {
assert(0);
emit_pre(emit, -2 - n_default_params - 2 * n_dict_params);
if (emit->pass == PASS_3) {
printf("MAKE_CLOSURE %d\n", (n_dict_params << 8) | n_default_params);
}
}
void emit_call_function(emitter_t *emit, int n_positional, int n_keyword, bool have_star_arg, bool have_dbl_star_arg) {
int s = 0;
if (have_star_arg) {
s += 1;
}
if (have_dbl_star_arg) {
s += 1;
}
emit_pre(emit, -n_positional - 2 * n_keyword - s);
int op;
if (have_star_arg) {
if (have_dbl_star_arg) {
op = PYBC_CALL_FUNCTION_VAR_KW;
} else {
op = PYBC_CALL_FUNCTION_VAR;
}
} else {
if (have_dbl_star_arg) {
op = PYBC_CALL_FUNCTION_KW;
} else {
op = PYBC_CALL_FUNCTION;
}
}
emit_write_byte_1_uint(emit, op, (n_keyword << 8) | n_positional); // TODO make it 2 separate uints
}
void emit_call_method(emitter_t *emit, int n_positional, int n_keyword, bool have_star_arg, bool have_dbl_star_arg) {
int s = 0;
if (have_star_arg) {
s += 1;
}
if (have_dbl_star_arg) {
s += 1;
}
emit_pre(emit, -n_positional - 2 * n_keyword - s);
int op;
if (have_star_arg) {
if (have_dbl_star_arg) {
op = PYBC_CALL_METHOD_VAR_KW;
} else {
op = PYBC_CALL_METHOD_VAR;
}
} else {
if (have_dbl_star_arg) {
op = PYBC_CALL_METHOD_KW;
} else {
op = PYBC_CALL_METHOD;
}
}
emit_write_byte_1_uint(emit, op, (n_keyword << 8) | n_positional); // TODO make it 2 separate uints
}
void emit_return_value(emitter_t *emit) {
emit_pre(emit, -1);
emit->last_emit_was_return_value = true;
emit_write_byte_1(emit, PYBC_RETURN_VALUE);
}
void emit_raise_varargs(emitter_t *emit, int n_args) {
assert(n_args >= 0);
emit_pre(emit, -n_args);
emit_write_byte_1_uint(emit, PYBC_RAISE_VARARGS, n_args);
}
void emit_yield_value(emitter_t *emit) {
emit_pre(emit, 0);
if (emit->pass == PASS_2) {
emit->scope->flags |= SCOPE_FLAG_GENERATOR;
}
emit_write_byte_1(emit, PYBC_YIELD_VALUE);
}
void emit_yield_from(emitter_t *emit) {
emit_pre(emit, -1);
if (emit->pass == PASS_2) {
emit->scope->flags |= SCOPE_FLAG_GENERATOR;
}
emit_write_byte_1(emit, PYBC_YIELD_FROM);
}
#endif // EMIT_DO_BC

171
py/emitcommon.c Normal file
View File

@ -0,0 +1,171 @@
#include <unistd.h>
#include <stdio.h>
#include <stdint.h>
#include <string.h>
#include <assert.h>
#include "misc.h"
#include "lexer.h"
#include "machine.h"
#include "parse.h"
#include "scope.h"
#include "runtime.h"
#include "emit.h"
#define EMIT(fun, arg...) (emit_##fun(emit, ##arg))
void emit_common_declare_global(pass_kind_t pass, scope_t *scope, qstr qstr) {
if (pass == PASS_1) {
if (scope->kind == SCOPE_MODULE) {
printf("SyntaxError?: can't declare global in outer code\n");
return;
}
bool added;
id_info_t *id_info = scope_find_or_add_id(scope, qstr, &added);
if (!added) {
printf("SyntaxError?: identifier already declared something\n");
return;
}
id_info->kind = ID_INFO_KIND_GLOBAL_EXPLICIT;
// if the id exists in the global scope, set its kind to EXPLICIT_GLOBAL
id_info = scope_find_global(scope, qstr);
if (id_info != NULL) {
id_info->kind = ID_INFO_KIND_GLOBAL_EXPLICIT;
}
}
}
void emit_common_declare_nonlocal(pass_kind_t pass, scope_t *scope, qstr qstr) {
if (pass == PASS_1) {
if (scope->kind == SCOPE_MODULE) {
printf("SyntaxError?: can't declare nonlocal in outer code\n");
return;
}
bool added;
id_info_t *id_info = scope_find_or_add_id(scope, qstr, &added);
if (!added) {
printf("SyntaxError?: identifier already declared something\n");
return;
}
id_info_t *id_info2 = scope_find_local_in_parent(scope, qstr);
if (id_info2 == NULL || !(id_info2->kind == ID_INFO_KIND_LOCAL || id_info2->kind == ID_INFO_KIND_CELL || id_info2->kind == ID_INFO_KIND_FREE)) {
printf("SyntaxError: no binding for nonlocal '%s' found\n", qstr_str(qstr));
return;
}
id_info->kind = ID_INFO_KIND_FREE;
scope_close_over_in_parents(scope, qstr);
}
}
void emit_common_load_id(pass_kind_t pass, scope_t *scope, qstr qstr___class__, emitter_t *emit, qstr qstr) {
id_info_t *id_info = NULL;
if (pass == PASS_1) {
// name adding/lookup
bool added;
id_info = scope_find_or_add_id(scope, qstr, &added);
if (added) {
if (strcmp(qstr_str(qstr), "AssertionError") == 0) {
id_info->kind = ID_INFO_KIND_GLOBAL_EXPLICIT;
// TODO how much of a hack is this?
} else if (strcmp(qstr_str(qstr), "super") == 0 && scope->kind == SCOPE_FUNCTION) {
// special case, super is a global, and also counts as use of __class__
id_info->kind = ID_INFO_KIND_GLOBAL_EXPLICIT;
id_info_t *id_info2 = scope_find_local_in_parent(scope, qstr___class__);
if (id_info2 != NULL) {
id_info2 = scope_find_or_add_id(scope, qstr___class__, &added);
if (added) {
id_info2->kind = ID_INFO_KIND_FREE;
scope_close_over_in_parents(scope, qstr___class__);
}
}
} else {
id_info_t *id_info2 = scope_find_local_in_parent(scope, qstr);
if (id_info2 != NULL && (id_info2->kind == ID_INFO_KIND_LOCAL || id_info2->kind == ID_INFO_KIND_CELL || id_info2->kind == ID_INFO_KIND_FREE)) {
id_info->kind = ID_INFO_KIND_FREE;
scope_close_over_in_parents(scope, qstr);
} else {
id_info->kind = ID_INFO_KIND_GLOBAL_IMPLICIT;
}
}
}
} else {
id_info = scope_find(scope, qstr);
}
assert(id_info != NULL); // TODO can this ever fail?
// call the emit backend with the correct code
if (id_info == NULL || id_info->kind == ID_INFO_KIND_GLOBAL_IMPLICIT) {
EMIT(load_name, qstr);
} else if (id_info->kind == ID_INFO_KIND_GLOBAL_EXPLICIT) {
EMIT(load_global, qstr);
} else if (id_info->kind == ID_INFO_KIND_LOCAL) {
EMIT(load_fast, qstr, id_info->local_num);
} else if (id_info->kind == ID_INFO_KIND_CELL || id_info->kind == ID_INFO_KIND_FREE) {
EMIT(load_deref, qstr);
} else {
assert(0);
}
}
static id_info_t *get_id_for_modification(pass_kind_t pass, scope_t *scope, emitter_t *emit, qstr qstr) {
id_info_t *id_info = NULL;
if (pass == PASS_1) {
// name adding/lookup
bool added;
id_info = scope_find_or_add_id(scope, qstr, &added);
if (added) {
if (scope->kind == SCOPE_MODULE || scope->kind == SCOPE_CLASS) {
id_info->kind = ID_INFO_KIND_GLOBAL_IMPLICIT;
} else {
id_info->kind = ID_INFO_KIND_LOCAL;
}
} else if (scope->kind >= SCOPE_FUNCTION && scope->kind <= SCOPE_GEN_EXPR && id_info->kind == ID_INFO_KIND_GLOBAL_IMPLICIT) {
// rebind as a local variable
id_info->kind = ID_INFO_KIND_LOCAL;
}
} else {
id_info = scope_find(scope, qstr);
}
assert(id_info != NULL); // TODO can this ever fail?
return id_info;
}
void emit_common_store_id(pass_kind_t pass, scope_t *scope, emitter_t *emit, qstr qstr) {
// create/get the id info
id_info_t *id = get_id_for_modification(pass, scope, emit, qstr);
// call the emit backend with the correct code
if (id == NULL || id->kind == ID_INFO_KIND_GLOBAL_IMPLICIT) {
EMIT(store_name, qstr);
} else if (id->kind == ID_INFO_KIND_GLOBAL_EXPLICIT) {
EMIT(store_global, qstr);
} else if (id->kind == ID_INFO_KIND_LOCAL) {
EMIT(store_fast, qstr, id->local_num);
} else if (id->kind == ID_INFO_KIND_CELL || id->kind == ID_INFO_KIND_FREE) {
EMIT(store_deref, qstr);
} else {
assert(0);
}
}
void emit_common_delete_id(pass_kind_t pass, scope_t *scope, emitter_t *emit, qstr qstr) {
// create/get the id info
id_info_t *id = get_id_for_modification(pass, scope, emit, qstr);
// call the emit backend with the correct code
if (id == NULL || id->kind == ID_INFO_KIND_GLOBAL_IMPLICIT) {
EMIT(delete_name, qstr);
} else if (id->kind == ID_INFO_KIND_GLOBAL_EXPLICIT) {
EMIT(delete_global, qstr);
} else if (id->kind == ID_INFO_KIND_LOCAL) {
EMIT(delete_fast, qstr, id->local_num);
} else if (id->kind == ID_INFO_KIND_CELL || id->kind == ID_INFO_KIND_FREE) {
EMIT(delete_deref, qstr);
} else {
assert(0);
}
}

834
py/emitcpy.c Normal file
View File

@ -0,0 +1,834 @@
#include <unistd.h>
#include <stdlib.h>
#include <stdint.h>
#include <stdio.h>
#include <string.h>
#include <assert.h>
#include "misc.h"
#include "lexer.h"
#include "machine.h"
#include "parse.h"
#include "compile.h"
#include "scope.h"
#include "runtime.h"
#include "emit.h"
#ifdef EMIT_DO_CPY
struct _emitter_t {
int pass;
int next_label;
int byte_code_offset;
int stack_size;
bool last_emit_was_return_value;
scope_t *scope;
int max_num_labels;
int *label_offsets;
};
emitter_t *emit_new() {
emitter_t *emit = m_new(emitter_t, 1);
emit->max_num_labels = 0;
emit->label_offsets = NULL;
return emit;
}
void emit_start_pass(emitter_t *emit, pass_kind_t pass, scope_t *scope) {
emit->pass = pass;
emit->next_label = 1;
emit->byte_code_offset = 0;
emit->stack_size = 0;
emit->last_emit_was_return_value = false;
emit->scope = scope;
if (pass > PASS_1) {
if (emit->label_offsets == NULL) {
emit->label_offsets = m_new(int, emit->max_num_labels);
}
if (pass == PASS_2) {
memset(emit->label_offsets, -1, emit->max_num_labels * sizeof(int));
}
}
}
void emit_end_pass(emitter_t *emit) {
// check stack is back to zero size
if (emit->stack_size != 0) {
printf("ERROR: stack size not back to zero; got %d\n", emit->stack_size);
}
// calculate number of labels need
if (emit->pass == PASS_1) {
if (emit->next_label > emit->max_num_labels) {
emit->max_num_labels = emit->next_label;
}
}
}
bool emit_last_emit_was_return_value(emitter_t *emit) {
return emit->last_emit_was_return_value;
}
int emit_get_stack_size(emitter_t *emit) {
return emit->stack_size;
}
void emit_set_stack_size(emitter_t *emit, int size) {
emit->stack_size = size;
}
static void emit_pre(emitter_t *emit, int stack_size_delta, int byte_code_size) {
emit->stack_size += stack_size_delta;
if (emit->pass > PASS_1 && emit->stack_size > emit->scope->stack_size) {
emit->scope->stack_size = emit->stack_size;
}
emit->last_emit_was_return_value = false;
if (emit->pass == PASS_3 && byte_code_size > 0) {
if (emit->byte_code_offset >= 1000) {
printf("%d ", emit->byte_code_offset);
} else {
printf("% 4d ", emit->byte_code_offset);
}
}
emit->byte_code_offset += byte_code_size;
}
int emit_label_new(emitter_t *emit) {
return emit->next_label++;
}
void emit_label_assign(emitter_t *emit, int l) {
emit_pre(emit, 0, 0);
if (emit->pass > PASS_1) {
assert(l < emit->max_num_labels);
if (emit->pass == PASS_2) {
// assign label offset
assert(emit->label_offsets[l] == -1);
emit->label_offsets[l] = emit->byte_code_offset;
} else if (emit->pass == PASS_3) {
// ensure label offset has not changed from PASS_2 to PASS_3
assert(emit->label_offsets[l] == emit->byte_code_offset);
//printf("l%d: (at %d)\n", l, emit->byte_code_offset);
}
}
}
void emit_import_name(emitter_t *emit, qstr qstr) {
emit_pre(emit, -1, 3);
if (emit->pass == PASS_3) {
printf("IMPORT_NAME %s\n", qstr_str(qstr));
}
}
void emit_import_from(emitter_t *emit, qstr qstr) {
emit_pre(emit, 1, 3);
if (emit->pass == PASS_3) {
printf("IMPORT_FROM %s\n", qstr_str(qstr));
}
}
void emit_import_star(emitter_t *emit) {
emit_pre(emit, -1, 1);
if (emit->pass == PASS_3) {
printf("IMPORT_STAR\n");
}
}
void emit_load_const_tok(emitter_t *emit, py_token_kind_t tok) {
emit_pre(emit, 1, 3);
if (emit->pass == PASS_3) {
printf("LOAD_CONST ");
switch (tok) {
case PY_TOKEN_KW_FALSE: printf("False"); break;
case PY_TOKEN_KW_NONE: printf("None"); break;
case PY_TOKEN_KW_TRUE: printf("True"); break;
default: printf("?=%d\n", tok); return; assert(0);
}
printf("\n");
}
}
void emit_load_const_small_int(emitter_t *emit, int arg) {
emit_pre(emit, 1, 3);
if (emit->pass == PASS_3) {
printf("LOAD_CONST %d\n", arg);
}
}
void emit_load_const_int(emitter_t *emit, qstr qstr) {
emit_pre(emit, 1, 3);
if (emit->pass == PASS_3) {
printf("LOAD_CONST %s\n", qstr_str(qstr));
}
}
void emit_load_const_dec(emitter_t *emit, qstr qstr) {
emit_pre(emit, 1, 3);
if (emit->pass == PASS_3) {
printf("LOAD_CONST %s\n", qstr_str(qstr));
}
}
void emit_load_const_id(emitter_t *emit, qstr qstr) {
emit_pre(emit, 1, 3);
if (emit->pass == PASS_3) {
printf("LOAD_CONST '%s'\n", qstr_str(qstr));
}
}
void emit_load_const_str(emitter_t *emit, qstr qstr, bool bytes) {
emit_pre(emit, 1, 3);
if (emit->pass == PASS_3) {
printf("LOAD_CONST ");
emit_load_const_verbatim_quoted_str(emit, qstr, bytes);
printf("\n");
}
}
void emit_load_const_verbatim_start(emitter_t *emit) {
emit_pre(emit, 1, 3);
if (emit->pass == PASS_3) {
printf("LOAD_CONST ");
}
}
void emit_load_const_verbatim_int(emitter_t *emit, int val) {
if (emit->pass == PASS_3) {
printf("%d", val);
}
}
void emit_load_const_verbatim_str(emitter_t *emit, const char *str) {
if (emit->pass == PASS_3) {
printf("%s", str);
}
}
void emit_load_const_verbatim_strn(emitter_t *emit, const char *str, int len) {
if (emit->pass == PASS_3) {
printf("%.*s", len, str);
}
}
void emit_load_const_verbatim_quoted_str(emitter_t *emit, qstr qstr, bool bytes) {
// TODO strings should be escaped before we get here
if (emit->pass == PASS_3) {
const char *str = qstr_str(qstr);
int len = strlen(str);
bool has_single_quote = false;
bool has_double_quote = false;
for (int i = 0; i < len; i++) {
if (str[i] == '\'') {
has_single_quote = true;
} else if (str[i] == '"') {
has_double_quote = true;
}
}
if (bytes) {
printf("b");
}
bool quote_single = false;
if (has_single_quote && !has_double_quote) {
printf("\"");
} else {
quote_single = true;
printf("'");
}
for (int i = 0; i < len; i++) {
if (str[i] == '\n') {
printf("\\n");
} else if (str[i] == '\\' && str[i + 1] == '\'') {
i += 1;
if (quote_single) {
printf("\\'");
} else {
printf("'");
}
} else if (str[i] == '\'' && quote_single) {
printf("\\'");
} else {
printf("%c", str[i]);
}
}
if (has_single_quote && !has_double_quote) {
printf("\"");
} else {
printf("'");
}
}
}
void emit_load_const_verbatim_end(emitter_t *emit) {
if (emit->pass == PASS_3) {
printf("\n");
}
}
void emit_load_name(emitter_t *emit, qstr qstr) {
emit_pre(emit, 1, 3);
if (emit->pass == PASS_3) {
printf("LOAD_NAME %s\n", qstr_str(qstr));
}
}
void emit_load_global(emitter_t *emit, qstr qstr) {
emit_pre(emit, 1, 3);
if (emit->pass == PASS_3) {
printf("LOAD_GLOBAL %s\n", qstr_str(qstr));
}
}
void emit_load_fast(emitter_t *emit, qstr qstr, int local_num) {
emit_pre(emit, 1, 3);
if (emit->pass == PASS_3) {
printf("LOAD_FAST %s\n", qstr_str(qstr));
}
}
void emit_load_deref(emitter_t *emit, qstr qstr) {
emit_pre(emit, 1, 3);
if (emit->pass == PASS_3) {
printf("LOAD_DEREF %s\n", qstr_str(qstr));
}
}
void emit_load_closure(emitter_t *emit, qstr qstr) {
emit_pre(emit, 1, 3);
if (emit->pass == PASS_3) {
printf("LOAD_CLOSURE %s\n", qstr_str(qstr));
}
}
void emit_load_attr(emitter_t *emit, qstr qstr) {
emit_pre(emit, 0, 3);
if (emit->pass == PASS_3) {
printf("LOAD_ATTR %s\n", qstr_str(qstr));
}
}
void emit_load_method(emitter_t *emit, qstr qstr) {
emit_load_attr(emit, qstr);
}
void emit_load_build_class(emitter_t *emit) {
emit_pre(emit, 1, 1);
if (emit->pass == PASS_3) {
printf("LOAD_BUILD_CLASS\n");
}
}
void emit_store_name(emitter_t *emit, qstr qstr) {
emit_pre(emit, -1, 3);
if (emit->pass == PASS_3) {
printf("STORE_NAME %s\n", qstr_str(qstr));
}
}
void emit_store_global(emitter_t *emit, qstr qstr) {
emit_pre(emit, -1, 3);
if (emit->pass == PASS_3) {
printf("STORE_GLOBAL %s\n", qstr_str(qstr));
}
}
void emit_store_fast(emitter_t *emit, qstr qstr, int local_num) {
emit_pre(emit, -1, 3);
if (emit->pass == PASS_3) {
printf("STORE_FAST %s\n", qstr_str(qstr));
}
}
void emit_store_deref(emitter_t *emit, qstr qstr) {
emit_pre(emit, -1, 3);
if (emit->pass == PASS_3) {
printf("STORE_DEREF %s\n", qstr_str(qstr));
}
}
void emit_store_attr(emitter_t *emit, qstr qstr) {
emit_pre(emit, -2, 3);
if (emit->pass == PASS_3) {
printf("STORE_ATTR %s\n", qstr_str(qstr));
}
}
void emit_store_locals(emitter_t *emit) {
emit_pre(emit, -1, 1);
if (emit->pass == PASS_3) {
printf("STORE_LOCALS\n");
}
}
void emit_store_subscr(emitter_t *emit) {
emit_pre(emit, -3, 1);
if (emit->pass == PASS_3) {
printf("STORE_SUBSCR\n");
}
}
void emit_delete_name(emitter_t *emit, qstr qstr) {
emit_pre(emit, 0, 3);
if (emit->pass == PASS_3) {
printf("DELETE_NAME %s\n", qstr_str(qstr));
}
}
void emit_delete_global(emitter_t *emit, qstr qstr) {
emit_pre(emit, 0, 3);
if (emit->pass == PASS_3) {
printf("DELETE_GLOBAL %s\n", qstr_str(qstr));
}
}
void emit_delete_fast(emitter_t *emit, qstr qstr, int local_num) {
emit_pre(emit, 0, 3);
if (emit->pass == PASS_3) {
printf("DELETE_FAST %s\n", qstr_str(qstr));
}
}
void emit_delete_deref(emitter_t *emit, qstr qstr) {
emit_pre(emit, 0, 3);
if (emit->pass == PASS_3) {
printf("DELETE_DEREF %s\n", qstr_str(qstr));
}
}
void emit_delete_attr(emitter_t *emit, qstr qstr) {
emit_pre(emit, -1, 3);
if (emit->pass == PASS_3) {
printf("DELETE_ATTR %s\n", qstr_str(qstr));
}
}
void emit_delete_subscr(emitter_t *emit) {
emit_pre(emit, -2, 1);
if (emit->pass == PASS_3) {
printf("DELETE_SUBSCR\n");
}
}
void emit_dup_top(emitter_t *emit) {
emit_pre(emit, 1, 1);
if (emit->pass == PASS_3) {
printf("DUP_TOP\n");
}
}
void emit_dup_top_two(emitter_t *emit) {
emit_pre(emit, 2, 1);
if (emit->pass == PASS_3) {
printf("DUP_TOP_TWO\n");
}
}
void emit_pop_top(emitter_t *emit) {
emit_pre(emit, -1, 1);
if (emit->pass == PASS_3) {
printf("POP_TOP\n");
}
}
void emit_rot_two(emitter_t *emit) {
emit_pre(emit, 0, 1);
if (emit->pass == PASS_3) {
printf("ROT_TWO\n");
}
}
void emit_rot_three(emitter_t *emit) {
emit_pre(emit, 0, 1);
if (emit->pass == PASS_3) {
printf("ROT_THREE\n");
}
}
void emit_jump(emitter_t *emit, int label) {
emit_pre(emit, 0, 3);
if (emit->pass == PASS_3) {
int dest = emit->label_offsets[label];
if (dest < emit->byte_code_offset) {
printf("JUMP_ABSOLUTE %d\n", emit->label_offsets[label]);
} else {
printf("JUMP_FORWARD %d\n", emit->label_offsets[label]);
}
}
}
void emit_pop_jump_if_true(emitter_t *emit, int label) {
emit_pre(emit, -1, 3);
if (emit->pass == PASS_3) {
printf("POP_JUMP_IF_TRUE %d\n", emit->label_offsets[label]);
}
}
void emit_pop_jump_if_false(emitter_t *emit, int label) {
emit_pre(emit, -1, 3);
if (emit->pass == PASS_3) {
printf("POP_JUMP_IF_FALSE %d\n", emit->label_offsets[label]);
}
}
void emit_jump_if_true_or_pop(emitter_t *emit, int label) {
emit_pre(emit, -1, 3);
if (emit->pass == PASS_3) {
printf("JUMP_IF_TRUE_OR_POP %d\n", emit->label_offsets[label]);
}
}
void emit_jump_if_false_or_pop(emitter_t *emit, int label) {
emit_pre(emit, -1, 3);
if (emit->pass == PASS_3) {
printf("JUMP_IF_FALSE_OR_POP %d\n", emit->label_offsets[label]);
}
}
void emit_setup_loop(emitter_t *emit, int label) {
emit_pre(emit, 0, 3);
if (emit->pass == PASS_3) {
printf("SETUP_LOOP %d\n", emit->label_offsets[label]);
}
}
void emit_break_loop(emitter_t *emit, int label) {
emit_pre(emit, 0, 1);
if (emit->pass == PASS_3) {
printf("BREAK_LOOP\n"); // CPython doesn't have label
//printf("BREAK_LOOP %d\n", emit->label_offsets[label]);
}
}
void emit_continue_loop(emitter_t *emit, int label) {
emit_pre(emit, 0, 3);
if (emit->pass == PASS_3) {
printf("CONTINUE_LOOP %d\n", emit->label_offsets[label]);
}
}
void emit_setup_with(emitter_t *emit, int label) {
emit_pre(emit, 7, 3);
if (emit->pass == PASS_3) {
printf("SETUP_WITH %d\n", emit->label_offsets[label]);
}
}
void emit_with_cleanup(emitter_t *emit) {
emit_pre(emit, -7, 1);
if (emit->pass == PASS_3) {
printf("WITH_CLEANUP\n");
}
}
void emit_setup_except(emitter_t *emit, int label) {
emit_pre(emit, 6, 3);
if (emit->pass == PASS_3) {
printf("SETUP_EXCEPT %d\n", emit->label_offsets[label]);
}
}
void emit_setup_finally(emitter_t *emit, int label) {
emit_pre(emit, 6, 3);
if (emit->pass == PASS_3) {
printf("SETUP_FINALLY %d\n", emit->label_offsets[label]);
}
}
void emit_end_finally(emitter_t *emit) {
emit_pre(emit, -1, 1);
if (emit->pass == PASS_3) {
printf("END_FINALLY\n");
}
}
void emit_get_iter(emitter_t *emit) {
emit_pre(emit, 0, 1);
if (emit->pass == PASS_3) {
printf("GET_ITER\n");
}
}
void emit_for_iter(emitter_t *emit, int label) {
emit_pre(emit, 1, 3);
if (emit->pass == PASS_3) {
printf("FOR_ITER %d\n", emit->label_offsets[label]);
}
}
void emit_for_iter_end(emitter_t *emit) {
emit_pre(emit, -1, 0);
}
void emit_pop_block(emitter_t *emit) {
emit_pre(emit, 0, 1);
if (emit->pass == PASS_3) {
printf("POP_BLOCK\n");
}
}
void emit_pop_except(emitter_t *emit) {
emit_pre(emit, 0, 1);
if (emit->pass == PASS_3) {
printf("POP_EXCEPT\n");
}
}
void emit_unary_op(emitter_t *emit, rt_unary_op_t op) {
emit_pre(emit, 0, 1);
if (emit->pass == PASS_3) {
switch (op) {
case RT_UNARY_OP_NOT: printf("UNARY_NOT\n"); break;
case RT_UNARY_OP_POSITIVE: printf("UNARY_POSITIVE\n"); break;
case RT_UNARY_OP_NEGATIVE: printf("UNARY_NEGATIVE\n"); break;
case RT_UNARY_OP_INVERT: printf("UNARY_INVERT\n"); break;
default: assert(0);
}
}
}
void emit_binary_op(emitter_t *emit, rt_binary_op_t op) {
emit_pre(emit, -1, 1);
if (emit->pass == PASS_3) {
switch (op) {
case RT_BINARY_OP_SUBSCR: printf("BINARY_SUBSCR\n"); break;
case RT_BINARY_OP_OR: printf("BINARY_OR\n"); break;
case RT_BINARY_OP_XOR: printf("BINARY_XOR\n"); break;
case RT_BINARY_OP_AND: printf("BINARY_AND\n"); break;
case RT_BINARY_OP_LSHIFT: printf("BINARY_LSHIFT\n"); break;
case RT_BINARY_OP_RSHIFT: printf("BINARY_RSHIFT\n"); break;
case RT_BINARY_OP_ADD: printf("BINARY_ADD\n"); break;
case RT_BINARY_OP_SUBTRACT: printf("BINARY_SUBTRACT\n"); break;
case RT_BINARY_OP_MULTIPLY: printf("BINARY_MULTIPLY\n"); break;
case RT_BINARY_OP_FLOOR_DIVIDE: printf("BINARY_FLOOR_DIVIDE\n"); break;
case RT_BINARY_OP_TRUE_DIVIDE: printf("BINARY_TRUE_DIVIDE\n"); break;
case RT_BINARY_OP_MODULO: printf("BINARY_MODULO\n"); break;
case RT_BINARY_OP_POWER: printf("BINARY_POWER\n"); break;
case RT_BINARY_OP_INPLACE_OR: printf("INPLACE_OR\n"); break;
case RT_BINARY_OP_INPLACE_XOR: printf("INPLACE_XOR\n"); break;
case RT_BINARY_OP_INPLACE_AND: printf("INPLACE_AND\n"); break;
case RT_BINARY_OP_INPLACE_LSHIFT: printf("INPLACE_LSHIFT\n"); break;
case RT_BINARY_OP_INPLACE_RSHIFT: printf("INPLACE_RSHIFT\n"); break;
case RT_BINARY_OP_INPLACE_ADD: printf("INPLACE_ADD\n"); break;
case RT_BINARY_OP_INPLACE_SUBTRACT: printf("INPLACE_SUBTRACT\n"); break;
case RT_BINARY_OP_INPLACE_MULTIPLY: printf("INPLACE_MULTIPLY\n"); break;
case RT_BINARY_OP_INPLACE_FLOOR_DIVIDE: printf("INPLACE_FLOOR_DIVIDE\n"); break;
case RT_BINARY_OP_INPLACE_TRUE_DIVIDE: printf("INPLACE_TRUE_DIVIDE\n"); break;
case RT_BINARY_OP_INPLACE_MODULO: printf("INPLACE_MODULO\n"); break;
case RT_BINARY_OP_INPLACE_POWER: printf("INPLACE_POWER\n"); break;
default: assert(0);
}
}
}
void emit_compare_op(emitter_t *emit, rt_compare_op_t op) {
emit_pre(emit, -1, 3);
if (emit->pass == PASS_3) {
switch (op) {
case RT_COMPARE_OP_LESS: printf("COMPARE_OP <\n"); break;
case RT_COMPARE_OP_MORE: printf("COMPARE_OP >\n"); break;
case RT_COMPARE_OP_EQUAL: printf("COMPARE_OP ==\n"); break;
case RT_COMPARE_OP_LESS_EQUAL: printf("COMPARE_OP <=\n"); break;
case RT_COMPARE_OP_MORE_EQUAL: printf("COMPARE_OP >=\n"); break;
case RT_COMPARE_OP_NOT_EQUAL: printf("COMPARE_OP !=\n"); break;
case RT_COMPARE_OP_IN: printf("COMPARE_OP in\n"); break;
case RT_COMPARE_OP_NOT_IN: printf("COMPARE_OP not in\n"); break;
case RT_COMPARE_OP_IS: printf("COMPARE_OP is\n"); break;
case RT_COMPARE_OP_IS_NOT: printf("COMPARE_OP is not\n"); break;
case RT_COMPARE_OP_EXCEPTION_MATCH: printf("COMPARE_OP exception match\n"); break;
default: assert(0);
}
}
}
void emit_build_tuple(emitter_t *emit, int n_args) {
emit_pre(emit, 1 - n_args, 3);
if (emit->pass == PASS_3) {
printf("BUILD_TUPLE %d\n", n_args);
}
}
void emit_build_list(emitter_t *emit, int n_args) {
emit_pre(emit, 1 - n_args, 3);
if (emit->pass == PASS_3) {
printf("BUILD_LIST %d\n", n_args);
}
}
void emit_list_append(emitter_t *emit, int list_index) {
emit_pre(emit, -1, 3);
if (emit->pass == PASS_3) {
printf("LIST_APPEND %d\n", list_index);
}
}
void emit_build_map(emitter_t *emit, int n_args) {
emit_pre(emit, 1, 3);
if (emit->pass == PASS_3) {
printf("BUILD_MAP %d\n", n_args);
}
}
void emit_store_map(emitter_t *emit) {
emit_pre(emit, -2, 1);
if (emit->pass == PASS_3) {
printf("STORE_MAP\n");
}
}
void emit_map_add(emitter_t *emit, int map_index) {
emit_pre(emit, -2, 3);
if (emit->pass == PASS_3) {
printf("MAP_ADD %d\n", map_index);
}
}
void emit_build_set(emitter_t *emit, int n_args) {
emit_pre(emit, 1 - n_args, 3);
if (emit->pass == PASS_3) {
printf("BUILD_SET %d\n", n_args);
}
}
void emit_set_add(emitter_t *emit, int set_index) {
emit_pre(emit, -1, 3);
if (emit->pass == PASS_3) {
printf("SET_ADD %d\n", set_index);
}
}
void emit_build_slice(emitter_t *emit, int n_args) {
emit_pre(emit, 1 - n_args, 3);
if (emit->pass == PASS_3) {
printf("BUILD_SLICE %d\n", n_args);
}
}
void emit_unpack_sequence(emitter_t *emit, int n_args) {
emit_pre(emit, -1 + n_args, 3);
if (emit->pass == PASS_3) {
printf("UNPACK_SEQUENCE %d\n", n_args);
}
}
void emit_unpack_ex(emitter_t *emit, int n_left, int n_right) {
emit_pre(emit, -1 + n_left + n_right + 1, 3);
if (emit->pass == PASS_3) {
printf("UNPACK_EX %d\n", n_left | (n_right << 8));
}
}
void emit_call_function(emitter_t *emit, int n_positional, int n_keyword, bool have_star_arg, bool have_dbl_star_arg) {
int s = 0;
if (have_star_arg) {
s += 1;
}
if (have_dbl_star_arg) {
s += 1;
}
emit_pre(emit, -n_positional - 2 * n_keyword - s, 3);
if (emit->pass == PASS_3) {
if (have_star_arg) {
if (have_dbl_star_arg) {
printf("CALL_FUNCTION_VAR_KW");
} else {
printf("CALL_FUNCTION_VAR");
}
} else {
if (have_dbl_star_arg) {
printf("CALL_FUNCTION_KW");
} else {
printf("CALL_FUNCTION");
}
}
printf(" %d, %d\n", n_positional, n_keyword);
}
}
void emit_call_method(emitter_t *emit, int n_positional, int n_keyword, bool have_star_arg, bool have_dbl_star_arg) {
emit_call_function(emit, n_positional, n_keyword, have_star_arg, have_dbl_star_arg);
}
void emit_return_value(emitter_t *emit) {
emit_pre(emit, -1, 1);
emit->last_emit_was_return_value = true;
if (emit->pass == PASS_3) {
printf("RETURN_VALUE\n");
}
}
void emit_raise_varargs(emitter_t *emit, int n_args) {
emit_pre(emit, -n_args, 3);
if (emit->pass == PASS_3) {
printf("RAISE_VARARGS %d\n", n_args);
}
}
void load_const_code_and_name(emitter_t *emit, qstr qstr) {
emit_pre(emit, 1, 3);
if (emit->pass == PASS_3) {
printf("LOAD_CONST code %s\n", qstr_str(qstr));
}
// load qualified name
emit_pre(emit, 1, 3);
if (emit->pass == PASS_3) {
printf("LOAD_CONST '");
// code just to work out the qualname (or whatever it is)
{
int depth = 0;
for (scope_t *s = emit->scope; s->parent != NULL; s = s->parent) {
depth += 1;
}
for (int wanted_depth = depth; wanted_depth >= 0; wanted_depth--) {
scope_t *s = emit->scope;
for (int i = 0; i < wanted_depth; i++) {
s = s->parent;
}
if (s->kind == SCOPE_FUNCTION) {
printf("%s.<locals>.", qstr_str(s->simple_name));
} else if (s->kind == SCOPE_CLASS) {
printf("%s.", qstr_str(s->simple_name));
}
}
}
printf("%s'\n", qstr_str(qstr));
}
}
void emit_make_function(emitter_t *emit, scope_t *scope, int n_dict_params, int n_default_params) {
load_const_code_and_name(emit, scope->simple_name);
emit_pre(emit, -1 - n_default_params - 2 * n_dict_params, 3);
if (emit->pass == PASS_3) {
printf("MAKE_FUNCTION %d\n", (n_dict_params << 8) | n_default_params);
}
}
void emit_make_closure(emitter_t *emit, scope_t *scope, int n_dict_params, int n_default_params) {
load_const_code_and_name(emit, scope->simple_name);
emit_pre(emit, -2 - n_default_params - 2 * n_dict_params, 3);
if (emit->pass == PASS_3) {
printf("MAKE_CLOSURE %d\n", (n_dict_params << 8) | n_default_params);
}
}
void emit_yield_value(emitter_t *emit) {
emit_pre(emit, 0, 1);
if (emit->pass == PASS_2) {
emit->scope->flags |= SCOPE_FLAG_GENERATOR;
}
if (emit->pass == PASS_3) {
printf("YIELD_VALUE\n");
}
}
void emit_yield_from(emitter_t *emit) {
emit_pre(emit, -1, 1);
if (emit->pass == PASS_2) {
emit->scope->flags |= SCOPE_FLAG_GENERATOR;
}
if (emit->pass == PASS_3) {
printf("YIELD_FROM\n");
}
}
#endif // EMIT_DO_CPY

673
py/emitthumb.c Normal file
View File

@ -0,0 +1,673 @@
#include <unistd.h>
#include <stdlib.h>
#include <stdint.h>
#include <stdio.h>
#include <string.h>
#include <assert.h>
#include "misc.h"
#include "lexer.h"
#include "machine.h"
#include "parse.h"
#include "scope.h"
#include "runtime.h"
#include "emit.h"
#include "asmthumb.h"
#ifdef EMIT_DO_THUMB
#define REG_LOCAL_1 (REG_R4)
#define REG_LOCAL_2 (REG_R5)
#define REG_LOCAL_3 (REG_R6)
#define REG_TEMP (REG_R7)
#define REG_LOCAL_NUM (3)
typedef enum {
NEED_TO_PUSH_NOTHING,
NEED_TO_PUSH_REG,
NEED_TO_PUSH_I32,
} need_to_push_t;
struct _emitter_t {
int pass;
int stack_start;
int stack_size;
bool last_emit_was_return_value;
need_to_push_t need_to_push;
int last_reg;
int32_t last_i32;
scope_t *scope;
asm_thumb_t *as;
bool do_native_types;
};
emitter_t *emit_new() {
emitter_t *emit = m_new(emitter_t, 1);
emit->as = asm_thumb_new();
emit->do_native_types = true;
return emit;
}
void emit_start_pass(emitter_t *emit, pass_kind_t pass, scope_t *scope) {
emit->pass = pass;
emit->stack_start = 0;
emit->stack_size = 0;
emit->last_emit_was_return_value = false;
emit->need_to_push = NEED_TO_PUSH_NOTHING;
emit->scope = scope;
if (pass == PASS_1) {
scope->unique_code_id = rt_get_new_unique_code_id();
}
asm_thumb_start_pass(emit->as, pass);
// entry to function
int num_locals = 0;
if (pass > PASS_1) {
num_locals = scope->num_locals - REG_LOCAL_NUM;
if (num_locals < 0) {
num_locals = 0;
}
emit->stack_start = num_locals;
num_locals += scope->stack_size;
}
asm_thumb_entry(emit->as, num_locals);
// initialise locals from parameters
for (int i = 0; i < scope->num_params; i++) {
if (i == 0) {
asm_thumb_mov_reg_reg(emit->as, REG_LOCAL_1, REG_ARG_1);
} else if (i == 1) {
asm_thumb_mov_reg_reg(emit->as, REG_LOCAL_2, REG_ARG_2);
} else if (i == 2) {
asm_thumb_mov_reg_reg(emit->as, REG_LOCAL_3, REG_ARG_3);
} else if (i == 3) {
asm_thumb_mov_local_reg(emit->as, i - REG_LOCAL_NUM, REG_ARG_4);
} else {
// TODO not implemented
assert(0);
}
}
asm_thumb_mov_reg_i32(emit->as, REG_R7, (machine_uint_t)rt_fun_table);
}
void emit_end_pass(emitter_t *emit) {
if (!emit->last_emit_was_return_value) {
asm_thumb_exit(emit->as);
}
asm_thumb_end_pass(emit->as);
// check stack is back to zero size
if (emit->stack_size != 0) {
printf("ERROR: stack size not back to zero; got %d\n", emit->stack_size);
}
if (emit->pass == PASS_3) {
py_fun_t f = asm_thumb_get_code(emit->as);
rt_assign_native_code(emit->scope->unique_code_id, f, asm_thumb_get_code_size(emit->as), emit->scope->num_params);
}
}
bool emit_last_emit_was_return_value(emitter_t *emit) {
return emit->last_emit_was_return_value;
}
int emit_get_stack_size(emitter_t *emit) {
return emit->stack_size;
}
void emit_set_stack_size(emitter_t *emit, int size) {
emit->stack_size = size;
}
static void adjust_stack(emitter_t *emit, int stack_size_delta) {
emit->stack_size += stack_size_delta;
assert(emit->stack_size >= 0);
if (emit->pass > PASS_1 && emit->stack_size > emit->scope->stack_size) {
emit->scope->stack_size = emit->stack_size;
}
}
static void stack_settle(emitter_t *emit) {
switch (emit->need_to_push) {
case NEED_TO_PUSH_NOTHING:
break;
case NEED_TO_PUSH_REG:
asm_thumb_mov_local_reg(emit->as, emit->stack_start + emit->stack_size, emit->last_reg);
adjust_stack(emit, 1);
break;
case NEED_TO_PUSH_I32:
asm_thumb_mov_reg_i32_optimised(emit->as, REG_R0, emit->last_i32);
asm_thumb_mov_local_reg(emit->as, emit->stack_start + emit->stack_size, REG_R0);
adjust_stack(emit, 1);
break;
}
emit->need_to_push = NEED_TO_PUSH_NOTHING;
}
static void emit_pre_raw(emitter_t *emit, int stack_size_delta) {
adjust_stack(emit, stack_size_delta);
emit->last_emit_was_return_value = false;
}
static void emit_pre(emitter_t *emit) {
stack_settle(emit);
emit_pre_raw(emit, 0);
}
static void emit_pre_pop_reg(emitter_t *emit, int reg_dest) {
switch (emit->need_to_push) {
case NEED_TO_PUSH_NOTHING:
asm_thumb_mov_reg_local(emit->as, reg_dest, emit->stack_start + emit->stack_size - 1);
emit_pre_raw(emit, -1);
break;
case NEED_TO_PUSH_REG:
emit_pre_raw(emit, 0);
if (emit->last_reg != reg_dest) {
asm_thumb_mov_reg_reg(emit->as, reg_dest, emit->last_reg);
}
break;
case NEED_TO_PUSH_I32:
emit_pre_raw(emit, 0);
asm_thumb_mov_reg_i32_optimised(emit->as, reg_dest, emit->last_i32);
break;
}
emit->need_to_push = NEED_TO_PUSH_NOTHING;
}
static void emit_pre_pop_reg_reg(emitter_t *emit, int rega, int regb) {
emit_pre_pop_reg(emit, rega);
asm_thumb_mov_reg_local(emit->as, regb, emit->stack_start + emit->stack_size - 1);
adjust_stack(emit, -1);
}
static void emit_pre_pop_reg_reg_reg(emitter_t *emit, int rega, int regb, int regc) {
emit_pre_pop_reg(emit, rega);
asm_thumb_mov_reg_local(emit->as, regb, emit->stack_start + emit->stack_size - 1);
asm_thumb_mov_reg_local(emit->as, regc, emit->stack_start + emit->stack_size - 2);
adjust_stack(emit, -2);
}
static void emit_post(emitter_t *emit) {
}
static void emit_post_push_reg(emitter_t *emit, int reg) {
emit->need_to_push = NEED_TO_PUSH_REG;
emit->last_reg = reg;
}
static void emit_post_push_i32(emitter_t *emit, int32_t i32) {
emit->need_to_push = NEED_TO_PUSH_I32;
emit->last_i32 = i32;
}
static void emit_post_push_reg_reg(emitter_t *emit, int rega, int regb) {
asm_thumb_mov_local_reg(emit->as, emit->stack_start + emit->stack_size, rega);
emit->need_to_push = NEED_TO_PUSH_REG;
emit->last_reg = regb;
adjust_stack(emit, 1);
}
static void emit_post_push_reg_reg_reg(emitter_t *emit, int rega, int regb, int regc) {
asm_thumb_mov_local_reg(emit->as, emit->stack_start + emit->stack_size, rega);
asm_thumb_mov_local_reg(emit->as, emit->stack_start + emit->stack_size + 1, regb);
asm_thumb_mov_local_reg(emit->as, emit->stack_start + emit->stack_size + 2, regc);
adjust_stack(emit, 3);
}
static void emit_post_push_reg_reg_reg_reg(emitter_t *emit, int rega, int regb, int regc, int regd) {
asm_thumb_mov_local_reg(emit->as, emit->stack_start + emit->stack_size, rega);
asm_thumb_mov_local_reg(emit->as, emit->stack_start + emit->stack_size + 1, regb);
asm_thumb_mov_local_reg(emit->as, emit->stack_start + emit->stack_size + 2, regc);
asm_thumb_mov_local_reg(emit->as, emit->stack_start + emit->stack_size + 3, regd);
adjust_stack(emit, 4);
}
static void emit_get_stack_pointer_to_reg_for_pop(emitter_t *emit, int reg_dest, int n_pop) {
asm_thumb_mov_reg_local_addr(emit->as, reg_dest, emit->stack_start + emit->stack_size - 1);
adjust_stack(emit, -n_pop);
}
static void emit_get_stack_pointer_to_reg_for_push(emitter_t *emit, int reg_dest, int n_push) {
asm_thumb_mov_reg_local_addr(emit->as, reg_dest, emit->stack_start + emit->stack_size + n_push - 1);
adjust_stack(emit, n_push);
}
static void emit_call(emitter_t *emit, rt_fun_kind_t fun_kind) {
asm_thumb_bl_ind(emit->as, rt_fun_table[fun_kind], fun_kind, REG_R3);
}
static void emit_call_with_i32_arg(emitter_t *emit, rt_fun_kind_t fun_kind, int32_t arg_val, int arg_reg) {
asm_thumb_mov_reg_i32_optimised(emit->as, arg_reg, arg_val);
asm_thumb_bl_ind(emit->as, rt_fun_table[fun_kind], fun_kind, REG_R3);
}
int emit_label_new(emitter_t *emit) {
return asm_thumb_label_new(emit->as);
}
void emit_label_assign(emitter_t *emit, int l) {
asm_thumb_label_assign(emit->as, l);
}
void emit_import_name(emitter_t *emit, qstr qstr) {
assert(0);
}
void emit_import_from(emitter_t *emit, qstr qstr) {
assert(0);
}
void emit_import_star(emitter_t *emit) {
assert(0);
}
void emit_load_const_tok(emitter_t *emit, py_token_kind_t tok) {
emit_pre(emit);
py_obj_t o;
switch (tok) {
case PY_TOKEN_KW_NONE: o = py_const_none; break;
case PY_TOKEN_KW_FALSE: o = py_const_false; break;
case PY_TOKEN_KW_TRUE: o = py_const_true; break;
default: assert(0); o = 0; // shouldn't happen
}
emit_post_push_i32(emit, (machine_uint_t)o);
}
void emit_load_const_small_int(emitter_t *emit, int arg) {
emit_pre(emit);
if (emit->do_native_types) {
emit_post_push_i32(emit, arg);
} else {
emit_post_push_i32(emit, (arg << 1) | 1);
}
}
void emit_load_const_int(emitter_t *emit, qstr qstr) {
assert(0);
}
void emit_load_const_dec(emitter_t *emit, qstr qstr) {
assert(0);
}
void emit_load_const_id(emitter_t *emit, qstr qstr) {
assert(0);
}
void emit_load_const_str(emitter_t *emit, qstr qstr, bool bytes) {
emit_pre(emit);
emit_call_with_i32_arg(emit, RT_F_LOAD_CONST_STR, qstr, REG_ARG_1);
emit_post_push_reg(emit, REG_RET);
}
void emit_load_const_verbatim_start(emitter_t *emit) {
assert(0);
}
void emit_load_const_verbatim_int(emitter_t *emit, int val) {
assert(0);
}
void emit_load_const_verbatim_str(emitter_t *emit, const char *str) {
assert(0);
}
void emit_load_const_verbatim_strn(emitter_t *emit, const char *str, int len) {
assert(0);
}
void emit_load_const_verbatim_quoted_str(emitter_t *emit, qstr qstr, bool bytes) {
assert(0);
}
void emit_load_const_verbatim_end(emitter_t *emit) {
assert(0);
}
void emit_load_fast(emitter_t *emit, qstr qstr, int local_num) {
emit_pre(emit);
if (local_num == 0) {
emit_post_push_reg(emit, REG_LOCAL_1);
} else if (local_num == 1) {
emit_post_push_reg(emit, REG_LOCAL_2);
} else if (local_num == 2) {
emit_post_push_reg(emit, REG_LOCAL_3);
} else {
asm_thumb_mov_reg_local(emit->as, REG_R0, local_num - 1);
emit_post_push_reg(emit, REG_R0);
}
}
void emit_load_name(emitter_t *emit, qstr qstr) {
emit_pre(emit);
emit_call_with_i32_arg(emit, RT_F_LOAD_NAME, qstr, REG_ARG_1);
emit_post_push_reg(emit, REG_RET);
}
void emit_load_global(emitter_t *emit, qstr qstr) {
emit_pre(emit);
emit_call_with_i32_arg(emit, RT_F_LOAD_GLOBAL, qstr, REG_ARG_1);
emit_post_push_reg(emit, REG_RET);
}
void emit_load_deref(emitter_t *emit, qstr qstr) {
assert(0);
}
void emit_load_closure(emitter_t *emit, qstr qstr) {
assert(0);
}
void emit_load_attr(emitter_t *emit, qstr qstr) {
emit_pre_pop_reg(emit, REG_ARG_1); // arg1 = base
emit_call_with_i32_arg(emit, RT_F_LOAD_ATTR, qstr, REG_ARG_2); // arg2 = attribute name
emit_post_push_reg(emit, REG_RET);
}
void emit_load_method(emitter_t *emit, qstr qstr) {
emit_pre_pop_reg(emit, REG_ARG_1); // arg1 = base
emit_get_stack_pointer_to_reg_for_push(emit, REG_ARG_3, 2); // arg3 = dest ptr
emit_call_with_i32_arg(emit, RT_F_LOAD_METHOD, qstr, REG_ARG_2); // arg2 = method name
}
void emit_load_build_class(emitter_t *emit) {
assert(0);
} // basically load __build_class__ from builtins
void emit_store_fast(emitter_t *emit, qstr qstr, int local_num) {
if (local_num == 0) {
emit_pre_pop_reg(emit, REG_LOCAL_1);
} else if (local_num == 1) {
emit_pre_pop_reg(emit, REG_LOCAL_2);
} else if (local_num == 2) {
emit_pre_pop_reg(emit, REG_LOCAL_3);
} else {
emit_pre_pop_reg(emit, REG_R0);
asm_thumb_mov_local_reg(emit->as, local_num - 1, REG_R0);
}
emit_post(emit);
}
void emit_store_name(emitter_t *emit, qstr qstr) {
emit_pre_pop_reg(emit, REG_ARG_2);
emit_call_with_i32_arg(emit, RT_F_STORE_NAME, qstr, REG_ARG_1); // arg1 = name
emit_post(emit);
}
void emit_store_global(emitter_t *emit, qstr qstr) {
assert(0);
}
void emit_store_deref(emitter_t *emit, qstr qstr) {
assert(0);
}
void emit_store_attr(emitter_t *emit, qstr qstr) {
assert(0);
}
void emit_store_locals(emitter_t *emit) {
assert(0);
}
void emit_store_subscr(emitter_t *emit) {
emit_pre_pop_reg_reg_reg(emit, REG_ARG_2, REG_ARG_1, REG_ARG_3); // index, base, value to store
emit_call(emit, RT_F_STORE_SUBSCR);
}
void emit_delete_fast(emitter_t *emit, qstr qstr, int local_num) {
assert(0);
}
void emit_delete_name(emitter_t *emit, qstr qstr) {
assert(0);
}
void emit_delete_global(emitter_t *emit, qstr qstr) {
assert(0);
}
void emit_delete_deref(emitter_t *emit, qstr qstr) {
assert(0);
}
void emit_delete_attr(emitter_t *emit, qstr qstr) {
assert(0);
}
void emit_delete_subscr(emitter_t *emit) {
assert(0);
}
void emit_dup_top(emitter_t *emit) {
emit_pre_pop_reg(emit, REG_R0);
emit_post_push_reg_reg(emit, REG_R0, REG_R0);
}
void emit_dup_top_two(emitter_t *emit) {
emit_pre_pop_reg_reg(emit, REG_R0, REG_R1);
emit_post_push_reg_reg_reg_reg(emit, REG_R1, REG_R0, REG_R1, REG_R0);
}
void emit_pop_top(emitter_t *emit) {
emit_pre_pop_reg(emit, REG_R0);
emit_post(emit);
}
void emit_rot_two(emitter_t *emit) {
assert(0);
}
void emit_rot_three(emitter_t *emit) {
emit_pre_pop_reg_reg_reg(emit, REG_R0, REG_R1, REG_R2);
emit_post_push_reg_reg_reg(emit, REG_R0, REG_R2, REG_R1);
}
void emit_jump(emitter_t *emit, int label) {
emit_pre(emit);
asm_thumb_b_label(emit->as, label);
emit_post(emit);
}
void emit_pop_jump_if_false(emitter_t *emit, int label) {
if (emit->do_native_types) {
emit_pre_pop_reg(emit, REG_RET);
asm_thumb_cmp_reg_bz_label(emit->as, REG_RET, label);
emit_post(emit);
} else {
emit_pre_pop_reg(emit, REG_ARG_1);
emit_call(emit, RT_F_IS_TRUE);
asm_thumb_cmp_reg_bz_label(emit->as, REG_RET, label);
emit_post(emit);
}
}
void emit_pop_jump_if_true(emitter_t *emit, int label) {
assert(0);
}
void emit_jump_if_true_or_pop(emitter_t *emit, int label) {
assert(0);
}
void emit_jump_if_false_or_pop(emitter_t *emit, int label) {
assert(0);
}
void emit_setup_loop(emitter_t *emit, int label) {
emit_pre(emit);
emit_post(emit);
}
void emit_break_loop(emitter_t *emit, int label) {
assert(0);
}
void emit_continue_loop(emitter_t *emit, int label) {
assert(0);
}
void emit_setup_with(emitter_t *emit, int label) {
assert(0);
}
void emit_with_cleanup(emitter_t *emit) {
assert(0);
}
void emit_setup_except(emitter_t *emit, int label) {
assert(0);
}
void emit_setup_finally(emitter_t *emit, int label) {
assert(0);
}
void emit_end_finally(emitter_t *emit) {
assert(0);
}
void emit_get_iter(emitter_t *emit) {
assert(0);
} // tos = getiter(tos)
void emit_for_iter(emitter_t *emit, int label) {
assert(0);
}
void emit_for_iter_end(emitter_t *emit) {
assert(0);
}
void emit_pop_except(emitter_t *emit) {
assert(0);
}
void emit_unary_op(emitter_t *emit, rt_unary_op_t op) {
emit_pre_pop_reg(emit, REG_ARG_2);
emit_call_with_i32_arg(emit, RT_F_UNARY_OP, op, REG_ARG_1);
emit_post_push_reg(emit, REG_RET);
}
void emit_build_tuple(emitter_t *emit, int n_args) {
assert(0);
}
void emit_build_list(emitter_t *emit, int n_args) {
emit_pre(emit);
emit_get_stack_pointer_to_reg_for_pop(emit, REG_ARG_2, n_args); // pointer to items in reverse order
emit_call_with_i32_arg(emit, RT_F_BUILD_LIST, n_args, REG_ARG_1);
emit_post_push_reg(emit, REG_RET); // new list
}
void emit_list_append(emitter_t *emit, int list_index) {
assert(0);
}
void emit_build_map(emitter_t *emit, int n_args) {
emit_pre(emit);
emit_call_with_i32_arg(emit, RT_F_BUILD_MAP, n_args, REG_ARG_1);
emit_post_push_reg(emit, REG_RET); // new map
}
void emit_store_map(emitter_t *emit) {
emit_pre_pop_reg_reg_reg(emit, REG_ARG_2, REG_ARG_3, REG_ARG_1); // key, value, map
emit_call(emit, RT_F_STORE_MAP);
emit_post_push_reg(emit, REG_RET); // map
}
void emit_map_add(emitter_t *emit, int map_index) {
assert(0);
}
void emit_build_set(emitter_t *emit, int n_args) {
emit_pre(emit);
emit_get_stack_pointer_to_reg_for_pop(emit, REG_ARG_2, n_args); // pointer to items in reverse order
emit_call_with_i32_arg(emit, RT_F_BUILD_SET, n_args, REG_ARG_1);
emit_post_push_reg(emit, REG_RET); // new set
}
void emit_set_add(emitter_t *emit, int set_index) {
assert(0);
}
void emit_build_slice(emitter_t *emit, int n_args) {
assert(0);
}
void emit_unpack_sequence(emitter_t *emit, int n_args) {
assert(0);
}
void emit_unpack_ex(emitter_t *emit, int n_left, int n_right) {
assert(0);
}
void emit_make_function(emitter_t *emit, scope_t *scope, int n_dict_params, int n_default_params) {
assert(n_default_params == 0 && n_dict_params == 0);
emit_pre(emit);
emit_call_with_i32_arg(emit, RT_F_MAKE_FUNCTION_FROM_ID, scope->unique_code_id, REG_ARG_1);
emit_post_push_reg(emit, REG_RET);
}
void emit_make_closure(emitter_t *emit, scope_t *scope, int n_dict_params, int n_default_params) {
assert(0);
}
void emit_call_function(emitter_t *emit, int n_positional, int n_keyword, bool have_star_arg, bool have_dbl_star_arg) {
assert(n_keyword == 0 && !have_star_arg && !have_dbl_star_arg);
if (n_positional == 0) {
emit_pre_pop_reg(emit, REG_ARG_1); // the function
emit_call(emit, RT_F_CALL_FUNCTION_0);
} else if (n_positional == 1) {
emit_pre_pop_reg_reg(emit, REG_ARG_2, REG_ARG_1); // the single argument, the function
emit_call(emit, RT_F_CALL_FUNCTION_1);
} else if (n_positional == 2) {
emit_pre_pop_reg_reg_reg(emit, REG_ARG_3, REG_ARG_2, REG_ARG_1); // the second argument, the first argument, the function
emit_call(emit, RT_F_CALL_FUNCTION_2);
} else {
assert(0);
}
emit_post_push_reg(emit, REG_RET);
}
void emit_call_method(emitter_t *emit, int n_positional, int n_keyword, bool have_star_arg, bool have_dbl_star_arg) {
assert(n_keyword == 0 && !have_star_arg && !have_dbl_star_arg);
if (n_positional == 0) {
emit_pre_pop_reg_reg(emit, REG_ARG_2, REG_ARG_1); // the self object (or NULL), the method
emit_call(emit, RT_F_CALL_METHOD_1);
} else if (n_positional == 1) {
emit_pre_pop_reg_reg_reg(emit, REG_ARG_3, REG_ARG_2, REG_ARG_1); // the first argument, the self object (or NULL), the method
emit_call(emit, RT_F_CALL_METHOD_2);
} else {
assert(0);
}
emit_post_push_reg(emit, REG_RET);
}
void emit_pop_block(emitter_t *emit) {
emit_pre(emit);
emit_post(emit);
}
void emit_binary_op(emitter_t *emit, rt_binary_op_t op) {
if (emit->do_native_types) {
emit_pre_pop_reg_reg(emit, REG_ARG_2, REG_ARG_1);
asm_thumb_add_reg_reg_reg(emit->as, REG_RET, REG_ARG_1, REG_ARG_2);
emit_post_push_reg(emit, REG_RET);
} else {
emit_pre_pop_reg_reg(emit, REG_ARG_3, REG_ARG_2);
emit_call_with_i32_arg(emit, RT_F_BINARY_OP, op, REG_ARG_1);
emit_post_push_reg(emit, REG_RET);
}
}
void emit_compare_op(emitter_t *emit, rt_compare_op_t op) {
if (emit->do_native_types) {
emit_pre_pop_reg_reg(emit, REG_ARG_2, REG_ARG_1);
asm_thumb_cmp_reg_reg(emit->as, REG_ARG_1, REG_ARG_2);
asm_thumb_ite_ge(emit->as);
asm_thumb_mov_reg_i8(emit->as, REG_RET, 0); // if r0 >= r1
asm_thumb_mov_reg_i8(emit->as, REG_RET, 1); // if r0 < r1
emit_post_push_reg(emit, REG_RET);
} else {
emit_pre_pop_reg_reg(emit, REG_ARG_3, REG_ARG_2);
emit_call_with_i32_arg(emit, RT_F_COMPARE_OP, op, REG_ARG_1);
emit_post_push_reg(emit, REG_RET);
}
}
void emit_return_value(emitter_t *emit) {
emit_pre_pop_reg(emit, REG_RET);
emit->last_emit_was_return_value = true;
//asm_thumb_call_ind(emit->as, 0, REG_R0); to seg fault for debugging with gdb
asm_thumb_exit(emit->as);
}
void emit_raise_varargs(emitter_t *emit, int n_args) {
assert(0);
}
void emit_yield_value(emitter_t *emit) {
assert(0);
}
void emit_yield_from(emitter_t *emit) {
assert(0);
}
#endif // EMIT_DO_THUMB

680
py/emitx64.c Normal file
View File

@ -0,0 +1,680 @@
/* This code is equivalent to emitx64.c but pre-allocates stack
* space and uses mov instead of push/pop instructions to access
* the temporary stack. It runs in similar time, but uses 3*n
* more bytes, where n is number of push/pop instructions.
*
* This code is preferred because it keeps the stack aligned on a
* 16 byte boundary.
*
* Improvements:
* Doesn't call stub functions, does all the work inline.
* Has optimisations for loading i64s to stack.
*/
#include <unistd.h>
#include <stdlib.h>
#include <stdint.h>
#include <stdio.h>
#include <string.h>
#include <assert.h>
#include "misc.h"
#include "lexer.h"
#include "machine.h"
#include "parse.h"
#include "scope.h"
#include "runtime.h"
#include "emit.h"
#include "asmx64.h"
#ifdef EMIT_DO_X64
#define REG_LOCAL_1 (REG_RBX)
#define REG_LOCAL_NUM (1)
typedef enum {
NEED_TO_PUSH_NOTHING,
NEED_TO_PUSH_R64,
NEED_TO_PUSH_I64,
} need_to_push_t;
struct _emitter_t {
int pass;
int stack_start;
int stack_size;
bool last_emit_was_return_value;
need_to_push_t need_to_push;
int last_r64;
int64_t last_i64;
scope_t *scope;
asm_x64_t *as;
bool do_native_types;
};
emitter_t *emit_new() {
emitter_t *emit = m_new(emitter_t, 1);
emit->as = asm_x64_new();
emit->do_native_types = false;
return emit;
}
void emit_set_native_types(emitter_t *emit, bool do_native_types) {
emit->do_native_types = do_native_types;
}
void emit_start_pass(emitter_t *emit, pass_kind_t pass, scope_t *scope) {
emit->pass = pass;
emit->stack_start = 0;
emit->stack_size = 0;
emit->last_emit_was_return_value = false;
emit->need_to_push = NEED_TO_PUSH_NOTHING;
emit->scope = scope;
if (pass == PASS_1) {
scope->unique_code_id = rt_get_new_unique_code_id();
}
asm_x64_start_pass(emit->as, pass);
// entry to function
int num_locals = 0;
if (pass > PASS_1) {
num_locals = scope->num_locals - REG_LOCAL_NUM;
if (num_locals < 0) {
num_locals = 0;
}
emit->stack_start = num_locals;
num_locals += scope->stack_size;
}
asm_x64_entry(emit->as, num_locals);
// initialise locals from parameters
for (int i = 0; i < scope->num_params; i++) {
if (i == 0) {
asm_x64_mov_r64_to_r64(emit->as, REG_ARG_1, REG_LOCAL_1);
} else if (i == 1) {
asm_x64_mov_r64_to_local(emit->as, REG_ARG_2, i - 1);
} else if (i == 2) {
asm_x64_mov_r64_to_local(emit->as, REG_ARG_3, i - 1);
} else {
// TODO not implemented
assert(0);
}
}
}
void emit_end_pass(emitter_t *emit) {
if (!emit->last_emit_was_return_value) {
asm_x64_exit(emit->as);
}
asm_x64_end_pass(emit->as);
// check stack is back to zero size
if (emit->stack_size != 0) {
printf("ERROR: stack size not back to zero; got %d\n", emit->stack_size);
}
if (emit->pass == PASS_3) {
py_fun_t f = asm_x64_get_code(emit->as);
rt_assign_native_code(emit->scope->unique_code_id, f, asm_x64_get_code_size(emit->as), emit->scope->num_params);
}
}
bool emit_last_emit_was_return_value(emitter_t *emit) {
return emit->last_emit_was_return_value;
}
int emit_get_stack_size(emitter_t *emit) {
return emit->stack_size;
}
void emit_set_stack_size(emitter_t *emit, int size) {
emit->stack_size = size;
}
static void adjust_stack(emitter_t *emit, int stack_size_delta) {
emit->stack_size += stack_size_delta;
assert(emit->stack_size >= 0);
if (emit->pass > PASS_1 && emit->stack_size > emit->scope->stack_size) {
emit->scope->stack_size = emit->stack_size;
}
}
static void stack_settle(emitter_t *emit) {
switch (emit->need_to_push) {
case NEED_TO_PUSH_NOTHING:
break;
case NEED_TO_PUSH_R64:
asm_x64_mov_r64_to_local(emit->as, emit->last_r64, emit->stack_start + emit->stack_size);
adjust_stack(emit, 1);
break;
case NEED_TO_PUSH_I64:
asm_x64_mov_i64_to_r64_optimised(emit->as, emit->last_i64, REG_RAX);
asm_x64_mov_r64_to_local(emit->as, REG_RAX, emit->stack_start + emit->stack_size);
adjust_stack(emit, 1);
break;
}
emit->need_to_push = NEED_TO_PUSH_NOTHING;
}
static void emit_pre_raw(emitter_t *emit, int stack_size_delta) {
adjust_stack(emit, stack_size_delta);
emit->last_emit_was_return_value = false;
}
static void emit_pre(emitter_t *emit) {
stack_settle(emit);
emit_pre_raw(emit, 0);
}
static void emit_pre_pop_r64(emitter_t *emit, int r64) {
switch (emit->need_to_push) {
case NEED_TO_PUSH_NOTHING:
asm_x64_mov_local_to_r64(emit->as, emit->stack_start + emit->stack_size - 1, r64);
emit_pre_raw(emit, -1);
break;
case NEED_TO_PUSH_R64:
emit_pre_raw(emit, 0);
if (emit->last_r64 != r64) {
asm_x64_mov_r64_to_r64(emit->as, emit->last_r64, r64);
}
break;
case NEED_TO_PUSH_I64:
emit_pre_raw(emit, 0);
asm_x64_mov_i64_to_r64_optimised(emit->as, emit->last_i64, r64);
break;
}
emit->need_to_push = NEED_TO_PUSH_NOTHING;
}
static void emit_pre_pop_r64_r64(emitter_t *emit, int r64a, int r64b) {
emit_pre_pop_r64(emit, r64a);
asm_x64_mov_local_to_r64(emit->as, emit->stack_start + emit->stack_size - 1, r64b);
adjust_stack(emit, -1);
}
static void emit_pre_pop_r64_r64_r64(emitter_t *emit, int r64a, int r64b, int r64c) {
emit_pre_pop_r64(emit, r64a);
asm_x64_mov_local_to_r64(emit->as, emit->stack_start + emit->stack_size - 1, r64b);
asm_x64_mov_local_to_r64(emit->as, emit->stack_start + emit->stack_size - 2, r64c);
adjust_stack(emit, -2);
}
static void emit_post(emitter_t *emit) {
}
static void emit_post_push_r64(emitter_t *emit, int r64) {
emit->need_to_push = NEED_TO_PUSH_R64;
emit->last_r64 = r64;
}
static void emit_post_push_i64(emitter_t *emit, int64_t i64) {
emit->need_to_push = NEED_TO_PUSH_I64;
emit->last_i64 = i64;
}
static void emit_post_push_r64_r64(emitter_t *emit, int r64a, int r64b) {
asm_x64_mov_r64_to_local(emit->as, r64a, emit->stack_start + emit->stack_size);
emit->need_to_push = NEED_TO_PUSH_R64;
emit->last_r64 = r64b;
adjust_stack(emit, 1);
}
static void emit_post_push_r64_r64_r64(emitter_t *emit, int r64a, int r64b, int r64c) {
asm_x64_mov_r64_to_local(emit->as, r64a, emit->stack_start + emit->stack_size);
asm_x64_mov_r64_to_local(emit->as, r64b, emit->stack_start + emit->stack_size + 1);
asm_x64_mov_r64_to_local(emit->as, r64c, emit->stack_start + emit->stack_size + 2);
adjust_stack(emit, 3);
}
static void emit_post_push_r64_r64_r64_r64(emitter_t *emit, int r64a, int r64b, int r64c, int r64d) {
asm_x64_mov_r64_to_local(emit->as, r64a, emit->stack_start + emit->stack_size);
asm_x64_mov_r64_to_local(emit->as, r64b, emit->stack_start + emit->stack_size + 1);
asm_x64_mov_r64_to_local(emit->as, r64c, emit->stack_start + emit->stack_size + 2);
asm_x64_mov_r64_to_local(emit->as, r64d, emit->stack_start + emit->stack_size + 3);
adjust_stack(emit, 4);
}
static void emit_get_stack_pointer_to_r64_for_pop(emitter_t *emit, int r64, int n_pop) {
asm_x64_mov_local_addr_to_r64(emit->as, emit->stack_start + emit->stack_size - 1, r64);
adjust_stack(emit, -n_pop);
}
static void emit_get_stack_pointer_to_r64_for_push(emitter_t *emit, int r64, int n_push) {
asm_x64_mov_local_addr_to_r64(emit->as, emit->stack_start + emit->stack_size + n_push - 1, r64);
adjust_stack(emit, n_push);
}
static void emit_call(emitter_t *emit, void *fun) {
asm_x64_call_ind(emit->as, fun, REG_RAX);
}
static void emit_call_with_i64_arg(emitter_t *emit, void *fun, int64_t arg_val, int arg_r64) {
asm_x64_mov_i64_to_r64_optimised(emit->as, arg_val, arg_r64);
asm_x64_call_ind(emit->as, fun, REG_RAX);
}
int emit_label_new(emitter_t *emit) {
return asm_x64_label_new(emit->as);
}
void emit_label_assign(emitter_t *emit, int l) {
asm_x64_label_assign(emit->as, l);
}
void emit_import_name(emitter_t *emit, qstr qstr) {
assert(0);
}
void emit_import_from(emitter_t *emit, qstr qstr) {
assert(0);
}
void emit_import_star(emitter_t *emit) {
assert(0);
}
void emit_load_const_tok(emitter_t *emit, py_token_kind_t tok) {
emit_pre(emit);
py_obj_t o;
switch (tok) {
case PY_TOKEN_KW_NONE: o = py_const_none; break;
case PY_TOKEN_KW_FALSE: o = py_const_false; break;
case PY_TOKEN_KW_TRUE: o = py_const_true; break;
default: assert(0); // shouldn't happen
}
emit_post_push_i64(emit, (uint64_t)o);
}
void emit_load_const_small_int(emitter_t *emit, int arg) {
emit_pre(emit);
if (emit->do_native_types) {
emit_post_push_i64(emit, arg);
} else {
emit_post_push_i64(emit, (arg << 1) | 1);
}
}
void emit_load_const_int(emitter_t *emit, qstr qstr) {
assert(0);
}
void emit_load_const_dec(emitter_t *emit, qstr qstr) {
assert(0);
}
void emit_load_const_id(emitter_t *emit, qstr qstr) {
assert(0);
}
void emit_load_const_str(emitter_t *emit, qstr qstr, bool bytes) {
emit_pre(emit);
emit_call_with_i64_arg(emit, rt_load_const_str, qstr, REG_ARG_1);
emit_post_push_r64(emit, REG_RET);
}
void emit_load_const_verbatim_start(emitter_t *emit) {
assert(0);
}
void emit_load_const_verbatim_int(emitter_t *emit, int val) {
assert(0);
}
void emit_load_const_verbatim_str(emitter_t *emit, const char *str) {
assert(0);
}
void emit_load_const_verbatim_strn(emitter_t *emit, const char *str, int len) {
assert(0);
}
void emit_load_const_verbatim_quoted_str(emitter_t *emit, qstr qstr, bool bytes) {
assert(0);
}
void emit_load_const_verbatim_end(emitter_t *emit) {
assert(0);
}
void emit_load_fast(emitter_t *emit, qstr qstr, int local_num) {
if (local_num == 0) {
emit_pre(emit);
emit_post_push_r64(emit, REG_LOCAL_1);
} else {
emit_pre(emit);
asm_x64_mov_local_to_r64(emit->as, local_num - 1, REG_RAX);
emit_post_push_r64(emit, REG_RAX);
}
}
void emit_load_name(emitter_t *emit, qstr qstr) {
emit_pre(emit);
emit_call_with_i64_arg(emit, rt_load_name, qstr, REG_ARG_1);
emit_post_push_r64(emit, REG_RET);
}
void emit_load_global(emitter_t *emit, qstr qstr) {
emit_pre(emit);
emit_call_with_i64_arg(emit, rt_load_global, qstr, REG_ARG_1);
emit_post_push_r64(emit, REG_RET);
}
void emit_load_deref(emitter_t *emit, qstr qstr) {
assert(0);
}
void emit_load_closure(emitter_t *emit, qstr qstr) {
assert(0);
}
void emit_load_attr(emitter_t *emit, qstr qstr) {
emit_pre_pop_r64(emit, REG_ARG_1); // arg1 = base
emit_call_with_i64_arg(emit, rt_load_attr, qstr, REG_ARG_2); // arg2 = attribute name
emit_post_push_r64(emit, REG_RET);
}
void emit_load_method(emitter_t *emit, qstr qstr) {
emit_pre_pop_r64(emit, REG_ARG_1); // arg1 = base
emit_get_stack_pointer_to_r64_for_push(emit, REG_ARG_3, 2); // arg3 = dest ptr
emit_call_with_i64_arg(emit, rt_load_method, qstr, REG_ARG_2); // arg2 = method name
}
void emit_load_build_class(emitter_t *emit) {
assert(0);
} // basically load __build_class__ from builtins
void emit_store_fast(emitter_t *emit, qstr qstr, int local_num) {
if (local_num == 0) {
emit_pre_pop_r64(emit, REG_LOCAL_1);
emit_post(emit);
} else {
emit_pre_pop_r64(emit, REG_RAX);
asm_x64_mov_r64_to_local(emit->as, REG_RAX, local_num - 1);
emit_post(emit);
}
}
void emit_store_name(emitter_t *emit, qstr qstr) {
emit_pre_pop_r64(emit, REG_ARG_2);
emit_call_with_i64_arg(emit, rt_store_name, qstr, REG_ARG_1); // arg1 = name
emit_post(emit);
}
void emit_store_global(emitter_t *emit, qstr qstr) {
assert(0);
}
void emit_store_deref(emitter_t *emit, qstr qstr) {
assert(0);
}
void emit_store_attr(emitter_t *emit, qstr qstr) {
assert(0);
}
void emit_store_locals(emitter_t *emit) {
assert(0);
}
void emit_store_subscr(emitter_t *emit) {
emit_pre_pop_r64_r64_r64(emit, REG_ARG_2, REG_ARG_1, REG_ARG_3); // index, base, value to store
emit_call(emit, rt_store_subscr);
}
void emit_delete_fast(emitter_t *emit, qstr qstr, int local_num) {
assert(0);
}
void emit_delete_name(emitter_t *emit, qstr qstr) {
assert(0);
}
void emit_delete_global(emitter_t *emit, qstr qstr) {
assert(0);
}
void emit_delete_deref(emitter_t *emit, qstr qstr) {
assert(0);
}
void emit_delete_attr(emitter_t *emit, qstr qstr) {
assert(0);
}
void emit_delete_subscr(emitter_t *emit) {
assert(0);
}
void emit_dup_top(emitter_t *emit) {
emit_pre_pop_r64(emit, REG_RAX);
emit_post_push_r64_r64(emit, REG_RAX, REG_RAX);
}
void emit_dup_top_two(emitter_t *emit) {
emit_pre_pop_r64_r64(emit, REG_RAX, REG_RDI);
emit_post_push_r64_r64_r64_r64(emit, REG_RDI, REG_RAX, REG_RDI, REG_RAX);
}
void emit_pop_top(emitter_t *emit) {
emit_pre_pop_r64(emit, REG_RAX);
emit_post(emit);
}
void emit_rot_two(emitter_t *emit) {
assert(0);
}
void emit_rot_three(emitter_t *emit) {
emit_pre_pop_r64_r64_r64(emit, REG_RAX, REG_RDI, REG_RSI);
emit_post_push_r64_r64_r64(emit, REG_RAX, REG_RSI, REG_RDI);
}
void emit_jump(emitter_t *emit, int label) {
emit_pre(emit);
asm_x64_jmp_label(emit->as, label);
emit_post(emit);
}
void emit_pop_jump_if_false(emitter_t *emit, int label) {
if (emit->do_native_types) {
emit_pre_pop_r64(emit, REG_RET);
asm_x64_test_r8_with_r8(emit->as, REG_RET, REG_RET);
asm_x64_jcc_label(emit->as, JCC_JZ, label);
emit_post(emit);
} else {
emit_pre_pop_r64(emit, REG_ARG_1);
emit_call(emit, rt_is_true);
asm_x64_test_r8_with_r8(emit->as, REG_RET, REG_RET);
asm_x64_jcc_label(emit->as, JCC_JZ, label);
emit_post(emit);
}
}
void emit_pop_jump_if_true(emitter_t *emit, int label) {
assert(0);
}
void emit_jump_if_true_or_pop(emitter_t *emit, int label) {
assert(0);
}
void emit_jump_if_false_or_pop(emitter_t *emit, int label) {
assert(0);
}
void emit_setup_loop(emitter_t *emit, int label) {
emit_pre(emit);
emit_post(emit);
}
void emit_break_loop(emitter_t *emit, int label) {
assert(0);
}
void emit_continue_loop(emitter_t *emit, int label) {
assert(0);
}
void emit_setup_with(emitter_t *emit, int label) {
assert(0);
}
void emit_with_cleanup(emitter_t *emit) {
assert(0);
}
void emit_setup_except(emitter_t *emit, int label) {
assert(0);
}
void emit_setup_finally(emitter_t *emit, int label) {
assert(0);
}
void emit_end_finally(emitter_t *emit) {
assert(0);
}
void emit_get_iter(emitter_t *emit) {
assert(0);
} // tos = getiter(tos)
void emit_for_iter(emitter_t *emit, int label) {
assert(0);
}
void emit_for_iter_end(emitter_t *emit) {
assert(0);
}
void emit_pop_except(emitter_t *emit) {
assert(0);
}
void emit_unary_op(emitter_t *emit, rt_unary_op_t op) {
emit_pre_pop_r64(emit, REG_ARG_2);
emit_call_with_i64_arg(emit, rt_unary_op, op, REG_ARG_1);
emit_post_push_r64(emit, REG_RET);
}
void emit_build_tuple(emitter_t *emit, int n_args) {
assert(0);
}
void emit_build_list(emitter_t *emit, int n_args) {
emit_pre(emit);
emit_get_stack_pointer_to_r64_for_pop(emit, REG_ARG_2, n_args); // pointer to items in reverse order
emit_call_with_i64_arg(emit, rt_build_list, n_args, REG_ARG_1);
emit_post_push_r64(emit, REG_RET); // new list
}
void emit_list_append(emitter_t *emit, int list_index) {
assert(0);
}
void emit_build_map(emitter_t *emit, int n_args) {
emit_pre(emit);
emit_call_with_i64_arg(emit, rt_build_map, n_args, REG_ARG_1);
emit_post_push_r64(emit, REG_RET); // new map
}
void emit_store_map(emitter_t *emit) {
emit_pre_pop_r64_r64_r64(emit, REG_ARG_2, REG_ARG_3, REG_ARG_1); // key, value, map
emit_call(emit, rt_store_map);
emit_post_push_r64(emit, REG_RET); // map
}
void emit_map_add(emitter_t *emit, int map_index) {
assert(0);
}
void emit_build_set(emitter_t *emit, int n_args) {
emit_pre(emit);
emit_get_stack_pointer_to_r64_for_pop(emit, REG_ARG_2, n_args); // pointer to items in reverse order
emit_call_with_i64_arg(emit, rt_build_set, n_args, REG_ARG_1);
emit_post_push_r64(emit, REG_RET); // new set
}
void emit_set_add(emitter_t *emit, int set_index) {
assert(0);
}
void emit_build_slice(emitter_t *emit, int n_args) {
assert(0);
}
void emit_unpack_sequence(emitter_t *emit, int n_args) {
assert(0);
}
void emit_unpack_ex(emitter_t *emit, int n_left, int n_right) {
assert(0);
}
void emit_make_function(emitter_t *emit, scope_t *scope, int n_dict_params, int n_default_params) {
assert(n_default_params == 0 && n_dict_params == 0);
emit_pre(emit);
emit_call_with_i64_arg(emit, rt_make_function_from_id, scope->unique_code_id, REG_ARG_1);
emit_post_push_r64(emit, REG_RET);
}
void emit_make_closure(emitter_t *emit, scope_t *scope, int n_dict_params, int n_default_params) {
assert(0);
}
void emit_call_function(emitter_t *emit, int n_positional, int n_keyword, bool have_star_arg, bool have_dbl_star_arg) {
assert(n_keyword == 0 && !have_star_arg && !have_dbl_star_arg);
if (n_positional == 0) {
emit_pre_pop_r64(emit, REG_ARG_1); // the function
emit_call(emit, rt_call_function_0);
} else if (n_positional == 1) {
emit_pre_pop_r64_r64(emit, REG_ARG_2, REG_ARG_1); // the single argument, the function
emit_call(emit, rt_call_function_1);
} else if (n_positional == 2) {
emit_pre_pop_r64_r64_r64(emit, REG_ARG_3, REG_ARG_2, REG_ARG_1); // the second argument, the first argument, the function
emit_call(emit, rt_call_function_2);
} else {
assert(0);
}
emit_post_push_r64(emit, REG_RET);
}
void emit_call_method(emitter_t *emit, int n_positional, int n_keyword, bool have_star_arg, bool have_dbl_star_arg) {
assert(n_keyword == 0 && !have_star_arg && !have_dbl_star_arg);
if (n_positional == 0) {
emit_pre_pop_r64_r64(emit, REG_ARG_2, REG_ARG_1); // the self object (or NULL), the method
emit_call(emit, rt_call_method_1);
} else if (n_positional == 1) {
emit_pre_pop_r64_r64_r64(emit, REG_ARG_3, REG_ARG_2, REG_ARG_1); // the first argument, the self object (or NULL), the method
emit_call(emit, rt_call_method_2);
} else {
assert(0);
}
emit_post_push_r64(emit, REG_RET);
}
void emit_pop_block(emitter_t *emit) {
emit_pre(emit);
emit_post(emit);
}
void emit_binary_op(emitter_t *emit, rt_binary_op_t op) {
if (emit->do_native_types) {
assert(op == RT_BINARY_OP_ADD);
emit_pre_pop_r64_r64(emit, REG_ARG_2, REG_RET);
asm_x64_add_r64_to_r64(emit->as, REG_ARG_2, REG_RET);
emit_post_push_r64(emit, REG_RET);
} else {
emit_pre_pop_r64_r64(emit, REG_ARG_3, REG_ARG_2);
emit_call_with_i64_arg(emit, rt_binary_op, op, REG_ARG_1);
emit_post_push_r64(emit, REG_RET);
}
}
void emit_compare_op(emitter_t *emit, rt_compare_op_t op) {
if (emit->do_native_types) {
assert(op == RT_COMPARE_OP_LESS);
emit_pre_pop_r64_r64(emit, REG_ARG_3, REG_ARG_2);
asm_x64_xor_r64_to_r64(emit->as, REG_RET, REG_RET);
asm_x64_cmp_r64_with_r64(emit->as, REG_ARG_3, REG_ARG_2);
asm_x64_setcc_r8(emit->as, JCC_JL, REG_RET);
emit_post_push_r64(emit, REG_RET);
} else {
emit_pre_pop_r64_r64(emit, REG_ARG_3, REG_ARG_2);
emit_call_with_i64_arg(emit, rt_compare_op, op, REG_ARG_1);
emit_post_push_r64(emit, REG_RET);
}
}
void emit_return_value(emitter_t *emit) {
emit_pre_pop_r64(emit, REG_RAX);
emit->last_emit_was_return_value = true;
//asm_x64_call_ind(emit->as, 0, REG_RAX); to seg fault for debugging with gdb
asm_x64_exit(emit->as);
}
void emit_raise_varargs(emitter_t *emit, int n_args) {
assert(0);
}
void emit_yield_value(emitter_t *emit) {
assert(0);
}
void emit_yield_from(emitter_t *emit) {
assert(0);
}
#endif // EMIT_DO_X64

300
py/grammar.h Normal file
View File

@ -0,0 +1,300 @@
// rules for writing rules:
// - zero_or_more is implemented using opt_rule around a one_or_more rule
// - don't put opt_rule in arguments of or rule; instead, wrap the call to this or rule in opt_rule
// # Start symbols for the grammar:
// # single_input is a single interactive statement;
// # file_input is a module or sequence of commands read from an input file;
// # eval_input is the input for the eval() functions.
// # NB: compound_stmt in single_input is followed by extra NEWLINE!
// single_input: NEWLINE | simple_stmt | compound_stmt NEWLINE
// file_input: (NEWLINE | stmt)* ENDMARKER
// eval_input: testlist NEWLINE* ENDMARKER
DEF_RULE(file_input, nc, and(1), opt_rule(file_input_2))
DEF_RULE(file_input_2, c(generic_all_nodes), one_or_more, rule(file_input_3))
DEF_RULE(file_input_3, nc, or(2), tok(NEWLINE), rule(stmt))
// decorator: '@' dotted_name [ '(' [arglist] ')' ] NEWLINE
// decorators: decorator+
// decorated: decorators (classdef | funcdef)
// funcdef: 'def' NAME parameters ['->' test] ':' suite
// parameters: '(' [typedargslist] ')'
// typedargslist: tfpdef ['=' test] (',' tfpdef ['=' test])* [',' ['*' [tfpdef] (',' tfpdef ['=' test])* [',' '**' tfpdef] | '**' tfpdef]] | '*' [tfpdef] (',' tfpdef ['=' test])* [',' '**' tfpdef] | '**' tfpdef
// tfpdef: NAME [':' test]
// varargslist: vfpdef ['=' test] (',' vfpdef ['=' test])* [',' ['*' [vfpdef] (',' vfpdef ['=' test])* [',' '**' vfpdef] | '**' vfpdef]] | '*' [vfpdef] (',' vfpdef ['=' test])* [',' '**' vfpdef] | '**' vfpdef
// vfpdef: NAME
DEF_RULE(decorator, nc, and(4), tok(DEL_AT), rule(dotted_name), opt_rule(trailer_paren), tok(NEWLINE))
//DEF_RULE(decorator_2, nc, and(3), tok(DEL_PAREN_OPEN), opt_rule(arglist), tok(DEL_PAREN_CLOSE))
DEF_RULE(decorators, nc, one_or_more, rule(decorator))
DEF_RULE(decorated, c(decorated), and(2), rule(decorators), rule(decorated_body))
DEF_RULE(decorated_body, nc, or(2), rule(classdef), rule(funcdef))
DEF_RULE(funcdef, c(funcdef), and(8), tok(KW_DEF), tok(NAME), tok(DEL_PAREN_OPEN), opt_rule(typedargslist), tok(DEL_PAREN_CLOSE), opt_rule(funcdef_2), tok(DEL_COLON), rule(suite))
DEF_RULE(funcdef_2, nc, and(2), tok(DEL_MINUS_MORE), rule(test))
// TODO typedargslist lets through more than is allowed
DEF_RULE(typedargslist, nc, list_with_end, rule(typedargslist_item), tok(DEL_COMMA))
DEF_RULE(typedargslist_item, nc, or(3), rule(typedargslist_name), rule(typedargslist_star), rule(typedargslist_dbl_star))
DEF_RULE(typedargslist_name, nc, and(3), tok(NAME), opt_rule(typedargslist_colon), opt_rule(typedargslist_equal))
DEF_RULE(typedargslist_star, nc, and(2), tok(OP_STAR), opt_rule(tfpdef))
DEF_RULE(typedargslist_dbl_star, nc, and(3), tok(OP_DBL_STAR), tok(NAME), opt_rule(typedargslist_colon))
DEF_RULE(typedargslist_colon, nc, and(2), tok(DEL_COLON), rule(test))
DEF_RULE(typedargslist_equal, nc, and(2), tok(DEL_EQUAL), rule(test))
DEF_RULE(tfpdef, nc, and(2), tok(NAME), opt_rule(typedargslist_colon))
// TODO varargslist lets through more than is allowed
DEF_RULE(varargslist, nc, list_with_end, rule(varargslist_item), tok(DEL_COMMA))
DEF_RULE(varargslist_item, nc, or(3), rule(varargslist_name), rule(varargslist_star), rule(varargslist_dbl_star))
DEF_RULE(varargslist_name, nc, and(2), tok(NAME), opt_rule(varargslist_equal))
DEF_RULE(varargslist_star, nc, and(2), tok(OP_STAR), opt_rule(vfpdef))
DEF_RULE(varargslist_dbl_star, nc, and(2), tok(OP_DBL_STAR), tok(NAME))
DEF_RULE(varargslist_equal, nc, and(2), tok(DEL_EQUAL), rule(test))
DEF_RULE(vfpdef, nc, and(1), tok(NAME))
// stmt: if_stmt | while_stmt | for_stmt | try_stmt | with_stmt | funcdef | classdef | decorated | simple_stmt
DEF_RULE(stmt, nc, or(9), rule(if_stmt), rule(while_stmt), rule(for_stmt), rule(try_stmt), rule(with_stmt), rule(funcdef), rule(classdef), rule(decorated), rule(simple_stmt))
// simple_stmt: small_stmt (';' small_stmt)* [';'] NEWLINE
DEF_RULE(simple_stmt, nc, and(2), rule(simple_stmt_2), tok(NEWLINE))
DEF_RULE(simple_stmt_2, c(generic_all_nodes), list_with_end, rule(small_stmt), tok(DEL_SEMICOLON))
// small_stmt: expr_stmt | del_stmt | pass_stmt | flow_stmt | import_stmt | global_stmt | nonlocal_stmt | assert_stmt
// expr_stmt: testlist_star_expr (augassign (yield_expr|testlist) | ('=' (yield_expr|testlist_star_expr))*)
// testlist_star_expr: (test|star_expr) (',' (test|star_expr))* [',']
// augassign: '+=' | '-=' | '*=' | '/=' | '%=' | '&=' | '|=' | '^=' | '<<=' | '>>=' | '**=' | '//='
// # For normal assignments, additional restrictions enforced by the interpreter
DEF_RULE(small_stmt, nc, or(8), rule(del_stmt), rule(pass_stmt), rule(flow_stmt), rule(import_stmt), rule(global_stmt), rule(nonlocal_stmt), rule(assert_stmt), rule(expr_stmt))
DEF_RULE(expr_stmt, c(expr_stmt), and(2), rule(testlist_star_expr), opt_rule(expr_stmt_2))
DEF_RULE(expr_stmt_2, nc, or(2), rule(expr_stmt_augassign), rule(expr_stmt_assign_list))
DEF_RULE(expr_stmt_augassign, nc, and(2), rule(augassign), rule(expr_stmt_6))
DEF_RULE(expr_stmt_assign_list, nc, one_or_more, rule(expr_stmt_assign))
DEF_RULE(expr_stmt_assign, nc, and(2), tok(DEL_EQUAL), rule(expr_stmt_6))
DEF_RULE(expr_stmt_6, nc, or(2), rule(yield_expr), rule(testlist_star_expr))
DEF_RULE(testlist_star_expr, c(generic_tuple), list_with_end, rule(testlist_star_expr_2), tok(DEL_COMMA))
DEF_RULE(testlist_star_expr_2, nc, or(2), rule(star_expr), rule(test))
DEF_RULE(augassign, nc, or(12), tok(DEL_PLUS_EQUAL), tok(DEL_MINUS_EQUAL), tok(DEL_STAR_EQUAL), tok(DEL_SLASH_EQUAL), tok(DEL_PERCENT_EQUAL), tok(DEL_AMPERSAND_EQUAL), tok(DEL_PIPE_EQUAL), tok(DEL_CARET_EQUAL), tok(DEL_DBL_LESS_EQUAL), tok(DEL_DBL_MORE_EQUAL), tok(DEL_DBL_STAR_EQUAL), tok(DEL_DBL_SLASH_EQUAL))
// del_stmt: 'del' exprlist
// pass_stmt: 'pass'
// flow_stmt: break_stmt | continue_stmt | return_stmt | raise_stmt | yield_stmt
// break_stmt: 'break'
// continue_stmt: 'continue'
// return_stmt: 'return' [testlist]
// yield_stmt: yield_expr
// raise_stmt: 'raise' [test ['from' test]]
DEF_RULE(del_stmt, c(del_stmt), and(2), tok(KW_DEL), rule(exprlist))
DEF_RULE(pass_stmt, c(generic_all_nodes), and(1), tok(KW_PASS))
DEF_RULE(flow_stmt, nc, or(5), rule(break_stmt), rule(continue_stmt), rule(return_stmt), rule(raise_stmt), rule(yield_stmt))
DEF_RULE(break_stmt, c(break_stmt), and(1), tok(KW_BREAK))
DEF_RULE(continue_stmt, c(continue_stmt), and(1), tok(KW_CONTINUE))
DEF_RULE(return_stmt, c(return_stmt), and(2), tok(KW_RETURN), opt_rule(testlist))
DEF_RULE(yield_stmt, c(yield_stmt), and(1), rule(yield_expr))
DEF_RULE(raise_stmt, c(raise_stmt), and(2), tok(KW_RAISE), opt_rule(raise_stmt_arg))
DEF_RULE(raise_stmt_arg, nc, and(2), rule(test), opt_rule(raise_stmt_from))
DEF_RULE(raise_stmt_from, nc, and(2), tok(KW_FROM), rule(test))
// import_stmt: import_name | import_from
// import_name: 'import' dotted_as_names
// import_from: 'from' (('.' | '...')* dotted_name | ('.' | '...')+) 'import' ('*' | '(' import_as_names ')' | import_as_names)
// import_as_name: NAME ['as' NAME]
// dotted_as_name: dotted_name ['as' NAME]
// import_as_names: import_as_name (',' import_as_name)* [',']
// dotted_as_names: dotted_as_name (',' dotted_as_name)*
// dotted_name: NAME ('.' NAME)*
// global_stmt: 'global' NAME (',' NAME)*
// nonlocal_stmt: 'nonlocal' NAME (',' NAME)*
// assert_stmt: 'assert' test [',' test]
DEF_RULE(import_stmt, nc, or(2), rule(import_name), rule(import_from))
DEF_RULE(import_name, c(import_name), and(2), tok(KW_IMPORT), rule(dotted_as_names))
DEF_RULE(import_from, c(import_from), and(4), tok(KW_FROM), rule(import_from_2), tok(KW_IMPORT), rule(import_from_3))
DEF_RULE(import_from_2, nc, or(2), rule(dotted_name), rule(import_from_2b))
DEF_RULE(import_from_2b, nc, and(2), rule(one_or_more_period_or_ellipses), opt_rule(dotted_name))
DEF_RULE(import_from_3, nc, or(3), tok(OP_STAR), rule(import_as_names_paren), rule(import_as_names))
DEF_RULE(import_as_names_paren, nc, and(3), tok(DEL_PAREN_OPEN), rule(import_as_names), tok(DEL_PAREN_CLOSE))
DEF_RULE(one_or_more_period_or_ellipses, nc, one_or_more, rule(period_or_ellipses))
DEF_RULE(period_or_ellipses, nc, or(2), tok(DEL_PERIOD), tok(ELLIPSES))
DEF_RULE(import_as_name, nc, and(2), tok(NAME), opt_rule(as_name))
DEF_RULE(dotted_as_name, nc, and(2), rule(dotted_name), opt_rule(as_name))
DEF_RULE(as_name, nc, and(2), tok(KW_AS), tok(NAME))
DEF_RULE(import_as_names, nc, list_with_end, rule(import_as_name), tok(DEL_COMMA))
DEF_RULE(dotted_as_names, nc, list, rule(dotted_as_name), tok(DEL_COMMA))
DEF_RULE(dotted_name, nc, list, tok(NAME), tok(DEL_PERIOD))
DEF_RULE(global_stmt, c(global_stmt), and(2), tok(KW_GLOBAL), rule(name_list))
DEF_RULE(nonlocal_stmt, c(nonlocal_stmt), and(2), tok(KW_NONLOCAL), rule(name_list))
DEF_RULE(name_list, nc, list, tok(NAME), tok(DEL_COMMA))
DEF_RULE(assert_stmt, c(assert_stmt), and(3), tok(KW_ASSERT), rule(test), opt_rule(assert_stmt_extra))
DEF_RULE(assert_stmt_extra, nc, and(2), tok(DEL_COMMA), rule(test))
// if_stmt: 'if' test ':' suite ('elif' test ':' suite)* ['else' ':' suite]
// while_stmt: 'while' test ':' suite ['else' ':' suite]
// for_stmt: 'for' exprlist 'in' testlist ':' suite ['else' ':' suite]
// try_stmt: 'try' ':' suite ((except_clause ':' suite)+ ['else' ':' suite] ['finally' ':' suite] | 'finally' ':' suite)
// # NB compile.c makes sure that the default except clause is last
// except_clause: 'except' [test ['as' NAME]]
// with_stmt: 'with' with_item (',' with_item)* ':' suite
// with_item: test ['as' expr]
// suite: simple_stmt | NEWLINE INDENT stmt+ DEDENT
DEF_RULE(if_stmt, c(if_stmt), and(6), tok(KW_IF), rule(test), tok(DEL_COLON), rule(suite), opt_rule(if_stmt_elif_list), opt_rule(else_stmt))
DEF_RULE(if_stmt_elif_list, nc, one_or_more, rule(if_stmt_elif))
DEF_RULE(if_stmt_elif, nc, and(4), tok(KW_ELIF), rule(test), tok(DEL_COLON), rule(suite))
DEF_RULE(while_stmt, c(while_stmt), and(5), tok(KW_WHILE), rule(test), tok(DEL_COLON), rule(suite), opt_rule(else_stmt))
DEF_RULE(for_stmt, c(for_stmt), and(7), tok(KW_FOR), rule(exprlist), tok(KW_IN), rule(testlist), tok(DEL_COLON), rule(suite), opt_rule(else_stmt))
DEF_RULE(try_stmt, c(try_stmt), and(4), tok(KW_TRY), tok(DEL_COLON), rule(suite), rule(try_stmt_2))
DEF_RULE(try_stmt_2, nc, or(2), rule(try_stmt_except_and_more), rule(try_stmt_finally))
DEF_RULE(try_stmt_except_and_more, nc, and(3), rule(try_stmt_except_list), opt_rule(else_stmt), opt_rule(try_stmt_finally))
DEF_RULE(try_stmt_except, nc, and(4), tok(KW_EXCEPT), opt_rule(try_stmt_as_name), tok(DEL_COLON), rule(suite))
DEF_RULE(try_stmt_as_name, nc, and(2), rule(test), opt_rule(as_name))
DEF_RULE(try_stmt_except_list, nc, one_or_more, rule(try_stmt_except))
DEF_RULE(try_stmt_finally, nc, and(3), tok(KW_FINALLY), tok(DEL_COLON), rule(suite))
DEF_RULE(else_stmt, nc, and(3), tok(KW_ELSE), tok(DEL_COLON), rule(suite))
DEF_RULE(with_stmt, c(with_stmt), and(4), tok(KW_WITH), rule(with_stmt_list), tok(DEL_COLON), rule(suite))
DEF_RULE(with_stmt_list, nc, list, rule(with_item), tok(DEL_COMMA))
DEF_RULE(with_item, nc, and(2), rule(test), opt_rule(with_item_as))
DEF_RULE(with_item_as, nc, and(2), tok(KW_AS), rule(expr))
DEF_RULE(suite, nc, or(2), rule(suite_block), rule(simple_stmt))
DEF_RULE(suite_block, nc, and(4), tok(NEWLINE), tok(INDENT), rule(suite_block_stmts), tok(DEDENT))
DEF_RULE(suite_block_stmts, c(generic_all_nodes), one_or_more, rule(stmt))
// test: or_test ['if' or_test 'else' test] | lambdef
// test_nocond: or_test | lambdef_nocond
// lambdef: 'lambda' [varargslist] ':' test
// lambdef_nocond: 'lambda' [varargslist] ':' test_nocond
DEF_RULE(test, nc, or(2), rule(lambdef), rule(test_if_expr))
DEF_RULE(test_if_expr, c(test_if_expr), and(2), rule(or_test), opt_rule(test_if_else))
DEF_RULE(test_if_else, nc, and(4), tok(KW_IF), rule(or_test), tok(KW_ELSE), rule(test))
DEF_RULE(test_nocond, nc, or(2), rule(lambdef_nocond), rule(or_test))
DEF_RULE(lambdef, c(lambdef), and(4), tok(KW_LAMBDA), opt_rule(varargslist), tok(DEL_COLON), rule(test))
DEF_RULE(lambdef_nocond, c(lambdef), and(4), tok(KW_LAMBDA), opt_rule(varargslist), tok(DEL_COLON), rule(test_nocond))
// or_test: and_test ('or' and_test)*
// and_test: not_test ('and' not_test)*
// not_test: 'not' not_test | comparison
// comparison: expr (comp_op expr)*
// comp_op: '<'|'>'|'=='|'>='|'<='|'!='|'in'|'not' 'in'|'is'|'is' 'not'
// star_expr: '*' expr
// expr: xor_expr ('|' xor_expr)*
// xor_expr: and_expr ('^' and_expr)*
// and_expr: shift_expr ('&' shift_expr)*
// shift_expr: arith_expr (('<<'|'>>') arith_expr)*
// arith_expr: term (('+'|'-') term)*
// term: factor (('*'|'/'|'%'|'//') factor)*
// factor: ('+'|'-'|'~') factor | power
// power: atom trailer* ['**' factor]
DEF_RULE(or_test, c(or_test), list, rule(and_test), tok(KW_OR))
DEF_RULE(and_test, c(and_test), list, rule(not_test), tok(KW_AND))
DEF_RULE(not_test, nc, or(2), rule(not_test_2), rule(comparison))
DEF_RULE(not_test_2, c(not_test_2), and(2), tok(KW_NOT), rule(not_test))
DEF_RULE(comparison, c(comparison), list, rule(expr), rule(comp_op))
DEF_RULE(comp_op, nc, or(9), tok(OP_LESS), tok(OP_MORE), tok(OP_DBL_EQUAL), tok(OP_LESS_EQUAL), tok(OP_MORE_EQUAL), tok(OP_NOT_EQUAL), tok(KW_IN), rule(comp_op_not_in), rule(comp_op_is))
DEF_RULE(comp_op_not_in, nc, and(2), tok(KW_NOT), tok(KW_IN))
DEF_RULE(comp_op_is, nc, and(2), tok(KW_IS), opt_rule(comp_op_is_not))
DEF_RULE(comp_op_is_not, nc, and(1), tok(KW_NOT))
DEF_RULE(star_expr, c(star_expr), and(2), tok(OP_STAR), rule(expr))
DEF_RULE(expr, c(expr), list, rule(xor_expr), tok(OP_PIPE))
DEF_RULE(xor_expr, c(xor_expr), list, rule(and_expr), tok(OP_CARET))
DEF_RULE(and_expr, c(and_expr), list, rule(shift_expr), tok(OP_AMPERSAND))
DEF_RULE(shift_expr, c(shift_expr), list, rule(arith_expr), rule(shift_op))
DEF_RULE(shift_op, nc, or(2), tok(OP_DBL_LESS), tok(OP_DBL_MORE))
DEF_RULE(arith_expr, c(arith_expr), list, rule(term), rule(arith_op))
DEF_RULE(arith_op, nc, or(2), tok(OP_PLUS), tok(OP_MINUS))
DEF_RULE(term, c(term), list, rule(factor), rule(term_op))
DEF_RULE(term_op, nc, or(4), tok(OP_STAR), tok(OP_SLASH), tok(OP_PERCENT), tok(OP_DBL_SLASH))
DEF_RULE(factor, nc, or(2), rule(factor_2), rule(power))
DEF_RULE(factor_2, c(factor_2), and(2), rule(factor_op), rule(factor))
DEF_RULE(factor_op, nc, or(3), tok(OP_PLUS), tok(OP_MINUS), tok(OP_TILDE))
DEF_RULE(power, c(generic_all_nodes), and(3), rule(atom), opt_rule(power_trailers), opt_rule(power_dbl_star))
DEF_RULE(power_trailers, c(power_trailers), one_or_more, rule(trailer))
DEF_RULE(power_dbl_star, c(power_dbl_star), and(2), tok(OP_DBL_STAR), rule(factor))
// atom: '(' [yield_expr|testlist_comp] ')' | '[' [testlist_comp] ']' | '{' [dictorsetmaker] '}' | NAME | NUMBER | STRING+ | '...' | 'None' | 'True' | 'False'
// testlist_comp: (test|star_expr) ( comp_for | (',' (test|star_expr))* [','] )
// trailer: '(' [arglist] ')' | '[' subscriptlist ']' | '.' NAME
DEF_RULE(atom, nc, or(10), tok(NAME), tok(NUMBER), rule(atom_string), tok(ELLIPSES), tok(KW_NONE), tok(KW_TRUE), tok(KW_FALSE), rule(atom_paren), rule(atom_bracket), rule(atom_brace))
DEF_RULE(atom_string, c(atom_string), one_or_more, rule(string_or_bytes))
DEF_RULE(string_or_bytes, nc, or(2), tok(STRING), tok(BYTES))
DEF_RULE(atom_paren, c(atom_paren), and(3), tok(DEL_PAREN_OPEN), opt_rule(atom_2b), tok(DEL_PAREN_CLOSE))
DEF_RULE(atom_2b, nc, or(2), rule(yield_expr), rule(testlist_comp))
DEF_RULE(atom_bracket, c(atom_bracket), and(3), tok(DEL_BRACKET_OPEN), opt_rule(testlist_comp), tok(DEL_BRACKET_CLOSE))
DEF_RULE(atom_brace, c(atom_brace), and(3), tok(DEL_BRACE_OPEN), opt_rule(dictorsetmaker), tok(DEL_BRACE_CLOSE))
DEF_RULE(testlist_comp, nc, and(2), rule(testlist_comp_2), opt_rule(testlist_comp_3))
DEF_RULE(testlist_comp_2, nc, or(2), rule(star_expr), rule(test))
DEF_RULE(testlist_comp_3, nc, or(2), rule(comp_for), rule(testlist_comp_3b))
DEF_RULE(testlist_comp_3b, nc, and(2), tok(DEL_COMMA), opt_rule(testlist_comp_3c))
DEF_RULE(testlist_comp_3c, nc, list_with_end, rule(testlist_comp_2), tok(DEL_COMMA))
DEF_RULE(trailer, nc, or(3), rule(trailer_paren), rule(trailer_bracket), rule(trailer_period))
DEF_RULE(trailer_paren, c(trailer_paren), and(3), tok(DEL_PAREN_OPEN), opt_rule(arglist), tok(DEL_PAREN_CLOSE))
DEF_RULE(trailer_bracket, c(trailer_bracket), and(3), tok(DEL_BRACKET_OPEN), rule(subscriptlist), tok(DEL_BRACKET_CLOSE))
DEF_RULE(trailer_period, c(trailer_period), and(2), tok(DEL_PERIOD), tok(NAME))
// subscriptlist: subscript (',' subscript)* [',']
// subscript: test | [test] ':' [test] [sliceop]
// sliceop: ':' [test]
DEF_RULE(subscriptlist, c(generic_tuple), list_with_end, rule(subscript), tok(DEL_COMMA))
DEF_RULE(subscript, nc, or(2), rule(subscript_3), rule(subscript_2))
DEF_RULE(subscript_2, c(subscript_2), and(2), rule(test), opt_rule(subscript_3))
DEF_RULE(subscript_3, c(subscript_3), and(2), tok(DEL_COLON), opt_rule(subscript_3b))
DEF_RULE(subscript_3b, nc, or(2), rule(subscript_3c), rule(subscript_3d))
DEF_RULE(subscript_3c, nc, and(2), tok(DEL_COLON), opt_rule(test))
DEF_RULE(subscript_3d, nc, and(2), rule(test), opt_rule(sliceop))
DEF_RULE(sliceop, nc, and(2), tok(DEL_COLON), opt_rule(test))
// exprlist: (expr|star_expr) (',' (expr|star_expr))* [',']
// testlist: test (',' test)* [',']
// dictorsetmaker: (test ':' test (comp_for | (',' test ':' test)* [','])) | (test (comp_for | (',' test)* [',']))
DEF_RULE(exprlist, nc, list_with_end, rule(exprlist_2), tok(DEL_COMMA))
DEF_RULE(exprlist_2, nc, or(2), rule(star_expr), rule(expr))
DEF_RULE(testlist, c(generic_tuple), list_with_end, rule(test), tok(DEL_COMMA))
// TODO dictorsetmaker lets through more than is allowed
DEF_RULE(dictorsetmaker, nc, and(2), rule(dictorsetmaker_item), opt_rule(dictorsetmaker_tail))
DEF_RULE(dictorsetmaker_item, c(dictorsetmaker_item), and(2), rule(test), opt_rule(dictorsetmaker_colon))
DEF_RULE(dictorsetmaker_colon, nc, and(2), tok(DEL_COLON), rule(test))
DEF_RULE(dictorsetmaker_tail, nc, or(2), rule(comp_for), rule(dictorsetmaker_list))
DEF_RULE(dictorsetmaker_list, nc, and(2), tok(DEL_COMMA), opt_rule(dictorsetmaker_list2))
DEF_RULE(dictorsetmaker_list2, nc, list_with_end, rule(dictorsetmaker_item), tok(DEL_COMMA))
// classdef: 'class' NAME ['(' [arglist] ')'] ':' suite
DEF_RULE(classdef, c(classdef), and(5), tok(KW_CLASS), tok(NAME), opt_rule(classdef_2), tok(DEL_COLON), rule(suite))
DEF_RULE(classdef_2, nc, and(3), tok(DEL_PAREN_OPEN), opt_rule(arglist), tok(DEL_PAREN_CLOSE))
// arglist: (argument ',')* (argument [','] | '*' test (',' argument)* [',' '**' test] | '**' test)
// TODO arglist lets through more than is allowed, compiler needs to do further verification
DEF_RULE(arglist, c(generic_all_nodes), list_with_end, rule(arglist_2), tok(DEL_COMMA))
DEF_RULE(arglist_2, nc, or(3), rule(arglist_star), rule(arglist_dbl_star), rule(argument))
DEF_RULE(arglist_star, c(arglist_star), and(2), tok(OP_STAR), rule(test))
DEF_RULE(arglist_dbl_star, c(arglist_dbl_star), and(2), tok(OP_DBL_STAR), rule(test))
// # The reason that keywords are test nodes instead of NAME is that using NAME
// # results in an ambiguity. ast.c makes sure it's a NAME.
// argument: test [comp_for] | test '=' test # Really [keyword '='] test
// comp_iter: comp_for | comp_if
// comp_for: 'for' exprlist 'in' or_test [comp_iter]
// comp_if: 'if' test_nocond [comp_iter]
DEF_RULE(argument, c(argument), and(2), rule(test), opt_rule(argument_2))
DEF_RULE(argument_2, nc, or(2), rule(comp_for), rule(argument_3))
DEF_RULE(argument_3, nc, and(2), tok(DEL_EQUAL), rule(test))
DEF_RULE(comp_iter, nc, or(2), rule(comp_for), rule(comp_if))
DEF_RULE(comp_for, nc, and(5), tok(KW_FOR), rule(exprlist), tok(KW_IN), rule(or_test), opt_rule(comp_iter))
DEF_RULE(comp_if, nc, and(3), tok(KW_IF), rule(test_nocond), opt_rule(comp_iter))
// # not used in grammar, but may appear in "node" passed from Parser to Compiler
// encoding_decl: NAME
// yield_expr: 'yield' [yield_arg]
// yield_arg: 'from' test | testlist
DEF_RULE(yield_expr, c(yield_expr), and(2), tok(KW_YIELD), opt_rule(yield_arg))
DEF_RULE(yield_arg, nc, or(2), rule(yield_arg_from), rule(testlist))
DEF_RULE(yield_arg_from, nc, and(2), tok(KW_FROM), rule(test))

677
py/lexer.c Normal file
View File

@ -0,0 +1,677 @@
/* lexer.c -- simple tokeniser for Python implementation
*/
#include <stdint.h>
#include <stdio.h>
#include <assert.h>
#include "misc.h"
#include "lexer.h"
#define TAB_SIZE (8)
#define CHR_EOF (-1)
struct _py_lexer_t {
const char *name; // (file) name of source
bool free; // free source when done with it
const char *src_beg; // beginning of source
const char *src_cur; // current location in source; points to chr0
const char *src_end; // end (exclusive) of source
unichar chr0, chr1, chr2; // current characters from source
uint line; // source line
uint column; // source column
uint cont_line; // continued line
int emit_dent;
int nested_bracket_level;
uint alloc_indent_level;
uint num_indent_level;
uint16_t *indent_level;
py_token_t tok_cur;
py_token_t tok_next;
};
static bool py_token_is_str(const py_token_t *tok, const char *str) {
uint i = 0;
const char *tstr = tok->str;
while (i < tok->len && *tstr == *str) {
++i;
++tstr;
++str;
}
return i == tok->len && *str == 0;
}
void py_token_show(const py_token_t *tok) {
printf("(%s:%d:%d) kind:%d cont_line:%d str:%p len:%d", tok->src_name, tok->src_line, tok->src_column, tok->kind, tok->cont_line, tok->str, tok->len);
if (tok->str != NULL && tok->len > 0) {
const char *i = tok->str;
const char *j = i + tok->len;
printf(" ");
while (i < j) {
unichar c = g_utf8_get_char(i);
i = g_utf8_next_char(i);
if (g_unichar_isprint(c)) {
printf("%c", c);
} else {
printf("?");
}
}
}
printf("\n");
}
void py_token_show_error_prefix(const py_token_t *tok) {
printf("(%s:%d:%d) ", tok->src_name, tok->src_line, tok->src_column);
}
bool py_token_show_error(const py_token_t *tok, const char *msg) {
printf("(%s:%d:%d) %s\n", tok->src_name, tok->src_line, tok->src_column, msg);
return false;
}
static bool is_end(py_lexer_t *lex) {
return lex->chr0 == CHR_EOF;
}
static bool is_physical_newline(py_lexer_t *lex) {
return lex->chr0 == '\n' || lex->chr0 == '\r';
}
static bool is_char(py_lexer_t *lex, char c) {
return lex->chr0 == c;
}
static bool is_char_or(py_lexer_t *lex, char c1, char c2) {
return lex->chr0 == c1 || lex->chr0 == c2;
}
static bool is_char_or3(py_lexer_t *lex, char c1, char c2, char c3) {
return lex->chr0 == c1 || lex->chr0 == c2 || lex->chr0 == c3;
}
/*
static bool is_char_following(py_lexer_t *lex, char c) {
return lex->chr1 == c;
}
*/
static bool is_char_following_or(py_lexer_t *lex, char c1, char c2) {
return lex->chr1 == c1 || lex->chr1 == c2;
}
static bool is_char_following_following_or(py_lexer_t *lex, char c1, char c2) {
return lex->chr2 == c1 || lex->chr2 == c2;
}
static bool is_char_and(py_lexer_t *lex, char c1, char c2) {
return lex->chr0 == c1 && lex->chr1 == c2;
}
static bool is_whitespace(py_lexer_t *lex) {
return g_unichar_isspace(lex->chr0);
}
static bool is_letter(py_lexer_t *lex) {
return g_unichar_isalpha(lex->chr0);
}
static bool is_digit(py_lexer_t *lex) {
return g_unichar_isdigit(lex->chr0);
}
static bool is_following_digit(py_lexer_t *lex) {
return g_unichar_isdigit(lex->chr1);
}
// TODO UNICODE include unicode characters in definition of identifiers
static bool is_head_of_identifier(py_lexer_t *lex) {
return is_letter(lex) || lex->chr0 == '_';
}
// TODO UNICODE include unicode characters in definition of identifiers
static bool is_tail_of_identifier(py_lexer_t *lex) {
return is_head_of_identifier(lex) || is_digit(lex);
}
static void next_char(py_lexer_t *lex) {
if (lex->chr0 == CHR_EOF) {
return;
}
int advance = 1;
if (lex->chr0 == '\n') {
// LF is a new line
++lex->line;
lex->column = 1;
lex->cont_line = lex->line;
} else if (lex->chr0 == '\r') {
// CR is a new line
++lex->line;
lex->column = 1;
lex->cont_line = lex->line;
if (lex->chr1 == '\n') {
// CR LF is a single new line
advance = 2;
}
} else if (lex->chr0 == '\t') {
// a tab
lex->column = (((lex->column - 1 + TAB_SIZE) / TAB_SIZE) * TAB_SIZE) + 1;
} else {
// a character worth one column
++lex->column;
}
for (; advance > 0; advance--) {
lex->chr0 = lex->chr1;
lex->chr1 = lex->chr2;
lex->src_cur++;
if (lex->src_cur + 2 < lex->src_end) {
lex->chr2 = lex->src_cur[2];
} else {
// EOF
if (lex->chr1 != '\n' && lex->chr1 != '\r') {
lex->chr2 = '\n'; // insert newline at end of file
} else {
lex->chr2 = CHR_EOF;
}
}
}
}
void indent_push(py_lexer_t *lex, uint indent) {
if (lex->num_indent_level >= lex->alloc_indent_level) {
lex->alloc_indent_level *= 2;
lex->indent_level = m_renew(uint16_t, lex->indent_level, lex->alloc_indent_level);
}
lex->indent_level[lex->num_indent_level++] = indent;
}
uint indent_top(py_lexer_t *lex) {
return lex->indent_level[lex->num_indent_level - 1];
}
void indent_pop(py_lexer_t *lex) {
lex->num_indent_level -= 1;
}
// some tricky operator encoding:
// <op> = begin with <op>, if this opchar matches then begin here
// e<op> = end with <op>, if this opchar matches then end
// E<op> = mandatory end with <op>, this opchar must match, then end
// c<op> = continue with <op>, if this opchar matches then continue matching
// this means if the start of two ops are the same then they are equal til the last char
static const char *tok_enc =
"()[]{},:;@~" // singles
"<e=c<e=" // < <= << <<=
">e=c>e=" // > >= >> >>=
"*e=c*e=" // * *= ** **=
"+e=" // + +=
"-e=e>" // - -= ->
"&e=" // & &=
"|e=" // | |=
"/e=c/e=" // / /= // //=
"%e=" // % %=
"^e=" // ^ ^=
"=e=" // = ==
"!E=" // !=
".c.E."; // . ...
// TODO static assert that number of tokens is less than 256 so we can safely make this table with byte sized entries
static const uint8_t tok_enc_kind[] = {
PY_TOKEN_DEL_PAREN_OPEN, PY_TOKEN_DEL_PAREN_CLOSE,
PY_TOKEN_DEL_BRACKET_OPEN, PY_TOKEN_DEL_BRACKET_CLOSE,
PY_TOKEN_DEL_BRACE_OPEN, PY_TOKEN_DEL_BRACE_CLOSE,
PY_TOKEN_DEL_COMMA, PY_TOKEN_DEL_COLON, PY_TOKEN_DEL_SEMICOLON, PY_TOKEN_DEL_AT, PY_TOKEN_OP_TILDE,
PY_TOKEN_OP_LESS, PY_TOKEN_OP_LESS_EQUAL, PY_TOKEN_OP_DBL_LESS, PY_TOKEN_DEL_DBL_LESS_EQUAL,
PY_TOKEN_OP_MORE, PY_TOKEN_OP_MORE_EQUAL, PY_TOKEN_OP_DBL_MORE, PY_TOKEN_DEL_DBL_MORE_EQUAL,
PY_TOKEN_OP_STAR, PY_TOKEN_DEL_STAR_EQUAL, PY_TOKEN_OP_DBL_STAR, PY_TOKEN_DEL_DBL_STAR_EQUAL,
PY_TOKEN_OP_PLUS, PY_TOKEN_DEL_PLUS_EQUAL,
PY_TOKEN_OP_MINUS, PY_TOKEN_DEL_MINUS_EQUAL, PY_TOKEN_DEL_MINUS_MORE,
PY_TOKEN_OP_AMPERSAND, PY_TOKEN_DEL_AMPERSAND_EQUAL,
PY_TOKEN_OP_PIPE, PY_TOKEN_DEL_PIPE_EQUAL,
PY_TOKEN_OP_SLASH, PY_TOKEN_DEL_SLASH_EQUAL, PY_TOKEN_OP_DBL_SLASH, PY_TOKEN_DEL_DBL_SLASH_EQUAL,
PY_TOKEN_OP_PERCENT, PY_TOKEN_DEL_PERCENT_EQUAL,
PY_TOKEN_OP_CARET, PY_TOKEN_DEL_CARET_EQUAL,
PY_TOKEN_DEL_EQUAL, PY_TOKEN_OP_DBL_EQUAL,
PY_TOKEN_OP_NOT_EQUAL,
PY_TOKEN_DEL_PERIOD, PY_TOKEN_ELLIPSES,
};
// must have the same order as enum in lexer.h
static const char *tok_kw[] = {
"False",
"None",
"True",
"and",
"as",
"assert",
"break",
"class",
"continue",
"def",
"del",
"elif",
"else",
"except",
"finally",
"for",
"from",
"global",
"if",
"import",
"in",
"is",
"lambda",
"nonlocal",
"not",
"or",
"pass",
"raise",
"return",
"try",
"while",
"with",
"yield",
NULL,
};
static void py_lexer_next_token_into(py_lexer_t *lex, py_token_t *tok) {
bool had_physical_newline = false;
while (!is_end(lex)) {
if (is_physical_newline(lex)) {
had_physical_newline = true;
next_char(lex);
} else if (is_whitespace(lex)) {
next_char(lex);
} else if (is_char(lex, '#')) {
next_char(lex);
while (!is_end(lex) && !is_physical_newline(lex)) {
next_char(lex);
}
// had_physical_newline will be set on next loop
} else if (is_char(lex, '\\')) {
// backslash (outside string literals) must appear just before a physical newline
next_char(lex);
if (!is_physical_newline(lex)) {
// TODO SyntaxError
assert(0);
} else {
next_char(lex);
}
} else {
break;
}
}
tok->src_name = lex->name;
tok->src_line = lex->line;
tok->src_column = lex->column;
tok->kind = PY_TOKEN_INVALID;
tok->cont_line = lex->cont_line;
tok->str = lex->src_cur;
tok->len = 0;
if (lex->emit_dent < 0) {
tok->kind = PY_TOKEN_DEDENT;
lex->emit_dent += 1;
} else if (lex->emit_dent > 0) {
tok->kind = PY_TOKEN_INDENT;
lex->emit_dent -= 1;
} else if (had_physical_newline && lex->nested_bracket_level == 0
&& tok != &lex->tok_cur // so that we don't emit a newline if file starts with a comment
) {
tok->kind = PY_TOKEN_NEWLINE;
uint num_spaces = lex->column - 1;
lex->emit_dent = 0;
if (num_spaces == indent_top(lex)) {
} else if (num_spaces > indent_top(lex)) {
indent_push(lex, num_spaces);
lex->emit_dent += 1;
} else {
while (num_spaces < indent_top(lex)) {
indent_pop(lex);
lex->emit_dent -= 1;
}
if (num_spaces != indent_top(lex)) {
//SyntaxError
}
}
} else if (is_end(lex)) {
// TODO emit a newline if file does not end in one
if (indent_top(lex) > 0) {
tok->kind = PY_TOKEN_NEWLINE;
lex->emit_dent = 0;
while (indent_top(lex) > 0) {
indent_pop(lex);
lex->emit_dent -= 1;
}
} else {
tok->kind = PY_TOKEN_END;
}
} else if (is_char_or(lex, '\'', '\"')
|| (is_char_or3(lex, 'r', 'u', 'b') && is_char_following_or(lex, '\'', '\"'))
|| ((is_char_and(lex, 'r', 'b') || is_char_and(lex, 'b', 'r')) && is_char_following_following_or(lex, '\'', '\"'))) {
// a string or bytes literal
// parse type codes
bool is_raw = false;
bool is_bytes = false;
if (is_char(lex, 'u')) {
next_char(lex);
} else if (is_char(lex, 'b')) {
is_bytes = true;
next_char(lex);
if (is_char(lex, 'r')) {
is_raw = true;
next_char(lex);
}
} else if (is_char(lex, 'r')) {
is_raw = true;
next_char(lex);
if (is_char(lex, 'b')) {
is_bytes = true;
next_char(lex);
}
}
// set token kind
if (is_bytes) {
tok->kind = PY_TOKEN_BYTES;
} else {
tok->kind = PY_TOKEN_STRING;
}
// get first quoting character
char quote_char = '\'';
if (is_char(lex, '\"')) {
quote_char = '\"';
}
next_char(lex);
// work out if it's a single or triple quoted literal
int num_quotes;
if (is_char_and(lex, quote_char, quote_char)) {
// triple quotes
next_char(lex);
next_char(lex);
num_quotes = 3;
} else {
// single quotes
num_quotes = 1;
}
// set start of token
tok->str = lex->src_cur;
// parse the literal
// TODO proper escaping
int n_closing = 0;
while (!is_end(lex) && (num_quotes > 1 || !is_char(lex, '\n')) && n_closing < num_quotes) {
if (is_char(lex, quote_char)) {
n_closing += 1;
} else {
n_closing = 0;
if (!is_raw && is_char(lex, '\\')) {
next_char(lex);
}
}
next_char(lex);
}
// check we got the required end quotes
if (n_closing < num_quotes) {
tok->kind = PY_TOKEN_LONELY_STRING_OPEN;
}
// set token string (byte) length
tok->len = lex->src_cur - tok->str - n_closing;
// we set the length, return now so it's not set incorrectly below
return;
} else if (is_head_of_identifier(lex)) {
tok->kind = PY_TOKEN_NAME;
next_char(lex);
while (!is_end(lex) && is_tail_of_identifier(lex)) {
next_char(lex);
}
} else if (is_digit(lex) || (is_char(lex, '.') && is_following_digit(lex))) {
tok->kind = PY_TOKEN_NUMBER;
next_char(lex);
while (!is_end(lex)) {
if (is_char_or(lex, 'e', 'E')) {
next_char(lex);
if (is_char(lex, '+') || is_char(lex, '-')) {
next_char(lex);
}
} else if (is_letter(lex) || is_digit(lex) || is_char_or(lex, '_', '.')) {
next_char(lex);
} else {
break;
}
}
} else {
// search for encoded delimiter or operator
const char *t = tok_enc;
uint tok_enc_index = 0;
for (; *t != 0 && !is_char(lex, *t); t += 1) {
if (*t == 'e' || *t == 'c') {
t += 1;
} else if (*t == 'E') {
tok_enc_index -= 1;
t += 1;
}
tok_enc_index += 1;
}
next_char(lex);
if (*t == 0) {
// didn't match any delimiter or operator characters
tok->kind = PY_TOKEN_INVALID;
} else {
// matched a delimiter or operator character
// get the maximum characters for a valid token
t += 1;
uint t_index = tok_enc_index;
for (;;) {
for (; *t == 'e'; t += 1) {
t += 1;
t_index += 1;
if (is_char(lex, *t)) {
next_char(lex);
tok_enc_index = t_index;
break;
}
}
if (*t == 'E') {
t += 1;
if (is_char(lex, *t)) {
next_char(lex);
tok_enc_index = t_index;
} else {
tok->kind = PY_TOKEN_INVALID;
}
break;
}
if (*t == 'c') {
t += 1;
t_index += 1;
if (is_char(lex, *t)) {
next_char(lex);
tok_enc_index = t_index;
t += 1;
} else {
break;
}
} else {
break;
}
}
// set token kind
tok->kind = tok_enc_kind[tok_enc_index];
// compute bracket level for implicit line joining
if (tok->kind == PY_TOKEN_DEL_PAREN_OPEN || tok->kind == PY_TOKEN_DEL_BRACKET_OPEN || tok->kind == PY_TOKEN_DEL_BRACE_OPEN) {
lex->nested_bracket_level += 1;
} else if (tok->kind == PY_TOKEN_DEL_PAREN_CLOSE || tok->kind == PY_TOKEN_DEL_BRACKET_CLOSE || tok->kind == PY_TOKEN_DEL_BRACE_CLOSE) {
lex->nested_bracket_level -= 1;
}
}
}
// set token string (byte) length
tok->len = lex->src_cur - tok->str;
// check for keywords (must be done after setting token string length)
if (tok->kind == PY_TOKEN_NAME) {
for (int i = 0; tok_kw[i] != NULL; i++) {
if (py_token_is_str(tok, tok_kw[i])) {
tok->kind = PY_TOKEN_KW_FALSE + i;
break;
}
}
}
}
py_lexer_t *py_lexer_from_str_len(const char *src_name, const char *str, uint len, bool free_str) {
py_lexer_t *lex;
lex = m_new(py_lexer_t, 1);
//lex->name = g_strdup(src_name); // TODO
lex->name = src_name;
lex->free = free_str;
lex->src_beg = str;
lex->src_cur = str;
lex->src_end = str + len;
lex->line = 1;
lex->column = 1;
lex->cont_line = lex->line;
lex->emit_dent = 0;
lex->nested_bracket_level = 0;
lex->alloc_indent_level = 16;
lex->num_indent_level = 1;
lex->indent_level = m_new(uint16_t, lex->alloc_indent_level);
lex->indent_level[0] = 0;
// preload characters
// TODO unicode
if (len == 0) {
lex->chr0 = '\n'; // insert newline at end of file
lex->chr1 = CHR_EOF;
lex->chr2 = CHR_EOF;
} else if (len == 1) {
lex->chr0 = str[0];
if (lex->chr0 != '\n' && lex->chr0 != '\r') {
lex->chr1 = '\n'; // insert newline at end of file
} else {
lex->chr1 = CHR_EOF;
}
lex->chr2 = CHR_EOF;
} else if (len == 2) {
lex->chr0 = str[0];
lex->chr1 = str[1];
if (lex->chr1 != '\n' && lex->chr1 != '\r') {
lex->chr2 = '\n'; // insert newline at end of file
} else {
lex->chr2 = CHR_EOF;
}
} else {
lex->chr0 = str[0];
lex->chr1 = str[1];
lex->chr2 = str[2];
}
py_lexer_next_token_into(lex, &lex->tok_cur);
py_lexer_next_token_into(lex, &lex->tok_next);
return lex;
}
void py_lexer_free(py_lexer_t *lex) {
if (lex == NULL) {
return;
}
//m_free(lex->name);
if (lex->free) {
m_free((char*)lex->src_beg);
}
m_free(lex);
}
void py_lexer_to_next(py_lexer_t *lex) {
lex->tok_cur = lex->tok_next;
py_lexer_next_token_into(lex, &lex->tok_next);
}
const py_token_t *py_lexer_cur(const py_lexer_t *lex) {
return &lex->tok_cur;
}
bool py_lexer_is_kind(py_lexer_t *lex, py_token_kind_t kind) {
return lex->tok_cur.kind == kind;
}
/*
bool py_lexer_is_str(py_lexer_t *lex, const char *str) {
return py_token_is_str(&lex->tok_cur, str);
}
bool py_lexer_is_next_kind(py_lexer_t *lex, py_token_kind_t kind) {
return lex->tok_next.kind == kind;
}
bool py_lexer_is_next_str(py_lexer_t *lex, const char *str) {
return py_token_is_str(&lex->tok_next, str);
}
bool py_lexer_opt_kind(py_lexer_t *lex, py_token_kind_t kind) {
if (py_lexer_is_kind(lex, kind)) {
py_lexer_to_next(lex);
return true;
}
return false;
}
bool py_lexer_opt_str(py_lexer_t *lex, const char *str) {
if (py_lexer_is_str(lex, str)) {
py_lexer_to_next(lex);
return true;
}
return false;
}
*/
bool py_lexer_show_error(py_lexer_t *lex, const char *msg) {
return py_token_show_error(&lex->tok_cur, msg);
}

141
py/lexer.h Normal file
View File

@ -0,0 +1,141 @@
/* lexer.h -- simple tokeniser for Python implementation
*/
#ifndef INCLUDED_LEXER_H
#define INCLUDED_LEXER_H
/* uses (byte) length instead of null termination
* tokens are the same - UTF-8 with (byte) length
*/
typedef enum _py_token_kind_t {
PY_TOKEN_END, // 0
PY_TOKEN_INVALID,
PY_TOKEN_LONELY_STRING_OPEN,
PY_TOKEN_NEWLINE, // 3
PY_TOKEN_INDENT, // 4
PY_TOKEN_DEDENT, // 5
PY_TOKEN_NAME, // 6
PY_TOKEN_NUMBER,
PY_TOKEN_STRING,
PY_TOKEN_BYTES,
PY_TOKEN_ELLIPSES,
PY_TOKEN_KW_FALSE, // 11
PY_TOKEN_KW_NONE,
PY_TOKEN_KW_TRUE,
PY_TOKEN_KW_AND,
PY_TOKEN_KW_AS,
PY_TOKEN_KW_ASSERT,
PY_TOKEN_KW_BREAK,
PY_TOKEN_KW_CLASS,
PY_TOKEN_KW_CONTINUE,
PY_TOKEN_KW_DEF, // 20
PY_TOKEN_KW_DEL,
PY_TOKEN_KW_ELIF,
PY_TOKEN_KW_ELSE,
PY_TOKEN_KW_EXCEPT,
PY_TOKEN_KW_FINALLY,
PY_TOKEN_KW_FOR,
PY_TOKEN_KW_FROM,
PY_TOKEN_KW_GLOBAL,
PY_TOKEN_KW_IF,
PY_TOKEN_KW_IMPORT, // 30
PY_TOKEN_KW_IN,
PY_TOKEN_KW_IS,
PY_TOKEN_KW_LAMBDA,
PY_TOKEN_KW_NONLOCAL,
PY_TOKEN_KW_NOT,
PY_TOKEN_KW_OR,
PY_TOKEN_KW_PASS,
PY_TOKEN_KW_RAISE,
PY_TOKEN_KW_RETURN,
PY_TOKEN_KW_TRY, // 40
PY_TOKEN_KW_WHILE,
PY_TOKEN_KW_WITH,
PY_TOKEN_KW_YIELD,
PY_TOKEN_OP_PLUS, // 44
PY_TOKEN_OP_MINUS,
PY_TOKEN_OP_STAR,
PY_TOKEN_OP_DBL_STAR,
PY_TOKEN_OP_SLASH,
PY_TOKEN_OP_DBL_SLASH,
PY_TOKEN_OP_PERCENT,
PY_TOKEN_OP_LESS,
PY_TOKEN_OP_DBL_LESS,
PY_TOKEN_OP_MORE,
PY_TOKEN_OP_DBL_MORE, // 54
PY_TOKEN_OP_AMPERSAND,
PY_TOKEN_OP_PIPE,
PY_TOKEN_OP_CARET,
PY_TOKEN_OP_TILDE,
PY_TOKEN_OP_LESS_EQUAL,
PY_TOKEN_OP_MORE_EQUAL,
PY_TOKEN_OP_DBL_EQUAL,
PY_TOKEN_OP_NOT_EQUAL,
PY_TOKEN_DEL_PAREN_OPEN, // 63
PY_TOKEN_DEL_PAREN_CLOSE,
PY_TOKEN_DEL_BRACKET_OPEN,
PY_TOKEN_DEL_BRACKET_CLOSE,
PY_TOKEN_DEL_BRACE_OPEN,
PY_TOKEN_DEL_BRACE_CLOSE,
PY_TOKEN_DEL_COMMA,
PY_TOKEN_DEL_COLON,
PY_TOKEN_DEL_PERIOD,
PY_TOKEN_DEL_SEMICOLON,
PY_TOKEN_DEL_AT, // 73
PY_TOKEN_DEL_EQUAL,
PY_TOKEN_DEL_PLUS_EQUAL,
PY_TOKEN_DEL_MINUS_EQUAL,
PY_TOKEN_DEL_STAR_EQUAL,
PY_TOKEN_DEL_SLASH_EQUAL,
PY_TOKEN_DEL_DBL_SLASH_EQUAL,
PY_TOKEN_DEL_PERCENT_EQUAL,
PY_TOKEN_DEL_AMPERSAND_EQUAL,
PY_TOKEN_DEL_PIPE_EQUAL,
PY_TOKEN_DEL_CARET_EQUAL, // 83
PY_TOKEN_DEL_DBL_MORE_EQUAL,
PY_TOKEN_DEL_DBL_LESS_EQUAL,
PY_TOKEN_DEL_DBL_STAR_EQUAL,
PY_TOKEN_DEL_MINUS_MORE,
} py_token_kind_t;
typedef struct _py_token_t {
const char *src_name; // (file) name of source
uint src_line; // actual source line
uint src_column; // actual source column
py_token_kind_t kind; // kind of token
uint cont_line; // token belongs to this line in a continued line
const char *str; // string of token
uint len; // (byte) length of string of token
} py_token_t;
typedef struct _py_lexer_t py_lexer_t;
void py_token_show(const py_token_t *tok);
void py_token_show_error_prefix(const py_token_t *tok);
bool py_token_show_error(const py_token_t *tok, const char *msg);
py_lexer_t *py_lexer_from_file(const char *filename);
py_lexer_t *py_lexer_from_str_len(const char *src_name, const char *str, uint len, bool free_str);
void py_lexer_free(py_lexer_t *lex);
void py_lexer_to_next(py_lexer_t *lex);
const py_token_t *py_lexer_cur(const py_lexer_t *lex);
bool py_lexer_is_kind(py_lexer_t *lex, py_token_kind_t kind);
/* unused
bool py_lexer_is_str(py_lexer_t *lex, const char *str);
bool py_lexer_is_next_kind(py_lexer_t *lex, py_token_kind_t kind);
bool py_lexer_is_next_str(py_lexer_t *lex, const char *str);
bool py_lexer_opt_kind(py_lexer_t *lex, py_token_kind_t kind);
bool py_lexer_opt_str(py_lexer_t *lex, const char *str);
*/
bool py_lexer_show_error(py_lexer_t *lex, const char *msg);
#endif /* INCLUDED_LEXER_H */

23
py/lexerfile.c Normal file
View File

@ -0,0 +1,23 @@
#include <stdint.h>
#include <stdio.h>
#include <unistd.h>
#include <fcntl.h>
#include "misc.h"
#include "lexer.h"
py_lexer_t *py_lexer_from_file(const char *filename) {
// TODO abstract away file functionality
int fd = open(filename, O_RDONLY);
if (fd < 0) {
printf("cannot open file %s\n", filename);
return NULL;
}
uint size = lseek(fd, 0, SEEK_END);
lseek(fd, 0, SEEK_SET);
char *data = m_new(char, size);
read(fd, data, size);
close(fd);
return py_lexer_from_str_len(filename, data, size, true);
}

4
py/machine.h Normal file
View File

@ -0,0 +1,4 @@
typedef int64_t machine_int_t; // must be pointer size
typedef uint64_t machine_uint_t; // must be pointer size
typedef void *machine_ptr_t; // must be of pointer size
typedef double machine_float_t;

58
py/main.c Normal file
View File

@ -0,0 +1,58 @@
#include <stdint.h>
#include <stdio.h>
#include <string.h>
#include "misc.h"
#include "lexer.h"
#include "machine.h"
#include "parse.h"
#include "compile.h"
#include "runtime.h"
int main(int argc, char **argv) {
qstr_init();
rt_init();
if (argc != 2) {
printf("usage: py <file>\n");
return 1;
}
py_lexer_t *lex = py_lexer_from_file(argv[1]);
//const char *pysrc = "def f():\n x=x+1\n print(42)\n";
//py_lexer_t *lex = py_lexer_from_str_len("<>", pysrc, strlen(pysrc), false);
if (lex == NULL) {
return 1;
}
if (0) {
while (!py_lexer_is_kind(lex, PY_TOKEN_END)) {
py_token_show(py_lexer_cur(lex));
py_lexer_to_next(lex);
}
} else {
py_parse_node_t pn = py_parse(lex, 0);
//printf("----------------\n");
//parse_node_show(pn, 0);
//printf("----------------\n");
py_compile(pn);
//printf("----------------\n");
}
py_lexer_free(lex);
if (1) {
// execute it
py_obj_t module_fun = rt_make_function_from_id(1);
if (module_fun != py_const_none) {
py_obj_t ret = rt_call_function_0(module_fun);
printf("done! got: ");
py_obj_print(ret);
printf("\n");
}
}
rt_deinit();
//printf("total bytes = %d\n", m_get_total_bytes_allocated());
return 0;
}

56
py/malloc.c Normal file
View File

@ -0,0 +1,56 @@
#include <stdio.h>
#include <stdlib.h>
#include "misc.h"
static int total_bytes_allocated = 0;
void m_free(void *ptr) {
if (ptr != NULL) {
free(ptr);
}
}
void *m_malloc(int num_bytes) {
if (num_bytes == 0) {
return NULL;
}
void *ptr = malloc(num_bytes);
if (ptr == NULL) {
printf("could not allocate memory, allocating %d bytes\n", num_bytes);
return NULL;
}
total_bytes_allocated += num_bytes;
return ptr;
}
void *m_malloc0(int num_bytes) {
if (num_bytes == 0) {
return NULL;
}
void *ptr = calloc(1, num_bytes);
if (ptr == NULL) {
printf("could not allocate memory, allocating %d bytes\n", num_bytes);
return NULL;
}
total_bytes_allocated += num_bytes;
return ptr;
}
void *m_realloc(void *ptr, int num_bytes) {
if (num_bytes == 0) {
free(ptr);
return NULL;
}
ptr = realloc(ptr, num_bytes);
if (ptr == NULL) {
printf("could not allocate memory, reallocating %d bytes\n", num_bytes);
return NULL;
}
total_bytes_allocated += num_bytes;
return ptr;
}
int m_get_total_bytes_allocated() {
return total_bytes_allocated;
}

84
py/misc.c Normal file
View File

@ -0,0 +1,84 @@
#include <stdint.h>
#include <string.h>
#include "misc.h"
// attribute flags
#define FL_PRINT (0x01)
#define FL_SPACE (0x02)
#define FL_DIGIT (0x04)
#define FL_ALPHA (0x08)
#define FL_UPPER (0x10)
#define FL_LOWER (0x20)
// shorthand character attributes
#define AT_PR (FL_PRINT)
#define AT_SP (FL_SPACE | FL_PRINT)
#define AT_DI (FL_DIGIT | FL_PRINT)
#define AT_AL (FL_ALPHA | FL_PRINT)
#define AT_UP (FL_UPPER | FL_ALPHA | FL_PRINT)
#define AT_LO (FL_LOWER | FL_ALPHA | FL_PRINT)
// table of attributes for ascii characters
static const uint8_t attr[] = {
0, 0, 0, 0, 0, 0, 0, 0,
0, AT_SP, AT_SP, AT_SP, 0, AT_SP, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
AT_SP, AT_PR, AT_PR, AT_PR, AT_PR, AT_PR, AT_PR, AT_PR,
AT_PR, AT_PR, AT_PR, AT_PR, AT_PR, AT_PR, AT_PR, AT_PR,
AT_DI, AT_DI, AT_DI, AT_DI, AT_DI, AT_DI, AT_DI, AT_DI,
AT_DI, AT_DI, AT_PR, AT_PR, AT_PR, AT_PR, AT_PR, AT_PR,
AT_PR, AT_UP, AT_UP, AT_UP, AT_UP, AT_UP, AT_UP, AT_UP,
AT_UP, AT_UP, AT_UP, AT_UP, AT_UP, AT_UP, AT_UP, AT_UP,
AT_UP, AT_UP, AT_UP, AT_UP, AT_UP, AT_UP, AT_UP, AT_UP,
AT_UP, AT_UP, AT_UP, AT_PR, AT_PR, AT_PR, AT_PR, AT_PR,
AT_PR, AT_LO, AT_LO, AT_LO, AT_LO, AT_LO, AT_LO, AT_LO,
AT_LO, AT_LO, AT_LO, AT_LO, AT_LO, AT_LO, AT_LO, AT_LO,
AT_LO, AT_LO, AT_LO, AT_LO, AT_LO, AT_LO, AT_LO, AT_LO,
AT_LO, AT_LO, AT_LO, AT_PR, AT_PR, AT_PR, AT_PR, 0
};
unichar g_utf8_get_char(const char *s) {
return *s;
}
char *g_utf8_next_char(const char *s) {
return (char*)(s + 1);
}
bool g_unichar_isspace(unichar c) {
return c < 128 && (attr[c] & FL_SPACE) != 0;
}
bool g_unichar_isalpha(unichar c) {
return c < 128 && (attr[c] & FL_ALPHA) != 0;
}
bool g_unichar_isprint(unichar c) {
return c < 128 && (attr[c] & FL_PRINT) != 0;
}
bool g_unichar_isdigit(unichar c) {
return c < 128 && (attr[c] & FL_DIGIT) != 0;
}
/*
bool char_is_alpha_or_digit(unichar c) {
return c < 128 && (attr[c] & (FL_ALPHA | FL_DIGIT)) != 0;
}
bool char_is_upper(unichar c) {
return c < 128 && (attr[c] & FL_UPPER) != 0;
}
bool char_is_lower(unichar c) {
return c < 128 && (attr[c] & FL_LOWER) != 0;
}
*/
/*
char *g_strdup(const char *s) {
return strdup(s);
}
*/

91
py/misc.h Normal file
View File

@ -0,0 +1,91 @@
// a mini library of useful types and functions
#ifndef _INCLUDED_MINILIB_H
#define _INCLUDED_MINILIB_H
/** types *******************************************************/
typedef int bool;
enum {
false = 0,
true = 1
};
typedef unsigned char byte;
typedef unsigned int uint;
/** memomry allocation ******************************************/
#define m_new(type, num) ((type*)(m_malloc(sizeof(type) * (num))))
#define m_new0(type, num) ((type*)(m_malloc0(sizeof(type) * (num))))
#define m_renew(type, ptr, num) ((type*)(m_realloc((ptr), sizeof(type) * (num))))
void m_free(void *ptr);
void *m_malloc(int num_bytes);
void *m_malloc0(int num_bytes);
void *m_realloc(void *ptr, int num_bytes);
int m_get_total_bytes_allocated();
/** unichar / UTF-8 *********************************************/
typedef int unichar; // TODO
unichar g_utf8_get_char(const char *s);
char *g_utf8_next_char(const char *s);
bool g_unichar_isspace(unichar c);
bool g_unichar_isalpha(unichar c);
bool g_unichar_isprint(unichar c);
bool g_unichar_isdigit(unichar c);
//char *g_strdup(const char *s);
/** blob ********************************************************/
/*
unsigned short decode_le16(byte *buf);
unsigned int decode_le32(byte *buf);
void encode_le16(byte *buf, unsigned short i);
void encode_le32(byte *buf, unsigned int i);
*/
/** string ******************************************************/
/*
#define streq(s1, s2) (strcmp((s1), (s2)) == 0)
*/
/** variable string *********************************************/
/*
typedef struct _vstr_t vstr_t;
vstr_t *vstr_new();
void vstr_free(vstr_t *vstr);
void vstr_reset(vstr_t *vstr);
bool vstr_had_error(vstr_t *vstr);
char *vstr_str(vstr_t *vstr);
int vstr_len(vstr_t *vstr);
void vstr_hint_size(vstr_t *vstr, int size);
char *vstr_add_len(vstr_t *vstr, int len);
void vstr_add_str(vstr_t *vstr, const char *str);
void vstr_add_strn(vstr_t *vstr, const char *str, int len);
void vstr_add_byte(vstr_t *vstr, byte v);
void vstr_add_le16(vstr_t *vstr, unsigned short v);
void vstr_add_le32(vstr_t *vstr, unsigned int v);
void vstr_cut_tail(vstr_t *vstr, int len);
void vstr_printf(vstr_t *vstr, const char *fmt, ...);
*/
/** unique string ***********************************************/
typedef unsigned int qstr;
void qstr_init();
qstr qstr_from_str_static(const char *str);
qstr qstr_from_str_take(char *str);
qstr qstr_from_strn_copy(const char *str, int len);
const char* qstr_str(qstr qstr);
#endif // _INCLUDED_MINILIB_H

565
py/parse.c Normal file
View File

@ -0,0 +1,565 @@
#include <unistd.h>
#include <stdlib.h>
#include <stdint.h>
#include <stdio.h>
#include <ctype.h>
#include <string.h>
#include <assert.h>
#include "misc.h"
#include "lexer.h"
#include "machine.h"
#include "parse.h"
#define RULE_ACT_KIND_MASK (0xf0)
#define RULE_ACT_ARG_MASK (0x0f)
#define RULE_ACT_OR (0x10)
#define RULE_ACT_AND (0x20)
#define RULE_ACT_LIST (0x30)
#define RULE_ARG_BLANK (0x0000)
#define RULE_ARG_KIND_MASK (0xf000)
#define RULE_ARG_ARG_MASK (0x0fff)
#define RULE_ARG_TOK (0x1000)
#define RULE_ARG_RULE (0x2000)
#define RULE_ARG_OPT_TOK (0x3000)
#define RULE_ARG_OPT_RULE (0x4000)
// (un)comment to use rule names; for debugging
//#define USE_RULE_NAME (1)
typedef struct _rule_t {
byte rule_id;
byte act;
#ifdef USE_RULE_NAME
const char *rule_name;
#endif
uint16_t arg[];
} rule_t;
enum {
RULE_none = 0,
#define DEF_RULE(rule, comp, kind, arg...) RULE_##rule,
#include "grammar.h"
#undef DEF_RULE
RULE_maximum_number_of,
};
#define or(n) (RULE_ACT_OR | n)
#define and(n) (RULE_ACT_AND | n)
#define one_or_more (RULE_ACT_LIST | 2)
#define list (RULE_ACT_LIST | 1)
#define list_with_end (RULE_ACT_LIST | 3)
#define tok(t) (RULE_ARG_TOK | PY_TOKEN_##t)
#define rule(r) (RULE_ARG_RULE | RULE_##r)
#define opt_tok(t) (RULE_ARG_OPT_TOK | PY_TOKEN_##t)
#define opt_rule(r) (RULE_ARG_OPT_RULE | RULE_##r)
#ifdef USE_RULE_NAME
#define DEF_RULE(rule, comp, kind, arg...) static rule_t rule_##rule = { RULE_##rule, kind, #rule, { arg } };
#else
#define DEF_RULE(rule, comp, kind, arg...) static rule_t rule_##rule = { RULE_##rule, kind, { arg } };
#endif
#include "grammar.h"
#undef or
#undef and
#undef list
#undef list_with_end
#undef tok
#undef rule
#undef opt_tok
#undef opt_rule
#undef one_or_more
#undef DEF_RULE
static rule_t *rules[] = {
NULL,
#define DEF_RULE(rule, comp, kind, arg...) &rule_##rule,
#include "grammar.h"
#undef DEF_RULE
};
typedef struct _rule_stack_t {
byte rule_id;
int32_t arg_i; // what should be the size and signedness?
} rule_stack_t;
typedef struct _parser_t {
uint rule_stack_alloc;
uint rule_stack_top;
rule_stack_t *rule_stack;
uint result_stack_top;
py_parse_node_t *result_stack;
} parser_t;
static void push_rule(parser_t *parser, rule_t *rule, int arg_i) {
if (parser->rule_stack_top >= parser->rule_stack_alloc) {
parser->rule_stack_alloc *= 2;
parser->rule_stack = m_renew(rule_stack_t, parser->rule_stack, parser->rule_stack_alloc);
}
parser->rule_stack[parser->rule_stack_top].rule_id = rule->rule_id;
parser->rule_stack[parser->rule_stack_top].arg_i = arg_i;
parser->rule_stack_top += 1;
}
static void push_rule_from_arg(parser_t *parser, uint arg) {
assert((arg & RULE_ARG_KIND_MASK) == RULE_ARG_RULE || (arg & RULE_ARG_KIND_MASK) == RULE_ARG_OPT_RULE);
uint rule_id = arg & RULE_ARG_ARG_MASK;
assert(rule_id < RULE_maximum_number_of);
push_rule(parser, rules[rule_id], 0);
}
static void pop_rule(parser_t *parser, rule_t **rule, uint *arg_i) {
parser->rule_stack_top -= 1;
*rule = rules[parser->rule_stack[parser->rule_stack_top].rule_id];
*arg_i = parser->rule_stack[parser->rule_stack_top].arg_i;
}
py_parse_node_t py_parse_node_new_leaf(machine_int_t kind, machine_int_t arg) {
return (py_parse_node_t)(kind | (arg << 4));
}
int num_parse_nodes_allocated = 0;
py_parse_node_struct_t *parse_node_new_struct(int rule_id, int num_args) {
py_parse_node_struct_t *pn = m_malloc(sizeof(py_parse_node_struct_t) + num_args * sizeof(py_parse_node_t));
pn->source = 0; // TODO
pn->kind_num_nodes = (rule_id & 0xff) | (num_args << 8);
num_parse_nodes_allocated += 1;
return pn;
}
void parse_node_show(py_parse_node_t pn, int indent) {
for (int i = 0; i < indent; i++) {
printf(" ");
}
if (PY_PARSE_NODE_IS_NULL(pn)) {
printf("NULL\n");
} else if (PY_PARSE_NODE_IS_LEAF(pn)) {
int arg = PY_PARSE_NODE_LEAF_ARG(pn);
switch (PY_PARSE_NODE_LEAF_KIND(pn)) {
case PY_PARSE_NODE_ID: printf("id(%s)\n", qstr_str(arg)); break;
case PY_PARSE_NODE_SMALL_INT: printf("int(%d)\n", arg); break;
case PY_PARSE_NODE_INTEGER: printf("int(%s)\n", qstr_str(arg)); break;
case PY_PARSE_NODE_DECIMAL: printf("dec(%s)\n", qstr_str(arg)); break;
case PY_PARSE_NODE_STRING: printf("str(%s)\n", qstr_str(arg)); break;
case PY_PARSE_NODE_BYTES: printf("bytes(%s)\n", qstr_str(arg)); break;
case PY_PARSE_NODE_TOKEN: printf("tok(%d)\n", arg); break;
default: assert(0);
}
} else {
py_parse_node_struct_t *pns2 = (py_parse_node_struct_t*)pn;
int n = pns2->kind_num_nodes >> 8;
#ifdef USE_RULE_NAME
printf("%s(%d) (n=%d)\n", rules[PY_PARSE_NODE_STRUCT_KIND(pns2)]->rule_name, PY_PARSE_NODE_STRUCT_KIND(pns2), n);
#else
printf("rule(%u) (n=%d)\n", (uint)PY_PARSE_NODE_STRUCT_KIND(pns2), n);
#endif
for (int i = 0; i < n; i++) {
parse_node_show(pns2->nodes[i], indent + 2);
}
}
}
/*
static void result_stack_show(parser_t *parser) {
printf("result stack, most recent first\n");
for (int i = parser->result_stack_top - 1; i >= 0; i--) {
parse_node_show(parser->result_stack[i], 0);
}
}
*/
static py_parse_node_t pop_result(parser_t *parser) {
assert(parser->result_stack_top > 0);
return parser->result_stack[--parser->result_stack_top];
}
static py_parse_node_t peek_result(parser_t *parser, int pos) {
assert(parser->result_stack_top > pos);
return parser->result_stack[parser->result_stack_top - 1 - pos];
}
static void push_result_node(parser_t *parser, py_parse_node_t pn) {
parser->result_stack[parser->result_stack_top++] = pn;
}
static void push_result_token(parser_t *parser, const py_lexer_t *lex) {
const py_token_t *tok = py_lexer_cur(lex);
py_parse_node_t pn;
if (tok->kind == PY_TOKEN_NAME) {
pn = py_parse_node_new_leaf(PY_PARSE_NODE_ID, qstr_from_strn_copy(tok->str, tok->len));
} else if (tok->kind == PY_TOKEN_NUMBER) {
bool dec = false;
bool small_int = true;
int int_val = 0;
int len = tok->len;
const char *str = tok->str;
int base = 10;
int i = 0;
if (len >= 3 && str[0] == '0') {
if (str[1] == 'o' || str[1] == 'O') {
// octal
base = 8;
i = 2;
} else if (str[1] == 'x' || str[1] == 'X') {
// hexadecimal
base = 16;
i = 2;
} else if (str[1] == 'b' || str[1] == 'B') {
// binary
base = 2;
i = 2;
}
}
for (; i < len; i++) {
if (g_unichar_isdigit(str[i]) && str[i] - '0' < base) {
int_val = base * int_val + str[i] - '0';
} else if (base == 16 && 'a' <= str[i] && str[i] <= 'f') {
int_val = base * int_val + str[i] - 'a' + 10;
} else if (base == 16 && 'F' <= str[i] && str[i] <= 'F') {
int_val = base * int_val + str[i] - 'A' + 10;
} else if (str[i] == '.' || str[i] == 'e' || str[i] == 'E') {
dec = true;
break;
} else {
small_int = false;
break;
}
}
if (dec) {
pn = py_parse_node_new_leaf(PY_PARSE_NODE_DECIMAL, qstr_from_strn_copy(str, len));
} else if (small_int && -0x10000 <= int_val && int_val <= 0xffff) {
pn = py_parse_node_new_leaf(PY_PARSE_NODE_SMALL_INT, int_val);
} else {
pn = py_parse_node_new_leaf(PY_PARSE_NODE_INTEGER, qstr_from_strn_copy(str, len));
}
} else if (tok->kind == PY_TOKEN_STRING) {
pn = py_parse_node_new_leaf(PY_PARSE_NODE_STRING, qstr_from_strn_copy(tok->str, tok->len));
} else if (tok->kind == PY_TOKEN_BYTES) {
pn = py_parse_node_new_leaf(PY_PARSE_NODE_BYTES, qstr_from_strn_copy(tok->str, tok->len));
} else {
pn = py_parse_node_new_leaf(PY_PARSE_NODE_TOKEN, tok->kind);
}
push_result_node(parser, pn);
}
static void push_result_rule(parser_t *parser, rule_t *rule, int num_args) {
py_parse_node_struct_t *pn = parse_node_new_struct(rule->rule_id, num_args);
for (int i = num_args; i > 0; i--) {
pn->nodes[i - 1] = pop_result(parser);
}
push_result_node(parser, (py_parse_node_t)pn);
}
py_parse_node_t py_parse(py_lexer_t *lex, int wanted_rule) {
wanted_rule = RULE_file_input;
parser_t *parser = m_new(parser_t, 1);
parser->rule_stack_alloc = 64;
parser->rule_stack_top = 0;
parser->rule_stack = m_new(rule_stack_t, parser->rule_stack_alloc);
parser->result_stack = m_new(py_parse_node_t, 1000);
parser->result_stack_top = 0;
push_rule(parser, rules[wanted_rule], 0);
uint n, i;
bool backtrack = false;
rule_t *rule;
py_token_kind_t tok_kind;
bool emit_rule;
bool had_trailing_sep;
for (;;) {
next_rule:
if (parser->rule_stack_top == 0) {
break;
}
pop_rule(parser, &rule, &i);
n = rule->act & RULE_ACT_ARG_MASK;
/*
// debugging
printf("depth=%d ", parser->rule_stack_top);
for (int j = 0; j < parser->rule_stack_top; ++j) {
printf(" ");
}
printf("%s n=%d i=%d bt=%d\n", rule->rule_name, n, i, backtrack);
*/
switch (rule->act & RULE_ACT_KIND_MASK) {
case RULE_ACT_OR:
if (i > 0 && !backtrack) {
goto next_rule;
} else {
backtrack = false;
}
for (; i < n - 1; ++i) {
switch (rule->arg[i] & RULE_ARG_KIND_MASK) {
case RULE_ARG_TOK:
if (py_lexer_is_kind(lex, rule->arg[i] & RULE_ARG_ARG_MASK)) {
push_result_token(parser, lex);
py_lexer_to_next(lex);
goto next_rule;
}
break;
case RULE_ARG_RULE:
push_rule(parser, rule, i + 1);
push_rule_from_arg(parser, rule->arg[i]);
goto next_rule;
default:
assert(0);
}
}
if ((rule->arg[i] & RULE_ARG_KIND_MASK) == RULE_ARG_TOK) {
if (py_lexer_is_kind(lex, rule->arg[i] & RULE_ARG_ARG_MASK)) {
push_result_token(parser, lex);
py_lexer_to_next(lex);
} else {
backtrack = true;
goto next_rule;
}
} else {
push_rule_from_arg(parser, rule->arg[i]);
}
break;
case RULE_ACT_AND:
// failed, backtrack if we can, else syntax error
if (backtrack) {
assert(i > 0);
if ((rule->arg[i - 1] & RULE_ARG_KIND_MASK) == RULE_ARG_OPT_RULE) {
// an optional rule that failed, so continue with next arg
push_result_node(parser, PY_PARSE_NODE_NULL);
backtrack = false;
} else {
// a mandatory rule that failed, so propagate backtrack
if (i > 1) {
// already eaten tokens so can't backtrack
goto syntax_error;
} else {
goto next_rule;
}
}
}
// progress through the rule
for (; i < n; ++i) {
switch (rule->arg[i] & RULE_ARG_KIND_MASK) {
case RULE_ARG_TOK:
// need to match a token
tok_kind = rule->arg[i] & RULE_ARG_ARG_MASK;
if (py_lexer_is_kind(lex, tok_kind)) {
// matched token
if (tok_kind == PY_TOKEN_NAME) {
push_result_token(parser, lex);
}
py_lexer_to_next(lex);
} else {
// failed to match token
if (i > 0) {
// already eaten tokens so can't backtrack
goto syntax_error;
} else {
// this rule failed, so backtrack
backtrack = true;
goto next_rule;
}
}
break;
case RULE_ARG_RULE:
//if (i + 1 < n) {
push_rule(parser, rule, i + 1);
//}
push_rule_from_arg(parser, rule->arg[i]);
goto next_rule;
case RULE_ARG_OPT_RULE:
push_rule(parser, rule, i + 1);
push_rule_from_arg(parser, rule->arg[i]);
goto next_rule;
default:
assert(0);
}
}
assert(i == n);
// matched the rule, so now build the corresponding parse_node
// count number of arguments for the parse_node
i = 0;
emit_rule = false;
for (int x = 0; x < n; ++x) {
if ((rule->arg[x] & RULE_ARG_KIND_MASK) == RULE_ARG_TOK) {
tok_kind = rule->arg[x] & RULE_ARG_ARG_MASK;
if (tok_kind >= PY_TOKEN_NAME) {
emit_rule = true;
}
if (tok_kind == PY_TOKEN_NAME) {
// only tokens which were names are pushed to stack
i += 1;
}
} else {
// rules are always pushed
i += 1;
}
}
// always emit these rules, even if they have only 1 argument
if (rule->rule_id == RULE_expr_stmt || rule->rule_id == RULE_yield_stmt) {
emit_rule = true;
}
// never emit these rules if they have only 1 argument
// NOTE: can't put atom_paren here because we need it to distinguisg, for example, [a,b] from [(a,b)]
if (rule->rule_id == RULE_else_stmt || rule->rule_id == RULE_testlist_comp_3b || rule->rule_id == RULE_import_as_names_paren || rule->rule_id == RULE_typedargslist_colon || rule->rule_id == RULE_typedargslist_equal || rule->rule_id == RULE_dictorsetmaker_colon || rule->rule_id == RULE_classdef_2 || rule->rule_id == RULE_with_item_as || rule->rule_id == RULE_assert_stmt_extra || rule->rule_id == RULE_as_name || rule->rule_id == RULE_raise_stmt_from || rule->rule_id == RULE_vfpdef) {
emit_rule = false;
}
// always emit these rules, and add an extra blank node at the end (to be used by the compiler to store data)
if (rule->rule_id == RULE_funcdef || rule->rule_id == RULE_classdef || rule->rule_id == RULE_comp_for || rule->rule_id == RULE_lambdef || rule->rule_id == RULE_lambdef_nocond) {
emit_rule = true;
push_result_node(parser, PY_PARSE_NODE_NULL);
i += 1;
}
int num_not_nil = 0;
for (int x = 0; x < i; ++x) {
if (peek_result(parser, x) != PY_PARSE_NODE_NULL) {
num_not_nil += 1;
}
}
//printf("done and %s n=%d i=%d notnil=%d\n", rule->rule_name, n, i, num_not_nil);
if (emit_rule) {
push_result_rule(parser, rule, i);
} else if (num_not_nil == 0) {
push_result_rule(parser, rule, i); // needed for, eg, atom_paren, testlist_comp_3b
//result_stack_show(parser);
//assert(0);
} else if (num_not_nil == 1) {
// single result, leave it on stack
py_parse_node_t pn = PY_PARSE_NODE_NULL;
for (int x = 0; x < i; ++x) {
py_parse_node_t pn2 = pop_result(parser);
if (pn2 != PY_PARSE_NODE_NULL) {
pn = pn2;
}
}
push_result_node(parser, pn);
} else {
push_result_rule(parser, rule, i);
}
break;
case RULE_ACT_LIST:
// n=2 is: item item*
// n=1 is: item (sep item)*
// n=3 is: item (sep item)* [sep]
if (backtrack) {
list_backtrack:
had_trailing_sep = false;
if (n == 2) {
if (i == 1) {
// fail on item, first time round; propagate backtrack
goto next_rule;
} else {
// fail on item, in later rounds; finish with this rule
backtrack = false;
}
} else {
if (i == 1) {
// fail on item, first time round; propagate backtrack
goto next_rule;
} else if ((i & 1) == 1) {
// fail on item, in later rounds; have eaten tokens so can't backtrack
if (n == 3) {
// list allows trailing separator; finish parsing list
had_trailing_sep = true;
backtrack = false;
} else {
// list doesn't allowing trailing separator; fail
goto syntax_error;
}
} else {
// fail on separator; finish parsing list
backtrack = false;
}
}
} else {
for (;;) {
uint arg = rule->arg[i & 1 & n];
switch (arg & RULE_ARG_KIND_MASK) {
case RULE_ARG_TOK:
if (py_lexer_is_kind(lex, arg & RULE_ARG_ARG_MASK)) {
if (i & 1 & n) {
// separators which are tokens are not pushed to result stack
} else {
push_result_token(parser, lex);
}
py_lexer_to_next(lex);
// got element of list, so continue parsing list
i += 1;
} else {
// couldn't get element of list
i += 1;
backtrack = true;
goto list_backtrack;
}
break;
case RULE_ARG_RULE:
push_rule(parser, rule, i + 1);
push_rule_from_arg(parser, arg);
goto next_rule;
default:
assert(0);
}
}
}
assert(i >= 1);
// compute number of elements in list, result in i
i -= 1;
if ((n & 1) && (rule->arg[1] & RULE_ARG_KIND_MASK) == RULE_ARG_TOK) {
// don't count separators when they are tokens
i = (i + 1) / 2;
}
if (i == 1) {
// list matched single item
if (had_trailing_sep) {
// if there was a trailing separator, make a list of a single item
push_result_rule(parser, rule, i);
} else {
// just leave single item on stack (ie don't wrap in a list)
}
} else {
//printf("done list %s %d %d\n", rule->rule_name, n, i);
push_result_rule(parser, rule, i);
}
break;
default:
assert(0);
}
}
if (!py_lexer_is_kind(lex, PY_TOKEN_END)) {
py_lexer_show_error(lex, "unexpected token at end:");
py_token_show(py_lexer_cur(lex));
}
//printf("--------------\n");
//result_stack_show(parser);
assert(parser->result_stack_top == 1);
//printf("maximum depth: %d\n", parser->rule_stack_alloc);
//printf("number of parse nodes allocated: %d\n", num_parse_nodes_allocated);
return parser->result_stack[0];
syntax_error:
py_lexer_show_error(lex, "syntax error:");
#ifdef USE_RULE_NAME
py_lexer_show_error(lex, rule->rule_name);
#endif
py_token_show(py_lexer_cur(lex));
return PY_PARSE_NODE_NULL;
}

54
py/parse.h Normal file
View File

@ -0,0 +1,54 @@
struct _py_lexer_t;
// a py_parse_node_t is:
// - 0000...0000: no node
// - xxxx...0001: an identifier; bits 4 and above are the qstr
// - xxxx...0011: a small integer; bits 4 and above are the signed value, 2's complement
// - xxxx...0101: an integer; bits 4 and above are the qstr holding the value
// - xxxx...0111: a decimal; bits 4 and above are the qstr holding the value
// - xxxx...1001: a string; bits 4 and above are the qstr holding the value
// - xxxx...1011: a string with triple quotes; bits 4 and above are the qstr holding the value
// - xxxx...1101: a token; bits 4 and above are py_token_kind_t
// - xxxx...xxx0: pointer to py_parse_node_struct_t
#define PY_PARSE_NODE_NULL (0)
#define PY_PARSE_NODE_ID (0x1)
#define PY_PARSE_NODE_SMALL_INT (0x3)
#define PY_PARSE_NODE_INTEGER (0x5)
#define PY_PARSE_NODE_DECIMAL (0x7)
#define PY_PARSE_NODE_STRING (0x9)
#define PY_PARSE_NODE_BYTES (0xb)
#define PY_PARSE_NODE_TOKEN (0xd)
typedef machine_uint_t py_parse_node_t; // must be pointer size
typedef struct _py_parse_node_struct_t {
uint32_t source; // file identifier, and line number
uint32_t kind_num_nodes; // parse node kind, and number of nodes
py_parse_node_t nodes[]; // nodes
} py_parse_node_struct_t;
// macros for py_parse_node_t usage
// some of these evaluate their argument more than once
#define PY_PARSE_NODE_IS_NULL(pn) ((pn) == PY_PARSE_NODE_NULL)
#define PY_PARSE_NODE_IS_LEAF(pn) ((pn) & 1)
#define PY_PARSE_NODE_IS_STRUCT(pn) ((pn) != PY_PARSE_NODE_NULL && ((pn) & 1) == 0)
#define PY_PARSE_NODE_IS_STRUCT_KIND(pn, k) ((pn) != PY_PARSE_NODE_NULL && ((pn) & 1) == 0 && PY_PARSE_NODE_STRUCT_KIND((py_parse_node_struct_t*)(pn)) == (k))
#define PY_PARSE_NODE_IS_ID(pn) (((pn) & 0xf) == PY_PARSE_NODE_ID)
#define PY_PARSE_NODE_IS_SMALL_INT(pn) (((pn) & 0xf) == PY_PARSE_NODE_SMALL_INT)
#define PY_PARSE_NODE_IS_TOKEN(pn) (((pn) & 0xf) == PY_PARSE_NODE_TOKEN)
#define PY_PARSE_NODE_IS_TOKEN_KIND(pn, k) ((pn) == (PY_PARSE_NODE_TOKEN | (k << 4)))
#define PY_PARSE_NODE_LEAF_KIND(pn) ((pn) & 0xf)
// TODO should probably have int and uint versions of this macro
#define PY_PARSE_NODE_LEAF_ARG(pn) (((machine_int_t)(pn)) >> 4)
#define PY_PARSE_NODE_STRUCT_KIND(pns) ((pns)->kind_num_nodes & 0xff)
#define PY_PARSE_NODE_STRUCT_NUM_NODES(pns) ((pns)->kind_num_nodes >> 8)
py_parse_node_t py_parse_node_new_leaf(machine_int_t kind, machine_int_t arg);
void parse_node_show(py_parse_node_t pn, int indent);
py_parse_node_t py_parse(struct _py_lexer_t *lex, int wanted_rule);

56
py/qstr.c Normal file
View File

@ -0,0 +1,56 @@
#include <assert.h>
#include <string.h>
#include "misc.h"
static int qstrs_alloc;
static int qstrs_len;
static const char **qstrs;
void qstr_init() {
qstrs_alloc = 400;
qstrs_len = 1;
qstrs = m_new(const char*, qstrs_alloc);
qstrs[0] = "nil";
}
static qstr qstr_add(const char *str) {
if (qstrs_len >= qstrs_alloc) {
qstrs_alloc *= 2;
qstrs = m_renew(const char*, qstrs, qstrs_alloc);
}
qstrs[qstrs_len++] = str;
return qstrs_len - 1;
}
qstr qstr_from_str_static(const char *str) {
for (int i = 0; i < qstrs_len; i++) {
if (strcmp(qstrs[i], str) == 0) {
return i;
}
}
return qstr_add(str);
}
qstr qstr_from_str_take(char *str) {
for (int i = 0; i < qstrs_len; i++) {
if (strcmp(qstrs[i], str) == 0) {
m_free(str);
return i;
}
}
return qstr_add(str);
}
qstr qstr_from_strn_copy(const char *str, int len) {
for (int i = 0; i < qstrs_len; i++) {
if (strncmp(qstrs[i], str, len) == 0 && qstrs[i][len] == '\0') {
return i;
}
}
return qstr_add(strndup(str, len));
}
const char *qstr_str(qstr qstr) {
return qstrs[qstr];
}

944
py/runtime.c Normal file
View File

@ -0,0 +1,944 @@
#include <stdint.h>
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <assert.h>
#include "misc.h"
#include "machine.h"
#include "runtime.h"
#include "bc.h"
#define DEBUG_printf(args...) (void)0
//#define DEBUG_printf(args...) printf(args)
#define DEBUG_OP_printf(args...) (void)0
//#define DEBUG_OP_printf(args...) printf(args)
// enable/disable float support with this definition
#define PY_FLOAT (1)
typedef machine_int_t py_small_int_t;
#define IS_O(o, k) (((((py_small_int_t)(o)) & 1) == 0) && (((py_obj_base_t*)(o))->kind == (k)))
#define IS_SMALL_INT(o) (((py_small_int_t)(o)) & 1)
#define FROM_SMALL_INT(o) (((py_small_int_t)(o)) >> 1)
#define TO_SMALL_INT(o) ((py_obj_t)(((o) << 1) | 1))
#ifdef PY_FLOAT
typedef machine_float_t float_t;
#endif
typedef enum {
O_CONST,
O_STR,
#ifdef PY_FLOAT
O_FLOAT,
#endif
O_FUN_0,
O_FUN_1,
O_FUN_2,
O_FUN_N,
O_FUN_BC,
O_BOUND_METH,
O_LIST,
O_SET,
O_MAP,
O_CLASS,
} py_obj_kind_t;
typedef enum {
MAP_QSTR,
MAP_PY_OBJ,
} py_map_kind_t;
typedef struct _py_map_elem_t {
py_obj_t key;
py_obj_t value;
} py_map_elem_t;
typedef struct _py_map_t {
py_map_kind_t kind;
machine_uint_t alloc;
machine_uint_t used;
py_map_elem_t *table;
} py_map_t;
typedef struct _py_obj_base_t {
py_obj_kind_t kind;
union {
const char *id;
qstr u_str;
#ifdef PY_FLOAT
float_t flt;
#endif
struct { // for O_FUN_[012N]
void *fun;
int n_args;
} u_fun;
struct { // for O_FUN_BC
byte *code;
uint len;
int n_args;
} u_fun_bc;
struct { // for O_BOUND_METH
py_obj_t meth;
py_obj_t self;
} u_bound_meth;
struct { // for O_LIST
int alloc;
int len;
py_obj_t *items;
} u_list;
struct { // for O_SET
int alloc;
int used;
py_obj_t *table;
} u_set;
py_map_t u_map; // for O_MAP
/*
struct { // for O_MAP
int alloc;
int used;
py_map_elem_t *table;
} u_map;
*/
struct { // for O_CLASS
py_map_t *map;
} u_class;
};
} py_obj_base_t;
py_obj_t py_const_none;
py_obj_t py_const_false;
py_obj_t py_const_true;
py_map_t map_name;
py_map_t map_builtins;
// approximatelly doubling primes; made with Mathematica command: Table[Prime[Floor[(1.7)^n]], {n, 3, 24}]
static int doubling_primes[] = {7, 19, 43, 89, 179, 347, 647, 1229, 2297, 4243, 7829, 14347, 26017, 47149, 84947, 152443, 273253, 488399, 869927, 1547173, 2745121, 4861607};
int get_doubling_prime_greater_or_equal_to(int x) {
for (int i = 0; i < sizeof(doubling_primes) / sizeof(int); i++) {
if (doubling_primes[i] >= x) {
return doubling_primes[i];
}
}
// ran out of primes in the table!
// return something sensible, at least make it odd
return x | 1;
}
void py_map_init(py_map_t *map, py_map_kind_t kind, int n) {
map->kind = kind;
map->alloc = get_doubling_prime_greater_or_equal_to(n + 1);
map->used = 0;
map->table = m_new(py_map_elem_t, map->alloc);
for (int i = 0; i < map->alloc; i++) {
map->table[i].key = NULL;
map->table[i].value = NULL;
}
}
py_map_t *py_map_new(py_map_kind_t kind, int n) {
py_map_t *map = m_new(py_map_t, 1);
py_map_init(map, kind, n);
return map;
}
int py_obj_hash(py_obj_t o_in) {
if (IS_SMALL_INT(o_in)) {
return FROM_SMALL_INT(o_in);
} else if (IS_O(o_in, O_STR)) {
return ((py_obj_base_t*)o_in)->u_str;
} else {
assert(0);
return 0;
}
}
bool py_obj_equal(py_obj_t o1, py_obj_t o2) {
if (o1 == o2) {
return true;
} else if (IS_SMALL_INT(o1) && IS_SMALL_INT(o2)) {
return false;
} else if (IS_O(o1, O_STR) && IS_O(o2, O_STR)) {
return ((py_obj_base_t*)o1)->u_str == ((py_obj_base_t*)o2)->u_str;
} else {
assert(0);
return false;
}
}
py_map_elem_t* py_map_lookup_helper(py_map_t *map, py_obj_t index, bool add_if_not_found) {
bool is_map_py_obj = (map->kind == MAP_PY_OBJ);
machine_uint_t hash;
if (is_map_py_obj) {
hash = py_obj_hash(index);
} else {
hash = (machine_uint_t)index;
}
uint pos = hash % map->alloc;
for (;;) {
py_map_elem_t *elem = &map->table[pos];
if (elem->key == NULL) {
// not in table
if (add_if_not_found) {
if (map->used + 1 >= map->alloc) {
// not enough room in table, rehash it
int old_alloc = map->alloc;
py_map_elem_t *old_table = map->table;
map->alloc = get_doubling_prime_greater_or_equal_to(map->alloc + 1);
map->used = 0;
map->table = m_new(py_map_elem_t, map->alloc);
for (int i = 0; i < old_alloc; i++) {
if (old_table[i].key != NULL) {
py_map_lookup_helper(map, old_table[i].key, true)->value = old_table[i].value;
}
}
m_free(old_table);
// restart the search for the new element
pos = hash % map->alloc;
} else {
map->used += 1;
elem->key = index;
return elem;
}
} else {
return NULL;
}
} else if (elem->key == index || (is_map_py_obj && py_obj_equal(elem->key, index))) {
// found it
if (add_if_not_found) {
elem->key = index;
}
return elem;
} else {
// not yet found, keep searching in this table
pos = (pos + 1) % map->alloc;
}
}
}
py_map_elem_t* py_qstr_map_lookup(py_map_t *map, qstr index, bool add_if_not_found) {
py_obj_t o = (py_obj_t)(machine_uint_t)index;
return py_map_lookup_helper(map, o, add_if_not_found);
}
py_map_elem_t* py_map_lookup(py_obj_t o, py_obj_t index, bool add_if_not_found) {
assert(IS_O(o, O_MAP));
return py_map_lookup_helper(&((py_obj_base_t *)o)->u_map, index, add_if_not_found);
}
static bool fit_small_int(py_small_int_t o) {
return true;
}
py_obj_t py_obj_new_const(const char *id) {
py_obj_base_t *o = m_new(py_obj_base_t, 1);
o->kind = O_CONST;
o->id = id;
return (py_obj_t)o;
}
py_obj_t py_obj_new_str(qstr qstr) {
py_obj_base_t *o = m_new(py_obj_base_t, 1);
o->kind = O_STR;
o->u_str = qstr;
return (py_obj_t)o;
}
#ifdef PY_FLOAT
py_obj_t py_obj_new_float(float_t val) {
py_obj_base_t *o = m_new(py_obj_base_t, 1);
o->kind = O_FLOAT;
o->flt = val;
return (py_obj_t)o;
}
#endif
py_obj_t list_append(py_obj_t self_in, py_obj_t arg) {
assert(IS_O(self_in, O_LIST));
py_obj_base_t *self = self_in;
if (self->u_list.len >= self->u_list.alloc) {
self->u_list.alloc *= 2;
self->u_list.items = m_renew(py_obj_t, self->u_list.items, self->u_list.alloc);
}
self->u_list.items[self->u_list.len++] = arg;
return arg;
}
static qstr q_append;
static qstr q_print;
static qstr q_len;
static qstr q___build_class__;
typedef enum {
PY_CODE_NATIVE,
PY_CODE_BYTE,
} py_code_kind_t;
typedef struct _py_code_t {
py_code_kind_t kind;
int n_args;
union {
struct {
py_fun_t fun;
} u_native;
struct {
byte *code;
uint len;
} u_byte;
};
} py_code_t;
static int next_unique_code_id;
static py_code_t *unique_codes;
py_obj_t fun_list_append;
py_obj_t py_builtin_print(py_obj_t o) {
if (IS_O(o, O_STR)) {
// special case, print string raw
printf("%s\n", qstr_str(((py_obj_base_t*)o)->u_str));
} else {
// print the object Python style
py_obj_print(o);
printf("\n");
}
return py_const_none;
}
py_obj_t py_builtin_len(py_obj_t o_in) {
py_small_int_t len = 0;
if (IS_O(o_in, O_LIST)) {
py_obj_base_t *o = o_in;
len = o->u_list.len;
} else if (IS_O(o_in, O_MAP)) {
py_obj_base_t *o = o_in;
len = o->u_map.used;
} else {
assert(0);
}
return TO_SMALL_INT(len);
}
py_obj_t py_builtin___build_class__(py_obj_t o1, py_obj_t o2) {
py_obj_base_t *o = m_new(py_obj_base_t, 1);
o->kind = O_CLASS;
o->u_class.map = py_map_new(MAP_QSTR, 0);
return o;
}
FILE *fp_native = NULL;
void rt_init() {
q_append = qstr_from_str_static("append");
q_print = qstr_from_str_static("print");
q_len = qstr_from_str_static("len");
q___build_class__ = qstr_from_str_static("__build_class__");
py_const_none = py_obj_new_const("None");
py_const_false = py_obj_new_const("False");
py_const_true = py_obj_new_const("True");
py_map_init(&map_name, MAP_QSTR, 0);
py_map_init(&map_builtins, MAP_QSTR, 3);
py_qstr_map_lookup(&map_builtins, q_print, true)->value = rt_make_function_1(py_builtin_print);
py_qstr_map_lookup(&map_builtins, q_len, true)->value = rt_make_function_1(py_builtin_len);
py_qstr_map_lookup(&map_builtins, q___build_class__, true)->value = rt_make_function_2(py_builtin___build_class__);
next_unique_code_id = 1;
unique_codes = NULL;
fun_list_append = rt_make_function_2(list_append);
fp_native = fopen("out-native", "wb");
}
void rt_deinit() {
if (fp_native != NULL) {
fclose(fp_native);
}
}
int rt_get_new_unique_code_id() {
return next_unique_code_id++;
}
void rt_assign_native_code(int unique_code_id, py_fun_t fun, uint len, int n_args) {
if (unique_codes == NULL) {
unique_codes = m_new(py_code_t, next_unique_code_id);
}
assert(unique_code_id < next_unique_code_id);
unique_codes[unique_code_id].kind = PY_CODE_NATIVE;
unique_codes[unique_code_id].n_args = n_args;
unique_codes[unique_code_id].u_native.fun = fun;
DEBUG_printf("assign native code: id=%d fun=%p len=%u n_args=%d\n", unique_code_id, fun, len, n_args);
byte *fun_data = (byte*)(((machine_uint_t)fun) & (~1)); // need to clear lower bit in case it's thumb code
for (int i = 0; i < 128 && i < len; i++) {
if (i > 0 && i % 16 == 0) {
DEBUG_printf("\n");
}
DEBUG_printf(" %02x", fun_data[i]);
}
DEBUG_printf("\n");
if (fp_native != NULL) {
fwrite(fun_data, len, 1, fp_native);
}
}
void rt_assign_byte_code(int unique_code_id, byte *code, uint len, int n_args) {
if (unique_codes == NULL) {
unique_codes = m_new(py_code_t, next_unique_code_id);
}
assert(unique_code_id < next_unique_code_id);
unique_codes[unique_code_id].kind = PY_CODE_BYTE;
unique_codes[unique_code_id].n_args = n_args;
unique_codes[unique_code_id].u_byte.code = code;
unique_codes[unique_code_id].u_byte.len = len;
DEBUG_printf("assign byte code: id=%d code=%p len=%u n_args=%d\n", unique_code_id, code, len, n_args);
}
const char *py_obj_get_type_str(py_obj_t o_in) {
if (IS_SMALL_INT(o_in)) {
return "int";
} else {
py_obj_base_t *o = o_in;
switch (o->kind) {
case O_CONST:
if (o == py_const_none) {
return "NoneType";
} else {
return "bool";
}
case O_STR:
return "str";
#ifdef PY_FLOAT
case O_FLOAT:
return "float";
#endif
case O_LIST:
return "list";
case O_SET:
return "set";
case O_MAP:
return "dict";
default:
assert(0);
return "UnknownType";
}
}
}
void py_obj_print(py_obj_t o_in) {
if (IS_SMALL_INT(o_in)) {
printf("%d", (int)FROM_SMALL_INT(o_in));
} else {
py_obj_base_t *o = o_in;
switch (o->kind) {
case O_CONST:
printf("%s", o->id);
break;
case O_STR:
// TODO need to escape chars etc
printf("'%s'", qstr_str(o->u_str));
break;
#ifdef PY_FLOAT
case O_FLOAT:
printf("%f", o->flt);
break;
#endif
case O_LIST:
printf("[");
for (int i = 0; i < o->u_list.len; i++) {
if (i > 0) {
printf(", ");
}
py_obj_print(o->u_list.items[i]);
}
printf("]");
break;
case O_SET:
{
bool first = true;
printf("{");
for (int i = 0; i < o->u_set.alloc; i++) {
if (o->u_set.table[i] != NULL) {
if (!first) {
printf(", ");
}
first = false;
py_obj_print(o->u_set.table[i]);
}
}
printf("}");
break;
}
case O_MAP:
{
bool first = true;
printf("{");
for (int i = 0; i < o->u_map.alloc; i++) {
if (o->u_map.table[i].key != NULL) {
if (!first) {
printf(", ");
}
first = false;
py_obj_print(o->u_map.table[i].key);
printf(": ");
py_obj_print(o->u_map.table[i].value);
}
}
printf("}");
break;
}
default:
assert(0);
}
}
}
int rt_is_true(py_obj_t arg) {
DEBUG_OP_printf("is true %p\n", arg);
if (IS_SMALL_INT(arg)) {
if (FROM_SMALL_INT(arg) == 0) {
return 0;
} else {
return 1;
}
} else if (arg == py_const_none) {
return 0;
} else if (arg == py_const_false) {
return 0;
} else if (arg == py_const_true) {
return 1;
} else {
assert(0);
return 0;
}
}
int rt_get_int(py_obj_t arg) {
if (IS_SMALL_INT(arg)) {
return FROM_SMALL_INT(arg);
} else {
assert(0);
return 0;
}
}
py_obj_t rt_load_const_str(qstr qstr) {
DEBUG_OP_printf("load '%s'\n", qstr_str(qstr));
return py_obj_new_str(qstr);
}
py_obj_t rt_load_name(qstr qstr) {
// logic: search locals, globals, builtins
DEBUG_OP_printf("load %s\n", qstr_str(qstr));
py_map_elem_t *elem = py_qstr_map_lookup(&map_name, qstr, false);
if (elem == NULL) {
elem = py_qstr_map_lookup(&map_builtins, qstr, false);
if (elem == NULL) {
printf("name doesn't exist: %s\n", qstr_str(qstr));
assert(0);
}
}
return elem->value;
}
py_obj_t rt_load_global(qstr qstr) {
return rt_load_name(qstr); // TODO
}
py_obj_t rt_load_build_class() {
DEBUG_OP_printf("load_build_class\n");
py_map_elem_t *elem = py_qstr_map_lookup(&map_builtins, q___build_class__, false);
if (elem == NULL) {
printf("name doesn't exist: __build_class__\n");
assert(0);
}
return elem->value;
}
void rt_store_name(qstr qstr, py_obj_t obj) {
DEBUG_OP_printf("store %s <- %p\n", qstr_str(qstr), obj);
py_qstr_map_lookup(&map_name, qstr, true)->value = obj;
}
py_obj_t rt_unary_op(int op, py_obj_t arg) {
assert(0);
return py_const_none;
}
py_obj_t rt_binary_op(int op, py_obj_t lhs, py_obj_t rhs) {
DEBUG_OP_printf("binary %d %p %p\n", op, lhs, rhs);
if (op == RT_BINARY_OP_SUBSCR) {
if (IS_O(lhs, O_LIST) && IS_SMALL_INT(rhs)) {
return ((py_obj_base_t*)lhs)->u_list.items[FROM_SMALL_INT(rhs)];
} else {
assert(0);
}
} else if (IS_SMALL_INT(lhs) && IS_SMALL_INT(rhs)) {
py_small_int_t val;
switch (op) {
case RT_BINARY_OP_ADD:
case RT_BINARY_OP_INPLACE_ADD: val = FROM_SMALL_INT(lhs) + FROM_SMALL_INT(rhs); break;
case RT_BINARY_OP_SUBTRACT: val = FROM_SMALL_INT(lhs) - FROM_SMALL_INT(rhs); break;
case RT_BINARY_OP_MULTIPLY: val = FROM_SMALL_INT(lhs) * FROM_SMALL_INT(rhs); break;
case RT_BINARY_OP_FLOOR_DIVIDE: val = FROM_SMALL_INT(lhs) / FROM_SMALL_INT(rhs); break;
#ifdef PY_FLOAT
case RT_BINARY_OP_TRUE_DIVIDE: return py_obj_new_float((float_t)FROM_SMALL_INT(lhs) / (float_t)FROM_SMALL_INT(rhs));
#endif
default: printf("%d\n", op); assert(0); val = 0;
}
if (fit_small_int(val)) {
return TO_SMALL_INT(val);
}
} else if (IS_O(lhs, O_STR) && IS_O(rhs, O_STR)) {
const char *lhs_str = qstr_str(((py_obj_base_t*)lhs)->u_str);
const char *rhs_str = qstr_str(((py_obj_base_t*)rhs)->u_str);
char *val;
switch (op) {
case RT_BINARY_OP_ADD:
case RT_BINARY_OP_INPLACE_ADD: val = m_new(char, strlen(lhs_str) + strlen(rhs_str) + 1); strcpy(val, lhs_str); strcat(val, rhs_str); break;
default: printf("%d\n", op); assert(0); val = NULL;
}
return py_obj_new_str(qstr_from_str_take(val));
}
assert(0);
return py_const_none;
}
py_obj_t rt_compare_op(int op, py_obj_t lhs, py_obj_t rhs) {
DEBUG_OP_printf("compare %d %p %p\n", op, lhs, rhs);
if (IS_SMALL_INT(lhs) && IS_SMALL_INT(rhs)) {
int cmp;
switch (op) {
case RT_COMPARE_OP_LESS: cmp = FROM_SMALL_INT(lhs) < FROM_SMALL_INT(rhs); break;
case RT_COMPARE_OP_MORE: cmp = FROM_SMALL_INT(lhs) > FROM_SMALL_INT(rhs); break;
default: assert(0); cmp = 0;
}
if (cmp) {
return py_const_true;
} else {
return py_const_false;
}
}
assert(0);
return py_const_none;
}
py_obj_t rt_make_function_from_id(int unique_code_id) {
if (unique_code_id >= next_unique_code_id) {
// illegal code id
return py_const_none;
}
py_code_t *c = &unique_codes[unique_code_id];
py_obj_base_t *o = m_new(py_obj_base_t, 1);
switch (c->kind) {
case PY_CODE_NATIVE:
switch (c->n_args) {
case 0: o->kind = O_FUN_0; break;
case 1: o->kind = O_FUN_1; break;
case 2: o->kind = O_FUN_2; break;
default: assert(0);
}
o->u_fun.fun = c->u_native.fun;
break;
case PY_CODE_BYTE:
o->kind = O_FUN_BC;
o->u_fun_bc.code = c->u_byte.code;
o->u_fun_bc.len = c->u_byte.len;
o->u_fun_bc.n_args = c->n_args;
break;
default:
assert(0);
}
return o;
}
py_obj_t rt_make_function_0(py_fun_0_t fun) {
py_obj_base_t *o = m_new(py_obj_base_t, 1);
o->kind = O_FUN_0;
o->u_fun.fun = fun;
return o;
}
py_obj_t rt_make_function_1(py_fun_1_t fun) {
py_obj_base_t *o = m_new(py_obj_base_t, 1);
o->kind = O_FUN_1;
o->u_fun.fun = fun;
return o;
}
py_obj_t rt_make_function_2(py_fun_2_t fun) {
py_obj_base_t *o = m_new(py_obj_base_t, 1);
o->kind = O_FUN_2;
o->u_fun.fun = fun;
return o;
}
py_obj_t rt_make_function(int n_args, py_fun_t code) {
// assumes code is a pointer to a py_fun_t (i think this is safe...)
py_obj_base_t *o = m_new(py_obj_base_t, 1);
o->kind = O_FUN_N;
o->u_fun.fun = code;
o->u_fun.n_args = n_args;
return o;
}
py_obj_t rt_call_function_0(py_obj_t fun) {
if (IS_O(fun, O_FUN_0)) {
py_obj_base_t *o = fun;
DEBUG_OP_printf("calling native %p...\n", o->u_fun.fun);
return ((py_fun_0_t)o->u_fun.fun)();
} else if (IS_O(fun, O_FUN_BC)) {
py_obj_base_t *o = fun;
assert(o->u_fun_bc.n_args == 0);
DEBUG_OP_printf("calling byte code %p...\n", o->u_fun_bc.code);
return py_execute_byte_code(o->u_fun_bc.code, o->u_fun_bc.len, NULL, 0);
} else {
printf("fun0:%p\n", fun);
assert(0);
return py_const_none;
}
}
py_obj_t rt_call_function_1(py_obj_t fun, py_obj_t arg) {
if (IS_O(fun, O_FUN_1)) {
py_obj_base_t *o = fun;
DEBUG_OP_printf("calling native %p...\n", o->u_fun.fun);
return ((py_fun_1_t)o->u_fun.fun)(arg);
} else if (IS_O(fun, O_FUN_BC)) {
py_obj_base_t *o = fun;
assert(o->u_fun_bc.n_args == 1);
DEBUG_OP_printf("calling byte code %p...\n", o->u_fun_bc.code);
return py_execute_byte_code(o->u_fun_bc.code, o->u_fun_bc.len, &arg, 1);
} else if (IS_O(fun, O_BOUND_METH)) {
py_obj_base_t *o = fun;
return rt_call_function_2(o->u_bound_meth.meth, o->u_bound_meth.self, arg);
} else {
printf("fun1:%p\n", fun);
assert(0);
return py_const_none;
}
}
py_obj_t rt_call_function_2(py_obj_t fun, py_obj_t arg1, py_obj_t arg2) {
if (IS_O(fun, O_FUN_2)) {
py_obj_base_t *o = fun;
DEBUG_OP_printf("calling native %p...\n", o->u_fun.fun);
return ((py_fun_2_t)o->u_fun.fun)(arg1, arg2);
} else if (IS_O(fun, O_FUN_BC)) {
py_obj_base_t *o = fun;
assert(o->u_fun_bc.n_args == 2);
DEBUG_OP_printf("calling byte code %p...\n", o->u_fun_bc.code);
py_obj_t args[2];
args[0] = arg1;
args[1] = arg2;
return py_execute_byte_code(o->u_fun_bc.code, o->u_fun_bc.len, &args[0], 2);
} else {
assert(0);
return py_const_none;
}
}
py_obj_t rt_call_method_1(py_obj_t fun, py_obj_t self) {
DEBUG_OP_printf("call method %p %p\n", fun, self);
if (self == NULL) {
return rt_call_function_0(fun);
} else {
return rt_call_function_1(fun, self);
}
}
py_obj_t rt_call_method_2(py_obj_t fun, py_obj_t self, py_obj_t arg) {
DEBUG_OP_printf("call method %p %p %p\n", fun, self, arg);
if (self == NULL) {
return rt_call_function_1(fun, arg);
} else {
return rt_call_function_2(fun, self, arg);
}
}
// items are in reverse order
py_obj_t rt_build_list(int n_args, py_obj_t *items) {
py_obj_base_t *o = m_new(py_obj_base_t, 1);
o->kind = O_LIST;
o->u_list.alloc = n_args;
if (o->u_list.alloc < 4) {
o->u_list.alloc = 4;
}
o->u_list.len = n_args;
o->u_list.items = m_new(py_obj_t, o->u_list.alloc);
for (int i = 0; i < n_args; i++) {
o->u_list.items[i] = items[n_args - i - 1];
}
return o;
}
py_obj_t py_set_lookup(py_obj_t o_in, py_obj_t index, bool add_if_not_found) {
assert(IS_O(o_in, O_SET));
py_obj_base_t *o = o_in;
int hash = py_obj_hash(index);
int pos = hash % o->u_set.alloc;
for (;;) {
py_obj_t elem = o->u_set.table[pos];
if (elem == NULL) {
// not in table
if (add_if_not_found) {
if (o->u_set.used + 1 >= o->u_set.alloc) {
// not enough room in table, rehash it
int old_alloc = o->u_set.alloc;
py_obj_t *old_table = o->u_set.table;
o->u_set.alloc = get_doubling_prime_greater_or_equal_to(o->u_set.alloc + 1);
o->u_set.used = 0;
o->u_set.table = m_new(py_obj_t, o->u_set.alloc);
for (int i = 0; i < old_alloc; i++) {
if (old_table[i] != NULL) {
py_set_lookup(o, old_table[i], true);
}
}
m_free(old_table);
// restart the search for the new element
pos = hash % o->u_set.alloc;
} else {
o->u_set.used += 1;
o->u_set.table[pos] = index;
return index;
}
} else {
return NULL;
}
} else if (py_obj_equal(elem, index)) {
// found it
return elem;
} else {
// not yet found, keep searching in this table
pos = (pos + 1) % o->u_set.alloc;
}
}
}
py_obj_t rt_build_set(int n_args, py_obj_t *items) {
py_obj_base_t *o = m_new(py_obj_base_t, 1);
o->kind = O_SET;
o->u_set.alloc = get_doubling_prime_greater_or_equal_to(n_args + 1);
o->u_set.used = 0;
o->u_set.table = m_new(py_obj_t, o->u_set.alloc);
for (int i = 0; i < o->u_set.alloc; i++) {
o->u_set.table[i] = NULL;
}
for (int i = 0; i < n_args; i++) {
py_set_lookup(o, items[i], true);
}
return o;
}
py_obj_t rt_build_map(int n_args) {
py_obj_base_t *o = m_new(py_obj_base_t, 1);
o->kind = O_MAP;
py_map_init(&o->u_map, MAP_PY_OBJ, n_args);
return o;
}
py_obj_t rt_store_map(py_obj_t map, py_obj_t key, py_obj_t value) {
assert(IS_O(map, O_MAP)); // should always be
py_map_lookup(map, key, true)->value = value;
return map;
}
void rt_store_subscr(py_obj_t base, py_obj_t index, py_obj_t value) {
if (IS_O(base, O_LIST) && IS_SMALL_INT(index)) {
// list store
py_obj_base_t *o = base;
int idx = FROM_SMALL_INT(index);
if (idx < 0) {
idx += o->u_list.len;
}
if (0 <= idx && idx < o->u_list.len) {
o->u_list.items[idx] = value;
} else {
assert(0);
}
} else if (IS_O(base, O_MAP)) {
// map store
py_map_lookup(base, index, true)->value = value;
} else {
assert(0);
}
}
py_obj_t build_bound_method(py_obj_t self, py_obj_t meth) {
py_obj_base_t *o = m_new(py_obj_base_t, 1);
o->kind = O_BOUND_METH;
o->u_bound_meth.meth = meth;
o->u_bound_meth.self = self;
return o;
}
py_obj_t rt_load_attr(py_obj_t base, qstr attr) {
DEBUG_OP_printf("load %s\n", qstr_str(attr));
if (IS_O(base, O_LIST) && attr == q_append) {
return build_bound_method(base, fun_list_append);
} else if (IS_O(base, O_CLASS)) {
py_obj_base_t *o = base;
py_map_elem_t *elem = py_qstr_map_lookup(o->u_class.map, attr, false);
if (elem == NULL) {
printf("Nope! %s\n", qstr_str(attr));
assert(0);
}
return elem->value;
} else {
printf("AttributeError: '%s' object has no attribute '%s'\n", py_obj_get_type_str(base), qstr_str(attr));
assert(0);
return py_const_none;
}
}
void rt_load_method(py_obj_t base, qstr attr, py_obj_t *dest) {
DEBUG_OP_printf("load method %s\n", qstr_str(attr));
if (IS_O(base, O_LIST) && attr == q_append) {
dest[1] = fun_list_append;
dest[0] = base;
} else {
dest[1] = rt_load_attr(base, attr);
dest[0] = NULL;
}
}
void *rt_fun_table[RT_F_NUMBER_OF] = {
rt_load_const_str,
rt_load_name,
rt_load_global,
rt_load_attr,
rt_load_method,
rt_store_name,
rt_store_subscr,
rt_is_true,
rt_unary_op,
rt_build_list,
rt_build_map,
rt_store_map,
rt_build_set,
rt_make_function_from_id,
rt_call_function_0,
rt_call_function_1,
rt_call_function_2,
rt_call_method_1,
rt_call_method_2,
rt_binary_op,
rt_compare_op,
};
/*
void rt_f_vector(rt_fun_kind_t fun_kind) {
(rt_f_table[fun_kind])();
}
*/

121
py/runtime.h Normal file
View File

@ -0,0 +1,121 @@
typedef enum {
RT_UNARY_OP_NOT,
RT_UNARY_OP_POSITIVE,
RT_UNARY_OP_NEGATIVE,
RT_UNARY_OP_INVERT,
} rt_unary_op_t;
typedef enum {
RT_BINARY_OP_SUBSCR,
RT_BINARY_OP_OR,
RT_BINARY_OP_XOR,
RT_BINARY_OP_AND,
RT_BINARY_OP_LSHIFT,
RT_BINARY_OP_RSHIFT,
RT_BINARY_OP_ADD,
RT_BINARY_OP_SUBTRACT,
RT_BINARY_OP_MULTIPLY,
RT_BINARY_OP_FLOOR_DIVIDE,
RT_BINARY_OP_TRUE_DIVIDE,
RT_BINARY_OP_MODULO,
RT_BINARY_OP_POWER,
RT_BINARY_OP_INPLACE_OR,
RT_BINARY_OP_INPLACE_XOR,
RT_BINARY_OP_INPLACE_AND,
RT_BINARY_OP_INPLACE_LSHIFT,
RT_BINARY_OP_INPLACE_RSHIFT,
RT_BINARY_OP_INPLACE_ADD,
RT_BINARY_OP_INPLACE_SUBTRACT,
RT_BINARY_OP_INPLACE_MULTIPLY,
RT_BINARY_OP_INPLACE_FLOOR_DIVIDE,
RT_BINARY_OP_INPLACE_TRUE_DIVIDE,
RT_BINARY_OP_INPLACE_MODULO,
RT_BINARY_OP_INPLACE_POWER,
} rt_binary_op_t;
typedef enum {
RT_COMPARE_OP_LESS,
RT_COMPARE_OP_MORE,
RT_COMPARE_OP_EQUAL,
RT_COMPARE_OP_LESS_EQUAL,
RT_COMPARE_OP_MORE_EQUAL,
RT_COMPARE_OP_NOT_EQUAL,
RT_COMPARE_OP_IN,
RT_COMPARE_OP_NOT_IN,
RT_COMPARE_OP_IS,
RT_COMPARE_OP_IS_NOT,
RT_COMPARE_OP_EXCEPTION_MATCH,
} rt_compare_op_t;
typedef enum {
RT_F_LOAD_CONST_STR = 0,
RT_F_LOAD_NAME,
RT_F_LOAD_GLOBAL,
RT_F_LOAD_ATTR,
RT_F_LOAD_METHOD,
RT_F_STORE_NAME,
RT_F_STORE_SUBSCR,
RT_F_IS_TRUE,
RT_F_UNARY_OP,
RT_F_BUILD_LIST,
RT_F_BUILD_MAP,
RT_F_STORE_MAP,
RT_F_BUILD_SET,
RT_F_MAKE_FUNCTION_FROM_ID,
RT_F_CALL_FUNCTION_0,
RT_F_CALL_FUNCTION_1,
RT_F_CALL_FUNCTION_2,
RT_F_CALL_METHOD_1,
RT_F_CALL_METHOD_2,
RT_F_BINARY_OP,
RT_F_COMPARE_OP,
RT_F_NUMBER_OF,
} rt_fun_kind_t;
extern void *rt_fun_table[RT_F_NUMBER_OF];
typedef machine_ptr_t py_obj_t; // must be of pointer size
typedef py_obj_t (*py_fun_0_t)();
typedef py_obj_t (*py_fun_1_t)(py_obj_t);
typedef py_obj_t (*py_fun_2_t)(py_obj_t, py_obj_t);
typedef py_obj_t (*py_fun_t)();
extern py_obj_t py_const_none;
extern py_obj_t py_const_false;
extern py_obj_t py_const_true;
void rt_init();
void rt_deinit();
int rt_get_new_unique_code_id();
void rt_assign_native_code(int unique_code_id, py_fun_t f, uint len, int n_args);
void rt_assign_byte_code(int unique_code_id, byte *code, uint len, int n_args);
py_fun_t rt_get_code(qstr id);
void py_obj_print(py_obj_t o);
int rt_is_true(py_obj_t arg);
int rt_get_int(py_obj_t arg);
py_obj_t rt_load_const_str(qstr qstr);
//py_obj_t rt_load_const_code(qstr qstr);
py_obj_t rt_load_name(qstr qstr);
py_obj_t rt_load_global(qstr qstr);
py_obj_t rt_load_build_class();
void rt_store_name(qstr qstr, py_obj_t obj);
py_obj_t rt_unary_op(int op, py_obj_t arg);
py_obj_t rt_binary_op(int op, py_obj_t lhs, py_obj_t rhs);
py_obj_t rt_compare_op(int op, py_obj_t lhs, py_obj_t rhs);
py_obj_t rt_make_function_from_id(int unique_code_id);
py_obj_t rt_make_function_0(py_fun_0_t f);
py_obj_t rt_make_function_1(py_fun_1_t f);
py_obj_t rt_make_function_2(py_fun_2_t f);
py_obj_t rt_make_function(int n_args, py_fun_t code);
py_obj_t rt_call_function_0(py_obj_t fun);
py_obj_t rt_call_function_1(py_obj_t fun, py_obj_t arg);
py_obj_t rt_call_function_2(py_obj_t fun, py_obj_t arg1, py_obj_t arg2);
py_obj_t rt_call_method_1(py_obj_t fun, py_obj_t self);
py_obj_t rt_call_method_2(py_obj_t fun, py_obj_t self, py_obj_t arg);
py_obj_t rt_build_list(int n_args, py_obj_t *items);
py_obj_t rt_build_map(int n_args);
py_obj_t rt_store_map(py_obj_t map, py_obj_t key, py_obj_t value);
py_obj_t rt_build_set(int n_args, py_obj_t *items);
void rt_store_subscr(py_obj_t base, py_obj_t index, py_obj_t val);
py_obj_t rt_load_attr(py_obj_t base, qstr attr);
void rt_load_method(py_obj_t base, qstr attr, py_obj_t *dest);

218
py/scope.c Normal file
View File

@ -0,0 +1,218 @@
#include <stdint.h>
#include <stdio.h>
#include <string.h>
#include <assert.h>
#include "misc.h"
#include "machine.h"
#include "parse.h"
#include "scope.h"
scope_t *scope_new(scope_kind_t kind, py_parse_node_t pn) {
scope_t *scope = m_new(scope_t, 1);
scope->kind = kind;
scope->parent = NULL;
scope->next = NULL;
scope->pn = pn;
switch (kind) {
case SCOPE_MODULE:
scope->simple_name = 0;
break;
case SCOPE_FUNCTION:
case SCOPE_CLASS:
assert(PY_PARSE_NODE_IS_STRUCT(pn));
scope->simple_name = PY_PARSE_NODE_LEAF_ARG(((py_parse_node_struct_t*)pn)->nodes[0]);
break;
case SCOPE_LAMBDA:
scope->simple_name = qstr_from_str_static("<lambda>");
break;
case SCOPE_LIST_COMP:
scope->simple_name = qstr_from_str_static("<listcomp>");
break;
case SCOPE_DICT_COMP:
scope->simple_name = qstr_from_str_static("<dictcomp>");
break;
case SCOPE_SET_COMP:
scope->simple_name = qstr_from_str_static("<setcomp>");
break;
case SCOPE_GEN_EXPR:
scope->simple_name = qstr_from_str_static("<genexpr>");
break;
default:
assert(0);
}
scope->id_info_alloc = 8;
scope->id_info_len = 0;
scope->id_info = m_new(id_info_t, scope->id_info_alloc);
scope->flags = 0;
scope->num_params = 0;
/* not needed
scope->num_default_params = 0;
scope->num_dict_params = 0;
*/
scope->num_locals = 0;
scope->unique_code_id = 0;
return scope;
}
id_info_t *scope_find_or_add_id(scope_t *scope, qstr qstr, bool *added) {
for (int i = 0; i < scope->id_info_len; i++) {
if (scope->id_info[i].qstr == qstr) {
*added = false;
return &scope->id_info[i];
}
}
// make sure we have enough memory
if (scope->id_info_len >= scope->id_info_alloc) {
scope->id_info_alloc *= 2;
scope->id_info = m_renew(id_info_t, scope->id_info, scope->id_info_alloc);
}
id_info_t *id_info;
{
/*
// just pick next slot in array
id_info = &scope->id_info[scope->id_info_len++];
*/
}
{
// sort insert into id_info array, so we are equivalent to CPython (no other reason to do it)
scope->id_info_len += 1;
for (int i = scope->id_info_len - 1;; i--) {
if (i == 0 || strcmp(qstr_str(scope->id_info[i - 1].qstr), qstr_str(qstr)) < 0) {
id_info = &scope->id_info[i];
break;
} else {
scope->id_info[i] = scope->id_info[i - 1];
}
}
}
id_info->param = false;
id_info->kind = 0;
id_info->qstr = qstr;
*added = true;
return id_info;
}
id_info_t *scope_find(scope_t *scope, qstr qstr) {
for (int i = 0; i < scope->id_info_len; i++) {
if (scope->id_info[i].qstr == qstr) {
return &scope->id_info[i];
}
}
return NULL;
}
id_info_t *scope_find_global(scope_t *scope, qstr qstr) {
while (scope->parent != NULL) {
scope = scope->parent;
}
for (int i = 0; i < scope->id_info_len; i++) {
if (scope->id_info[i].qstr == qstr) {
return &scope->id_info[i];
}
}
return NULL;
}
id_info_t *scope_find_local_in_parent(scope_t *scope, qstr qstr) {
if (scope->parent == NULL) {
return NULL;
}
for (scope_t *s = scope->parent; s->parent != NULL; s = s->parent) {
for (int i = 0; i < s->id_info_len; i++) {
if (s->id_info[i].qstr == qstr) {
return &s->id_info[i];
}
}
}
return NULL;
}
void scope_close_over_in_parents(scope_t *scope, qstr qstr) {
assert(scope->parent != NULL); // we should have at least 1 parent
for (scope_t *s = scope->parent; s->parent != NULL; s = s->parent) {
id_info_t *id = NULL;
for (int i = 0; i < s->id_info_len; i++) {
if (s->id_info[i].qstr == qstr) {
id = &s->id_info[i];
break;
}
}
if (id == NULL) {
// variable not declared in this scope, so declare it as free and keep searching parents
bool added;
id = scope_find_or_add_id(s, qstr, &added);
assert(added);
id->kind = ID_INFO_KIND_FREE;
} else {
// variable is declared in this scope, so finish
switch (id->kind) {
case ID_INFO_KIND_LOCAL: id->kind = ID_INFO_KIND_CELL; break; // variable local to this scope, close it over
case ID_INFO_KIND_FREE: break; // variable already closed over in a parent scope
case ID_INFO_KIND_CELL: break; // variable already closed over in this scope
default: assert(0); // TODO
}
return;
}
}
assert(0); // we should have found the variable in one of the parents
}
void scope_print_info(scope_t *s) {
if (s->kind == SCOPE_MODULE) {
printf("code <module>\n");
} else if (s->kind == SCOPE_LAMBDA) {
printf("code <lambda>\n");
} else if (s->kind == SCOPE_LIST_COMP) {
printf("code <listcomp>\n");
} else if (s->kind == SCOPE_DICT_COMP) {
printf("code <dictcomp>\n");
} else if (s->kind == SCOPE_SET_COMP) {
printf("code <setcomp>\n");
} else if (s->kind == SCOPE_GEN_EXPR) {
printf("code <genexpr>\n");
} else {
printf("code %s\n", qstr_str(s->simple_name));
}
/*
printf("var global:");
for (int i = 0; i < s->id_info_len; i++) {
if (s->id_info[i].kind == ID_INFO_KIND_GLOBAL_EXPLICIT) {
printf(" %s", qstr_str(s->id_info[i].qstr));
}
}
printf("\n");
printf("var name:");
for (int i = 0; i < s->id_info_len; i++) {
if (s->id_info[i].kind == ID_INFO_KIND_GLOBAL_IMPLICIT) {
printf(" %s", qstr_str(s->id_info[i].qstr));
}
}
printf("\n");
printf("var local:");
for (int i = 0; i < s->id_info_len; i++) {
if (s->id_info[i].kind == ID_INFO_KIND_LOCAL) {
printf(" %s", qstr_str(s->id_info[i].qstr));
}
}
printf("\n");
printf("var free:");
for (int i = 0; i < s->id_info_len; i++) {
if (s->id_info[i].kind == ID_INFO_KIND_FREE) {
printf(" %s", qstr_str(s->id_info[i].qstr));
}
}
printf("\n");
*/
printf(" flags %04x\n", s->flags);
printf(" argcount %d\n", s->num_params);
printf(" nlocals %d\n", s->num_locals);
printf(" stacksize %d\n", s->stack_size);
}

58
py/scope.h Normal file
View File

@ -0,0 +1,58 @@
enum {
ID_INFO_KIND_GLOBAL_IMPLICIT,
ID_INFO_KIND_GLOBAL_EXPLICIT,
ID_INFO_KIND_LOCAL, // in a function f, written and only referenced by f
ID_INFO_KIND_CELL, // in a function f, read/written by children of f
ID_INFO_KIND_FREE, // in a function f, belongs to the parent of f
};
typedef struct _id_info_t {
bool param;
int kind;
qstr qstr;
int local_num; // when it's an ID_INFO_KIND_LOCAL this is the unique number of the local
} id_info_t;
// taken from python source, Include/code.h
#define SCOPE_FLAG_OPTIMISED 0x0001
#define SCOPE_FLAG_NEWLOCALS 0x0002
#define SCOPE_FLAG_VARARGS 0x0004
#define SCOPE_FLAG_VARKEYWORDS 0x0008
#define SCOPE_FLAG_NESTED 0x0010
#define SCOPE_FLAG_GENERATOR 0x0020
/* The SCOPE_FLAG_NOFREE flag is set if there are no free or cell variables.
This information is redundant, but it allows a single flag test
to determine whether there is any extra work to be done when the
call frame is setup.
*/
#define SCOPE_FLAG_NOFREE 0x0040
// scope is a "block" in Python parlance
typedef enum { SCOPE_MODULE, SCOPE_FUNCTION, SCOPE_LAMBDA, SCOPE_LIST_COMP, SCOPE_DICT_COMP, SCOPE_SET_COMP, SCOPE_GEN_EXPR, SCOPE_CLASS } scope_kind_t;
typedef struct _scope_t {
scope_kind_t kind;
struct _scope_t *parent;
struct _scope_t *next;
py_parse_node_t pn;
qstr simple_name;
int id_info_alloc;
int id_info_len;
id_info_t *id_info;
int flags;
int num_params;
/* not needed
int num_default_params;
int num_dict_params;
*/
int num_locals;
int stack_size;
int unique_code_id;
} scope_t;
scope_t *scope_new(scope_kind_t kind, py_parse_node_t pn);
id_info_t *scope_find_or_add_id(scope_t *scope, qstr qstr, bool *added);
id_info_t *scope_find(scope_t *scope, qstr qstr);
id_info_t *scope_find_global(scope_t *scope, qstr qstr);
id_info_t *scope_find_local_in_parent(scope_t *scope, qstr qstr);
void scope_close_over_in_parents(scope_t *scope, qstr qstr);
void scope_print_info(scope_t *s);