From 71a3d6ec3bd02c5bd13334537e1bd146bb643bad Mon Sep 17 00:00:00 2001 From: Damien George Date: Fri, 17 Mar 2017 14:54:53 +1100 Subject: [PATCH] py: Reduce size of mp_code_state_t structure. Instead of caching data that is constant (code_info, const_table and n_state), store just a pointer to the underlying function object from which this data can be derived. This helps reduce stack usage for the case when the mp_code_state_t structure is stored on the stack, as well as heap usage when it's stored on the heap. The downside is that the VM becomes a little more complex because it now needs to derive the data from the underlying function object. But this doesn't impact the performance by much (if at all) because most of the decoding of data is done outside the main opcode loop. Measurements using pystone show that little to no performance is lost. This patch also fixes a nasty bug whereby the bytecode can be reclaimed by the GC during execution. With this patch there is always a pointer to the function object held by the VM during execution, since it's stored in the mp_code_state_t structure. --- py/bc.c | 22 +++++++++++----------- py/bc.h | 12 +++++++----- py/emitnative.c | 35 ++++++++++++----------------------- py/objfun.c | 12 ++++++------ py/objgenerator.c | 15 +++++++++------ py/vm.c | 25 +++++++++++++++++-------- 6 files changed, 62 insertions(+), 59 deletions(-) diff --git a/py/bc.c b/py/bc.c index 07de08fc3..db5d0e686 100644 --- a/py/bc.c +++ b/py/bc.c @@ -86,25 +86,26 @@ STATIC void dump_args(const mp_obj_t *a, size_t sz) { // On entry code_state should be allocated somewhere (stack/heap) and // contain the following valid entries: -// - code_state->ip should contain the offset in bytes from the start of -// the bytecode chunk to just after n_state and n_exc_stack -// - code_state->n_state should be set to the state size (locals plus stack) -void mp_setup_code_state(mp_code_state_t *code_state, mp_obj_fun_bc_t *self, size_t n_args, size_t n_kw, const mp_obj_t *args) { +// - code_state->fun_bc should contain a pointer to the function object +// - code_state->ip should contain the offset in bytes from the pointer +// code_state->fun_bc->bytecode to the entry n_state (0 for bytecode, non-zero for native) +void mp_setup_code_state(mp_code_state_t *code_state, size_t n_args, size_t n_kw, const mp_obj_t *args) { // This function is pretty complicated. It's main aim is to be efficient in speed and RAM // usage for the common case of positional only args. - size_t n_state = code_state->n_state; + + // get the function object that we want to set up (could be bytecode or native code) + mp_obj_fun_bc_t *self = code_state->fun_bc; // ip comes in as an offset into bytecode, so turn it into a true pointer code_state->ip = self->bytecode + (size_t)code_state->ip; - // store pointer to constant table - code_state->const_table = self->const_table; - #if MICROPY_STACKLESS code_state->prev = NULL; #endif // get params + size_t n_state = mp_decode_uint(&code_state->ip); + mp_decode_uint(&code_state->ip); // skip n_exc_stack size_t scope_flags = *code_state->ip++; size_t n_pos_args = *code_state->ip++; size_t n_kwonly_args = *code_state->ip++; @@ -168,7 +169,7 @@ void mp_setup_code_state(mp_code_state_t *code_state, mp_obj_fun_bc_t *self, siz } // get pointer to arg_names array - const mp_obj_t *arg_names = (const mp_obj_t*)code_state->const_table; + const mp_obj_t *arg_names = (const mp_obj_t*)self->const_table; for (size_t i = 0; i < n_kw; i++) { // the keys in kwargs are expected to be qstr objects @@ -244,9 +245,8 @@ continue2:; // get the ip and skip argument names const byte *ip = code_state->ip; - // store pointer to code_info and jump over it + // jump over code info (source file and line-number mapping) { - code_state->code_info = ip; const byte *ip2 = ip; size_t code_info_size = mp_decode_uint(&ip2); ip += code_info_size; diff --git a/py/bc.h b/py/bc.h index c7dffbac5..996b1a2f3 100644 --- a/py/bc.h +++ b/py/bc.h @@ -28,6 +28,7 @@ #include "py/runtime.h" #include "py/obj.h" +#include "py/objfun.h" // bytecode layout: // @@ -70,9 +71,12 @@ typedef struct _mp_exc_stack_t { } mp_exc_stack_t; typedef struct _mp_code_state_t { - const byte *code_info; + // The fun_bc entry points to the underlying function object that is being executed. + // It is needed to access the start of bytecode and the const_table. + // It is also needed to prevent the GC from reclaiming the bytecode during execution, + // because the ip pointer below will always point to the interior of the bytecode. + mp_obj_fun_bc_t *fun_bc; const byte *ip; - const mp_uint_t *const_table; mp_obj_t *sp; // bit 0 is saved currently_in_except_block value mp_exc_stack_t *exc_sp; @@ -80,7 +84,6 @@ typedef struct _mp_code_state_t { #if MICROPY_STACKLESS struct _mp_code_state_t *prev; #endif - size_t n_state; // Variable-length mp_obj_t state[0]; // Variable-length, never accessed by name, only as (void*)(state + n_state) @@ -91,8 +94,7 @@ mp_uint_t mp_decode_uint(const byte **ptr); mp_vm_return_kind_t mp_execute_bytecode(mp_code_state_t *code_state, volatile mp_obj_t inject_exc); mp_code_state_t *mp_obj_fun_bc_prepare_codestate(mp_obj_t func, size_t n_args, size_t n_kw, const mp_obj_t *args); -struct _mp_obj_fun_bc_t; -void mp_setup_code_state(mp_code_state_t *code_state, struct _mp_obj_fun_bc_t *self, size_t n_args, size_t n_kw, const mp_obj_t *args); +void mp_setup_code_state(mp_code_state_t *code_state, size_t n_args, size_t n_kw, const mp_obj_t *args); void mp_bytecode_print(const void *descr, const byte *code, mp_uint_t len, const mp_uint_t *const_table); void mp_bytecode_print2(const byte *code, size_t len, const mp_uint_t *const_table); const byte *mp_bytecode_print_str(const byte *ip); diff --git a/py/emitnative.c b/py/emitnative.c index 55eb6cadf..4af68102b 100644 --- a/py/emitnative.c +++ b/py/emitnative.c @@ -407,43 +407,29 @@ STATIC void emit_native_start_pass(emit_t *emit, pass_kind_t pass, scope_t *scop #endif // prepare incoming arguments for call to mp_setup_code_state + #if N_X86 - asm_x86_mov_arg_to_r32(emit->as, 0, REG_ARG_2); - asm_x86_mov_arg_to_r32(emit->as, 1, REG_ARG_3); - asm_x86_mov_arg_to_r32(emit->as, 2, REG_ARG_4); - asm_x86_mov_arg_to_r32(emit->as, 3, REG_ARG_5); - #else - #if N_THUMB - ASM_MOV_REG_REG(emit->as, ASM_THUMB_REG_R4, REG_ARG_4); - #elif N_ARM - ASM_MOV_REG_REG(emit->as, ASM_ARM_REG_R4, REG_ARG_4); - #else - ASM_MOV_REG_REG(emit->as, REG_ARG_5, REG_ARG_4); - #endif - ASM_MOV_REG_REG(emit->as, REG_ARG_4, REG_ARG_3); - ASM_MOV_REG_REG(emit->as, REG_ARG_3, REG_ARG_2); - ASM_MOV_REG_REG(emit->as, REG_ARG_2, REG_ARG_1); + asm_x86_mov_arg_to_r32(emit->as, 0, REG_ARG_1); + asm_x86_mov_arg_to_r32(emit->as, 1, REG_ARG_2); + asm_x86_mov_arg_to_r32(emit->as, 2, REG_ARG_3); + asm_x86_mov_arg_to_r32(emit->as, 3, REG_ARG_4); #endif + // set code_state.fun_bc + ASM_MOV_REG_TO_LOCAL(emit->as, REG_ARG_1, offsetof(mp_code_state_t, fun_bc) / sizeof(uintptr_t)); + // set code_state.ip (offset from start of this function to prelude info) // XXX this encoding may change size - ASM_MOV_IMM_TO_LOCAL_USING(emit->as, emit->prelude_offset, offsetof(mp_code_state_t, ip) / sizeof(mp_uint_t), REG_ARG_1); - - // set code_state.n_state - ASM_MOV_IMM_TO_LOCAL_USING(emit->as, emit->n_state, offsetof(mp_code_state_t, n_state) / sizeof(mp_uint_t), REG_ARG_1); + ASM_MOV_IMM_TO_LOCAL_USING(emit->as, emit->prelude_offset, offsetof(mp_code_state_t, ip) / sizeof(uintptr_t), REG_ARG_1); // put address of code_state into first arg ASM_MOV_LOCAL_ADDR_TO_REG(emit->as, 0, REG_ARG_1); // call mp_setup_code_state to prepare code_state structure #if N_THUMB - asm_thumb_op16(emit->as, 0xb400 | (1 << ASM_THUMB_REG_R4)); // push 5th arg asm_thumb_bl_ind(emit->as, mp_fun_table[MP_F_SETUP_CODE_STATE], MP_F_SETUP_CODE_STATE, ASM_THUMB_REG_R4); - asm_thumb_op16(emit->as, 0xbc00 | (1 << REG_RET)); // pop dummy (was 5th arg) #elif N_ARM - asm_arm_push(emit->as, 1 << ASM_ARM_REG_R4); // push 5th arg asm_arm_bl_ind(emit->as, mp_fun_table[MP_F_SETUP_CODE_STATE], MP_F_SETUP_CODE_STATE, ASM_ARM_REG_R4); - asm_arm_pop(emit->as, 1 << REG_RET); // pop dummy (was 5th arg) #else ASM_CALL_IND(emit->as, mp_fun_table[MP_F_SETUP_CODE_STATE], MP_F_SETUP_CODE_STATE); #endif @@ -477,6 +463,9 @@ STATIC void emit_native_end_pass(emit_t *emit) { if (!emit->do_viper_types) { emit->prelude_offset = mp_asm_base_get_code_pos(&emit->as->base); + mp_asm_base_data(&emit->as->base, 1, 0x80 | ((emit->n_state >> 7) & 0x7f)); + mp_asm_base_data(&emit->as->base, 1, emit->n_state & 0x7f); + mp_asm_base_data(&emit->as->base, 1, 0); // n_exc_stack mp_asm_base_data(&emit->as->base, 1, emit->scope->scope_flags); mp_asm_base_data(&emit->as->base, 1, emit->scope->num_pos_args); mp_asm_base_data(&emit->as->base, 1, emit->scope->num_kwonly_args); diff --git a/py/objfun.c b/py/objfun.c index a823f49e5..4670521b4 100644 --- a/py/objfun.c +++ b/py/objfun.c @@ -220,9 +220,9 @@ mp_code_state_t *mp_obj_fun_bc_prepare_codestate(mp_obj_t self_in, size_t n_args return NULL; } - code_state->ip = (byte*)(ip - self->bytecode); // offset to after n_state/n_exc_stack - code_state->n_state = n_state; - mp_setup_code_state(code_state, self, n_args, n_kw, args); + code_state->fun_bc = self; + code_state->ip = 0; + mp_setup_code_state(code_state, n_args, n_kw, args); // execute the byte code with the correct globals context code_state->old_globals = mp_globals_get(); @@ -265,9 +265,9 @@ STATIC mp_obj_t fun_bc_call(mp_obj_t self_in, size_t n_args, size_t n_kw, const state_size = 0; // indicate that we allocated using alloca } - code_state->ip = (byte*)(ip - self->bytecode); // offset to after n_state/n_exc_stack - code_state->n_state = n_state; - mp_setup_code_state(code_state, self, n_args, n_kw, args); + code_state->fun_bc = self; + code_state->ip = 0; + mp_setup_code_state(code_state, n_args, n_kw, args); // execute the byte code with the correct globals context code_state->old_globals = mp_globals_get(); diff --git a/py/objgenerator.c b/py/objgenerator.c index 654b18670..2baead231 100644 --- a/py/objgenerator.c +++ b/py/objgenerator.c @@ -67,9 +67,9 @@ STATIC mp_obj_t gen_wrap_call(mp_obj_t self_in, size_t n_args, size_t n_kw, cons o->base.type = &mp_type_gen_instance; o->globals = self_fun->globals; - o->code_state.n_state = n_state; - o->code_state.ip = (byte*)(ip - self_fun->bytecode); // offset to prelude - mp_setup_code_state(&o->code_state, self_fun, n_args, n_kw, args); + o->code_state.fun_bc = self_fun; + o->code_state.ip = 0; + mp_setup_code_state(&o->code_state, n_args, n_kw, args); return MP_OBJ_FROM_PTR(o); } @@ -92,7 +92,7 @@ mp_obj_t mp_obj_new_gen_wrap(mp_obj_t fun) { STATIC void gen_instance_print(const mp_print_t *print, mp_obj_t self_in, mp_print_kind_t kind) { (void)kind; mp_obj_gen_instance_t *self = MP_OBJ_TO_PTR(self_in); - mp_printf(print, "", mp_obj_code_get_name(self->code_state.code_info), self); + mp_printf(print, "", mp_obj_fun_get_name(MP_OBJ_FROM_PTR(self->code_state.fun_bc)), self); } mp_vm_return_kind_t mp_obj_gen_resume(mp_obj_t self_in, mp_obj_t send_value, mp_obj_t throw_value, mp_obj_t *ret_val) { @@ -134,10 +134,13 @@ mp_vm_return_kind_t mp_obj_gen_resume(mp_obj_t self_in, mp_obj_t send_value, mp_ } break; - case MP_VM_RETURN_EXCEPTION: + case MP_VM_RETURN_EXCEPTION: { + const byte *bc = self->code_state.fun_bc->bytecode; + size_t n_state = mp_decode_uint(&bc); self->code_state.ip = 0; - *ret_val = self->code_state.state[self->code_state.n_state - 1]; + *ret_val = self->code_state.state[n_state - 1]; break; + } } return ret_kind; diff --git a/py/vm.c b/py/vm.c index 7a906cd80..63a88a4fb 100644 --- a/py/vm.c +++ b/py/vm.c @@ -73,10 +73,10 @@ typedef enum { ip += 2; #define DECODE_PTR \ DECODE_UINT; \ - void *ptr = (void*)(uintptr_t)code_state->const_table[unum] + void *ptr = (void*)(uintptr_t)code_state->fun_bc->const_table[unum] #define DECODE_OBJ \ DECODE_UINT; \ - mp_obj_t obj = (mp_obj_t)code_state->const_table[unum] + mp_obj_t obj = (mp_obj_t)code_state->fun_bc->const_table[unum] #else @@ -162,8 +162,10 @@ mp_vm_return_kind_t mp_execute_bytecode(mp_code_state_t *code_state, volatile mp run_code_state: ; #endif // Pointers which are constant for particular invocation of mp_execute_bytecode() - mp_obj_t * /*const*/ fastn = &code_state->state[code_state->n_state - 1]; - mp_exc_stack_t * /*const*/ exc_stack = (mp_exc_stack_t*)(code_state->state + code_state->n_state); + const byte *temp_bc = code_state->fun_bc->bytecode; + size_t n_state = mp_decode_uint(&temp_bc); + mp_obj_t * /*const*/ fastn = &code_state->state[n_state - 1]; + mp_exc_stack_t * /*const*/ exc_stack = (mp_exc_stack_t*)(code_state->state + n_state); // variables that are visible to the exception handler (declared volatile) volatile bool currently_in_except_block = MP_TAGPTR_TAG0(code_state->exc_sp); // 0 or 1, to detect nested exceptions @@ -1327,8 +1329,16 @@ unwind_loop: // But consider how to handle nested exceptions. // TODO need a better way of not adding traceback to constant objects (right now, just GeneratorExit_obj and MemoryError_obj) if (nlr.ret_val != &mp_const_GeneratorExit_obj && nlr.ret_val != &mp_const_MemoryError_obj) { - const byte *ip = code_state->code_info; + const byte *ip = code_state->fun_bc->bytecode; + mp_decode_uint(&ip); // skip n_state + mp_decode_uint(&ip); // skip n_exc_stack + ip++; // skip scope_params + ip++; // skip n_pos_args + ip++; // skip n_kwonly_args + ip++; // skip n_def_pos_args + size_t bc = code_state->ip - ip; size_t code_info_size = mp_decode_uint(&ip); + bc -= code_info_size; #if MICROPY_PERSISTENT_CODE qstr block_name = ip[0] | (ip[1] << 8); qstr source_file = ip[2] | (ip[3] << 8); @@ -1337,7 +1347,6 @@ unwind_loop: qstr block_name = mp_decode_uint(&ip); qstr source_file = mp_decode_uint(&ip); #endif - size_t bc = code_state->ip - code_state->code_info - code_info_size; size_t source_line = 1; size_t c; while ((c = *ip)) { @@ -1393,8 +1402,8 @@ unwind_loop: } else if (code_state->prev != NULL) { mp_globals_set(code_state->old_globals); code_state = code_state->prev; - fastn = &code_state->state[code_state->n_state - 1]; - exc_stack = (mp_exc_stack_t*)(code_state->state + code_state->n_state); + fastn = &code_state->state[n_state - 1]; + exc_stack = (mp_exc_stack_t*)(code_state->state + n_state); // variables that are visible to the exception handler (declared volatile) currently_in_except_block = MP_TAGPTR_TAG0(code_state->exc_sp); // 0 or 1, to detect nested exceptions exc_sp = MP_TAGPTR_PTR(code_state->exc_sp); // stack grows up, exc_sp points to top of stack