From b16523aa95ca0a95f3d5a82e6f691e088135d7ba Mon Sep 17 00:00:00 2001 From: Paul Sokolovsky Date: Sat, 31 May 2014 03:41:08 +0300 Subject: [PATCH 1/2] vm: Don't unconditionally allocate state on stack, do that only if needed. This makes sure that only as much stack allocated as actually used, reducing stack usage for each Python function call. --- py/vm.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/py/vm.c b/py/vm.c index aa7e0e2cfc..b7a7569b52 100644 --- a/py/vm.c +++ b/py/vm.c @@ -28,6 +28,7 @@ #include #include #include +#include #include "mpconfig.h" #include "nlr.h" @@ -117,21 +118,23 @@ mp_vm_return_kind_t mp_execute_bytecode(const byte *code, const mp_obj_t *args, ip += 4; // allocate state for locals and stack - mp_obj_t temp_state[VM_MAX_STATE_ON_STACK]; - mp_obj_t *state = &temp_state[0]; #if DETECT_VM_STACK_OVERFLOW n_state += 1; #endif + mp_obj_t *state; if (n_state > VM_MAX_STATE_ON_STACK) { state = m_new(mp_obj_t, n_state); + } else { + state = alloca(sizeof(mp_obj_t) * n_state); } mp_obj_t *sp = &state[0] - 1; // allocate state for exceptions - mp_exc_stack_t exc_state[VM_MAX_EXC_STATE_ON_STACK]; - mp_exc_stack_t *exc_stack = &exc_state[0]; + mp_exc_stack_t *exc_stack; if (n_exc_stack > VM_MAX_EXC_STATE_ON_STACK) { exc_stack = m_new(mp_exc_stack_t, n_exc_stack); + } else { + exc_stack = alloca(sizeof(mp_exc_stack_t) * n_exc_stack); } mp_exc_stack_t *exc_sp = &exc_stack[0] - 1; From b4ebad3310b238bb85ea1f0d7b78b4fcb3d146df Mon Sep 17 00:00:00 2001 From: Paul Sokolovsky Date: Sat, 31 May 2014 16:50:46 +0300 Subject: [PATCH 2/2] vm: Factor out structure with code execution state and pass it around. This improves stack usage in callers to mp_execute_bytecode2, and is step forward towards unifying execution interface for function and generators (which is important because generators don't even support full forms of arguments passing (keywords, etc.)). --- py/bc.h | 15 ++++++- py/objgenerator.c | 51 +++++++++-------------- py/vm.c | 101 +++++++++++++++++++++------------------------- 3 files changed, 81 insertions(+), 86 deletions(-) diff --git a/py/bc.h b/py/bc.h index 6c1e45b2b3..6c604fe1c0 100644 --- a/py/bc.h +++ b/py/bc.h @@ -36,8 +36,21 @@ typedef struct _mp_exc_stack { byte opcode; } mp_exc_stack_t; +typedef struct _mp_code_state { + const byte *code_info; + const byte *ip; + mp_obj_t *sp; + // bit 0 is saved currently_in_except_block value + mp_exc_stack_t *exc_sp; + uint n_state; + // Variable-length + mp_obj_t state[0]; + // Variable-length, never accessed by name, only as (void*)(state + n_state) + //mp_exc_stack_t exc_state[0]; +} mp_code_state; + mp_vm_return_kind_t mp_execute_bytecode(const byte *code, const mp_obj_t *args, uint n_args, const mp_obj_t *args2, uint n_args2, mp_obj_t *ret); -mp_vm_return_kind_t mp_execute_bytecode2(const byte *code_info, const byte **ip_in_out, mp_obj_t *fastn, mp_obj_t **sp_in_out, mp_exc_stack_t *exc_stack, mp_exc_stack_t **exc_sp_in_out, volatile mp_obj_t inject_exc); +mp_vm_return_kind_t mp_execute_bytecode2(mp_code_state *code_state, volatile mp_obj_t inject_exc); void mp_bytecode_print(const byte *code, int len); void mp_bytecode_print2(const byte *code, int len); diff --git a/py/objgenerator.c b/py/objgenerator.c index d9825f814f..7326bced30 100644 --- a/py/objgenerator.c +++ b/py/objgenerator.c @@ -82,42 +82,31 @@ mp_obj_t mp_obj_new_gen_wrap(mp_obj_t fun) { typedef struct _mp_obj_gen_instance_t { mp_obj_base_t base; mp_obj_dict_t *globals; - const byte *code_info; - const byte *ip; - mp_obj_t *sp; - // bit 0 is saved currently_in_except_block value - mp_exc_stack_t *exc_sp; - uint n_state; - // Variable-length - mp_obj_t state[0]; - // Variable-length, never accessed by name, only as (void*)(state + n_state) - //mp_exc_stack_t exc_state[0]; + mp_code_state code_state; } mp_obj_gen_instance_t; void gen_instance_print(void (*print)(void *env, const char *fmt, ...), void *env, mp_obj_t self_in, mp_print_kind_t kind) { mp_obj_gen_instance_t *self = self_in; - print(env, "", mp_obj_code_get_name(self->code_info), self_in); + print(env, "", mp_obj_code_get_name(self->code_state.code_info), self_in); } mp_vm_return_kind_t mp_obj_gen_resume(mp_obj_t self_in, mp_obj_t send_value, mp_obj_t throw_value, mp_obj_t *ret_val) { assert(MP_OBJ_IS_TYPE(self_in, &mp_type_gen_instance)); mp_obj_gen_instance_t *self = self_in; - if (self->ip == 0) { + if (self->code_state.ip == 0) { *ret_val = MP_OBJ_STOP_ITERATION; return MP_VM_RETURN_NORMAL; } - if (self->sp == self->state - 1) { + if (self->code_state.sp == self->code_state.state - 1) { if (send_value != mp_const_none) { nlr_raise(mp_obj_new_exception_msg(&mp_type_TypeError, "can't send non-None value to a just-started generator")); } } else { - *self->sp = send_value; + *self->code_state.sp = send_value; } mp_obj_dict_t *old_globals = mp_globals_get(); mp_globals_set(self->globals); - mp_vm_return_kind_t ret_kind = mp_execute_bytecode2(self->code_info, &self->ip, - &self->state[self->n_state - 1], &self->sp, (mp_exc_stack_t*)(self->state + self->n_state), - &self->exc_sp, throw_value); + mp_vm_return_kind_t ret_kind = mp_execute_bytecode2(&self->code_state, throw_value); mp_globals_set(old_globals); switch (ret_kind) { @@ -127,17 +116,17 @@ mp_vm_return_kind_t mp_obj_gen_resume(mp_obj_t self_in, mp_obj_t send_value, mp_ // again and again, leading to side effects. // TODO: check how return with value behaves under such conditions // in CPython. - self->ip = 0; - *ret_val = *self->sp; + self->code_state.ip = 0; + *ret_val = *self->code_state.sp; break; case MP_VM_RETURN_YIELD: - *ret_val = *self->sp; + *ret_val = *self->code_state.sp; break; case MP_VM_RETURN_EXCEPTION: - self->ip = 0; - *ret_val = self->state[self->n_state - 1]; + self->code_state.ip = 0; + *ret_val = self->code_state.state[self->code_state.n_state - 1]; break; default: @@ -269,32 +258,32 @@ mp_obj_t mp_obj_new_gen_instance(mp_obj_dict_t *globals, const byte *bytecode, mp_obj_gen_instance_t *o = m_new_obj_var(mp_obj_gen_instance_t, byte, n_state * sizeof(mp_obj_t) + n_exc_stack * sizeof(mp_exc_stack_t)); o->base.type = &mp_type_gen_instance; o->globals = globals; - o->code_info = code_info; - o->sp = &o->state[0] - 1; // sp points to top of stack, which starts off 1 below the state - o->exc_sp = (mp_exc_stack_t*)(o->state + n_state) - 1; - o->n_state = n_state; + o->code_state.code_info = code_info; + o->code_state.sp = &o->code_state.state[0] - 1; // sp points to top of stack, which starts off 1 below the state + o->code_state.exc_sp = (mp_exc_stack_t*)(o->code_state.state + n_state) - 1; + o->code_state.n_state = n_state; // copy args to end of state array, in reverse (that's how mp_execute_bytecode2 needs it) for (uint i = 0; i < n_args; i++) { - o->state[n_state - 1 - i] = args[i]; + o->code_state.state[n_state - 1 - i] = args[i]; } for (uint i = 0; i < n_args2; i++) { - o->state[n_state - 1 - n_args - i] = args2[i]; + o->code_state.state[n_state - 1 - n_args - i] = args2[i]; } // set rest of state to MP_OBJ_NULL for (uint i = 0; i < n_state - n_args - n_args2; i++) { - o->state[i] = MP_OBJ_NULL; + o->code_state.state[i] = MP_OBJ_NULL; } // bytecode prelude: initialise closed over variables for (uint n_local = *bytecode++; n_local > 0; n_local--) { uint local_num = *bytecode++; - o->state[n_state - 1 - local_num] = mp_obj_new_cell(o->state[n_state - 1 - local_num]); + o->code_state.state[n_state - 1 - local_num] = mp_obj_new_cell(o->code_state.state[n_state - 1 - local_num]); } // set ip to start of actual byte code - o->ip = bytecode; + o->code_state.ip = bytecode; return o; } diff --git a/py/vm.c b/py/vm.c index b7a7569b52..d68d397222 100644 --- a/py/vm.c +++ b/py/vm.c @@ -41,11 +41,10 @@ #include "bc.h" #include "objgenerator.h" -// With these macros you can tune the maximum number of state slots +// With these macros you can tune the maximum number of function state bytes // that will be allocated on the stack. Any function that needs more // than this will use the heap. -#define VM_MAX_STATE_ON_STACK (10) -#define VM_MAX_EXC_STATE_ON_STACK (4) +#define VM_MAX_STATE_ON_STACK (40) #define DETECT_VM_STACK_OVERFLOW (0) #if 0 @@ -121,53 +120,51 @@ mp_vm_return_kind_t mp_execute_bytecode(const byte *code, const mp_obj_t *args, #if DETECT_VM_STACK_OVERFLOW n_state += 1; #endif - mp_obj_t *state; - if (n_state > VM_MAX_STATE_ON_STACK) { - state = m_new(mp_obj_t, n_state); - } else { - state = alloca(sizeof(mp_obj_t) * n_state); - } - mp_obj_t *sp = &state[0] - 1; - // allocate state for exceptions - mp_exc_stack_t *exc_stack; - if (n_exc_stack > VM_MAX_EXC_STATE_ON_STACK) { - exc_stack = m_new(mp_exc_stack_t, n_exc_stack); + int state_size = n_state * sizeof(mp_obj_t) + n_exc_stack * sizeof(mp_exc_stack_t); + mp_code_state *code_state; + if (state_size > VM_MAX_STATE_ON_STACK) { + code_state = m_new_obj_var(mp_code_state, byte, state_size); } else { - exc_stack = alloca(sizeof(mp_exc_stack_t) * n_exc_stack); + code_state = alloca(sizeof(mp_code_state) + state_size); } - mp_exc_stack_t *exc_sp = &exc_stack[0] - 1; + + code_state->code_info = code; + code_state->sp = &code_state->state[0] - 1; + code_state->exc_sp = (mp_exc_stack_t*)(code_state->state + n_state) - 1; + code_state->n_state = n_state; // init args for (uint i = 0; i < n_args; i++) { - state[n_state - 1 - i] = args[i]; + code_state->state[n_state - 1 - i] = args[i]; } for (uint i = 0; i < n_args2; i++) { - state[n_state - 1 - n_args - i] = args2[i]; + code_state->state[n_state - 1 - n_args - i] = args2[i]; } // set rest of state to MP_OBJ_NULL for (uint i = 0; i < n_state - n_args - n_args2; i++) { - state[i] = MP_OBJ_NULL; + code_state->state[i] = MP_OBJ_NULL; } // bytecode prelude: initialise closed over variables for (uint n_local = *ip++; n_local > 0; n_local--) { uint local_num = *ip++; - state[n_state - 1 - local_num] = mp_obj_new_cell(state[n_state - 1 - local_num]); + code_state->state[n_state - 1 - local_num] = mp_obj_new_cell(code_state->state[n_state - 1 - local_num]); } + code_state->ip = ip; + // execute the byte code - mp_vm_return_kind_t vm_return_kind = mp_execute_bytecode2(code, &ip, &state[n_state - 1], &sp, exc_stack, &exc_sp, MP_OBJ_NULL); + mp_vm_return_kind_t vm_return_kind = mp_execute_bytecode2(code_state, MP_OBJ_NULL); #if DETECT_VM_STACK_OVERFLOW if (vm_return_kind == MP_VM_RETURN_NORMAL) { - if (sp != state) { - printf("Stack misalign: %d\n", sp - state); + if (code_state->sp != code_state->state) { + printf("Stack misalign: %d\n", code_state->sp - code_state->state); assert(0); } } - // We can't check the case when an exception is returned in state[n_state - 1] // and there are no arguments, because in this case our detection slot may have // been overwritten by the returned exception (which is allowed). @@ -175,13 +172,13 @@ mp_vm_return_kind_t mp_execute_bytecode(const byte *code, const mp_obj_t *args, // Just check to see that we have at least 1 null object left in the state. bool overflow = true; for (uint i = 0; i < n_state - n_args - n_args2; i++) { - if (state[i] == MP_OBJ_NULL) { + if (code_state->state[i] == MP_OBJ_NULL) { overflow = false; break; } } if (overflow) { - printf("VM stack overflow state=%p n_state+1=%u\n", state, n_state); + printf("VM stack overflow state=%p n_state+1=%u\n", code_state->state, n_state); assert(0); } } @@ -191,13 +188,13 @@ mp_vm_return_kind_t mp_execute_bytecode(const byte *code, const mp_obj_t *args, switch (vm_return_kind) { case MP_VM_RETURN_NORMAL: // return value is in *sp - *ret = *sp; + *ret = *code_state->sp; ret_kind = MP_VM_RETURN_NORMAL; break; case MP_VM_RETURN_EXCEPTION: // return value is in state[n_state - 1] - *ret = state[n_state - 1]; + *ret = code_state->state[n_state - 1]; ret_kind = MP_VM_RETURN_EXCEPTION; break; @@ -209,15 +206,9 @@ mp_vm_return_kind_t mp_execute_bytecode(const byte *code, const mp_obj_t *args, } // free the state if it was allocated on the heap - if (n_state > VM_MAX_STATE_ON_STACK) { - m_free(state, n_state); + if (state_size > VM_MAX_STATE_ON_STACK) { + m_del_var(mp_code_state, byte, state_size, code_state); } - - // free the exception state if it was allocated on the heap - if (n_exc_stack > VM_MAX_EXC_STATE_ON_STACK) { - m_free(exc_stack, n_exc_stack); - } - return ret_kind; } @@ -227,10 +218,7 @@ mp_vm_return_kind_t mp_execute_bytecode(const byte *code, const mp_obj_t *args, // MP_VM_RETURN_NORMAL, sp valid, return value in *sp // MP_VM_RETURN_YIELD, ip, sp valid, yielded value in *sp // MP_VM_RETURN_EXCEPTION, exception in fastn[0] -mp_vm_return_kind_t mp_execute_bytecode2(const byte *code_info, const byte **ip_in_out, - mp_obj_t *fastn, mp_obj_t **sp_in_out, - mp_exc_stack_t *exc_stack, mp_exc_stack_t **exc_sp_in_out, - volatile mp_obj_t inject_exc) { +mp_vm_return_kind_t mp_execute_bytecode2(mp_code_state *code_state, volatile mp_obj_t inject_exc) { #if MICROPY_OPT_COMPUTED_GOTO #include "vmentrytable.h" #define DISPATCH() do { \ @@ -252,11 +240,15 @@ mp_vm_return_kind_t mp_execute_bytecode2(const byte *code_info, const byte **ip_ // loop and the exception handler, leading to very obscure bugs. #define RAISE(o) do { nlr_pop(); nlr.ret_val = o; goto exception_handler; } while(0) + // Pointers which are constant for particular invocation of mp_execute_bytecode2() + mp_obj_t *const fastn = &code_state->state[code_state->n_state - 1]; + mp_exc_stack_t *const exc_stack = (mp_exc_stack_t*)(code_state->state + code_state->n_state); + // variables that are visible to the exception handler (declared volatile) - volatile bool currently_in_except_block = MP_TAGPTR_TAG(*exc_sp_in_out); // 0 or 1, to detect nested exceptions - mp_exc_stack_t *volatile exc_sp = MP_TAGPTR_PTR(*exc_sp_in_out); // stack grows up, exc_sp points to top of stack - const byte *volatile save_ip = *ip_in_out; // this is so we can access ip in the exception handler without making ip volatile (which means the compiler can't keep it in a register in the main loop) - mp_obj_t *volatile save_sp = *sp_in_out; // this is so we can access sp in the exception handler when needed + volatile bool currently_in_except_block = MP_TAGPTR_TAG(code_state->exc_sp); // 0 or 1, to detect nested exceptions + mp_exc_stack_t *volatile exc_sp = MP_TAGPTR_PTR(code_state->exc_sp); // stack grows up, exc_sp points to top of stack + const byte *volatile save_ip = code_state->ip; // this is so we can access ip in the exception handler without making ip volatile (which means the compiler can't keep it in a register in the main loop) + mp_obj_t *volatile save_sp = code_state->sp; // this is so we can access sp in the exception handler when needed // outer exception handling loop for (;;) { @@ -264,8 +256,8 @@ mp_vm_return_kind_t mp_execute_bytecode2(const byte *code_info, const byte **ip_ outer_dispatch_loop: if (nlr_push(&nlr) == 0) { // local variables that are not visible to the exception handler - const byte *ip = *ip_in_out; - mp_obj_t *sp = *sp_in_out; + const byte *ip = code_state->ip; + mp_obj_t *sp = code_state->sp; machine_uint_t unum; mp_obj_t obj_shared; @@ -908,7 +900,7 @@ unwind_return: exc_sp--; } nlr_pop(); - *sp_in_out = sp; + code_state->sp = sp; assert(exc_sp == exc_stack - 1); return MP_VM_RETURN_NORMAL; @@ -939,9 +931,9 @@ unwind_return: ENTRY(MP_BC_YIELD_VALUE): yield: nlr_pop(); - *ip_in_out = ip; - *sp_in_out = sp; - *exc_sp_in_out = MP_TAGPTR_MAKE(exc_sp, currently_in_except_block); + code_state->ip = ip; + code_state->sp = sp; + code_state->exc_sp = MP_TAGPTR_MAKE(exc_sp, currently_in_except_block); return MP_VM_RETURN_YIELD; ENTRY(MP_BC_YIELD_FROM): { @@ -1035,8 +1027,8 @@ exception_handler: const byte *ip = save_ip + 1; machine_uint_t unum; DECODE_ULABEL; // the jump offset if iteration finishes; for labels are always forward - *ip_in_out = ip + unum; // jump to after for-block - *sp_in_out = save_sp - 1; // pop the exhausted iterator + code_state->ip = ip + unum; // jump to after for-block + code_state->sp = save_sp - 1; // pop the exhausted iterator goto outer_dispatch_loop; // continue with dispatch loop } @@ -1045,6 +1037,7 @@ exception_handler: // But consider how to handle nested exceptions. // TODO need a better way of not adding traceback to constant objects (right now, just GeneratorExit_obj and MemoryError_obj) if (mp_obj_is_exception_instance(nlr.ret_val) && nlr.ret_val != &mp_const_GeneratorExit_obj && nlr.ret_val != &mp_const_MemoryError_obj) { + const byte *code_info = code_state->code_info; machine_uint_t code_info_size = code_info[0] | (code_info[1] << 8) | (code_info[2] << 16) | (code_info[3] << 24); qstr source_file = code_info[4] | (code_info[5] << 8) | (code_info[6] << 16) | (code_info[7] << 24); qstr block_name = code_info[8] | (code_info[9] << 8) | (code_info[10] << 16) | (code_info[11] << 24); @@ -1075,7 +1068,7 @@ exception_handler: currently_in_except_block = 1; // catch exception and pass to byte code - *ip_in_out = exc_sp->handler; + code_state->ip = exc_sp->handler; mp_obj_t *sp = MP_TAGPTR_PTR(exc_sp->val_sp); // save this exception in the stack so it can be used in a reraise, if needed exc_sp->prev_exc = nlr.ret_val; @@ -1083,7 +1076,7 @@ exception_handler: PUSH(mp_const_none); PUSH(nlr.ret_val); PUSH(mp_obj_get_type(nlr.ret_val)); - *sp_in_out = sp; + code_state->sp = sp; } else { // propagate exception to higher level