From 503d6110338ab2d79e6c0f8f591a0ca6397717de Mon Sep 17 00:00:00 2001 From: Damien George Date: Wed, 28 May 2014 14:07:21 +0100 Subject: [PATCH] py: Implement long int parsing in int(...). Addresses issue #627. --- py/emitnative.c | 2 +- py/obj.h | 4 ++-- py/objint.c | 6 +++--- py/objint_longlong.c | 20 ++++++++------------ py/objint_mpz.c | 18 +++++------------- py/parsenum.c | 32 +++++++++++++++++++++----------- py/runtime.c | 9 ++++++++- py/runtime.h | 1 + py/runtime0.h | 4 ++-- py/vm.c | 2 +- 10 files changed, 52 insertions(+), 46 deletions(-) diff --git a/py/emitnative.c b/py/emitnative.c index 261b1a2a5..057e42c75 100644 --- a/py/emitnative.c +++ b/py/emitnative.c @@ -704,7 +704,7 @@ STATIC void emit_native_load_const_int(emit_t *emit, qstr qst) { DEBUG_printf("load_const_int %s\n", qstr_str(st)); // for viper: load integer, check fits in 32 bits emit_native_pre(emit); - emit_call_with_imm_arg(emit, MP_F_LOAD_CONST_INT, mp_obj_new_int_from_qstr, qst, REG_ARG_1); + emit_call_with_imm_arg(emit, MP_F_LOAD_CONST_INT, mp_load_const_int, qst, REG_ARG_1); emit_post_push_reg(emit, VTYPE_PYOBJ, REG_RET); } diff --git a/py/obj.h b/py/obj.h index e4350b424..74bdc7977 100644 --- a/py/obj.h +++ b/py/obj.h @@ -371,7 +371,7 @@ mp_obj_t mp_obj_new_bool(bool value); mp_obj_t mp_obj_new_cell(mp_obj_t obj); mp_obj_t mp_obj_new_int(machine_int_t value); mp_obj_t mp_obj_new_int_from_uint(machine_uint_t value); -mp_obj_t mp_obj_new_int_from_qstr(qstr qst); +mp_obj_t mp_obj_new_int_from_str_len(const char **str, uint len, bool neg, uint base); mp_obj_t mp_obj_new_int_from_ll(long long val); // this must return a multi-precision integer object (or raise an overflow exception) mp_obj_t mp_obj_new_str(const char* data, uint len, bool make_qstr_if_not_already); mp_obj_t mp_obj_new_bytes(const byte* data, uint len); @@ -445,7 +445,7 @@ void mp_obj_cell_set(mp_obj_t self_in, mp_obj_t obj); // int // For long int, returns value truncated to machine_int_t -machine_int_t mp_obj_int_get(mp_obj_t self_in); +machine_int_t mp_obj_int_get(mp_const_obj_t self_in); #if MICROPY_ENABLE_FLOAT mp_float_t mp_obj_int_as_float(mp_obj_t self_in); #endif diff --git a/py/objint.c b/py/objint.c index a3b355400..328fb11e8 100644 --- a/py/objint.c +++ b/py/objint.c @@ -139,7 +139,7 @@ char *mp_obj_int_formatted(char **buf, int *buf_size, int *fmt_size, mp_const_ob } else if (MP_OBJ_IS_TYPE(self_in, &mp_type_int)) { // Not a small int. #if MICROPY_LONGINT_IMPL == MICROPY_LONGINT_IMPL_LONGLONG - mp_obj_int_t *self = self_in; + const mp_obj_int_t *self = self_in; // Get the value to format; mp_obj_get_int truncates to machine_int_t. num = self->val; #else @@ -225,7 +225,7 @@ mp_obj_t mp_obj_int_binary_op(int op, mp_obj_t lhs_in, mp_obj_t rhs_in) { } // This is called only with strings whose value doesn't fit in SMALL_INT -mp_obj_t mp_obj_new_int_from_qstr(qstr qst) { +mp_obj_t mp_obj_new_int_from_str_len(const char **str, uint len, bool neg, uint base) { nlr_raise(mp_obj_new_exception_msg(&mp_type_OverflowError, "long int not supported in this build")); return mp_const_none; } @@ -254,7 +254,7 @@ mp_obj_t mp_obj_new_int(machine_int_t value) { return mp_const_none; } -machine_int_t mp_obj_int_get(mp_obj_t self_in) { +machine_int_t mp_obj_int_get(mp_const_obj_t self_in) { return MP_OBJ_SMALL_INT_VALUE(self_in); } diff --git a/py/objint_longlong.c b/py/objint_longlong.c index 1e0167b46..381246dfe 100644 --- a/py/objint_longlong.c +++ b/py/objint_longlong.c @@ -162,26 +162,22 @@ mp_obj_t mp_obj_new_int_from_ll(long long val) { return o; } -mp_obj_t mp_obj_new_int_from_qstr(qstr qst) { - const char *s = qstr_str(qst); - long long v; - char *end; - // TODO: this doesn't handle Python hacked 0o octal syntax - v = strtoll(s, &end, 0); - if (*end != 0) { - nlr_raise(mp_obj_new_exception_msg(&mp_type_SyntaxError, "invalid syntax for number")); - } +mp_obj_t mp_obj_new_int_from_str_len(const char **str, uint len, bool neg, uint base) { + // TODO this does not honor the given length of the string, but it all cases it should anyway be null terminated + // TODO check overflow mp_obj_int_t *o = m_new_obj(mp_obj_int_t); o->base.type = &mp_type_int; - o->val = v; + char *endptr; + o->val = strtoll(*str, &endptr, base); + *str = endptr; return o; } -machine_int_t mp_obj_int_get(mp_obj_t self_in) { +machine_int_t mp_obj_int_get(mp_const_obj_t self_in) { if (MP_OBJ_IS_SMALL_INT(self_in)) { return MP_OBJ_SMALL_INT_VALUE(self_in); } else { - mp_obj_int_t *self = self_in; + const mp_obj_int_t *self = self_in; return self->val; } } diff --git a/py/objint_mpz.c b/py/objint_mpz.c index 2df3232e9..733dc096a 100644 --- a/py/objint_mpz.c +++ b/py/objint_mpz.c @@ -260,26 +260,18 @@ mp_obj_t mp_obj_new_int_from_uint(machine_uint_t value) { return mp_obj_new_int_from_ll(value); } -mp_obj_t mp_obj_new_int_from_qstr(qstr qst) { +mp_obj_t mp_obj_new_int_from_str_len(const char **str, uint len, bool neg, uint base) { mp_obj_int_t *o = mp_obj_int_new_mpz(); - uint len; - const char* str = (const char*)qstr_data(qst, &len); - int base = 0; - int skip = mp_parse_num_base(str, len, &base); - str += skip; - len -= skip; - uint n = mpz_set_from_str(&o->mpz, str, len, false, base); - if (n != len) { - nlr_raise(mp_obj_new_exception_msg(&mp_type_SyntaxError, "invalid syntax for number")); - } + uint n = mpz_set_from_str(&o->mpz, *str, len, neg, base); + *str += n; return o; } -machine_int_t mp_obj_int_get(mp_obj_t self_in) { +machine_int_t mp_obj_int_get(mp_const_obj_t self_in) { if (MP_OBJ_IS_SMALL_INT(self_in)) { return MP_OBJ_SMALL_INT_VALUE(self_in); } else { - mp_obj_int_t *self = self_in; + const mp_obj_int_t *self = self_in; return mpz_as_int(&self->mpz); } } diff --git a/py/parsenum.c b/py/parsenum.c index d3cb821a1..842a9e959 100644 --- a/py/parsenum.c +++ b/py/parsenum.c @@ -42,6 +42,7 @@ mp_obj_t mp_parse_num_integer(const char *restrict str, uint len, int base) { const char *restrict top = str + len; bool neg = false; + mp_obj_t ret_val; // check radix base if ((base != 0 && base < 2) || base > 36) { @@ -96,16 +97,20 @@ mp_obj_t mp_parse_num_integer(const char *restrict str, uint len, int base) { } } - // check we parsed something - if (str == str_val_start) { - goto value_error; - } - // negate value if needed if (neg) { int_val = -int_val; } + // create the small int + ret_val = MP_OBJ_NEW_SMALL_INT(int_val); + +have_ret_val: + // check we parsed something + if (str == str_val_start) { + goto value_error; + } + // skip trailing space for (; str < top && unichar_isspace(*str); str++) { } @@ -116,14 +121,19 @@ mp_obj_t mp_parse_num_integer(const char *restrict str, uint len, int base) { } // return the object - return MP_OBJ_NEW_SMALL_INT(int_val); - -value_error: - nlr_raise(mp_obj_new_exception_msg_varg(&mp_type_ValueError, "invalid literal for int() with base %d: '%s'", base, str)); + return ret_val; overflow: - // TODO reparse using bignum - nlr_raise(mp_obj_new_exception_msg(&mp_type_ValueError, "overflow parsing integer")); + // reparse using long int + { + const char *s2 = str_val_start; + ret_val = mp_obj_new_int_from_str_len(&s2, top - str_val_start, neg, base); + str = s2; + goto have_ret_val; + } + +value_error: + nlr_raise(mp_obj_new_exception_msg_varg(&mp_type_ValueError, "invalid syntax for integer with base %d: '%s'", base, str)); } #define PARSE_DEC_IN_INTG (1) diff --git a/py/runtime.c b/py/runtime.c index ea75280ce..f48780725 100644 --- a/py/runtime.c +++ b/py/runtime.c @@ -98,6 +98,13 @@ void mp_deinit(void) { #endif } +mp_obj_t mp_load_const_int(qstr qstr) { + DEBUG_OP_printf("load '%s'\n", qstr_str(qstr)); + uint len; + const byte* data = qstr_data(qstr, &len); + return mp_parse_num_integer((const char*)data, len, 0); +} + mp_obj_t mp_load_const_dec(qstr qstr) { DEBUG_OP_printf("load '%s'\n", qstr_str(qstr)); uint len; @@ -1147,8 +1154,8 @@ void *m_malloc_fail(int num_bytes) { // these must correspond to the respective enum void *const mp_fun_table[MP_F_NUMBER_OF] = { + mp_load_const_int, mp_load_const_dec, - mp_obj_new_int_from_qstr, mp_load_const_str, mp_load_name, mp_load_global, diff --git a/py/runtime.h b/py/runtime.h index a5d674303..3c79b48ed 100644 --- a/py/runtime.h +++ b/py/runtime.h @@ -77,6 +77,7 @@ void mp_delete_global(qstr qstr); mp_obj_t mp_unary_op(int op, mp_obj_t arg); mp_obj_t mp_binary_op(int op, mp_obj_t lhs, mp_obj_t rhs); +mp_obj_t mp_load_const_int(qstr qstr); mp_obj_t mp_load_const_dec(qstr qstr); mp_obj_t mp_load_const_str(qstr qstr); mp_obj_t mp_load_const_bytes(qstr qstr); diff --git a/py/runtime0.h b/py/runtime0.h index 425122dbc..542edf4a6 100644 --- a/py/runtime0.h +++ b/py/runtime0.h @@ -96,8 +96,8 @@ typedef enum { } mp_binary_op_t; typedef enum { - MP_F_LOAD_CONST_DEC = 0, - MP_F_LOAD_CONST_INT, + MP_F_LOAD_CONST_INT = 0, + MP_F_LOAD_CONST_DEC, MP_F_LOAD_CONST_STR, MP_F_LOAD_NAME, MP_F_LOAD_GLOBAL, diff --git a/py/vm.c b/py/vm.c index f6aa74348..bd94ade54 100644 --- a/py/vm.c +++ b/py/vm.c @@ -312,7 +312,7 @@ dispatch_loop: ENTRY(MP_BC_LOAD_CONST_INT): { DECODE_QSTR; - PUSH(mp_obj_new_int_from_qstr(qst)); + PUSH(mp_load_const_int(qst)); DISPATCH(); }