py: Convert hash API to use MP_UNARY_OP_HASH instead of ad-hoc function.

Hashing is now done using mp_unary_op function with MP_UNARY_OP_HASH as
the operator argument.  Hashing for int, str and bytes still go via
fast-path in mp_unary_op since they are the most common objects which
need to be hashed.

This lead to quite a bit of code cleanup, and should be more efficient
if anything.  It saves 176 bytes code space on Thumb2, and 360 bytes on
x86.

The only loss is that the error message "unhashable type" is now the
more generic "unsupported type for __hash__".
This commit is contained in:
Damien George 2015-05-11 12:25:19 +00:00
parent 6738c1dded
commit c2a4e4effc
20 changed files with 85 additions and 112 deletions

View File

@ -116,7 +116,7 @@
#define MP_BC_LOAD_CONST_SMALL_INT_MULTI (0x70) // + N(64)
#define MP_BC_LOAD_FAST_MULTI (0xb0) // + N(16)
#define MP_BC_STORE_FAST_MULTI (0xc0) // + N(16)
#define MP_BC_UNARY_OP_MULTI (0xd0) // + op(5)
#define MP_BC_BINARY_OP_MULTI (0xd5) // + op(35)
#define MP_BC_UNARY_OP_MULTI (0xd0) // + op(6)
#define MP_BC_BINARY_OP_MULTI (0xd6) // + op(35)
#endif // __MICROPY_INCLUDED_PY_BC0_H__

View File

@ -31,7 +31,8 @@
#include "py/mpconfig.h"
#include "py/misc.h"
#include "py/obj.h"
#include "py/runtime0.h"
#include "py/runtime.h"
// Fixed empty map. Useful when need to call kw-receiving functions
// without any keywords from C, etc.
@ -200,7 +201,7 @@ mp_map_elem_t *mp_map_lookup(mp_map_t *map, mp_obj_t index, mp_map_lookup_kind_t
}
}
mp_uint_t hash = mp_obj_hash(index);
mp_uint_t hash = MP_OBJ_SMALL_INT_VALUE(mp_unary_op(MP_UNARY_OP_HASH, index));
mp_uint_t pos = hash % map->alloc;
mp_uint_t start_pos = pos;
mp_map_elem_t *avail_slot = NULL;
@ -308,7 +309,7 @@ mp_obj_t mp_set_lookup(mp_set_t *set, mp_obj_t index, mp_map_lookup_kind_t looku
return NULL;
}
}
mp_uint_t hash = mp_obj_hash(index);
mp_uint_t hash = MP_OBJ_SMALL_INT_VALUE(mp_unary_op(MP_UNARY_OP_HASH, index));
mp_uint_t pos = hash % set->alloc;
mp_uint_t start_pos = pos;
mp_obj_t *avail_slot = NULL;

View File

@ -272,8 +272,8 @@ STATIC mp_obj_t mp_builtin_divmod(mp_obj_t o1_in, mp_obj_t o2_in) {
MP_DEFINE_CONST_FUN_OBJ_2(mp_builtin_divmod_obj, mp_builtin_divmod);
STATIC mp_obj_t mp_builtin_hash(mp_obj_t o_in) {
// TODO hash will generally overflow small integer; can we safely truncate it?
return mp_obj_new_int(mp_obj_hash(o_in));
// result is guaranteed to be a (small) int
return mp_unary_op(MP_UNARY_OP_HASH, o_in);
}
MP_DEFINE_CONST_FUN_OBJ_1(mp_builtin_hash_obj, mp_builtin_hash);

View File

@ -147,61 +147,6 @@ bool mp_obj_is_callable(mp_obj_t o_in) {
return mp_obj_instance_is_callable(o_in);
}
mp_int_t mp_obj_hash(mp_obj_t o_in) {
if (o_in == mp_const_false) {
return 0; // needs to hash to same as the integer 0, since False==0
} else if (o_in == mp_const_true) {
return 1; // needs to hash to same as the integer 1, since True==1
} else if (MP_OBJ_IS_SMALL_INT(o_in)) {
return MP_OBJ_SMALL_INT_VALUE(o_in);
} else if (MP_OBJ_IS_TYPE(o_in, &mp_type_int)) {
return mp_obj_int_hash(o_in);
} else if (MP_OBJ_IS_STR(o_in) || MP_OBJ_IS_TYPE(o_in, &mp_type_bytes)) {
return mp_obj_str_get_hash(o_in);
} else if (MP_OBJ_IS_TYPE(o_in, &mp_type_NoneType)) {
return (mp_int_t)o_in;
} else if (MP_OBJ_IS_FUN(o_in)) {
return (mp_int_t)o_in;
} else if (MP_OBJ_IS_TYPE(o_in, &mp_type_tuple)) {
return mp_obj_tuple_hash(o_in);
} else if (MP_OBJ_IS_TYPE(o_in, &mp_type_type)) {
return (mp_int_t)o_in;
} else if (mp_obj_is_instance_type(mp_obj_get_type(o_in))) {
// if a valid __hash__ method exists, use it
mp_obj_t method[2];
mp_load_method_maybe(o_in, MP_QSTR___hash__, method);
if (method[0] != MP_OBJ_NULL) {
mp_obj_t hash_val = mp_call_method_n_kw(0, 0, method);
if (MP_OBJ_IS_INT(hash_val)) {
return mp_obj_int_get_truncated(hash_val);
}
goto error;
}
mp_load_method_maybe(o_in, MP_QSTR___eq__, method);
if (method[0] == MP_OBJ_NULL) {
// https://docs.python.org/3/reference/datamodel.html#object.__hash__
// "User-defined classes have __eq__() and __hash__() methods by default;
// with them, all objects compare unequal (except with themselves) and
// x.__hash__() returns an appropriate value such that x == y implies
// both that x is y and hash(x) == hash(y)."
return (mp_int_t)o_in;
}
// "A class that overrides __eq__() and does not define __hash__() will have its __hash__() implicitly set to None.
// When the __hash__() method of a class is None, instances of the class will raise an appropriate TypeError"
}
// TODO hash classes
error:
if (MICROPY_ERROR_REPORTING == MICROPY_ERROR_REPORTING_TERSE) {
nlr_raise(mp_obj_new_exception_msg(&mp_type_TypeError, "unhashable type"));
} else {
nlr_raise(mp_obj_new_exception_msg_varg(&mp_type_TypeError,
"unhashable type: '%s'", mp_obj_get_type_str(o_in)));
}
}
// This function implements the '==' operator (and so the inverse of '!=').
//
// From the Python language reference:
@ -540,3 +485,10 @@ void mp_get_buffer_raise(mp_obj_t obj, mp_buffer_info_t *bufinfo, mp_uint_t flag
nlr_raise(mp_obj_new_exception_msg(&mp_type_TypeError, "object with buffer protocol required"));
}
}
mp_obj_t mp_generic_unary_op(mp_uint_t op, mp_obj_t o_in) {
switch (op) {
case MP_UNARY_OP_HASH: return MP_OBJ_NEW_SMALL_INT((mp_uint_t)o_in);
default: return MP_OBJ_NULL; // op not supported
}
}

View File

@ -508,7 +508,6 @@ void mp_obj_print_exception(const mp_print_t *print, mp_obj_t exc);
bool mp_obj_is_true(mp_obj_t arg);
bool mp_obj_is_callable(mp_obj_t o_in);
mp_int_t mp_obj_hash(mp_obj_t o_in);
bool mp_obj_equal(mp_obj_t o1, mp_obj_t o2);
mp_int_t mp_obj_get_int(mp_const_obj_t arg);
@ -525,6 +524,7 @@ mp_obj_t mp_obj_id(mp_obj_t o_in);
mp_obj_t mp_obj_len(mp_obj_t o_in);
mp_obj_t mp_obj_len_maybe(mp_obj_t o_in); // may return MP_OBJ_NULL
mp_obj_t mp_obj_subscr(mp_obj_t base, mp_obj_t index, mp_obj_t val);
mp_obj_t mp_generic_unary_op(mp_uint_t op, mp_obj_t o_in);
// bool
// TODO make lower case when it has proven itself

View File

@ -69,18 +69,21 @@ STATIC mp_obj_t bool_unary_op(mp_uint_t op, mp_obj_t o_in) {
mp_int_t value = ((mp_obj_bool_t*)o_in)->value;
switch (op) {
case MP_UNARY_OP_BOOL: return o_in;
// needs to hash to the same value as if converting to an integer
case MP_UNARY_OP_HASH: return MP_OBJ_NEW_SMALL_INT(value);
case MP_UNARY_OP_POSITIVE: return MP_OBJ_NEW_SMALL_INT(value);
case MP_UNARY_OP_NEGATIVE: return MP_OBJ_NEW_SMALL_INT(-value);
case MP_UNARY_OP_INVERT: return MP_OBJ_NEW_SMALL_INT(~value);
// only bool needs to implement MP_UNARY_OP_NOT
case MP_UNARY_OP_NOT:
default: // no other cases
if (value) {
return mp_const_false;
} else {
return mp_const_true;
}
default: return MP_OBJ_NULL; // op not supported
}
}

View File

@ -98,6 +98,7 @@ const mp_obj_type_t mp_type_fun_builtin = {
{ &mp_type_type },
.name = MP_QSTR_function,
.call = fun_builtin_call,
.unary_op = mp_generic_unary_op,
};
/******************************************************************************/
@ -314,6 +315,7 @@ const mp_obj_type_t mp_type_fun_bc = {
.print = fun_bc_print,
#endif
.call = fun_bc_call,
.unary_op = mp_generic_unary_op,
#if MICROPY_PY_FUNCTION_ATTRS
.attr = fun_bc_attr,
#endif
@ -366,6 +368,7 @@ STATIC const mp_obj_type_t mp_type_fun_native = {
{ &mp_type_type },
.name = MP_QSTR_function,
.call = fun_native_call,
.unary_op = mp_generic_unary_op,
};
mp_obj_t mp_obj_new_fun_native(mp_uint_t scope_flags, mp_uint_t n_pos_args, mp_uint_t n_kwonly_args, mp_obj_t def_args_in, mp_obj_t def_kw_args, const void *fun_data) {
@ -421,6 +424,7 @@ STATIC const mp_obj_type_t mp_type_fun_viper = {
{ &mp_type_type },
.name = MP_QSTR_function,
.call = fun_viper_call,
.unary_op = mp_generic_unary_op,
};
mp_obj_t mp_obj_new_fun_viper(mp_uint_t n_args, void *fun_data, mp_uint_t type_sig) {
@ -533,6 +537,7 @@ STATIC const mp_obj_type_t mp_type_fun_asm = {
{ &mp_type_type },
.name = MP_QSTR_function,
.call = fun_asm_call,
.unary_op = mp_generic_unary_op,
};
mp_obj_t mp_obj_new_fun_asm(mp_uint_t n_args, void *fun_data) {

View File

@ -259,10 +259,6 @@ char *mp_obj_int_formatted(char **buf, mp_uint_t *buf_size, mp_uint_t *fmt_size,
#if MICROPY_LONGINT_IMPL == MICROPY_LONGINT_IMPL_NONE
mp_int_t mp_obj_int_hash(mp_obj_t self_in) {
return MP_OBJ_SMALL_INT_VALUE(self_in);
}
bool mp_obj_int_is_positive(mp_obj_t self_in) {
return mp_obj_get_int(self_in) >= 0;
}

View File

@ -53,16 +53,6 @@
const mp_obj_int_t mp_maxsize_obj = {{&mp_type_int}, MP_SSIZE_MAX};
#endif
mp_int_t mp_obj_int_hash(mp_obj_t self_in) {
if (MP_OBJ_IS_SMALL_INT(self_in)) {
return MP_OBJ_SMALL_INT_VALUE(self_in);
}
mp_obj_int_t *self = self_in;
// truncate value to fit in mp_int_t, which gives the same hash as
// small int if the value fits without truncation
return self->val;
}
void mp_obj_int_to_bytes_impl(mp_obj_t self_in, bool big_endian, mp_uint_t len, byte *buf) {
assert(MP_OBJ_IS_TYPE(self_in, &mp_type_int));
mp_obj_int_t *self = self_in;
@ -117,6 +107,11 @@ mp_obj_t mp_obj_int_unary_op(mp_uint_t op, mp_obj_t o_in) {
mp_obj_int_t *o = o_in;
switch (op) {
case MP_UNARY_OP_BOOL: return MP_BOOL(o->val != 0);
// truncate value to fit in mp_int_t, which gives the same hash as
// small int if the value fits without truncation
case MP_UNARY_OP_HASH: return MP_OBJ_NEW_SMALL_INT((mp_int_t)o->val);
case MP_UNARY_OP_POSITIVE: return o_in;
case MP_UNARY_OP_NEGATIVE: return mp_obj_new_int_from_ll(-o->val);
case MP_UNARY_OP_INVERT: return mp_obj_new_int_from_ll(~o->val);

View File

@ -96,14 +96,6 @@ char *mp_obj_int_formatted_impl(char **buf, mp_uint_t *buf_size, mp_uint_t *fmt_
return str;
}
mp_int_t mp_obj_int_hash(mp_obj_t self_in) {
if (MP_OBJ_IS_SMALL_INT(self_in)) {
return MP_OBJ_SMALL_INT_VALUE(self_in);
}
mp_obj_int_t *self = self_in;
return mpz_hash(&self->mpz);
}
void mp_obj_int_to_bytes_impl(mp_obj_t self_in, bool big_endian, mp_uint_t len, byte *buf) {
assert(MP_OBJ_IS_TYPE(self_in, &mp_type_int));
mp_obj_int_t *self = self_in;
@ -143,6 +135,7 @@ mp_obj_t mp_obj_int_unary_op(mp_uint_t op, mp_obj_t o_in) {
mp_obj_int_t *o = o_in;
switch (op) {
case MP_UNARY_OP_BOOL: return MP_BOOL(!mpz_is_zero(&o->mpz));
case MP_UNARY_OP_HASH: return MP_OBJ_NEW_SMALL_INT(mpz_hash(&o->mpz));
case MP_UNARY_OP_POSITIVE: return o_in;
case MP_UNARY_OP_NEGATIVE: { mp_obj_int_t *o2 = mp_obj_int_new_mpz(); mpz_neg_inpl(&o2->mpz, &o->mpz); return o2; }
case MP_UNARY_OP_INVERT: { mp_obj_int_t *o2 = mp_obj_int_new_mpz(); mpz_not_inpl(&o2->mpz, &o->mpz); return o2; }

View File

@ -47,6 +47,7 @@ STATIC mp_obj_t none_unary_op(mp_uint_t op, mp_obj_t o_in) {
(void)o_in;
switch (op) {
case MP_UNARY_OP_BOOL: return mp_const_false;
case MP_UNARY_OP_HASH: return MP_OBJ_NEW_SMALL_INT((mp_uint_t)o_in);
default: return MP_OBJ_NULL; // op not supported
}
}

View File

@ -1982,16 +1982,6 @@ STATIC void bad_implicit_conversion(mp_obj_t self_in) {
}
}
mp_uint_t mp_obj_str_get_hash(mp_obj_t self_in) {
// TODO: This has too big overhead for hash accessor
if (MP_OBJ_IS_STR_OR_BYTES(self_in)) {
GET_STR_HASH(self_in, h);
return h;
} else {
bad_implicit_conversion(self_in);
}
}
mp_uint_t mp_obj_str_get_len(mp_obj_t self_in) {
// TODO This has a double check for the type, one in obj.c and one here
if (MP_OBJ_IS_STR_OR_BYTES(self_in)) {

View File

@ -126,6 +126,14 @@ mp_obj_t mp_obj_tuple_unary_op(mp_uint_t op, mp_obj_t self_in) {
mp_obj_tuple_t *self = self_in;
switch (op) {
case MP_UNARY_OP_BOOL: return MP_BOOL(self->len != 0);
case MP_UNARY_OP_HASH: {
// start hash with pointer to empty tuple, to make it fairly unique
mp_int_t hash = (mp_int_t)mp_const_empty_tuple;
for (mp_uint_t i = 0; i < self->len; i++) {
hash += MP_OBJ_SMALL_INT_VALUE(mp_unary_op(MP_UNARY_OP_HASH, self->items[i]));
}
return MP_OBJ_NEW_SMALL_INT(hash);
}
case MP_UNARY_OP_LEN: return MP_OBJ_NEW_SMALL_INT(self->len);
default: return MP_OBJ_NULL; // op not supported
}
@ -258,17 +266,6 @@ void mp_obj_tuple_del(mp_obj_t self_in) {
m_del_var(mp_obj_tuple_t, mp_obj_t, self->len, self);
}
mp_int_t mp_obj_tuple_hash(mp_obj_t self_in) {
assert(MP_OBJ_IS_TYPE(self_in, &mp_type_tuple));
mp_obj_tuple_t *self = self_in;
// start hash with pointer to empty tuple, to make it fairly unique
mp_int_t hash = (mp_int_t)mp_const_empty_tuple;
for (mp_uint_t i = 0; i < self->len; i++) {
hash += mp_obj_hash(self->items[i]);
}
return hash;
}
/******************************************************************************/
/* tuple iterator */

View File

@ -327,6 +327,7 @@ mp_obj_t mp_obj_instance_make_new(mp_obj_t self_in, mp_uint_t n_args, mp_uint_t
const qstr mp_unary_op_method_name[] = {
[MP_UNARY_OP_BOOL] = MP_QSTR___bool__,
[MP_UNARY_OP_LEN] = MP_QSTR___len__,
[MP_UNARY_OP_HASH] = MP_QSTR___hash__,
#if MICROPY_PY_ALL_SPECIAL_METHODS
[MP_UNARY_OP_POSITIVE] = MP_QSTR___pos__,
[MP_UNARY_OP_NEGATIVE] = MP_QSTR___neg__,
@ -355,8 +356,28 @@ STATIC mp_obj_t instance_unary_op(mp_uint_t op, mp_obj_t self_in) {
if (member[0] == MP_OBJ_SENTINEL) {
return mp_unary_op(op, self->subobj[0]);
} else if (member[0] != MP_OBJ_NULL) {
return mp_call_function_1(member[0], self_in);
mp_obj_t val = mp_call_function_1(member[0], self_in);
// __hash__ must return a small int
if (op == MP_UNARY_OP_HASH) {
val = MP_OBJ_NEW_SMALL_INT(mp_obj_int_get_truncated(val));
}
return val;
} else {
if (op == MP_UNARY_OP_HASH) {
lookup.attr = MP_QSTR___eq__;
mp_obj_class_lookup(&lookup, self->base.type);
if (member[0] == MP_OBJ_NULL) {
// https://docs.python.org/3/reference/datamodel.html#object.__hash__
// "User-defined classes have __eq__() and __hash__() methods by default;
// with them, all objects compare unequal (except with themselves) and
// x.__hash__() returns an appropriate value such that x == y implies
// both that x is y and hash(x) == hash(y)."
return MP_OBJ_NEW_SMALL_INT((mp_uint_t)self_in);
}
// "A class that overrides __eq__() and does not define __hash__() will have its __hash__() implicitly set to None.
// When the __hash__() method of a class is None, instances of the class will raise an appropriate TypeError"
}
return MP_OBJ_NULL; // op not supported
}
}
@ -835,6 +856,7 @@ const mp_obj_type_t mp_type_type = {
.print = type_print,
.make_new = type_make_new,
.call = type_call,
.unary_op = mp_generic_unary_op,
.attr = type_attr,
};

View File

@ -32,6 +32,7 @@
#include "py/nlr.h"
#include "py/parsenum.h"
#include "py/compile.h"
#include "py/objstr.h"
#include "py/objtuple.h"
#include "py/objlist.h"
#include "py/objmodule.h"
@ -200,6 +201,8 @@ mp_obj_t mp_unary_op(mp_uint_t op, mp_obj_t arg) {
switch (op) {
case MP_UNARY_OP_BOOL:
return MP_BOOL(val != 0);
case MP_UNARY_OP_HASH:
return arg;
case MP_UNARY_OP_POSITIVE:
return arg;
case MP_UNARY_OP_NEGATIVE:
@ -215,6 +218,10 @@ mp_obj_t mp_unary_op(mp_uint_t op, mp_obj_t arg) {
assert(0);
return arg;
}
} else if (op == MP_UNARY_OP_HASH && MP_OBJ_IS_STR_OR_BYTES(arg)) {
// fast path for hashing str/bytes
GET_STR_HASH(arg, h);
return MP_OBJ_NEW_SMALL_INT(h);
} else {
mp_obj_type_t *type = mp_obj_get_type(arg);
if (type->unary_op != NULL) {

View File

@ -50,6 +50,7 @@
typedef enum {
MP_UNARY_OP_BOOL, // __bool__
MP_UNARY_OP_LEN, // __len__
MP_UNARY_OP_HASH, // __hash__; must return a small int
MP_UNARY_OP_POSITIVE,
MP_UNARY_OP_NEGATIVE,
MP_UNARY_OP_INVERT,

View File

@ -1196,7 +1196,7 @@ yield:
} else if (ip[-1] < MP_BC_STORE_FAST_MULTI + 16) {
fastn[MP_BC_STORE_FAST_MULTI - (mp_int_t)ip[-1]] = POP();
DISPATCH();
} else if (ip[-1] < MP_BC_UNARY_OP_MULTI + 5) {
} else if (ip[-1] < MP_BC_UNARY_OP_MULTI + 6) {
SET_TOP(mp_unary_op(ip[-1] - MP_BC_UNARY_OP_MULTI, TOP()));
DISPATCH();
} else if (ip[-1] < MP_BC_BINARY_OP_MULTI + 35) {

View File

@ -111,7 +111,7 @@ static void* entry_table[256] = {
[MP_BC_LOAD_CONST_SMALL_INT_MULTI ... MP_BC_LOAD_CONST_SMALL_INT_MULTI + 63] = &&entry_MP_BC_LOAD_CONST_SMALL_INT_MULTI,
[MP_BC_LOAD_FAST_MULTI ... MP_BC_LOAD_FAST_MULTI + 15] = &&entry_MP_BC_LOAD_FAST_MULTI,
[MP_BC_STORE_FAST_MULTI ... MP_BC_STORE_FAST_MULTI + 15] = &&entry_MP_BC_STORE_FAST_MULTI,
[MP_BC_UNARY_OP_MULTI ... MP_BC_UNARY_OP_MULTI + 4] = &&entry_MP_BC_UNARY_OP_MULTI,
[MP_BC_UNARY_OP_MULTI ... MP_BC_UNARY_OP_MULTI + 5] = &&entry_MP_BC_UNARY_OP_MULTI,
[MP_BC_BINARY_OP_MULTI ... MP_BC_BINARY_OP_MULTI + 34] = &&entry_MP_BC_BINARY_OP_MULTI,
};

View File

@ -20,11 +20,12 @@ class A:
print(hash(A()))
print({A():1})
# all user-classes have default __hash__
class B:
pass
hash(B())
# if __eq__ is defined then default __hash__ is not used
class C:
def __eq__(self, another):
return True
@ -32,3 +33,12 @@ try:
hash(C())
except TypeError:
print("TypeError")
# __hash__ must return an int
class D:
def __hash__(self):
return None
try:
hash(D())
except TypeError:
print("TypeError")

View File

@ -83,7 +83,7 @@ arg names:
59 BINARY_OP 5 __add__
60 STORE_FAST 8
61 LOAD_FAST 0
62 UNARY_OP 3
62 UNARY_OP 4
63 STORE_FAST 9
64 LOAD_FAST 0
65 UNARY_OP 0