From 2a2736585437e32309d31b88814ef7e14db7a87c Mon Sep 17 00:00:00 2001 From: Paul Sokolovsky Date: Tue, 13 May 2014 08:07:08 +0300 Subject: [PATCH] objstr.c: Partial implementation of .rsplit(). sep=None is TODO. --- py/objstr.c | 66 +++++++++++++++++++++++++++++++++++ py/qstrdefs.h | 1 + tests/basics/string_rsplit.py | 42 ++++++++++++++++++++++ 3 files changed, 109 insertions(+) create mode 100644 tests/basics/string_rsplit.py diff --git a/py/objstr.c b/py/objstr.c index 3a4b0b97f..b5f40551f 100644 --- a/py/objstr.c +++ b/py/objstr.c @@ -38,6 +38,7 @@ #include "runtime.h" #include "pfenv.h" #include "objstr.h" +#include "objlist.h" STATIC mp_obj_t str_modulo_format(mp_obj_t pattern, uint n_args, const mp_obj_t *args); const mp_obj_t mp_const_empty_bytes; @@ -483,6 +484,69 @@ STATIC mp_obj_t str_split(uint n_args, const mp_obj_t *args) { return res; } +STATIC mp_obj_t str_rsplit(uint n_args, const mp_obj_t *args) { + if (n_args < 3) { + // If we don't have split limit, it doesn't matter from which side + // we split. + return str_split(n_args, args); + } + const mp_obj_type_t *self_type = mp_obj_get_type(args[0]); + mp_obj_t sep = args[1]; + GET_STR_DATA_LEN(args[0], s, len); + + machine_int_t splits = mp_obj_get_int(args[2]); + machine_int_t org_splits = splits; + // Preallocate list to the max expected # of elements, as we + // will fill it from the end. + mp_obj_list_t *res = mp_obj_new_list(splits + 1, NULL); + int idx = splits; + + if (sep == mp_const_none) { + // TODO + assert(0); + } else { + uint sep_len; + const char *sep_str = mp_obj_str_get_data(sep, &sep_len); + + if (sep_len == 0) { + nlr_raise(mp_obj_new_exception_msg(&mp_type_ValueError, "empty separator")); + } + + const byte *beg = s; + const byte *last = s + len; + for (;;) { + s = last - sep_len; + for (;;) { + if (splits == 0 || s < beg) { + break; + } else if (memcmp(s, sep_str, sep_len) == 0) { + break; + } + s--; + } + if (s < beg || splits == 0) { + res->items[idx] = str_new(self_type, beg, last - beg); + break; + } + res->items[idx--] = str_new(self_type, s + sep_len, last - s - sep_len); + last = s; + if (splits > 0) { + splits--; + } + } + if (idx != 0) { + // We split less parts than split limit, now go cleanup surplus + int used = org_splits + 1 - idx; + memcpy(res->items, &res->items[idx], used * sizeof(mp_obj_t)); + mp_seq_clear(res->items, used, res->alloc, sizeof(*res->items)); + res->len = used; + } + } + + return res; +} + + STATIC mp_obj_t str_finder(uint n_args, const mp_obj_t *args, machine_int_t direction, bool is_index) { assert(2 <= n_args && n_args <= 4); assert(MP_OBJ_IS_STR(args[0])); @@ -1460,6 +1524,7 @@ STATIC MP_DEFINE_CONST_FUN_OBJ_VAR_BETWEEN(str_index_obj, 2, 4, str_index); STATIC MP_DEFINE_CONST_FUN_OBJ_VAR_BETWEEN(str_rindex_obj, 2, 4, str_rindex); STATIC MP_DEFINE_CONST_FUN_OBJ_2(str_join_obj, str_join); STATIC MP_DEFINE_CONST_FUN_OBJ_VAR_BETWEEN(str_split_obj, 1, 3, str_split); +STATIC MP_DEFINE_CONST_FUN_OBJ_VAR_BETWEEN(str_rsplit_obj, 1, 3, str_rsplit); STATIC MP_DEFINE_CONST_FUN_OBJ_2(str_startswith_obj, str_startswith); STATIC MP_DEFINE_CONST_FUN_OBJ_VAR_BETWEEN(str_strip_obj, 1, 2, str_strip); STATIC MP_DEFINE_CONST_FUN_OBJ_VAR_BETWEEN(str_lstrip_obj, 1, 2, str_lstrip); @@ -1483,6 +1548,7 @@ STATIC const mp_map_elem_t str_locals_dict_table[] = { { MP_OBJ_NEW_QSTR(MP_QSTR_rindex), (mp_obj_t)&str_rindex_obj }, { MP_OBJ_NEW_QSTR(MP_QSTR_join), (mp_obj_t)&str_join_obj }, { MP_OBJ_NEW_QSTR(MP_QSTR_split), (mp_obj_t)&str_split_obj }, + { MP_OBJ_NEW_QSTR(MP_QSTR_rsplit), (mp_obj_t)&str_rsplit_obj }, { MP_OBJ_NEW_QSTR(MP_QSTR_startswith), (mp_obj_t)&str_startswith_obj }, { MP_OBJ_NEW_QSTR(MP_QSTR_strip), (mp_obj_t)&str_strip_obj }, { MP_OBJ_NEW_QSTR(MP_QSTR_lstrip), (mp_obj_t)&str_lstrip_obj }, diff --git a/py/qstrdefs.h b/py/qstrdefs.h index 1679d8b39..ca3001012 100644 --- a/py/qstrdefs.h +++ b/py/qstrdefs.h @@ -236,6 +236,7 @@ Q(find) Q(rfind) Q(rindex) Q(split) +Q(rsplit) Q(startswith) Q(replace) Q(partition) diff --git a/tests/basics/string_rsplit.py b/tests/basics/string_rsplit.py new file mode 100644 index 000000000..cc6c0fd06 --- /dev/null +++ b/tests/basics/string_rsplit.py @@ -0,0 +1,42 @@ +# default separator (whitespace) +print("a b".rsplit()) +#print(" a b ".rsplit(None)) +#print(" a b ".rsplit(None, 1)) +#print(" a b ".rsplit(None, 2)) +#print(" a b c ".rsplit(None, 1)) +#print(" a b c ".rsplit(None, 0)) +#print(" a b c ".rsplit(None, -1)) + +# empty separator should fail +try: + "abc".rsplit('') +except ValueError: + print("ValueError") + +# non-empty separator +print("abc".rsplit("a")) +print("abc".rsplit("b")) +print("abc".rsplit("c")) +print("abc".rsplit("z")) +print("abc".rsplit("ab")) +print("abc".rsplit("bc")) +print("abc".rsplit("abc")) +print("abc".rsplit("abcd")) +print("abcabc".rsplit("bc")) +print("abcabc".rsplit("bc", 0)) +print("abcabc".rsplit("bc", 1)) +print("abcabc".rsplit("bc", 2)) + +print("10/11/12".rsplit("/", 1)) +print("10/11/12".rsplit("/", 2)) +print("10/11/12".rsplit("/", 3)) +print("10/11/12".rsplit("/", 4)) +print("10/11/12".rsplit("/", 5)) + +print("/*10/*11/*12/*".rsplit("/*", 1)) +print("/*10/*11/*12/*".rsplit("/*", 2)) +print("/*10/*11/*12/*".rsplit("/*", 3)) +print("/*10/*11/*12/*".rsplit("/*", 4)) +print("/*10/*11/*12/*".rsplit("/*", 5)) + +print(b"abcabc".rsplit(b"bc", 2))