From 46d31e9ca90635707031afedf09da7aeed25a321 Mon Sep 17 00:00:00 2001 From: Paul Sokolovsky Date: Sat, 14 Jun 2014 03:16:17 +0300 Subject: [PATCH] unicode: Add utf8_ptr_to_index(). Useful when we have pointer to char inside string, but need to return char index. (E.g. str.find()). --- py/unicode.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/py/unicode.c b/py/unicode.c index 0da247889..c38120072 100644 --- a/py/unicode.c +++ b/py/unicode.c @@ -86,6 +86,17 @@ char *utf8_next_char(const char *s) { return (char *)s; } +machine_uint_t utf8_ptr_to_index(const char *s, const char *ptr) { + machine_uint_t i = 0; + while (ptr > s) { + if (!UTF8_IS_CONT(*--ptr)) { + i++; + } + } + + return i; +} + uint unichar_charlen(const char *str, uint len) { uint charlen = 0;