Add a unichar_charlen() function to calculate length-in-characters from length-in-bytes

2014-06-08 06:58:26 +10:00 · 2014-06-08 06:58:26 +10:00 · a019ba968b
parent 44b0d5cff8
commit a019ba968b
2 changed files with 12 additions and 0 deletions
--- a/py/misc.h
+++ b/py/misc.h
@ -100,6 +100,7 @@ bool unichar_isupper(unichar c);
 bool unichar_islower(unichar c);
 unichar unichar_tolower(unichar c);
 unichar unichar_toupper(unichar c);
+uint unichar_charlen(const char *str, uint len);
 #define UTF8_IS_NONASCII(ch) ((ch) & 0x80)
 #define UTF8_IS_CONT(ch) (((ch) & 0xC0) == 0x80)

--- a/py/unicode.c
+++ b/py/unicode.c
@ -86,6 +86,17 @@ char *utf8_next_char(const char *s) {
    return (char *)s;
 }

+uint unichar_charlen(const char *str, uint len)
+{
+    uint charlen = 0;
+    for (const char *top = str + len; str < top; ++str) {
+        if (!UTF8_IS_CONT(*str)) {
+            ++charlen;
+        }
+    }
+    return charlen;
+}
+
 // Be aware: These unichar_is* functions are actually ASCII-only!
 bool unichar_isspace(unichar c) {
    return c < 128 && (attr[c] & FL_SPACE) != 0;