From c3f21572ed18b9319aeba0fc5caf4a6394fd3905 Mon Sep 17 00:00:00 2001 From: Michael Drake Date: Sun, 17 Mar 2013 22:01:42 +0000 Subject: [PATCH] Add function to get the byte length of n characters in a utf8 string. --- utils/utf8.c | 18 ++++++++++++++++++ utils/utf8.h | 1 + 2 files changed, 19 insertions(+) diff --git a/utils/utf8.c b/utils/utf8.c index b4e308044..885ca94ee 100644 --- a/utils/utf8.c +++ b/utils/utf8.c @@ -117,6 +117,24 @@ size_t utf8_bounded_length(const char *s, size_t l) return len; } +/** + * Calculate the length (in bytes) of a bounded UTF-8 string + * + * \param s The string + * \param l Maximum length of input (in bytes) + * \param c Maximum number of characters to measure + * \return Length of string, in bytes + */ +size_t utf8_bounded_byte_length(const char *s, size_t l, size_t c) +{ + size_t len = 0; + + while (len < l && c-- > 0) + len = utf8_next(s, l, len); + + return len; +} + /** * Calculate the length (in bytes) of a UTF-8 character * diff --git a/utils/utf8.h b/utils/utf8.h index 26234f6c0..eb043c227 100644 --- a/utils/utf8.h +++ b/utils/utf8.h @@ -37,6 +37,7 @@ size_t utf8_from_ucs4(uint32_t c, char *s); size_t utf8_length(const char *s); size_t utf8_bounded_length(const char *s, size_t l); +size_t utf8_bounded_byte_length(const char *s, size_t l, size_t c); size_t utf8_char_byte_length(const char *s);