From 279de0c8eb3cb186914799ccc5ee94ea97f56de4 Mon Sep 17 00:00:00 2001
From: Chris Angelico <rosuav@gmail.com>
Date: Sat, 7 Jun 2014 15:28:35 +1000
Subject: [PATCH] Formatting/layout improvements - introduce macros for UTF-8
 byte detection, add braces. No functional changes.

---
 py/builtin.c |  4 ++--
 py/misc.h    |  2 ++
 py/objstr.c  | 29 +++++++++++++++++++++--------
 py/qstr.c    | 14 ++++++++++----
 4 files changed, 35 insertions(+), 14 deletions(-)

diff --git a/py/builtin.c b/py/builtin.c
index ded8e522f1..f5102feb59 100644
--- a/py/builtin.c
+++ b/py/builtin.c
@@ -360,12 +360,12 @@ STATIC mp_obj_t mp_builtin_ord(mp_obj_t o_in) {
     uint len, charlen;
     const char *str = mp_obj_str_get_data_len(o_in, &len, &charlen);
     if (charlen == 1) {
-        if (MP_OBJ_IS_STR(o_in) && (*str & 0x80)) {
+        if (MP_OBJ_IS_STR(o_in) && UTF8_IS_NONASCII(*str)) {
 	    machine_int_t ord = *str++ & 0x7F;
             for (machine_int_t mask = 0x40; ord & mask; mask >>= 1) {
 		ord &= ~mask;
 	    }
-	    while ((*str & 0xC0) == 0x80) {
+	    while (UTF8_IS_CONT(*str)) {
 		ord = (ord << 6) | (*str++ & 0x3F);
 	    }
 	    return mp_obj_new_int(ord);
diff --git a/py/misc.h b/py/misc.h
index fd54147efd..f2d375d251 100644
--- a/py/misc.h
+++ b/py/misc.h
@@ -100,6 +100,8 @@ bool unichar_isupper(unichar c);
 bool unichar_islower(unichar c);
 unichar unichar_tolower(unichar c);
 unichar unichar_toupper(unichar c);
+#define UTF8_IS_NONASCII(ch) ((ch) & 0x80)
+#define UTF8_IS_CONT(ch) (((ch) & 0xC0) == 0x80)
 
 /** variable string *********************************************/
 
diff --git a/py/objstr.c b/py/objstr.c
index 9f6a9d5771..21b0b3a19b 100644
--- a/py/objstr.c
+++ b/py/objstr.c
@@ -109,7 +109,7 @@ void mp_str_print_quoted(void (*print)(void *env, const char *fmt, ...), void *e
             for (machine_int_t mask = 0x40; ord & mask; mask >>= 1) {
 		ord &= ~mask;
 	    }
-	    while ((*s & 0xC0) == 0x80) {
+	    while (UTF8_IS_CONT(*s)) {
 		ord = (ord << 6) | (*s++ & 0x3F);
 	    }
 	    --s; // s will be incremented by the main loop
@@ -398,12 +398,22 @@ STATIC mp_obj_t str_subscr(mp_obj_t self_in, mp_obj_t index, mp_obj_t value) {
             // Assumes that the string is correctly formed - will run past the
             // end of the buffer if there aren't that many characters in it
             const char *s;
-            for (s=(const char *)self_data; index_val; ++s)
-                if ((*s & 0xC0) != 0x80) --index_val;
-            while ((*s & 0xC0) == 0x80) ++s; // Skip continuation bytes after the last lead byte
+            for (s=(const char *)self_data; index_val; ++s) {
+                if (!UTF8_IS_CONT(*s)) {
+		    --index_val;
+		}
+	    }
+	    // Skip continuation bytes after the last lead byte
+            while (UTF8_IS_CONT(*s)) {
+		++s;
+	    }
             int len = 1;
-            if (*s & 0x80)
-                for (char mask = 0x40; *s & mask; mask >>= 1) ++len; // Count the number of 1 bits (after the first)
+            if (UTF8_IS_NONASCII(*s)) {
+		// Count the number of 1 bits (after the first)
+                for (char mask = 0x40; *s & mask; mask >>= 1) {
+		    ++len;
+		}
+	    }
             return mp_obj_new_str(s, len, true); // This will create a one-character string
         }
     } else {
@@ -1769,8 +1779,11 @@ mp_obj_t mp_obj_new_str_of_type(const mp_obj_type_t *type, const byte* data, uin
             // Count non-continuation bytes so we know how long the string is in characters.
             const byte *endptr, *top = data + len;
             uint charlen = 0;
-            for (endptr = data; endptr < top; ++endptr)
-                if ((*endptr & 0xC0) != 0x80) ++charlen;
+            for (endptr = data; endptr < top; ++endptr) {
+                if (!UTF8_IS_CONT(*endptr)) {
+		    ++charlen;
+		}
+	    }
             o->charlen = charlen;
 	} else {
             // For byte strings, the 'character' length (really the "exposed length" or "Python length") equals the byte length.
diff --git a/py/qstr.c b/py/qstr.c
index 4013a67f79..5637aea77d 100644
--- a/py/qstr.c
+++ b/py/qstr.c
@@ -162,8 +162,11 @@ qstr qstr_from_strn(const char *str, uint len) {
         machine_uint_t hash = qstr_compute_hash((const byte*)str, len);
         byte *q_ptr = m_new(byte, 7 + len + 1);
         uint charlen = 0;
-        for (const char *s = str; s < str + len; ++s)
-            if ((*s & 0xC0) != 0x80) ++charlen;
+        for (const char *s = str; s < str + len; ++s) {
+            if (!UTF8_IS_CONT(*s)) {
+		++charlen;
+	    }
+	}
         q_ptr[0] = hash;
         q_ptr[1] = hash >> 8;
         q_ptr[2] = len;
@@ -195,8 +198,11 @@ qstr qstr_build_end(byte *q_ptr) {
         q_ptr[0] = hash;
         q_ptr[1] = hash >> 8;
         uint charlen = 0;
-        for (const byte *s = str; s < str + len; ++s)
-            if ((*s & 0xC0) != 0x80) ++charlen;
+        for (const byte *s = str; s < str + len; ++s) {
+            if (!UTF8_IS_CONT(*s)) {
+		++charlen;
+	    }
+	}
         q_ptr[4] = charlen;
         q_ptr[5] = charlen >> 8;
         q_ptr[6] = 1;