From 9749b2fb0d6c4ff65c7395e09028102c9508b34a Mon Sep 17 00:00:00 2001 From: Paul Sokolovsky Date: Mon, 11 Aug 2014 22:36:38 +0300 Subject: [PATCH] objstr: Make sure that bytes are indexed as bytes, not as unicode. Fixes #795. --- py/objstr.c | 13 +++++++------ tests/basics/bytes.py | 5 +++++ 2 files changed, 12 insertions(+), 6 deletions(-) diff --git a/py/objstr.c b/py/objstr.c index 9d34609882..fb170f83c9 100644 --- a/py/objstr.c +++ b/py/objstr.c @@ -353,7 +353,8 @@ const byte *str_index_to_ptr(const mp_obj_type_t *type, const byte *self_data, u } #endif -STATIC mp_obj_t str_subscr(mp_obj_t self_in, mp_obj_t index, mp_obj_t value) { +// This is used for both bytes and 8-bit strings. This is not used for unicode strings. +STATIC mp_obj_t bytes_subscr(mp_obj_t self_in, mp_obj_t index, mp_obj_t value) { mp_obj_type_t *type = mp_obj_get_type(self_in); GET_STR_DATA_LEN(self_in, self_data, self_len); if (value == MP_OBJ_SENTINEL) { @@ -368,11 +369,11 @@ STATIC mp_obj_t str_subscr(mp_obj_t self_in, mp_obj_t index, mp_obj_t value) { return mp_obj_new_str_of_type(type, self_data + slice.start, slice.stop - slice.start); } #endif - const byte *p = str_index_to_ptr(type, self_data, self_len, index, false); + mp_uint_t index_val = mp_get_index(type, self_len, index, false); if (type == &mp_type_bytes) { - return MP_OBJ_NEW_SMALL_INT(*p); + return MP_OBJ_NEW_SMALL_INT(self_data[index_val]); } else { - return mp_obj_new_str((char*)p, 1, true); + return mp_obj_new_str((char*)&self_data[index_val], 1, true); } } else { return MP_OBJ_NULL; // op not supported @@ -1704,7 +1705,7 @@ const mp_obj_type_t mp_type_str = { .print = str_print, .make_new = str_make_new, .binary_op = mp_obj_str_binary_op, - .subscr = str_subscr, + .subscr = bytes_subscr, .getiter = mp_obj_new_str_iterator, .buffer_p = { .get_buffer = mp_obj_str_get_buffer }, .locals_dict = (mp_obj_t)&str_locals_dict, @@ -1718,7 +1719,7 @@ const mp_obj_type_t mp_type_bytes = { .print = str_print, .make_new = bytes_make_new, .binary_op = mp_obj_str_binary_op, - .subscr = str_subscr, + .subscr = bytes_subscr, .getiter = mp_obj_new_bytes_iterator, .buffer_p = { .get_buffer = mp_obj_str_get_buffer }, .locals_dict = (mp_obj_t)&str_locals_dict, diff --git a/tests/basics/bytes.py b/tests/basics/bytes.py index a084bc3994..ce027e7d19 100644 --- a/tests/basics/bytes.py +++ b/tests/basics/bytes.py @@ -37,3 +37,8 @@ def gen(): for i in range(4): yield i print(bytes(gen())) + +# Make sure bytes are not mistreated as unicode +x = b"\xff\x8e\xfe}\xfd\x7f" +print(len(x)) +print(x[0], x[1], x[2], x[3])