diff --git a/docs/library/builtins.rst b/docs/library/builtins.rst index 7a0229c2aa..e489375b1f 100644 --- a/docs/library/builtins.rst +++ b/docs/library/builtins.rst @@ -82,6 +82,10 @@ Functions and types In MicroPython, `byteorder` parameter must be positional (this is compatible with CPython). + .. note:: The optional ``signed`` kwarg from CPython is not supported. + MicroPython currently converts negative integers as signed, + and positive as unsigned. (:ref:`Details `.) + .. function:: isinstance() .. function:: issubclass() diff --git a/py/misc.h b/py/misc.h index 9f8a8c1e13..cf1810d4e7 100644 --- a/py/misc.h +++ b/py/misc.h @@ -343,13 +343,46 @@ static uint32_t mp_clz(uint32_t x) { return _BitScanReverse(&lz, x) ? (sizeof(x) * 8 - 1) - lz : 0; } +static uint32_t mp_clzl(unsigned long x) { + unsigned long lz = 0; + return _BitScanReverse(&lz, x) ? (sizeof(x) * 8 - 1) - lz : 0; +} + +#ifdef _WIN64 +static uint32_t mp_clzll(unsigned long long x) { + unsigned long lz = 0; + return _BitScanReverse64(&lz, x) ? (sizeof(x) * 8 - 1) - lz : 0; +} +#else +// Microsoft don't ship _BitScanReverse64 on Win32, so emulate it +static uint32_t mp_clzll(unsigned long long x) { + unsigned long h = x >> 32; + return h ? mp_clzl(h) : (mp_clzl(x) + 32); +} +#endif + static uint32_t mp_ctz(uint32_t x) { unsigned long tz = 0; return _BitScanForward(&tz, x) ? tz : 0; } #else #define mp_clz(x) __builtin_clz(x) +#define mp_clzl(x) __builtin_clzl(x) +#define mp_clzll(x) __builtin_clzll(x) #define mp_ctz(x) __builtin_ctz(x) #endif +// mp_int_t can be larger than long, i.e. Windows 64-bit, nan-box variants +static inline uint32_t mp_clz_mpi(mp_int_t x) { + MP_STATIC_ASSERT(sizeof(mp_int_t) == sizeof(long long) + || sizeof(mp_int_t) == sizeof(long)); + + // ugly, but should compile to single intrinsic unless O0 is set + if (sizeof(mp_int_t) == sizeof(long)) { + return mp_clzl(x); + } else { + return mp_clzll(x); + } +} + #endif // MICROPY_INCLUDED_PY_MISC_H diff --git a/py/mpz.c b/py/mpz.c index 502d4e1c13..750664ad9a 100644 --- a/py/mpz.c +++ b/py/mpz.c @@ -1589,7 +1589,7 @@ bool mpz_as_uint_checked(const mpz_t *i, mp_uint_t *value) { return true; } -void mpz_as_bytes(const mpz_t *z, bool big_endian, size_t len, byte *buf) { +bool mpz_as_bytes(const mpz_t *z, bool big_endian, bool as_signed, size_t len, byte *buf) { byte *b = buf; if (big_endian) { b += len; @@ -1598,6 +1598,8 @@ void mpz_as_bytes(const mpz_t *z, bool big_endian, size_t len, byte *buf) { int bits = 0; mpz_dbl_dig_t d = 0; mpz_dbl_dig_t carry = 1; + size_t olen = len; // bytes in output buffer + bool ok = true; for (size_t zlen = z->len; zlen > 0; --zlen) { bits += DIG_SIZE; d = (d << DIG_SIZE) | *zdig++; @@ -1607,28 +1609,32 @@ void mpz_as_bytes(const mpz_t *z, bool big_endian, size_t len, byte *buf) { val = (~val & 0xff) + carry; carry = val >> 8; } + + if (!olen) { + // Buffer is full, only OK if all remaining bytes are zeroes + ok = ok && ((byte)val == 0); + continue; + } + if (big_endian) { *--b = val; - if (b == buf) { - return; - } } else { *b++ = val; - if (b == buf + len) { - return; - } } + olen--; } } - // fill remainder of buf with zero/sign extension of the integer - if (big_endian) { - len = b - buf; + if (as_signed && olen == 0 && len > 0) { + // If output exhausted then ensure there was enough space for the sign bit + byte most_sig = big_endian ? buf[0] : buf[len - 1]; + ok = ok && (bool)(most_sig & 0x80) == (bool)z->neg; } else { - len = buf + len - b; - buf = b; + // fill remainder of buf with zero/sign extension of the integer + memset(big_endian ? buf : b, z->neg ? 0xff : 0x00, olen); } - memset(buf, z->neg ? 0xff : 0x00, len); + + return ok; } #if MICROPY_PY_BUILTINS_FLOAT diff --git a/py/mpz.h b/py/mpz.h index d27f572404..6f1ac930b0 100644 --- a/py/mpz.h +++ b/py/mpz.h @@ -93,9 +93,9 @@ typedef int8_t mpz_dbl_dig_signed_t; typedef struct _mpz_t { // Zero has neg=0, len=0. Negative zero is not allowed. size_t neg : 1; - size_t fixed_dig : 1; - size_t alloc : (8 * sizeof(size_t) - 2); - size_t len; + size_t fixed_dig : 1; // flag, 'dig' buffer cannot be reallocated + size_t alloc : (8 * sizeof(size_t) - 2); // number of entries allocated in 'dig' + size_t len; // number of entries used in 'dig' mpz_dig_t *dig; } mpz_t; @@ -145,7 +145,8 @@ static inline size_t mpz_max_num_bits(const mpz_t *z) { mp_int_t mpz_hash(const mpz_t *z); bool mpz_as_int_checked(const mpz_t *z, mp_int_t *value); bool mpz_as_uint_checked(const mpz_t *z, mp_uint_t *value); -void mpz_as_bytes(const mpz_t *z, bool big_endian, size_t len, byte *buf); +// Returns true if 'z' fit into 'len' bytes of 'buf' without overflowing, 'buf' is truncated otherwise. +bool mpz_as_bytes(const mpz_t *z, bool big_endian, bool as_signed, size_t len, byte *buf); #if MICROPY_PY_BUILTINS_FLOAT mp_float_t mpz_as_float(const mpz_t *z); #endif diff --git a/py/objint.c b/py/objint.c index 6caa608f33..467a4714ef 100644 --- a/py/objint.c +++ b/py/objint.c @@ -421,29 +421,50 @@ static MP_DEFINE_CONST_FUN_OBJ_VAR_BETWEEN(int_from_bytes_fun_obj, 3, 4, int_fro static MP_DEFINE_CONST_CLASSMETHOD_OBJ(int_from_bytes_obj, MP_ROM_PTR(&int_from_bytes_fun_obj)); static mp_obj_t int_to_bytes(size_t n_args, const mp_obj_t *args) { - // TODO: Support signed param (assumes signed=False) + // TODO: Support signed (currently behaves as if signed=(val < 0)) (void)n_args; + bool overflow; - mp_int_t len = mp_obj_get_int(args[1]); - if (len < 0) { + mp_int_t dlen = mp_obj_get_int(args[1]); + if (dlen < 0) { mp_raise_ValueError(NULL); } bool big_endian = args[2] != MP_OBJ_NEW_QSTR(MP_QSTR_little); vstr_t vstr; - vstr_init_len(&vstr, len); + vstr_init_len(&vstr, dlen); byte *data = (byte *)vstr.buf; - memset(data, 0, len); #if MICROPY_LONGINT_IMPL != MICROPY_LONGINT_IMPL_NONE if (!mp_obj_is_small_int(args[0])) { - mp_obj_int_to_bytes_impl(args[0], big_endian, len, data); + overflow = !mp_obj_int_to_bytes_impl(args[0], big_endian, dlen, data); } else #endif { mp_int_t val = MP_OBJ_SMALL_INT_VALUE(args[0]); - size_t l = MIN((size_t)len, sizeof(val)); - mp_binary_set_int(l, big_endian, data + (big_endian ? (len - l) : 0), val); + int slen = 0; // Number of bytes to represent val + + // This logic has a twin in objint_longlong.c + if (val > 0) { + slen = (sizeof(mp_int_t) * 8 - mp_clz_mpi(val) + 7) / 8; + } else if (val < -1) { + slen = (sizeof(mp_int_t) * 8 - mp_clz_mpi(~val) + 8) / 8; + } else { + // clz of 0 is defined, so 0 and -1 map to 0 and 1 + slen = -val; + } + + if (slen <= dlen) { + memset(data, val < 0 ? 0xFF : 0x00, dlen); + mp_binary_set_int(slen, big_endian, data + (big_endian ? (dlen - slen) : 0), val); + overflow = false; + } else { + overflow = true; + } + } + + if (overflow) { + mp_raise_msg(&mp_type_OverflowError, MP_ERROR_TEXT("buffer too small")); } return mp_obj_new_bytes_from_vstr(&vstr); diff --git a/py/objint.h b/py/objint.h index 5eed87705d..28930e35ad 100644 --- a/py/objint.h +++ b/py/objint.h @@ -55,7 +55,8 @@ char *mp_obj_int_formatted_impl(char **buf, size_t *buf_size, size_t *fmt_size, int base, const char *prefix, char base_char, char comma); mp_int_t mp_obj_int_hash(mp_obj_t self_in); mp_obj_t mp_obj_int_from_bytes_impl(bool big_endian, size_t len, const byte *buf); -void mp_obj_int_to_bytes_impl(mp_obj_t self_in, bool big_endian, size_t len, byte *buf); +// Returns true if 'self_in' fit into 'len' bytes of 'buf' without overflowing, 'buf' is truncated otherwise. +bool mp_obj_int_to_bytes_impl(mp_obj_t self_in, bool big_endian, size_t len, byte *buf); int mp_obj_int_sign(mp_obj_t self_in); mp_obj_t mp_obj_int_unary_op(mp_unary_op_t op, mp_obj_t o_in); mp_obj_t mp_obj_int_binary_op(mp_binary_op_t op, mp_obj_t lhs_in, mp_obj_t rhs_in); diff --git a/py/objint_longlong.c b/py/objint_longlong.c index ee499e0265..00fe5636c1 100644 --- a/py/objint_longlong.c +++ b/py/objint_longlong.c @@ -57,10 +57,27 @@ mp_obj_t mp_obj_int_from_bytes_impl(bool big_endian, size_t len, const byte *buf return mp_obj_new_int_from_ll(value); } -void mp_obj_int_to_bytes_impl(mp_obj_t self_in, bool big_endian, size_t len, byte *buf) { +bool mp_obj_int_to_bytes_impl(mp_obj_t self_in, bool big_endian, size_t len, byte *buf) { assert(mp_obj_is_exact_type(self_in, &mp_type_int)); mp_obj_int_t *self = self_in; long long val = self->val; + size_t slen; // Number of bytes to represent val + + // This logic has a twin in objint.c + if (val > 0) { + slen = (sizeof(long long) * 8 - mp_clzll(val) + 7) / 8; + } else if (val < -1) { + slen = (sizeof(long long) * 8 - mp_clzll(~val) + 8) / 8; + } else { + // clz of 0 is defined, so 0 and -1 map to 0 and 1 + slen = -val; + } + + if (slen > len) { + return false; // Would overflow + // TODO: Determine whether to copy and truncate, as some callers probably expect this...? + } + if (big_endian) { byte *b = buf + len; while (b > buf) { @@ -73,6 +90,7 @@ void mp_obj_int_to_bytes_impl(mp_obj_t self_in, bool big_endian, size_t len, byt val >>= 8; } } + return true; } int mp_obj_int_sign(mp_obj_t self_in) { diff --git a/py/objint_mpz.c b/py/objint_mpz.c index 600316a42a..4a1a685bbd 100644 --- a/py/objint_mpz.c +++ b/py/objint_mpz.c @@ -112,10 +112,10 @@ mp_obj_t mp_obj_int_from_bytes_impl(bool big_endian, size_t len, const byte *buf return MP_OBJ_FROM_PTR(o); } -void mp_obj_int_to_bytes_impl(mp_obj_t self_in, bool big_endian, size_t len, byte *buf) { +bool mp_obj_int_to_bytes_impl(mp_obj_t self_in, bool big_endian, size_t len, byte *buf) { assert(mp_obj_is_exact_type(self_in, &mp_type_int)); mp_obj_int_t *self = MP_OBJ_TO_PTR(self_in); - mpz_as_bytes(&self->mpz, big_endian, len, buf); + return mpz_as_bytes(&self->mpz, big_endian, self->mpz.neg, len, buf); } int mp_obj_int_sign(mp_obj_t self_in) { diff --git a/tests/basics/int_bytes.py b/tests/basics/int_bytes.py index d1837ea75c..15c12640e9 100644 --- a/tests/basics/int_bytes.py +++ b/tests/basics/int_bytes.py @@ -1,3 +1,5 @@ +import sys + print((10).to_bytes(1, "little")) print((111111).to_bytes(4, "little")) print((100).to_bytes(10, "little")) @@ -20,3 +22,74 @@ try: (1).to_bytes(-1, "little") except ValueError: print("ValueError") + +# zero byte destination should also raise an error +try: + (1).to_bytes(0, "little") +except OverflowError: + print("OverflowError") + +# except for converting 0 to a zero-length byte array +print((0).to_bytes(0, "big")) + +# byte length can fit the integer directly +print((0xFF).to_bytes(1, "little")) +print((0xFF).to_bytes(1, "big")) +print((0xEFF).to_bytes(2, "little")) +print((0xEFF).to_bytes(2, "big")) +print((0xCDEFF).to_bytes(3, "little")) +print((0xCDEFF).to_bytes(3, "big")) + +# OverFlowError if not big enough + +try: + (0x123).to_bytes(1, "big") +except OverflowError: + print("OverflowError") + +try: + (0x12345).to_bytes(2, "big") +except OverflowError: + print("OverflowError") + +try: + (0x1234567).to_bytes(3, "big") +except OverflowError: + print("OverflowError") + + +# negative representations + +# MicroPython int.to_bytes() behaves as if signed=True for negative numbers +if "micropython" in repr(sys.implementation): + + def to_bytes_compat(i, l, e): + return i.to_bytes(l, e) +else: + # Implement MicroPython compatible behaviour for CPython + def to_bytes_compat(i, l, e): + return i.to_bytes(l, e, signed=i < 0) + + +print(to_bytes_compat(-1, 1, "little")) +print(to_bytes_compat(-1, 3, "little")) +print(to_bytes_compat(-1, 1, "big")) +print(to_bytes_compat(-1, 3, "big")) +print(to_bytes_compat(-128, 1, "big")) +print(to_bytes_compat(-32768, 2, "big")) +print(to_bytes_compat(-(1 << 23), 3, "big")) + +try: + print(to_bytes_compat(-129, 1, "big")) +except OverflowError: + print("OverflowError") + +try: + print(to_bytes_compat(-32769, 2, "big")) +except OverflowError: + print("OverflowError") + +try: + print(to_bytes_compat(-(1 << 23) - 1, 2, "big")) +except OverflowError: + print("OverflowError") diff --git a/tests/basics/int_bytes_int64.py b/tests/basics/int_bytes_int64.py new file mode 100644 index 0000000000..032dbccc5b --- /dev/null +++ b/tests/basics/int_bytes_int64.py @@ -0,0 +1,52 @@ +import sys + +# Depending on the port, the numbers in this test may be implemented as "small" +# native 64 bit ints, arbitrary precision large ints, or large integers using 64-bit +# long longs. + +try: + x = int.from_bytes(b"\x6F\xAB\xCD\x12\x34\x56\x78\xFB", "big") +except OverflowError: + print("SKIP") # Port can't represent this size of integer at all + raise SystemExit + +print(hex(x)) +b = x.to_bytes(8, "little") +print(b) +print(x.to_bytes(8, "big")) + +# padding in output +print(x.to_bytes(20, "little")) +print(x.to_bytes(20, "big")) + +# check that extra zero bytes don't change the internal int value +print(int.from_bytes(b + bytes(10), "little") == x) + +# can't write to a zero-length bytes object +try: + x.to_bytes(0, "little") +except OverflowError: + print("OverflowError") + +# or one that it too short +try: + x.to_bytes(7, "big") +except OverflowError: + print("OverflowError") + +# negative representations + +# MicroPython int.to_bytes() behaves as if signed=True for negative numbers +if "micropython" in repr(sys.implementation): + + def to_bytes_compat(i, l, e): + return i.to_bytes(l, e) +else: + # Implement MicroPython compatible behaviour for CPython + def to_bytes_compat(i, l, e): + return i.to_bytes(l, e, signed=i < 0) + + +print(to_bytes_compat(-x, 8, "little")) +print(to_bytes_compat(-x, 20, "big")) +print(to_bytes_compat(-x, 20, "little")) diff --git a/tests/basics/int_bytes_intbig.py b/tests/basics/int_bytes_intbig.py index 147362bef1..13cf5d0085 100644 --- a/tests/basics/int_bytes_intbig.py +++ b/tests/basics/int_bytes_intbig.py @@ -1,3 +1,5 @@ +import sys + print((2**64).to_bytes(9, "little")) print((2**64).to_bytes(9, "big")) @@ -10,5 +12,51 @@ print(ib) print(il.to_bytes(20, "little")) print(ib.to_bytes(20, "big")) +# check padding comes out correctly +print(il.to_bytes(40, "little")) +print(ib.to_bytes(40, "big")) + # check that extra zero bytes don't change the internal int value print(int.from_bytes(b + bytes(10), "little") == int.from_bytes(b, "little")) + +# can't write to a zero-length bytes object +try: + ib.to_bytes(0, "little") +except OverflowError: + print("OverflowError") + +# or one that it too short +try: + ib.to_bytes(18, "big") +except OverflowError: + print("OverflowError") + +# negative representations + +# MicroPython int.to_bytes() behaves as if signed=True for negative numbers +if "micropython" in repr(sys.implementation): + + def to_bytes_compat(i, l, e): + return i.to_bytes(l, e) +else: + # Implement MicroPython compatible behaviour for CPython + def to_bytes_compat(i, l, e): + return i.to_bytes(l, e, signed=i < 0) + + +print(to_bytes_compat(-ib, 20, "big")) +print(to_bytes_compat(ib * -ib, 40, "big")) + +# case where an additional byte is needed for sign bit +ib = (2**64) - 1 +print(ib.to_bytes(8, "little")) + +ib *= -1 + +try: + print(to_bytes_compat(ib, 8, "little")) +except OverflowError: + print("OverflowError") + +print(to_bytes_compat(ib, 9, "little")) +print(to_bytes_compat(ib, 9, "big")) diff --git a/tests/cpydiff/types_int_to_bytes.py b/tests/cpydiff/types_int_to_bytes.py new file mode 100644 index 0000000000..6530a2a32e --- /dev/null +++ b/tests/cpydiff/types_int_to_bytes.py @@ -0,0 +1,16 @@ +""" +categories: Types,int +description: ``to_bytes`` method doesn't implement signed parameter. +cause: The ``signed`` keyword-only parameter is not implemented for ``int.to_bytes()``. + +When the integer is negative, MicroPython behaves the same as CPython ``int.to_bytes(..., signed=True)`` + +When the integer is non-negative, MicroPython behaves the same as CPython ``int.to_bytes(..., signed=False)``. + +(The difference is subtle, but in CPython a positive integer converted with ``signed=True`` may require one byte more in the output length, in order to fit the 0 sign bit.) + +workaround: Take care when calling ``to_bytes()`` on an integer value which may be negative. +""" + +x = -1 +print(x.to_bytes(1, "big"))