Floats formatted with "correct" precision

Conversion float->string ensures that, for any float f, tonumber(tostring(f)) == f, but still avoiding noise like 1.1 converting to "1.1000000000000001".
2024-11-23 21:29:38 +03:00 · 2024-08-02 15:09:30 -03:00 · 2024-08-02 15:09:30 -03:00 · 1bf4b80f1a
commit 1bf4b80f1a
parent 4c6afbcb01
3 changed files with 153 additions and 21 deletions
--- a/lobject.c
+++ b/lobject.c
@ -10,6 +10,7 @@
 #include "lprefix.h"


+#include <float.h>
 #include <locale.h>
 #include <math.h>
 #include <stdarg.h>
@ -401,29 +402,54 @@ int luaO_utf8esc (char *buff, unsigned long x) {
 /*
 ** Maximum length of the conversion of a number to a string. Must be
 ** enough to accommodate both LUA_INTEGER_FMT and LUA_NUMBER_FMT.
-** (For a long long int, this is 19 digits plus a sign and a final '\0',
-** adding to 21. For a long double, it can go to a sign, 33 digits,
-** the dot, an exponent letter, an exponent sign, 5 exponent digits,
-** and a final '\0', adding to 43.)
+** For a long long int, this is 19 digits plus a sign and a final '\0',
+** adding to 21. For a long double, it can go to a sign, the dot, an
+** exponent letter, an exponent sign, 4 exponent digits, the final
+** '\0', plus the significant digits, which are approximately the *_DIG
+** attribute.
 */
-#define MAXNUMBER2STR	44
+#define MAXNUMBER2STR	(20 + l_floatatt(DIG))


 /*
-** Convert a number object to a string, adding it to a buffer
+** Convert a float to a string, adding it to a buffer. First try with
+** a not too large number of digits, to avoid noise (for instance,
+** 1.1 going to "1.1000000000000001"). If that lose precision, so
+** that reading the result back gives a different number, then do the
+** conversion again with extra precision. Moreover, if the numeral looks
+** like an integer (without a decimal point or an exponent), add ".0" to
+** its end.
+*/
+static int tostringbuffFloat (lua_Number n, char *buff) {
+  /* first conversion */
+  int len = l_sprintf(buff, MAXNUMBER2STR, LUA_NUMBER_FMT,
+                            (LUAI_UACNUMBER)n);
+  lua_Number check = lua_str2number(buff, NULL);  /* read it back */
+  if (check != n) {  /* not enough precision? */
+    /* convert again with more precision */
+    len = l_sprintf(buff, MAXNUMBER2STR, LUA_NUMBER_FMT_N,
+                          (LUAI_UACNUMBER)n);
+  }
+  /* looks like an integer? */
+  if (buff[strspn(buff, "-0123456789")] == '\0') {
+    buff[len++] = lua_getlocaledecpoint();
+    buff[len++] = '0';  /* adds '.0' to result */
+  }
+  return len;
+}
+
+
+/*
+** Convert a number object to a string, adding it to a buffer.
 */
 static unsigned tostringbuff (TValue *obj, char *buff) {
  int len;
  lua_assert(ttisnumber(obj));
  if (ttisinteger(obj))
    len = lua_integer2str(buff, MAXNUMBER2STR, ivalue(obj));
-  else {
-    len = lua_number2str(buff, MAXNUMBER2STR, fltvalue(obj));
-    if (buff[strspn(buff, "-0123456789")] == '\0') {  /* looks like an int? */
-      buff[len++] = lua_getlocaledecpoint();
-      buff[len++] = '0';  /* adds '.0' to result */
-    }
-  }
+  else
+    len = tostringbuffFloat(fltvalue(obj), buff);
+  lua_assert(len < MAXNUMBER2STR);
  return cast_uint(len);
 }

--- a/luaconf.h
+++ b/luaconf.h
@ -416,8 +416,13 @@
@@ l_floatatt(x) corrects float attribute 'x' to the proper float type
 ** by prefixing it with one of FLT/DBL/LDBL.
@@ LUA_NUMBER_FRMLEN is the length modifier for writing floats.
-@@ LUA_NUMBER_FMT is the format for writing floats.
-@@ lua_number2str converts a float to a string.
+@@ LUA_NUMBER_FMT is the format for writing floats with the maximum
+** number of digits that respects tostring(tonumber(numeral)) == numeral.
+** (That would be floor(log10(2^n)), where n is the number of bits in
+** the float mantissa.)
+@@ LUA_NUMBER_FMT_N is the format for writing floats with the minimum
+** number of digits that ensures tonumber(tostring(number)) == number.
+** (That would be LUA_NUMBER_FMT+2.)
@@ l_mathop allows the addition of an 'l' or 'f' to all math operations.
@@ l_floor takes the floor of a float.
@@ lua_str2number converts a decimal numeral to a number.
@ -428,8 +433,6 @@

 #define l_floor(x)		(l_mathop(floor)(x))

-#define lua_number2str(s,sz,n)  \
-	l_sprintf((s), sz, LUA_NUMBER_FMT, (LUAI_UACNUMBER)(n))

 /*
@@ lua_numbertointeger converts a float number with an integral value
@ -458,6 +461,7 @@

 #define LUA_NUMBER_FRMLEN	""
 #define LUA_NUMBER_FMT		"%.7g"
+#define LUA_NUMBER_FMT_N	"%.9g"

 #define l_mathop(op)		op##f

@ -474,6 +478,7 @@

 #define LUA_NUMBER_FRMLEN	"L"
 #define LUA_NUMBER_FMT		"%.19Lg"
+#define LUA_NUMBER_FMT_N	"%.21Lg"

 #define l_mathop(op)		op##l

@ -488,7 +493,8 @@
 #define LUAI_UACNUMBER	double

 #define LUA_NUMBER_FRMLEN	""
-#define LUA_NUMBER_FMT		"%.14g"
+#define LUA_NUMBER_FMT		"%.15g"
+#define LUA_NUMBER_FMT_N	"%.17g"

 #define l_mathop(op)		op

--- a/testes/math.lua
+++ b/testes/math.lua
@ -22,6 +22,18 @@ do
  end
 end

+
+-- maximum exponent for a floating-point number
+local maxexp = 0
+do
+  local p = 2.0
+  while p < math.huge do
+    maxexp = maxexp + 1
+    p = p + p
+  end
+end
+
+
 local function isNaN (x)
  return (x ~= x)
 end
@ -34,8 +46,8 @@ do
  local x = 2.0^floatbits
  assert(x > x - 1.0 and x == x + 1.0)

-  print(string.format("%d-bit integers, %d-bit (mantissa) floats",
-                       intbits, floatbits))
+  local msg = "  %d-bit integers, %d-bit*2^%d floats"
+  print(string.format(msg, intbits, floatbits, maxexp))
 end

 assert(math.type(0) == "integer" and math.type(0.0) == "float"
@ -803,7 +815,11 @@ do
 end


-print("testing 'math.random'")
+--
+-- [[==================================================================
+      print("testing 'math.random'")
+-- -===================================================================
+--

 local random, max, min = math.random, math.max, math.min

@ -1019,6 +1035,90 @@ assert(not pcall(random, minint + 1, minint))
 assert(not pcall(random, maxint, maxint - 1))
 assert(not pcall(random, maxint, minint))

+-- ]]==================================================================
+
+
+--
+-- [[==================================================================
+    print("testing precision of 'tostring'")
+-- -===================================================================
+--
+
+-- number of decimal digits supported by float precision
+local decdig = math.floor(floatbits * math.log(2, 10))
+print(string.format("  %d-digit float numbers with full precision",
+                    decdig))
+-- number of decimal digits supported by integer precision
+local Idecdig = math.floor(math.log(maxint, 10))
+print(string.format("  %d-digit integer numbers with full precision",
+                    Idecdig))
+
+do
+  -- Any number should print so that reading it back gives itself:
+  -- tonumber(tostring(x)) == x
+
+  -- Mersenne fractions
+  local p = 1.0
+  for i = 1, maxexp do
+    p = p + p
+    local x = 1 / (p - 1)
+    assert(x == tonumber(tostring(x)))
+  end
+
+  -- some random numbers in [0,1)
+  for i = 1, 100 do
+    local x = math.random()
+    assert(x == tonumber(tostring(x)))
+  end
+
+  -- different numbers shold print differently.
+  -- check pairs of floats with minimum detectable difference
+  local p = floatbits - 1
+  for i = 1, maxexp - 1 do
+    for _, i in ipairs{-i, i} do
+      local x = 2^i
+      local diff = 2^(i - p)   -- least significant bit for 'x'
+      local y = x + diff
+      local fy = tostring(y)
+      assert(x ~= y and tostring(x) ~= fy)
+      assert(tonumber(fy) == y)
+    end
+  end
+
+
+  -- "reasonable" numerals should be printed like themselves
+
+  -- create random float numerals with 5 digits, with a decimal point
+  -- inserted in all places. (With more than 5, things like "0.00001"
+  -- reformats like "1e-5".)
+  for i = 1, 1000 do
+    -- random numeral with 5 digits
+    local x = string.format("%.5d", math.random(0, 99999))
+    for i = 2, #x do
+      -- insert decimal point at position 'i'
+      local y = string.sub(x, 1, i - 1) .. "." .. string.sub(x, i, -1)
+      y = string.gsub(y, "^0*(%d.-%d)0*$", "%1")   -- trim extra zeros
+      assert(y == tostring(tonumber(y)))
+    end
+  end
+
+  -- all-random floats
+  local Fsz = string.packsize("n")   -- size of floats in bytes
+
+  for i = 1, 400 do
+    local s = string.pack("j", math.random(0))   -- a random string of bits
+    while #s < Fsz do   -- make 's' long enough
+      s = s .. string.pack("j", math.random(0))
+    end
+    local n = string.unpack("n", s)   -- read 's' as a float
+    s = tostring(n)
+    if string.find(s, "^%-?%d") then   -- avoid NaN, inf, -inf
+      assert(tonumber(s) == n)
+    end
+  end
+
+end
+-- ]]==================================================================


 print('OK')