summaryrefslogtreecommitdiff
path: root/py
diff options
context:
space:
mode:
authorYoctopuce dev <dev@yoctopuce.com>2025-06-06 14:55:21 +0200
committerDamien George <damien@micropython.org>2025-08-01 00:47:33 +1000
commitdbbaa959c85c04dbbcde5908b5d0775b574e44e7 (patch)
tree050bd1670b061788d291c0d88af22a6aad722f64 /py
parente4e1c9f4132f839dac0291557d9b992f67577fd3 (diff)
py/formatfloat: Improve accuracy of float formatting code.
Following discussions in PR #16666, this commit updates the float formatting code to improve the `repr` reversibility, i.e. the percentage of valid floating point numbers that do parse back to the same number when formatted by `repr` (in CPython it's 100%). This new code offers a choice of 3 float conversion methods, depending on the desired tradeoff between code size and conversion precision: - BASIC method is the smallest code footprint - APPROX method uses an iterative method to approximate the exact representation, which is a bit slower but but does not have a big impact on code size. It provides `repr` reversibility on >99.8% of the cases in double precision, and on >98.5% in single precision (except with REPR_C, where reversibility is 100% as the last two bits are not taken into account). - EXACT method uses higher-precision floats during conversion, which provides perfect results but has a higher impact on code size. It is faster than APPROX method, and faster than the CPython equivalent implementation. It is however not available on all compilers when using FLOAT_IMPL_DOUBLE. Here is the table comparing the impact of the three conversion methods on code footprint on PYBV10 (using single-precision floats) and reversibility rate for both single-precision and double-precision floats. The table includes current situation as a baseline for the comparison: PYBV10 REPR_C FLOAT DOUBLE current = 364688 12.9% 27.6% 37.9% basic = 364812 85.6% 60.5% 85.7% approx = 365080 100.0% 98.5% 99.8% exact = 366408 100.0% 100.0% 100.0% Signed-off-by: Yoctopuce dev <dev@yoctopuce.com>
Diffstat (limited to 'py')
-rw-r--r--py/formatfloat.c757
-rw-r--r--py/formatfloat.h1
-rw-r--r--py/misc.h19
-rw-r--r--py/mpconfig.h21
-rw-r--r--py/mpprint.c12
-rw-r--r--py/mpprint.h1
-rw-r--r--py/objcomplex.c31
-rw-r--r--py/objfloat.c18
-rw-r--r--py/parsenum.c50
-rw-r--r--py/parsenum.h2
10 files changed, 557 insertions, 355 deletions
diff --git a/py/formatfloat.c b/py/formatfloat.c
index 7cd471018..1ea34f84b 100644
--- a/py/formatfloat.c
+++ b/py/formatfloat.c
@@ -33,392 +33,537 @@
#include <stdint.h>
#include <math.h>
#include "py/formatfloat.h"
+#include "py/parsenum.h"
/***********************************************************************
Routine for converting a arbitrary floating
point number into a string.
- The code in this function was inspired from Fred Bayer's pdouble.c.
- Since pdouble.c was released as Public Domain, I'm releasing this
- code as public domain as well.
+ The code in this function was inspired from Dave Hylands's previous
+ version, which was itself inspired from Fred Bayer's pdouble.c.
The original code can be found in https://github.com/dhylands/format-float
- Dave Hylands
-
***********************************************************************/
-#if MICROPY_FLOAT_IMPL == MICROPY_FLOAT_IMPL_FLOAT
-// 1 sign bit, 8 exponent bits, and 23 mantissa bits.
-// exponent values 0 and 255 are reserved, exponent can be 1 to 254.
-// exponent is stored with a bias of 127.
-// The min and max floats are on the order of 1x10^37 and 1x10^-37
-
-#define FPTYPE float
-#define FPCONST(x) x##F
-#define FPROUND_TO_ONE 0.9999995F
-#define FPDECEXP 32
-#define FPMIN_BUF_SIZE 6 // +9e+99
+// Float formatting debug code is intended for use in ports/unix only,
+// as it uses the libc float printing function as a reference.
+#define DEBUG_FLOAT_FORMATTING 0
+
+#if DEBUG_FLOAT_FORMATTING
+#define DEBUG_PRINTF(...) fprintf(stderr, __VA_ARGS__)
+#else
+#define DEBUG_PRINTF(...)
+#endif
+
+#if MICROPY_FLOAT_FORMAT_IMPL == MICROPY_FLOAT_FORMAT_IMPL_EXACT || MICROPY_FLOAT_IMPL == MICROPY_FLOAT_IMPL_DOUBLE
+#define MP_FFUINT_FMT "%lu"
+#else
+#define MP_FFUINT_FMT "%u"
+#endif
+
+static inline int fp_expval(mp_float_t x) {
+ mp_float_union_t fb = { x };
+ return (int)fb.p.exp - MP_FLOAT_EXP_OFFSET;
+}
-#define FLT_SIGN_MASK 0x80000000
+#if MICROPY_FLOAT_IMPL == MICROPY_FLOAT_IMPL_DOUBLE
-static inline int fp_signbit(float x) {
- mp_float_union_t fb = {x};
- return fb.i & FLT_SIGN_MASK;
+static inline int fp_isless1(mp_float_t x) {
+ return x < 1.0;
}
-#define fp_isnan(x) isnan(x)
-#define fp_isinf(x) isinf(x)
-static inline int fp_iszero(float x) {
- mp_float_union_t fb = {x};
- return fb.i == 0;
+
+static inline int fp_iszero(mp_float_t x) {
+ return x == 0.0;
}
-static inline int fp_isless1(float x) {
- mp_float_union_t fb = {x};
+
+#if MICROPY_FLOAT_FORMAT_IMPL != MICROPY_FLOAT_FORMAT_IMPL_APPROX
+static inline int fp_equal(mp_float_t x, mp_float_t y) {
+ return x == y;
+}
+#else
+static inline mp_float_t fp_diff(mp_float_t x, mp_float_t y) {
+ return x - y;
+}
+#endif
+
+#elif MICROPY_FLOAT_IMPL == MICROPY_FLOAT_IMPL_FLOAT
+
+// The functions below are roughly equivalent to the ones above,
+// but they are optimized to reduce code footprint by skipping
+// handling for special values such as nan, inf, +/-0.0
+// for ports where FP support is done in software.
+//
+// They also take into account lost bits of REPR_C as needed.
+
+static inline int fp_isless1(mp_float_t x) {
+ mp_float_union_t fb = { x };
return fb.i < 0x3f800000;
}
-#elif MICROPY_FLOAT_IMPL == MICROPY_FLOAT_IMPL_DOUBLE
+static inline int fp_iszero(mp_float_t x) {
+ mp_float_union_t x_check = { x };
+ return !x_check.i; // this is valid for REPR_C as well
+}
-#define FPTYPE double
-#define FPCONST(x) x
-#define FPROUND_TO_ONE 0.999999999995
-#define FPDECEXP 256
-#define FPMIN_BUF_SIZE 7 // +9e+199
-#define fp_signbit(x) signbit(x)
-#define fp_isnan(x) isnan(x)
-#define fp_isinf(x) isinf(x)
-#define fp_iszero(x) (x == 0)
-#define fp_isless1(x) (x < 1.0)
+#if MICROPY_FLOAT_FORMAT_IMPL != MICROPY_FLOAT_FORMAT_IMPL_APPROX
+static inline int fp_equal(mp_float_t x, mp_float_t y) {
+ mp_float_union_t x_check = { x };
+ mp_float_union_t y_check = { y };
+ #if MICROPY_OBJ_REPR == MICROPY_OBJ_REPR_C
+ return (x_check.i & ~3) == (y_check.i & ~3);
+ #else
+ return x_check.i == y_check.i;
+ #endif
+}
+#else
+static inline mp_float_t fp_diff(mp_float_t x, mp_float_t y) {
+ #if MICROPY_OBJ_REPR == MICROPY_OBJ_REPR_C
+ mp_float_union_t x_check = { x };
+ mp_float_union_t y_check = { y };
+ x_check.i &= ~3;
+ y_check.i &= ~3;
+ return x_check.f - y_check.f;
+ #else
+ return x - y;
+ #endif
+}
+#endif
-#endif // MICROPY_FLOAT_IMPL == MICROPY_FLOAT_IMPL_FLOAT/DOUBLE
+#endif
-static inline int fp_expval(FPTYPE x) {
- mp_float_union_t fb = {x};
- return (int)((fb.i >> MP_FLOAT_FRAC_BITS) & (~(0xFFFFFFFF << MP_FLOAT_EXP_BITS))) - MP_FLOAT_EXP_OFFSET;
+#if MICROPY_FLOAT_IMPL == MICROPY_FLOAT_IMPL_FLOAT
+#define FPMIN_BUF_SIZE 6 // +9e+99
+#define MAX_MANTISSA_DIGITS (9)
+#define SAFE_MANTISSA_DIGITS (6)
+#elif MICROPY_FLOAT_IMPL == MICROPY_FLOAT_IMPL_DOUBLE
+#define FPMIN_BUF_SIZE 7 // +9e+199
+#define MAX_MANTISSA_DIGITS (19)
+#define SAFE_MANTISSA_DIGITS (16)
+#endif
+
+// Internal formatting flags
+#define FMT_MODE_E 0x01 // render using scientific notation (%e)
+#define FMT_MODE_G 0x02 // render using general format (%g)
+#define FMT_MODE_F 0x04 // render using using expanded fixed-point format (%f)
+#define FMT_E_CASE 0x20 // don't change this value (used for case conversion!)
+
+static char *mp_prepend_zeros(char *s, int cnt) {
+ *s++ = '0';
+ *s++ = '.';
+ while (cnt > 0) {
+ *s++ = '0';
+ cnt--;
+ }
+ return s;
}
-int mp_format_float(FPTYPE f, char *buf, size_t buf_size, char fmt, int prec, char sign) {
+// Helper to convert a decimal mantissa (provided as an mp_large_float_uint_t) to string
+static int mp_format_mantissa(mp_large_float_uint_t mantissa, mp_large_float_uint_t mantissa_cap, char *buf, char *s,
+ int num_digits, int max_exp_zeros, int trailing_zeros, int dec, int e, int fmt_flags) {
- char *s = buf;
+ DEBUG_PRINTF("mantissa=" MP_FFUINT_FMT " exp=%d (cap=" MP_FFUINT_FMT "):\n", mantissa, e, mantissa_cap);
- if (buf_size <= FPMIN_BUF_SIZE) {
- // FPMIN_BUF_SIZE is the minimum size needed to store any FP number.
- // If the buffer does not have enough room for this (plus null terminator)
- // then don't try to format the float.
+ if (mantissa) {
+ // If rounding/searching created an extra digit or removed too many, fix mantissa first
+ if (mantissa >= mantissa_cap) {
+ if (fmt_flags & FMT_MODE_F) {
+ assert(e >= 0);
+ num_digits++;
+ dec++;
+ } else {
+ mantissa /= 10;
+ e++;
+ }
+ }
+ }
- if (buf_size >= 2) {
- *s++ = '?';
+ // When 'g' format is used, replace small exponents by explicit zeros
+ if ((fmt_flags & FMT_MODE_G) && e != 0) {
+ if (e >= 0) {
+ // If 0 < e < max_exp_zeros, expand positive exponent into trailing zeros
+ if (e < max_exp_zeros) {
+ dec += e;
+ if (dec >= num_digits) {
+ trailing_zeros = dec - (num_digits - 1);
+ }
+ e = 0;
+ }
+ } else {
+ // If -4 <= e < 0, expand negative exponent without losing significant digits
+ if (e >= -4) {
+ int cnt = 0;
+ while (e < 0 && !(mantissa % 10)) {
+ mantissa /= 10;
+ cnt++;
+ e++;
+ }
+ num_digits -= cnt;
+ s = mp_prepend_zeros(s, cnt - e - 1);
+ dec = 255;
+ e = 0;
+ }
}
- if (buf_size >= 1) {
- *s = '\0';
+ }
+
+ // Convert the integer mantissa to string
+ for (int digit = num_digits - 1; digit >= 0; digit--) {
+ int digit_ofs = (digit > dec ? digit + 1 : digit);
+ s[digit_ofs] = '0' + (int)(mantissa % 10);
+ mantissa /= 10;
+ }
+ int dot = (dec >= 255);
+ if (dec + 1 < num_digits) {
+ dot = 1;
+ s++;
+ s[dec] = '.';
+ }
+ s += num_digits;
+ #if DEBUG_FLOAT_FORMATTING
+ *s = 0;
+ DEBUG_PRINTF(" = %s exp=%d num_digits=%d zeros=%d dec=%d\n", buf, e, num_digits, trailing_zeros, dec);
+ #endif
+
+ // Append or remove trailing zeros, as required by format
+ if (trailing_zeros) {
+ dec -= num_digits - 1;
+ while (trailing_zeros--) {
+ if (!dec--) {
+ *s++ = '.';
+ dot = 1;
+ }
+ *s++ = '0';
}
- return buf_size >= 2;
}
- if (fp_signbit(f) && !fp_isnan(f)) {
- *s++ = '-';
- f = -f;
- } else {
- if (sign) {
- *s++ = sign;
+ if (fmt_flags & FMT_MODE_G) {
+ // 'g' format requires to remove trailing zeros after decimal point
+ if (dot) {
+ while (s[-1] == '0') {
+ s--;
+ }
+ if (s[-1] == '.') {
+ s--;
+ }
+ }
+ }
+
+ // Append the exponent if needed
+ if (((e != 0) || (fmt_flags & FMT_MODE_E)) && !(fmt_flags & FMT_MODE_F)) {
+ *s++ = 'E' | (fmt_flags & FMT_E_CASE);
+ if (e >= 0) {
+ *s++ = '+';
+ } else {
+ *s++ = '-';
+ e = -e;
}
+ if (e >= 100) {
+ *s++ = '0' + (e / 100);
+ }
+ *s++ = '0' + ((e / 10) % 10);
+ *s++ = '0' + (e % 10);
}
+ *s = '\0';
+ DEBUG_PRINTF(" ===> %s\n", buf);
- // buf_remaining contains bytes available for digits and exponent.
- // It is buf_size minus room for the sign and null byte.
- int buf_remaining = buf_size - 1 - (s - buf);
+ return s - buf;
+}
+// minimal value expected for buf_size, to avoid checking everywhere for overflow
+#define MIN_BUF_SIZE (MAX_MANTISSA_DIGITS + 10)
+
+int mp_format_float(mp_float_t f_entry, char *buf_entry, size_t buf_size, char fmt, int prec, char sign) {
+ assert(buf_size >= MIN_BUF_SIZE);
+
+ // Handle sign
+ mp_float_t f = f_entry;
+ char *buf = buf_entry;
+ if (signbit(f_entry) && !isnan(f_entry)) {
+ f = -f;
+ sign = '-';
+ }
+ if (sign) {
+ *buf++ = sign;
+ buf_size--;
+ }
+
+ // Handle inf/nan
+ char uc = fmt & 0x20;
{
- char uc = fmt & 0x20;
- if (fp_isinf(f)) {
+ char *s = buf;
+ if (isinf(f)) {
*s++ = 'I' ^ uc;
*s++ = 'N' ^ uc;
*s++ = 'F' ^ uc;
goto ret;
- } else if (fp_isnan(f)) {
+ } else if (isnan(f)) {
*s++ = 'N' ^ uc;
*s++ = 'A' ^ uc;
*s++ = 'N' ^ uc;
ret:
*s = '\0';
- return s - buf;
+ return s - buf_entry;
}
}
+ // Decode format character
+ int fmt_flags = (unsigned char)uc; // setup FMT_E_CASE, clear all other bits
+ char lofmt = (char)(fmt | 0x20); // fmt in lowercase
+ if (lofmt == 'f') {
+ fmt_flags |= FMT_MODE_F;
+ } else if (lofmt == 'g') {
+ fmt_flags |= FMT_MODE_G;
+ } else {
+ fmt_flags |= FMT_MODE_E;
+ }
+
+ // When precision is unspecified, default to 6
if (prec < 0) {
prec = 6;
}
- char e_char = 'E' | (fmt & 0x20); // e_char will match case of fmt
- fmt |= 0x20; // Force fmt to be lowercase
- char org_fmt = fmt;
- if (fmt == 'g' && prec == 0) {
- prec = 1;
+ // Use high precision for `repr`, but switch to exponent mode
+ // after 16 digits in any case to match CPython behaviour
+ int max_exp_zeros = (prec < (int)buf_size - 3 ? prec : (int)buf_size - 3);
+ if (prec == MP_FLOAT_REPR_PREC) {
+ prec = MAX_MANTISSA_DIGITS;
+ max_exp_zeros = 16;
}
- int e;
- int dec = 0;
- char e_sign = '\0';
- int num_digits = 0;
- int signed_e = 0;
- // Approximate power of 10 exponent from binary exponent.
- // abs(e_guess) is lower bound on abs(power of 10 exponent).
- int e_guess = (int)(fp_expval(f) * FPCONST(0.3010299956639812)); // 1/log2(10).
- if (fp_iszero(f)) {
- e = 0;
- if (fmt == 'f') {
- // Truncate precision to prevent buffer overflow
- if (prec + 2 > buf_remaining) {
- prec = buf_remaining - 2;
- }
- num_digits = prec + 1;
- } else {
- // Truncate precision to prevent buffer overflow
- if (prec + 6 > buf_remaining) {
- prec = buf_remaining - 6;
- }
- if (fmt == 'e') {
- e_sign = '+';
- }
+ // Precompute the exact decimal exponent of f, such that
+ // abs(e) is lower bound on abs(power of 10 exponent).
+ int e = 0;
+ if (!fp_iszero(f)) {
+ // Approximate power of 10 exponent from binary exponent.
+ e = (int)(fp_expval(f) * MICROPY_FLOAT_CONST(0.3010299956639812)); // 1/log2(10).
+ int positive_exp = !fp_isless1(f);
+ mp_float_t u_base = (mp_float_t)mp_decimal_exp((mp_large_float_t)1.0, e + positive_exp);
+ while ((f >= u_base) == positive_exp) {
+ e += (positive_exp ? 1 : -1);
+ u_base = (mp_float_t)mp_decimal_exp((mp_large_float_t)1.0, e + positive_exp);
}
- } else if (fp_isless1(f)) {
- FPTYPE f_entry = f; // Save f in case we go to 'f' format.
- // Build negative exponent
- e = -e_guess;
- FPTYPE u_base = MICROPY_FLOAT_C_FUN(pow)(10, -e);
- while (u_base > f) {
- ++e;
- u_base = MICROPY_FLOAT_C_FUN(pow)(10, -e);
- }
- // Normalize out the inferred unit. Use divide because
- // pow(10, e) * pow(10, -e) is slightly < 1 for some e in float32
- // (e.g. print("%.12f" % ((1e13) * (1e-13))))
- f /= u_base;
-
- // If the user specified 'g' format, and e is <= 4, then we'll switch
- // to the fixed format ('f')
-
- if (fmt == 'f' || (fmt == 'g' && e <= 4)) {
- fmt = 'f';
- dec = 0;
+ }
- if (org_fmt == 'g') {
- prec += (e - 1);
- }
+ // For 'e' format, prec is # digits after the decimal
+ // For 'f' format, prec is # digits after the decimal
+ // For 'g' format, prec is the max number of significant digits
+ //
+ // For 'e' & 'g' format, there will be a single digit before the decimal
+ // For 'f' format, zeros must be expanded instead of using an exponent.
+ // Make sure there is enough room in the buffer for them, or switch to format 'g'.
+ if ((fmt_flags & FMT_MODE_F) && e > 0) {
+ int req_size = e + prec + 2;
+ if (req_size > (int)buf_size) {
+ fmt_flags ^= FMT_MODE_F;
+ fmt_flags |= FMT_MODE_G;
+ prec++;
+ }
+ }
- // truncate precision to prevent buffer overflow
- if (prec + 2 > buf_remaining) {
- prec = buf_remaining - 2;
+ // To work independently of the format, we precompute:
+ // - the max number of significant digits to produce
+ // - the number of leading zeros to prepend (mode f only)
+ // - the number of trailing zeros to append
+ int max_digits = prec;
+ int lead_zeros = 0;
+ int trail_zeros = 0;
+ if (fmt_flags & FMT_MODE_F) {
+ if (max_digits > (int)buf_size - 3) {
+ // cannot satisfy requested number of decimals given buf_size, sorry
+ max_digits = (int)buf_size - 3;
+ }
+ if (e < 0) {
+ if (max_digits > 2 && e < -2) {
+ // Insert explicit leading zeros
+ lead_zeros = (-e < max_digits ? -e : max_digits) - 2;
+ max_digits -= lead_zeros;
+ } else {
+ max_digits++;
}
-
- num_digits = prec;
- signed_e = 0;
- f = f_entry;
- ++num_digits;
} else {
- // For e & g formats, we'll be printing the exponent, so set the
- // sign.
- e_sign = '-';
- dec = 0;
-
- if (prec > (buf_remaining - FPMIN_BUF_SIZE)) {
- prec = buf_remaining - FPMIN_BUF_SIZE;
- if (fmt == 'g') {
- prec++;
- }
- }
- signed_e = -e;
+ max_digits += e + 1;
}
} else {
- // Build positive exponent.
- // We don't modify f at this point to avoid inaccuracies from
- // scaling it. Instead, we find the product of powers of 10
- // that is not greater than it, and use that to start the
- // mantissa.
- e = e_guess;
- FPTYPE next_u = MICROPY_FLOAT_C_FUN(pow)(10, e + 1);
- while (f >= next_u) {
- ++e;
- next_u = MICROPY_FLOAT_C_FUN(pow)(10, e + 1);
+ if (!(fmt_flags & FMT_MODE_G) || max_digits == 0) {
+ max_digits++;
}
+ }
+ if (max_digits > MAX_MANTISSA_DIGITS) {
+ // use trailing zeros to avoid overflowing the mantissa
+ trail_zeros = max_digits - MAX_MANTISSA_DIGITS;
+ max_digits = MAX_MANTISSA_DIGITS;
+ }
+ int overhead = (fmt_flags & FMT_MODE_F ? 3 : FPMIN_BUF_SIZE + 1);
+ if (trail_zeros > (int)buf_size - max_digits - overhead) {
+ // cannot satisfy requested number of decimals given buf_size, sorry
+ trail_zeros = (int)buf_size - max_digits - overhead;
+ }
- // If the user specified fixed format (fmt == 'f') and e makes the
- // number too big to fit into the available buffer, then we'll
- // switch to the 'e' format.
-
- if (fmt == 'f') {
- if (e >= buf_remaining) {
- fmt = 'e';
- } else if ((e + prec + 2) > buf_remaining) {
- prec = buf_remaining - e - 2;
- if (prec < 0) {
- // This means no decimal point, so we can add one back
- // for the decimal.
- prec++;
- }
- }
- }
- if (fmt == 'e' && prec > (buf_remaining - FPMIN_BUF_SIZE)) {
- prec = buf_remaining - FPMIN_BUF_SIZE;
- }
- if (fmt == 'g') {
- // Truncate precision to prevent buffer overflow
- if (prec + (FPMIN_BUF_SIZE - 1) > buf_remaining) {
- prec = buf_remaining - (FPMIN_BUF_SIZE - 1);
- }
- }
- // If the user specified 'g' format, and e is < prec, then we'll switch
- // to the fixed format.
+ // When the caller asks for more precision than available for sure,
+ // Look for a shorter (rounded) representation first, and only dig
+ // into more digits if there is no short representation.
+ int num_digits = (SAFE_MANTISSA_DIGITS < max_digits ? SAFE_MANTISSA_DIGITS : max_digits);
+try_again:
+ ;
- if (fmt == 'g' && e < prec) {
- fmt = 'f';
- prec -= (e + 1);
- }
- if (fmt == 'f') {
- dec = e;
- num_digits = prec + e + 1;
+ char *s = buf;
+ int extra_zeros = trail_zeros + (max_digits - num_digits);
+ int decexp;
+ int dec = 0;
+
+ if (fp_iszero(f)) {
+ // no need for scaling 0.0
+ decexp = 0;
+ } else if (fmt_flags & FMT_MODE_F) {
+ decexp = num_digits - 1;
+ if (e < 0) {
+ // Negative exponent: we keep a single leading zero in the mantissa,
+ // as using more would waste precious digits needed for accuracy.
+ if (lead_zeros > 0) {
+ // We are using leading zeros
+ s = mp_prepend_zeros(s, lead_zeros);
+ decexp += lead_zeros + 1;
+ dec = 255; // no decimal dot
+ } else {
+ // Small negative exponent, work directly on the mantissa
+ dec = 0;
+ }
} else {
- e_sign = '+';
+ // Positive exponent: we will add trailing zeros separately
+ decexp -= e;
+ dec = e;
}
- signed_e = e;
+ } else {
+ decexp = num_digits - e - 1;
}
- if (prec < 0) {
- // This can happen when the prec is trimmed to prevent buffer overflow
- prec = 0;
+ DEBUG_PRINTF("input=%.19g e=%d fmt=%c max_d=%d num_d=%d decexp=%d dec=%d l0=%d r0=%d\n",
+ (double)f, e, lofmt, max_digits, num_digits, decexp, dec, lead_zeros, extra_zeros);
+
+ // At this point,
+ // - buf points to beginning of output buffer for the unsigned representation
+ // - num_digits == the number of mantissa digits to add
+ // - (dec + 1) == the number of digits to print before adding a decimal point
+ // - decexp == the power of 10 exponent to apply to f to get the decimal mantissa
+ // - e == the power of 10 exponent to append ('e' or 'g' format)
+ mp_large_float_uint_t mantissa_cap = 10;
+ for (int n = 1; n < num_digits; n++) {
+ mantissa_cap *= 10;
}
- // At this point e contains the absolute value of the power of 10 exponent.
- // (dec + 1) == the number of dgits before the decimal.
-
- // For e, prec is # digits after the decimal
- // For f, prec is # digits after the decimal
- // For g, prec is the max number of significant digits
- //
- // For e & g there will be a single digit before the decimal
- // for f there will be e digits before the decimal
-
- if (fmt == 'e') {
- num_digits = prec + 1;
- } else if (fmt == 'g') {
- if (prec == 0) {
- prec = 1;
+ // Build the decimal mantissa into a large uint
+ mp_large_float_uint_t mantissa = 1;
+ if (sizeof(mp_large_float_t) == sizeof(mp_float_t) && num_digits > SAFE_MANTISSA_DIGITS && decexp > 1) {
+ // if we don't have large floats, use integer multiply to produce the last digits
+ if (num_digits > SAFE_MANTISSA_DIGITS + 1 && decexp > 2) {
+ mantissa = 100;
+ decexp -= 2;
+ } else {
+ mantissa = 10;
+ decexp -= 1;
}
- num_digits = prec;
}
-
- int d = 0;
- for (int digit_index = signed_e; num_digits >= 0; --digit_index) {
- FPTYPE u_base = FPCONST(1.0);
- if (digit_index > 0) {
- // Generate 10^digit_index for positive digit_index.
- u_base = MICROPY_FLOAT_C_FUN(pow)(10, digit_index);
- }
- for (d = 0; d < 9; ++d) {
- if (f < u_base) {
- break;
- }
- f -= u_base;
- }
- // We calculate one more digit than we display, to use in rounding
- // below. So only emit the digit if it's one that we display.
- if (num_digits > 0) {
- // Emit this number (the leading digit).
- *s++ = '0' + d;
- if (dec == 0 && prec > 0) {
- *s++ = '.';
- }
- }
- --dec;
- --num_digits;
- if (digit_index <= 0) {
- // Once we get below 1.0, we scale up f instead of calculating
- // negative powers of 10 in u_base. This provides better
- // renditions of exact decimals like 1/16 etc.
- f *= FPCONST(10.0);
+ mp_large_float_t mantissa_f = mp_decimal_exp((mp_large_float_t)f, decexp);
+ mantissa *= (mp_large_float_uint_t)(mantissa_f + (mp_large_float_t)0.5);
+ DEBUG_PRINTF("input=%.19g fmt=%c num_digits=%d dec=%d mantissa=" MP_FFUINT_FMT " r0=%d\n", (double)f, lofmt, num_digits, dec, mantissa, extra_zeros);
+
+ // Finally convert the decimal mantissa to a floating-point string, according to formatting rules
+ int reprlen = mp_format_mantissa(mantissa, mantissa_cap, buf, s, num_digits, max_exp_zeros, extra_zeros, dec, e, fmt_flags);
+ assert(reprlen + 1 <= (int)buf_size);
+
+ #if MICROPY_FLOAT_FORMAT_IMPL != MICROPY_FLOAT_FORMAT_IMPL_APPROX
+
+ if (num_digits < max_digits) {
+ // The initial precision might not be sufficient for an exact representation
+ // for all numbers. If the result is not exact, restart using next precision.
+ // parse the resulting number and compare against the original
+ mp_float_t check;
+ DEBUG_PRINTF("input=%.19g, compare to float('%s')\n", (double)f, buf);
+ mp_parse_float_internal(buf, reprlen, &check);
+ if (!fp_equal(check, f)) {
+ num_digits++;
+ DEBUG_PRINTF("Not perfect, retry using more digits (%d)\n", num_digits);
+ goto try_again;
}
}
- // Rounding. If the next digit to print is >= 5, round up.
- if (d >= 5) {
- char *rs = s;
- rs--;
- while (1) {
- if (*rs == '.') {
- rs--;
- continue;
- }
- if (*rs < '0' || *rs > '9') {
- // + or -
- rs++; // So we sit on the digit to the right of the sign
- break;
+
+ #else
+
+ // The initial decimal mantissa might not have been be completely accurate due
+ // to the previous loating point operations. The best way to verify this is to
+ // parse the resulting number and compare against the original
+ mp_float_t check;
+ DEBUG_PRINTF("input=%.19g, compare to float('%s')\n", (double)f, buf);
+ mp_parse_float_internal(buf, reprlen, &check);
+ mp_float_t diff = fp_diff(check, f);
+ mp_float_t best_diff = diff;
+ mp_large_float_uint_t best_mantissa = mantissa;
+
+ if (fp_iszero(diff)) {
+ // we have a perfect match
+ DEBUG_PRINTF(MP_FFUINT_FMT ": perfect match (direct)\n", mantissa);
+ } else {
+ // In order to get the best possible representation, we will perform a
+ // dichotomic search for a reversible representation.
+ // This will also provide optimal rounding on the fly.
+ unsigned err_range = 1;
+ if (num_digits > SAFE_MANTISSA_DIGITS) {
+ err_range <<= 3 * (num_digits - SAFE_MANTISSA_DIGITS);
+ }
+ int maxruns = 3 + 3 * (MAX_MANTISSA_DIGITS - SAFE_MANTISSA_DIGITS);
+ while (maxruns-- > 0) {
+ // update mantissa according to dichotomic search
+ if (signbit(diff)) {
+ mantissa += err_range;
+ } else {
+ // mantissa is expected to always have more significant digits than err_range
+ assert(mantissa >= err_range);
+ mantissa -= err_range;
}
- if (*rs < '9') {
- (*rs)++;
+ // retry conversion
+ reprlen = mp_format_mantissa(mantissa, mantissa_cap, buf, s, num_digits, max_exp_zeros, extra_zeros, dec, e, fmt_flags);
+ assert(reprlen + 1 <= (int)buf_size);
+ DEBUG_PRINTF("input=%.19g, compare to float('%s')\n", (double)f, buf);
+ mp_parse_float_internal(buf, reprlen, &check);
+ DEBUG_PRINTF("check=%.19g num_digits=%d e=%d mantissa=" MP_FFUINT_FMT "\n", (double)check, num_digits, e, mantissa);
+ diff = fp_diff(check, f);
+ if (fp_iszero(diff)) {
+ // we have a perfect match
+ DEBUG_PRINTF(MP_FFUINT_FMT ": perfect match\n", mantissa);
break;
}
- *rs = '0';
- if (rs == buf) {
- break;
+ // keep track of our best estimate
+ mp_float_t delta = MICROPY_FLOAT_C_FUN(fabs)(diff) - MICROPY_FLOAT_C_FUN(fabs)(best_diff);
+ if (signbit(delta) || (fp_iszero(delta) && !(mantissa % 10u))) {
+ best_diff = diff;
+ best_mantissa = mantissa;
}
- rs--;
- }
- if (*rs == '0') {
- // We need to insert a 1
- if (rs[1] == '.' && fmt != 'f') {
- // We're going to round 9.99 to 10.00
- // Move the decimal point
- rs[0] = '.';
- rs[1] = '0';
- if (e_sign == '-') {
- e--;
- if (e == 0) {
- e_sign = '+';
- }
- } else {
- e++;
- }
+ // string repr is not perfect: continue a dichotomic improvement
+ DEBUG_PRINTF(MP_FFUINT_FMT ": %.19g, err_range=%d\n", mantissa, (double)check, err_range);
+ if (err_range > 1) {
+ err_range >>= 1;
} else {
- // Need at extra digit at the end to make room for the leading '1'
- // but if we're at the buffer size limit, just drop the final digit.
- if ((size_t)(s + 1 - buf) < buf_size) {
- s++;
+ // We have tried all possible mantissa, without finding a reversible repr.
+ // Check if we have an alternate precision to try.
+ if (num_digits < max_digits) {
+ num_digits++;
+ DEBUG_PRINTF("Failed to find a perfect match, try with more digits (%d)\n", num_digits);
+ goto try_again;
}
+ // Otherwise, keep the closest one, which is either the first one or the last one.
+ if (mantissa == best_mantissa) {
+ // Last guess is the best one
+ DEBUG_PRINTF(MP_FFUINT_FMT ": last guess was the best one\n", mantissa);
+ } else {
+ // We had a better guess earlier
+ DEBUG_PRINTF(MP_FFUINT_FMT ": use best guess\n", mantissa);
+ reprlen = mp_format_mantissa(best_mantissa, mantissa_cap, buf, s, num_digits, max_exp_zeros, extra_zeros, dec, e, fmt_flags);
+ }
+ break;
}
- char *ss = s;
- while (ss > rs) {
- *ss = ss[-1];
- ss--;
- }
- *rs = '1';
}
}
+ #endif
- // verify that we did not overrun the input buffer so far
- assert((size_t)(s + 1 - buf) <= buf_size);
-
- if (org_fmt == 'g' && prec > 0) {
- // Remove trailing zeros and a trailing decimal point
- while (s[-1] == '0') {
- s--;
- }
- if (s[-1] == '.') {
- s--;
- }
- }
- // Append the exponent
- if (e_sign) {
- *s++ = e_char;
- *s++ = e_sign;
- if (FPMIN_BUF_SIZE == 7 && e >= 100) {
- *s++ = '0' + (e / 100);
- }
- *s++ = '0' + ((e / 10) % 10);
- *s++ = '0' + (e % 10);
- }
- *s = '\0';
-
- // verify that we did not overrun the input buffer
- assert((size_t)(s + 1 - buf) <= buf_size);
-
- return s - buf;
+ return buf + reprlen - buf_entry;
}
#endif // MICROPY_FLOAT_IMPL != MICROPY_FLOAT_IMPL_NONE
diff --git a/py/formatfloat.h b/py/formatfloat.h
index 9a1643b4d..7b1414672 100644
--- a/py/formatfloat.h
+++ b/py/formatfloat.h
@@ -29,6 +29,7 @@
#include "py/mpconfig.h"
#if MICROPY_PY_BUILTINS_FLOAT
+#define MP_FLOAT_REPR_PREC (99) // magic `prec` value for optimal `repr` behaviour
int mp_format_float(mp_float_t f, char *buf, size_t bufSize, char fmt, int prec, char sign);
#endif
diff --git a/py/misc.h b/py/misc.h
index e03448583..86ac2ec9a 100644
--- a/py/misc.h
+++ b/py/misc.h
@@ -277,6 +277,25 @@ typedef union _mp_float_union_t {
mp_float_uint_t i;
} mp_float_union_t;
+#if MICROPY_FLOAT_FORMAT_IMPL == MICROPY_FLOAT_FORMAT_IMPL_EXACT
+
+#if MICROPY_FLOAT_IMPL == MICROPY_FLOAT_IMPL_FLOAT
+// Exact float conversion requires using internally a bigger sort of floating point
+typedef double mp_large_float_t;
+#elif MICROPY_FLOAT_IMPL == MICROPY_FLOAT_IMPL_DOUBLE
+typedef long double mp_large_float_t;
+#endif
+// Always use a 64 bit mantissa for formatting and parsing
+typedef uint64_t mp_large_float_uint_t;
+
+#else // MICROPY_FLOAT_FORMAT_IMPL != MICROPY_FLOAT_FORMAT_IMPL_EXACT
+
+// No bigger floating points
+typedef mp_float_t mp_large_float_t;
+typedef mp_float_uint_t mp_large_float_uint_t;
+
+#endif
+
#endif // MICROPY_PY_BUILTINS_FLOAT
/** ROM string compression *************/
diff --git a/py/mpconfig.h b/py/mpconfig.h
index c316aa4b2..caa63fef3 100644
--- a/py/mpconfig.h
+++ b/py/mpconfig.h
@@ -861,6 +861,27 @@ typedef double mp_float_t;
#define MICROPY_PY_BUILTINS_COMPLEX (MICROPY_PY_BUILTINS_FLOAT)
#endif
+// Float to string conversion implementations
+//
+// Note that the EXACT method is only available if the compiler supports
+// floating points larger than mp_float_t:
+// - with MICROPY_FLOAT_IMPL_FLOAT, the compiler needs to support `double`
+// - with MICROPY_FLOAT_IMPL_DOUBLE, the compiler needs to support `long double`
+//
+#define MICROPY_FLOAT_FORMAT_IMPL_BASIC (0) // smallest code, but inexact
+#define MICROPY_FLOAT_FORMAT_IMPL_APPROX (1) // slightly bigger, almost perfect
+#define MICROPY_FLOAT_FORMAT_IMPL_EXACT (2) // bigger code, and 100% exact repr
+
+#ifndef MICROPY_FLOAT_FORMAT_IMPL
+#if MICROPY_FLOAT_IMPL == MICROPY_FLOAT_IMPL_FLOAT
+#define MICROPY_FLOAT_FORMAT_IMPL (MICROPY_FLOAT_FORMAT_IMPL_APPROX)
+#elif defined(__SIZEOF_LONG_DOUBLE__) && __SIZEOF_LONG_DOUBLE__ > __SIZEOF_DOUBLE__
+#define MICROPY_FLOAT_FORMAT_IMPL (MICROPY_FLOAT_FORMAT_IMPL_EXACT)
+#else
+#define MICROPY_FLOAT_FORMAT_IMPL (MICROPY_FLOAT_FORMAT_IMPL_APPROX)
+#endif
+#endif
+
// Whether to use the native _Float16 for 16-bit float support
#ifndef MICROPY_FLOAT_USE_NATIVE_FLT16
#ifdef __FLT16_MAX__
diff --git a/py/mpprint.c b/py/mpprint.c
index f1d8bd0c5..bd7a25087 100644
--- a/py/mpprint.c
+++ b/py/mpprint.c
@@ -338,7 +338,7 @@ int mp_print_mp_int(const mp_print_t *print, mp_obj_t x, unsigned int base, int
#if MICROPY_PY_BUILTINS_FLOAT
int mp_print_float(const mp_print_t *print, mp_float_t f, char fmt, unsigned int flags, char fill, int width, int prec) {
- char buf[32];
+ char buf[36];
char sign = '\0';
int chrs = 0;
@@ -349,11 +349,17 @@ int mp_print_float(const mp_print_t *print, mp_float_t f, char fmt, unsigned int
sign = ' ';
}
- int len = mp_format_float(f, buf, sizeof(buf), fmt, prec, sign);
+ int len = mp_format_float(f, buf, sizeof(buf) - 3, fmt, prec, sign);
char *s = buf;
- if ((flags & PF_FLAG_ADD_PERCENT) && (size_t)(len + 1) < sizeof(buf)) {
+ if ((flags & PF_FLAG_ALWAYS_DECIMAL) && strchr(buf, '.') == NULL && strchr(buf, 'e') == NULL && strchr(buf, 'n') == NULL) {
+ buf[len++] = '.';
+ buf[len++] = '0';
+ buf[len] = '\0';
+ }
+
+ if (flags & PF_FLAG_ADD_PERCENT) {
buf[len++] = '%';
buf[len] = '\0';
}
diff --git a/py/mpprint.h b/py/mpprint.h
index 583f00bda..250ea24b8 100644
--- a/py/mpprint.h
+++ b/py/mpprint.h
@@ -36,6 +36,7 @@
#define PF_FLAG_CENTER_ADJUST (0x020)
#define PF_FLAG_ADD_PERCENT (0x040)
#define PF_FLAG_SHOW_OCTAL_LETTER (0x080)
+#define PF_FLAG_ALWAYS_DECIMAL (0x100)
#define PF_FLAG_SEP_POS (9) // must be above all the above PF_FLAGs
#if MICROPY_PY_IO && MICROPY_PY_SYS_STDFILES
diff --git a/py/objcomplex.c b/py/objcomplex.c
index 85b585284..805899edf 100644
--- a/py/objcomplex.c
+++ b/py/objcomplex.c
@@ -45,29 +45,18 @@ typedef struct _mp_obj_complex_t {
static void complex_print(const mp_print_t *print, mp_obj_t o_in, mp_print_kind_t kind) {
(void)kind;
mp_obj_complex_t *o = MP_OBJ_TO_PTR(o_in);
- #if MICROPY_FLOAT_IMPL == MICROPY_FLOAT_IMPL_FLOAT
- char buf[16];
- #if MICROPY_OBJ_REPR == MICROPY_OBJ_REPR_C
- const int precision = 6;
- #else
- const int precision = 7;
- #endif
- #else
- char buf[32];
- const int precision = 16;
- #endif
- if (o->real == 0) {
- mp_format_float(o->imag, buf, sizeof(buf), 'g', precision, '\0');
- mp_printf(print, "%sj", buf);
+ const char *suffix;
+ int flags = 0;
+ if (o->real != 0) {
+ mp_print_str(print, "(");
+ mp_print_float(print, o->real, 'g', 0, '\0', -1, MP_FLOAT_REPR_PREC);
+ flags = PF_FLAG_SHOW_SIGN;
+ suffix = "j)";
} else {
- mp_format_float(o->real, buf, sizeof(buf), 'g', precision, '\0');
- mp_printf(print, "(%s", buf);
- if (o->imag >= 0 || isnan(o->imag)) {
- mp_print_str(print, "+");
- }
- mp_format_float(o->imag, buf, sizeof(buf), 'g', precision, '\0');
- mp_printf(print, "%sj)", buf);
+ suffix = "j";
}
+ mp_print_float(print, o->imag, 'g', flags, '\0', -1, MP_FLOAT_REPR_PREC);
+ mp_print_str(print, suffix);
}
static mp_obj_t complex_make_new(const mp_obj_type_t *type_in, size_t n_args, size_t n_kw, const mp_obj_t *args) {
diff --git a/py/objfloat.c b/py/objfloat.c
index 81b0daa62..125b576fb 100644
--- a/py/objfloat.c
+++ b/py/objfloat.c
@@ -110,23 +110,7 @@ mp_int_t mp_float_hash(mp_float_t src) {
static void float_print(const mp_print_t *print, mp_obj_t o_in, mp_print_kind_t kind) {
(void)kind;
mp_float_t o_val = mp_obj_float_get(o_in);
- #if MICROPY_FLOAT_IMPL == MICROPY_FLOAT_IMPL_FLOAT
- char buf[16];
- #if MICROPY_OBJ_REPR == MICROPY_OBJ_REPR_C
- const int precision = 6;
- #else
- const int precision = 7;
- #endif
- #else
- char buf[32];
- const int precision = 16;
- #endif
- mp_format_float(o_val, buf, sizeof(buf), 'g', precision, '\0');
- mp_print_str(print, buf);
- if (strchr(buf, '.') == NULL && strchr(buf, 'e') == NULL && strchr(buf, 'n') == NULL) {
- // Python floats always have decimal point (unless inf or nan)
- mp_print_str(print, ".0");
- }
+ mp_print_float(print, o_val, 'g', PF_FLAG_ALWAYS_DECIMAL, '\0', -1, MP_FLOAT_REPR_PREC);
}
static mp_obj_t float_make_new(const mp_obj_type_t *type_in, size_t n_args, size_t n_kw, const mp_obj_t *args) {
diff --git a/py/parsenum.c b/py/parsenum.c
index 019491b51..e18002306 100644
--- a/py/parsenum.c
+++ b/py/parsenum.c
@@ -210,7 +210,7 @@ typedef enum {
} parse_dec_in_t;
// MANTISSA_MAX is used to retain precision while not overflowing mantissa
-#define MANTISSA_MAX (sizeof(mp_float_uint_t) == 8 ? 0x1999999999999998ULL : 0x19999998U)
+#define MANTISSA_MAX (sizeof(mp_large_float_uint_t) == 8 ? 0x1999999999999998ULL : 0x19999998U)
// MAX_EXACT_POWER_OF_5 is the largest value of x so that 5^x can be stored exactly in a float
#if MICROPY_FLOAT_IMPL == MICROPY_FLOAT_IMPL_FLOAT
@@ -220,11 +220,45 @@ typedef enum {
#endif
// Helper to compute `num * (10.0 ** dec_exp)`
-mp_float_t mp_decimal_exp(mp_float_t num, int dec_exp) {
-
- if (dec_exp == 0 || num == MICROPY_FLOAT_CONST(0.0)) {
+mp_large_float_t mp_decimal_exp(mp_large_float_t num, int dec_exp) {
+ if (dec_exp == 0 || num == (mp_large_float_t)(0.0)) {
return num;
}
+
+ #if MICROPY_FLOAT_FORMAT_IMPL == MICROPY_FLOAT_FORMAT_IMPL_EXACT
+
+ // If the assert below fails, it means you have chosen MICROPY_FLOAT_FORMAT_IMPL_EXACT
+ // manually on a platform where `larger floats` are not supported, which would
+ // result in inexact conversions. To fix this issue, change your `mpconfigport.h`
+ // and select MICROPY_FLOAT_FORMAT_IMPL_APPROX instead
+ assert(sizeof(mp_large_float_t) > sizeof(mp_float_t));
+
+ // Perform power using simple multiplications, to avoid
+ // dependency to higher-precision pow() function
+ int neg_exp = (dec_exp < 0);
+ if (neg_exp) {
+ dec_exp = -dec_exp;
+ }
+ mp_large_float_t res = num;
+ mp_large_float_t expo = (mp_large_float_t)10.0;
+ while (dec_exp) {
+ if (dec_exp & 1) {
+ if (neg_exp) {
+ res /= expo;
+ } else {
+ res *= expo;
+ }
+ }
+ dec_exp >>= 1;
+ if (dec_exp) {
+ expo *= expo;
+ }
+ }
+ return res;
+
+ #else
+ // MICROPY_FLOAT_FORMAT_IMPL != MICROPY_FLOAT_FORMAT_IMPL_EXACT
+
mp_float_union_t res = {num};
// Multiply first by (2.0 ** dec_exp) via the exponent
// - this will ensure that the result of `pow()` is always in mp_float_t range
@@ -238,12 +272,14 @@ mp_float_t mp_decimal_exp(mp_float_t num, int dec_exp) {
} else {
res.f *= MICROPY_FLOAT_C_FUN(pow)(5, dec_exp);
}
- return (mp_float_t)res.f;
+ return (mp_large_float_t)res.f;
+
+ #endif
}
// Break out inner digit accumulation routine to ease trailing zero deferral.
-static mp_float_uint_t accept_digit(mp_float_uint_t p_mantissa, unsigned int dig, int *p_exp_extra, int in) {
+static mp_large_float_uint_t accept_digit(mp_large_float_uint_t p_mantissa, unsigned int dig, int *p_exp_extra, int in) {
// Core routine to ingest an additional digit.
if (p_mantissa < MANTISSA_MAX) {
// dec_val won't overflow so keep accumulating
@@ -267,7 +303,7 @@ const char *mp_parse_float_internal(const char *str, size_t len, mp_float_t *res
parse_dec_in_t in = PARSE_DEC_IN_INTG;
bool exp_neg = false;
- mp_float_uint_t mantissa = 0;
+ mp_large_float_uint_t mantissa = 0;
int exp_val = 0;
int exp_extra = 0;
int trailing_zeros_intg = 0, trailing_zeros_frac = 0;
diff --git a/py/parsenum.h b/py/parsenum.h
index a807cb09d..d532cb194 100644
--- a/py/parsenum.h
+++ b/py/parsenum.h
@@ -35,7 +35,7 @@
mp_obj_t mp_parse_num_integer(const char *restrict str, size_t len, int base, mp_lexer_t *lex);
#if MICROPY_PY_BUILTINS_FLOAT
-mp_float_t mp_decimal_exp(mp_float_t num, int dec_exp);
+mp_large_float_t mp_decimal_exp(mp_large_float_t num, int dec_exp);
const char *mp_parse_float_internal(const char *str, size_t len, mp_float_t *res);
#endif