From dbbaa959c85c04dbbcde5908b5d0775b574e44e7 Mon Sep 17 00:00:00 2001 From: Yoctopuce dev Date: Fri, 6 Jun 2025 14:55:21 +0200 Subject: py/formatfloat: Improve accuracy of float formatting code. Following discussions in PR #16666, this commit updates the float formatting code to improve the `repr` reversibility, i.e. the percentage of valid floating point numbers that do parse back to the same number when formatted by `repr` (in CPython it's 100%). This new code offers a choice of 3 float conversion methods, depending on the desired tradeoff between code size and conversion precision: - BASIC method is the smallest code footprint - APPROX method uses an iterative method to approximate the exact representation, which is a bit slower but but does not have a big impact on code size. It provides `repr` reversibility on >99.8% of the cases in double precision, and on >98.5% in single precision (except with REPR_C, where reversibility is 100% as the last two bits are not taken into account). - EXACT method uses higher-precision floats during conversion, which provides perfect results but has a higher impact on code size. It is faster than APPROX method, and faster than the CPython equivalent implementation. It is however not available on all compilers when using FLOAT_IMPL_DOUBLE. Here is the table comparing the impact of the three conversion methods on code footprint on PYBV10 (using single-precision floats) and reversibility rate for both single-precision and double-precision floats. The table includes current situation as a baseline for the comparison: PYBV10 REPR_C FLOAT DOUBLE current = 364688 12.9% 27.6% 37.9% basic = 364812 85.6% 60.5% 85.7% approx = 365080 100.0% 98.5% 99.8% exact = 366408 100.0% 100.0% 100.0% Signed-off-by: Yoctopuce dev --- py/formatfloat.c | 757 +++++++++++++++++++++++++++++++++---------------------- 1 file changed, 451 insertions(+), 306 deletions(-) (limited to 'py/formatfloat.c') diff --git a/py/formatfloat.c b/py/formatfloat.c index 7cd471018..1ea34f84b 100644 --- a/py/formatfloat.c +++ b/py/formatfloat.c @@ -33,392 +33,537 @@ #include #include #include "py/formatfloat.h" +#include "py/parsenum.h" /*********************************************************************** Routine for converting a arbitrary floating point number into a string. - The code in this function was inspired from Fred Bayer's pdouble.c. - Since pdouble.c was released as Public Domain, I'm releasing this - code as public domain as well. + The code in this function was inspired from Dave Hylands's previous + version, which was itself inspired from Fred Bayer's pdouble.c. The original code can be found in https://github.com/dhylands/format-float - Dave Hylands - ***********************************************************************/ -#if MICROPY_FLOAT_IMPL == MICROPY_FLOAT_IMPL_FLOAT -// 1 sign bit, 8 exponent bits, and 23 mantissa bits. -// exponent values 0 and 255 are reserved, exponent can be 1 to 254. -// exponent is stored with a bias of 127. -// The min and max floats are on the order of 1x10^37 and 1x10^-37 - -#define FPTYPE float -#define FPCONST(x) x##F -#define FPROUND_TO_ONE 0.9999995F -#define FPDECEXP 32 -#define FPMIN_BUF_SIZE 6 // +9e+99 +// Float formatting debug code is intended for use in ports/unix only, +// as it uses the libc float printing function as a reference. +#define DEBUG_FLOAT_FORMATTING 0 + +#if DEBUG_FLOAT_FORMATTING +#define DEBUG_PRINTF(...) fprintf(stderr, __VA_ARGS__) +#else +#define DEBUG_PRINTF(...) +#endif + +#if MICROPY_FLOAT_FORMAT_IMPL == MICROPY_FLOAT_FORMAT_IMPL_EXACT || MICROPY_FLOAT_IMPL == MICROPY_FLOAT_IMPL_DOUBLE +#define MP_FFUINT_FMT "%lu" +#else +#define MP_FFUINT_FMT "%u" +#endif + +static inline int fp_expval(mp_float_t x) { + mp_float_union_t fb = { x }; + return (int)fb.p.exp - MP_FLOAT_EXP_OFFSET; +} -#define FLT_SIGN_MASK 0x80000000 +#if MICROPY_FLOAT_IMPL == MICROPY_FLOAT_IMPL_DOUBLE -static inline int fp_signbit(float x) { - mp_float_union_t fb = {x}; - return fb.i & FLT_SIGN_MASK; +static inline int fp_isless1(mp_float_t x) { + return x < 1.0; } -#define fp_isnan(x) isnan(x) -#define fp_isinf(x) isinf(x) -static inline int fp_iszero(float x) { - mp_float_union_t fb = {x}; - return fb.i == 0; + +static inline int fp_iszero(mp_float_t x) { + return x == 0.0; } -static inline int fp_isless1(float x) { - mp_float_union_t fb = {x}; + +#if MICROPY_FLOAT_FORMAT_IMPL != MICROPY_FLOAT_FORMAT_IMPL_APPROX +static inline int fp_equal(mp_float_t x, mp_float_t y) { + return x == y; +} +#else +static inline mp_float_t fp_diff(mp_float_t x, mp_float_t y) { + return x - y; +} +#endif + +#elif MICROPY_FLOAT_IMPL == MICROPY_FLOAT_IMPL_FLOAT + +// The functions below are roughly equivalent to the ones above, +// but they are optimized to reduce code footprint by skipping +// handling for special values such as nan, inf, +/-0.0 +// for ports where FP support is done in software. +// +// They also take into account lost bits of REPR_C as needed. + +static inline int fp_isless1(mp_float_t x) { + mp_float_union_t fb = { x }; return fb.i < 0x3f800000; } -#elif MICROPY_FLOAT_IMPL == MICROPY_FLOAT_IMPL_DOUBLE +static inline int fp_iszero(mp_float_t x) { + mp_float_union_t x_check = { x }; + return !x_check.i; // this is valid for REPR_C as well +} -#define FPTYPE double -#define FPCONST(x) x -#define FPROUND_TO_ONE 0.999999999995 -#define FPDECEXP 256 -#define FPMIN_BUF_SIZE 7 // +9e+199 -#define fp_signbit(x) signbit(x) -#define fp_isnan(x) isnan(x) -#define fp_isinf(x) isinf(x) -#define fp_iszero(x) (x == 0) -#define fp_isless1(x) (x < 1.0) +#if MICROPY_FLOAT_FORMAT_IMPL != MICROPY_FLOAT_FORMAT_IMPL_APPROX +static inline int fp_equal(mp_float_t x, mp_float_t y) { + mp_float_union_t x_check = { x }; + mp_float_union_t y_check = { y }; + #if MICROPY_OBJ_REPR == MICROPY_OBJ_REPR_C + return (x_check.i & ~3) == (y_check.i & ~3); + #else + return x_check.i == y_check.i; + #endif +} +#else +static inline mp_float_t fp_diff(mp_float_t x, mp_float_t y) { + #if MICROPY_OBJ_REPR == MICROPY_OBJ_REPR_C + mp_float_union_t x_check = { x }; + mp_float_union_t y_check = { y }; + x_check.i &= ~3; + y_check.i &= ~3; + return x_check.f - y_check.f; + #else + return x - y; + #endif +} +#endif -#endif // MICROPY_FLOAT_IMPL == MICROPY_FLOAT_IMPL_FLOAT/DOUBLE +#endif -static inline int fp_expval(FPTYPE x) { - mp_float_union_t fb = {x}; - return (int)((fb.i >> MP_FLOAT_FRAC_BITS) & (~(0xFFFFFFFF << MP_FLOAT_EXP_BITS))) - MP_FLOAT_EXP_OFFSET; +#if MICROPY_FLOAT_IMPL == MICROPY_FLOAT_IMPL_FLOAT +#define FPMIN_BUF_SIZE 6 // +9e+99 +#define MAX_MANTISSA_DIGITS (9) +#define SAFE_MANTISSA_DIGITS (6) +#elif MICROPY_FLOAT_IMPL == MICROPY_FLOAT_IMPL_DOUBLE +#define FPMIN_BUF_SIZE 7 // +9e+199 +#define MAX_MANTISSA_DIGITS (19) +#define SAFE_MANTISSA_DIGITS (16) +#endif + +// Internal formatting flags +#define FMT_MODE_E 0x01 // render using scientific notation (%e) +#define FMT_MODE_G 0x02 // render using general format (%g) +#define FMT_MODE_F 0x04 // render using using expanded fixed-point format (%f) +#define FMT_E_CASE 0x20 // don't change this value (used for case conversion!) + +static char *mp_prepend_zeros(char *s, int cnt) { + *s++ = '0'; + *s++ = '.'; + while (cnt > 0) { + *s++ = '0'; + cnt--; + } + return s; } -int mp_format_float(FPTYPE f, char *buf, size_t buf_size, char fmt, int prec, char sign) { +// Helper to convert a decimal mantissa (provided as an mp_large_float_uint_t) to string +static int mp_format_mantissa(mp_large_float_uint_t mantissa, mp_large_float_uint_t mantissa_cap, char *buf, char *s, + int num_digits, int max_exp_zeros, int trailing_zeros, int dec, int e, int fmt_flags) { - char *s = buf; + DEBUG_PRINTF("mantissa=" MP_FFUINT_FMT " exp=%d (cap=" MP_FFUINT_FMT "):\n", mantissa, e, mantissa_cap); - if (buf_size <= FPMIN_BUF_SIZE) { - // FPMIN_BUF_SIZE is the minimum size needed to store any FP number. - // If the buffer does not have enough room for this (plus null terminator) - // then don't try to format the float. + if (mantissa) { + // If rounding/searching created an extra digit or removed too many, fix mantissa first + if (mantissa >= mantissa_cap) { + if (fmt_flags & FMT_MODE_F) { + assert(e >= 0); + num_digits++; + dec++; + } else { + mantissa /= 10; + e++; + } + } + } - if (buf_size >= 2) { - *s++ = '?'; + // When 'g' format is used, replace small exponents by explicit zeros + if ((fmt_flags & FMT_MODE_G) && e != 0) { + if (e >= 0) { + // If 0 < e < max_exp_zeros, expand positive exponent into trailing zeros + if (e < max_exp_zeros) { + dec += e; + if (dec >= num_digits) { + trailing_zeros = dec - (num_digits - 1); + } + e = 0; + } + } else { + // If -4 <= e < 0, expand negative exponent without losing significant digits + if (e >= -4) { + int cnt = 0; + while (e < 0 && !(mantissa % 10)) { + mantissa /= 10; + cnt++; + e++; + } + num_digits -= cnt; + s = mp_prepend_zeros(s, cnt - e - 1); + dec = 255; + e = 0; + } } - if (buf_size >= 1) { - *s = '\0'; + } + + // Convert the integer mantissa to string + for (int digit = num_digits - 1; digit >= 0; digit--) { + int digit_ofs = (digit > dec ? digit + 1 : digit); + s[digit_ofs] = '0' + (int)(mantissa % 10); + mantissa /= 10; + } + int dot = (dec >= 255); + if (dec + 1 < num_digits) { + dot = 1; + s++; + s[dec] = '.'; + } + s += num_digits; + #if DEBUG_FLOAT_FORMATTING + *s = 0; + DEBUG_PRINTF(" = %s exp=%d num_digits=%d zeros=%d dec=%d\n", buf, e, num_digits, trailing_zeros, dec); + #endif + + // Append or remove trailing zeros, as required by format + if (trailing_zeros) { + dec -= num_digits - 1; + while (trailing_zeros--) { + if (!dec--) { + *s++ = '.'; + dot = 1; + } + *s++ = '0'; } - return buf_size >= 2; } - if (fp_signbit(f) && !fp_isnan(f)) { - *s++ = '-'; - f = -f; - } else { - if (sign) { - *s++ = sign; + if (fmt_flags & FMT_MODE_G) { + // 'g' format requires to remove trailing zeros after decimal point + if (dot) { + while (s[-1] == '0') { + s--; + } + if (s[-1] == '.') { + s--; + } + } + } + + // Append the exponent if needed + if (((e != 0) || (fmt_flags & FMT_MODE_E)) && !(fmt_flags & FMT_MODE_F)) { + *s++ = 'E' | (fmt_flags & FMT_E_CASE); + if (e >= 0) { + *s++ = '+'; + } else { + *s++ = '-'; + e = -e; } + if (e >= 100) { + *s++ = '0' + (e / 100); + } + *s++ = '0' + ((e / 10) % 10); + *s++ = '0' + (e % 10); } + *s = '\0'; + DEBUG_PRINTF(" ===> %s\n", buf); - // buf_remaining contains bytes available for digits and exponent. - // It is buf_size minus room for the sign and null byte. - int buf_remaining = buf_size - 1 - (s - buf); + return s - buf; +} +// minimal value expected for buf_size, to avoid checking everywhere for overflow +#define MIN_BUF_SIZE (MAX_MANTISSA_DIGITS + 10) + +int mp_format_float(mp_float_t f_entry, char *buf_entry, size_t buf_size, char fmt, int prec, char sign) { + assert(buf_size >= MIN_BUF_SIZE); + + // Handle sign + mp_float_t f = f_entry; + char *buf = buf_entry; + if (signbit(f_entry) && !isnan(f_entry)) { + f = -f; + sign = '-'; + } + if (sign) { + *buf++ = sign; + buf_size--; + } + + // Handle inf/nan + char uc = fmt & 0x20; { - char uc = fmt & 0x20; - if (fp_isinf(f)) { + char *s = buf; + if (isinf(f)) { *s++ = 'I' ^ uc; *s++ = 'N' ^ uc; *s++ = 'F' ^ uc; goto ret; - } else if (fp_isnan(f)) { + } else if (isnan(f)) { *s++ = 'N' ^ uc; *s++ = 'A' ^ uc; *s++ = 'N' ^ uc; ret: *s = '\0'; - return s - buf; + return s - buf_entry; } } + // Decode format character + int fmt_flags = (unsigned char)uc; // setup FMT_E_CASE, clear all other bits + char lofmt = (char)(fmt | 0x20); // fmt in lowercase + if (lofmt == 'f') { + fmt_flags |= FMT_MODE_F; + } else if (lofmt == 'g') { + fmt_flags |= FMT_MODE_G; + } else { + fmt_flags |= FMT_MODE_E; + } + + // When precision is unspecified, default to 6 if (prec < 0) { prec = 6; } - char e_char = 'E' | (fmt & 0x20); // e_char will match case of fmt - fmt |= 0x20; // Force fmt to be lowercase - char org_fmt = fmt; - if (fmt == 'g' && prec == 0) { - prec = 1; + // Use high precision for `repr`, but switch to exponent mode + // after 16 digits in any case to match CPython behaviour + int max_exp_zeros = (prec < (int)buf_size - 3 ? prec : (int)buf_size - 3); + if (prec == MP_FLOAT_REPR_PREC) { + prec = MAX_MANTISSA_DIGITS; + max_exp_zeros = 16; } - int e; - int dec = 0; - char e_sign = '\0'; - int num_digits = 0; - int signed_e = 0; - // Approximate power of 10 exponent from binary exponent. - // abs(e_guess) is lower bound on abs(power of 10 exponent). - int e_guess = (int)(fp_expval(f) * FPCONST(0.3010299956639812)); // 1/log2(10). - if (fp_iszero(f)) { - e = 0; - if (fmt == 'f') { - // Truncate precision to prevent buffer overflow - if (prec + 2 > buf_remaining) { - prec = buf_remaining - 2; - } - num_digits = prec + 1; - } else { - // Truncate precision to prevent buffer overflow - if (prec + 6 > buf_remaining) { - prec = buf_remaining - 6; - } - if (fmt == 'e') { - e_sign = '+'; - } + // Precompute the exact decimal exponent of f, such that + // abs(e) is lower bound on abs(power of 10 exponent). + int e = 0; + if (!fp_iszero(f)) { + // Approximate power of 10 exponent from binary exponent. + e = (int)(fp_expval(f) * MICROPY_FLOAT_CONST(0.3010299956639812)); // 1/log2(10). + int positive_exp = !fp_isless1(f); + mp_float_t u_base = (mp_float_t)mp_decimal_exp((mp_large_float_t)1.0, e + positive_exp); + while ((f >= u_base) == positive_exp) { + e += (positive_exp ? 1 : -1); + u_base = (mp_float_t)mp_decimal_exp((mp_large_float_t)1.0, e + positive_exp); } - } else if (fp_isless1(f)) { - FPTYPE f_entry = f; // Save f in case we go to 'f' format. - // Build negative exponent - e = -e_guess; - FPTYPE u_base = MICROPY_FLOAT_C_FUN(pow)(10, -e); - while (u_base > f) { - ++e; - u_base = MICROPY_FLOAT_C_FUN(pow)(10, -e); - } - // Normalize out the inferred unit. Use divide because - // pow(10, e) * pow(10, -e) is slightly < 1 for some e in float32 - // (e.g. print("%.12f" % ((1e13) * (1e-13)))) - f /= u_base; - - // If the user specified 'g' format, and e is <= 4, then we'll switch - // to the fixed format ('f') - - if (fmt == 'f' || (fmt == 'g' && e <= 4)) { - fmt = 'f'; - dec = 0; + } - if (org_fmt == 'g') { - prec += (e - 1); - } + // For 'e' format, prec is # digits after the decimal + // For 'f' format, prec is # digits after the decimal + // For 'g' format, prec is the max number of significant digits + // + // For 'e' & 'g' format, there will be a single digit before the decimal + // For 'f' format, zeros must be expanded instead of using an exponent. + // Make sure there is enough room in the buffer for them, or switch to format 'g'. + if ((fmt_flags & FMT_MODE_F) && e > 0) { + int req_size = e + prec + 2; + if (req_size > (int)buf_size) { + fmt_flags ^= FMT_MODE_F; + fmt_flags |= FMT_MODE_G; + prec++; + } + } - // truncate precision to prevent buffer overflow - if (prec + 2 > buf_remaining) { - prec = buf_remaining - 2; + // To work independently of the format, we precompute: + // - the max number of significant digits to produce + // - the number of leading zeros to prepend (mode f only) + // - the number of trailing zeros to append + int max_digits = prec; + int lead_zeros = 0; + int trail_zeros = 0; + if (fmt_flags & FMT_MODE_F) { + if (max_digits > (int)buf_size - 3) { + // cannot satisfy requested number of decimals given buf_size, sorry + max_digits = (int)buf_size - 3; + } + if (e < 0) { + if (max_digits > 2 && e < -2) { + // Insert explicit leading zeros + lead_zeros = (-e < max_digits ? -e : max_digits) - 2; + max_digits -= lead_zeros; + } else { + max_digits++; } - - num_digits = prec; - signed_e = 0; - f = f_entry; - ++num_digits; } else { - // For e & g formats, we'll be printing the exponent, so set the - // sign. - e_sign = '-'; - dec = 0; - - if (prec > (buf_remaining - FPMIN_BUF_SIZE)) { - prec = buf_remaining - FPMIN_BUF_SIZE; - if (fmt == 'g') { - prec++; - } - } - signed_e = -e; + max_digits += e + 1; } } else { - // Build positive exponent. - // We don't modify f at this point to avoid inaccuracies from - // scaling it. Instead, we find the product of powers of 10 - // that is not greater than it, and use that to start the - // mantissa. - e = e_guess; - FPTYPE next_u = MICROPY_FLOAT_C_FUN(pow)(10, e + 1); - while (f >= next_u) { - ++e; - next_u = MICROPY_FLOAT_C_FUN(pow)(10, e + 1); + if (!(fmt_flags & FMT_MODE_G) || max_digits == 0) { + max_digits++; } + } + if (max_digits > MAX_MANTISSA_DIGITS) { + // use trailing zeros to avoid overflowing the mantissa + trail_zeros = max_digits - MAX_MANTISSA_DIGITS; + max_digits = MAX_MANTISSA_DIGITS; + } + int overhead = (fmt_flags & FMT_MODE_F ? 3 : FPMIN_BUF_SIZE + 1); + if (trail_zeros > (int)buf_size - max_digits - overhead) { + // cannot satisfy requested number of decimals given buf_size, sorry + trail_zeros = (int)buf_size - max_digits - overhead; + } - // If the user specified fixed format (fmt == 'f') and e makes the - // number too big to fit into the available buffer, then we'll - // switch to the 'e' format. - - if (fmt == 'f') { - if (e >= buf_remaining) { - fmt = 'e'; - } else if ((e + prec + 2) > buf_remaining) { - prec = buf_remaining - e - 2; - if (prec < 0) { - // This means no decimal point, so we can add one back - // for the decimal. - prec++; - } - } - } - if (fmt == 'e' && prec > (buf_remaining - FPMIN_BUF_SIZE)) { - prec = buf_remaining - FPMIN_BUF_SIZE; - } - if (fmt == 'g') { - // Truncate precision to prevent buffer overflow - if (prec + (FPMIN_BUF_SIZE - 1) > buf_remaining) { - prec = buf_remaining - (FPMIN_BUF_SIZE - 1); - } - } - // If the user specified 'g' format, and e is < prec, then we'll switch - // to the fixed format. + // When the caller asks for more precision than available for sure, + // Look for a shorter (rounded) representation first, and only dig + // into more digits if there is no short representation. + int num_digits = (SAFE_MANTISSA_DIGITS < max_digits ? SAFE_MANTISSA_DIGITS : max_digits); +try_again: + ; - if (fmt == 'g' && e < prec) { - fmt = 'f'; - prec -= (e + 1); - } - if (fmt == 'f') { - dec = e; - num_digits = prec + e + 1; + char *s = buf; + int extra_zeros = trail_zeros + (max_digits - num_digits); + int decexp; + int dec = 0; + + if (fp_iszero(f)) { + // no need for scaling 0.0 + decexp = 0; + } else if (fmt_flags & FMT_MODE_F) { + decexp = num_digits - 1; + if (e < 0) { + // Negative exponent: we keep a single leading zero in the mantissa, + // as using more would waste precious digits needed for accuracy. + if (lead_zeros > 0) { + // We are using leading zeros + s = mp_prepend_zeros(s, lead_zeros); + decexp += lead_zeros + 1; + dec = 255; // no decimal dot + } else { + // Small negative exponent, work directly on the mantissa + dec = 0; + } } else { - e_sign = '+'; + // Positive exponent: we will add trailing zeros separately + decexp -= e; + dec = e; } - signed_e = e; + } else { + decexp = num_digits - e - 1; } - if (prec < 0) { - // This can happen when the prec is trimmed to prevent buffer overflow - prec = 0; + DEBUG_PRINTF("input=%.19g e=%d fmt=%c max_d=%d num_d=%d decexp=%d dec=%d l0=%d r0=%d\n", + (double)f, e, lofmt, max_digits, num_digits, decexp, dec, lead_zeros, extra_zeros); + + // At this point, + // - buf points to beginning of output buffer for the unsigned representation + // - num_digits == the number of mantissa digits to add + // - (dec + 1) == the number of digits to print before adding a decimal point + // - decexp == the power of 10 exponent to apply to f to get the decimal mantissa + // - e == the power of 10 exponent to append ('e' or 'g' format) + mp_large_float_uint_t mantissa_cap = 10; + for (int n = 1; n < num_digits; n++) { + mantissa_cap *= 10; } - // At this point e contains the absolute value of the power of 10 exponent. - // (dec + 1) == the number of dgits before the decimal. - - // For e, prec is # digits after the decimal - // For f, prec is # digits after the decimal - // For g, prec is the max number of significant digits - // - // For e & g there will be a single digit before the decimal - // for f there will be e digits before the decimal - - if (fmt == 'e') { - num_digits = prec + 1; - } else if (fmt == 'g') { - if (prec == 0) { - prec = 1; + // Build the decimal mantissa into a large uint + mp_large_float_uint_t mantissa = 1; + if (sizeof(mp_large_float_t) == sizeof(mp_float_t) && num_digits > SAFE_MANTISSA_DIGITS && decexp > 1) { + // if we don't have large floats, use integer multiply to produce the last digits + if (num_digits > SAFE_MANTISSA_DIGITS + 1 && decexp > 2) { + mantissa = 100; + decexp -= 2; + } else { + mantissa = 10; + decexp -= 1; } - num_digits = prec; } - - int d = 0; - for (int digit_index = signed_e; num_digits >= 0; --digit_index) { - FPTYPE u_base = FPCONST(1.0); - if (digit_index > 0) { - // Generate 10^digit_index for positive digit_index. - u_base = MICROPY_FLOAT_C_FUN(pow)(10, digit_index); - } - for (d = 0; d < 9; ++d) { - if (f < u_base) { - break; - } - f -= u_base; - } - // We calculate one more digit than we display, to use in rounding - // below. So only emit the digit if it's one that we display. - if (num_digits > 0) { - // Emit this number (the leading digit). - *s++ = '0' + d; - if (dec == 0 && prec > 0) { - *s++ = '.'; - } - } - --dec; - --num_digits; - if (digit_index <= 0) { - // Once we get below 1.0, we scale up f instead of calculating - // negative powers of 10 in u_base. This provides better - // renditions of exact decimals like 1/16 etc. - f *= FPCONST(10.0); + mp_large_float_t mantissa_f = mp_decimal_exp((mp_large_float_t)f, decexp); + mantissa *= (mp_large_float_uint_t)(mantissa_f + (mp_large_float_t)0.5); + DEBUG_PRINTF("input=%.19g fmt=%c num_digits=%d dec=%d mantissa=" MP_FFUINT_FMT " r0=%d\n", (double)f, lofmt, num_digits, dec, mantissa, extra_zeros); + + // Finally convert the decimal mantissa to a floating-point string, according to formatting rules + int reprlen = mp_format_mantissa(mantissa, mantissa_cap, buf, s, num_digits, max_exp_zeros, extra_zeros, dec, e, fmt_flags); + assert(reprlen + 1 <= (int)buf_size); + + #if MICROPY_FLOAT_FORMAT_IMPL != MICROPY_FLOAT_FORMAT_IMPL_APPROX + + if (num_digits < max_digits) { + // The initial precision might not be sufficient for an exact representation + // for all numbers. If the result is not exact, restart using next precision. + // parse the resulting number and compare against the original + mp_float_t check; + DEBUG_PRINTF("input=%.19g, compare to float('%s')\n", (double)f, buf); + mp_parse_float_internal(buf, reprlen, &check); + if (!fp_equal(check, f)) { + num_digits++; + DEBUG_PRINTF("Not perfect, retry using more digits (%d)\n", num_digits); + goto try_again; } } - // Rounding. If the next digit to print is >= 5, round up. - if (d >= 5) { - char *rs = s; - rs--; - while (1) { - if (*rs == '.') { - rs--; - continue; - } - if (*rs < '0' || *rs > '9') { - // + or - - rs++; // So we sit on the digit to the right of the sign - break; + + #else + + // The initial decimal mantissa might not have been be completely accurate due + // to the previous loating point operations. The best way to verify this is to + // parse the resulting number and compare against the original + mp_float_t check; + DEBUG_PRINTF("input=%.19g, compare to float('%s')\n", (double)f, buf); + mp_parse_float_internal(buf, reprlen, &check); + mp_float_t diff = fp_diff(check, f); + mp_float_t best_diff = diff; + mp_large_float_uint_t best_mantissa = mantissa; + + if (fp_iszero(diff)) { + // we have a perfect match + DEBUG_PRINTF(MP_FFUINT_FMT ": perfect match (direct)\n", mantissa); + } else { + // In order to get the best possible representation, we will perform a + // dichotomic search for a reversible representation. + // This will also provide optimal rounding on the fly. + unsigned err_range = 1; + if (num_digits > SAFE_MANTISSA_DIGITS) { + err_range <<= 3 * (num_digits - SAFE_MANTISSA_DIGITS); + } + int maxruns = 3 + 3 * (MAX_MANTISSA_DIGITS - SAFE_MANTISSA_DIGITS); + while (maxruns-- > 0) { + // update mantissa according to dichotomic search + if (signbit(diff)) { + mantissa += err_range; + } else { + // mantissa is expected to always have more significant digits than err_range + assert(mantissa >= err_range); + mantissa -= err_range; } - if (*rs < '9') { - (*rs)++; + // retry conversion + reprlen = mp_format_mantissa(mantissa, mantissa_cap, buf, s, num_digits, max_exp_zeros, extra_zeros, dec, e, fmt_flags); + assert(reprlen + 1 <= (int)buf_size); + DEBUG_PRINTF("input=%.19g, compare to float('%s')\n", (double)f, buf); + mp_parse_float_internal(buf, reprlen, &check); + DEBUG_PRINTF("check=%.19g num_digits=%d e=%d mantissa=" MP_FFUINT_FMT "\n", (double)check, num_digits, e, mantissa); + diff = fp_diff(check, f); + if (fp_iszero(diff)) { + // we have a perfect match + DEBUG_PRINTF(MP_FFUINT_FMT ": perfect match\n", mantissa); break; } - *rs = '0'; - if (rs == buf) { - break; + // keep track of our best estimate + mp_float_t delta = MICROPY_FLOAT_C_FUN(fabs)(diff) - MICROPY_FLOAT_C_FUN(fabs)(best_diff); + if (signbit(delta) || (fp_iszero(delta) && !(mantissa % 10u))) { + best_diff = diff; + best_mantissa = mantissa; } - rs--; - } - if (*rs == '0') { - // We need to insert a 1 - if (rs[1] == '.' && fmt != 'f') { - // We're going to round 9.99 to 10.00 - // Move the decimal point - rs[0] = '.'; - rs[1] = '0'; - if (e_sign == '-') { - e--; - if (e == 0) { - e_sign = '+'; - } - } else { - e++; - } + // string repr is not perfect: continue a dichotomic improvement + DEBUG_PRINTF(MP_FFUINT_FMT ": %.19g, err_range=%d\n", mantissa, (double)check, err_range); + if (err_range > 1) { + err_range >>= 1; } else { - // Need at extra digit at the end to make room for the leading '1' - // but if we're at the buffer size limit, just drop the final digit. - if ((size_t)(s + 1 - buf) < buf_size) { - s++; + // We have tried all possible mantissa, without finding a reversible repr. + // Check if we have an alternate precision to try. + if (num_digits < max_digits) { + num_digits++; + DEBUG_PRINTF("Failed to find a perfect match, try with more digits (%d)\n", num_digits); + goto try_again; } + // Otherwise, keep the closest one, which is either the first one or the last one. + if (mantissa == best_mantissa) { + // Last guess is the best one + DEBUG_PRINTF(MP_FFUINT_FMT ": last guess was the best one\n", mantissa); + } else { + // We had a better guess earlier + DEBUG_PRINTF(MP_FFUINT_FMT ": use best guess\n", mantissa); + reprlen = mp_format_mantissa(best_mantissa, mantissa_cap, buf, s, num_digits, max_exp_zeros, extra_zeros, dec, e, fmt_flags); + } + break; } - char *ss = s; - while (ss > rs) { - *ss = ss[-1]; - ss--; - } - *rs = '1'; } } + #endif - // verify that we did not overrun the input buffer so far - assert((size_t)(s + 1 - buf) <= buf_size); - - if (org_fmt == 'g' && prec > 0) { - // Remove trailing zeros and a trailing decimal point - while (s[-1] == '0') { - s--; - } - if (s[-1] == '.') { - s--; - } - } - // Append the exponent - if (e_sign) { - *s++ = e_char; - *s++ = e_sign; - if (FPMIN_BUF_SIZE == 7 && e >= 100) { - *s++ = '0' + (e / 100); - } - *s++ = '0' + ((e / 10) % 10); - *s++ = '0' + (e % 10); - } - *s = '\0'; - - // verify that we did not overrun the input buffer - assert((size_t)(s + 1 - buf) <= buf_size); - - return s - buf; + return buf + reprlen - buf_entry; } #endif // MICROPY_FLOAT_IMPL != MICROPY_FLOAT_IMPL_NONE -- cgit v1.2.3