diff options
Diffstat (limited to 'py')
-rw-r--r-- | py/formatfloat.c | 757 | ||||
-rw-r--r-- | py/formatfloat.h | 1 | ||||
-rw-r--r-- | py/misc.h | 19 | ||||
-rw-r--r-- | py/mpconfig.h | 21 | ||||
-rw-r--r-- | py/mpprint.c | 12 | ||||
-rw-r--r-- | py/mpprint.h | 1 | ||||
-rw-r--r-- | py/objcomplex.c | 31 | ||||
-rw-r--r-- | py/objfloat.c | 18 | ||||
-rw-r--r-- | py/parsenum.c | 50 | ||||
-rw-r--r-- | py/parsenum.h | 2 |
10 files changed, 557 insertions, 355 deletions
diff --git a/py/formatfloat.c b/py/formatfloat.c index 7cd471018..1ea34f84b 100644 --- a/py/formatfloat.c +++ b/py/formatfloat.c @@ -33,392 +33,537 @@ #include <stdint.h> #include <math.h> #include "py/formatfloat.h" +#include "py/parsenum.h" /*********************************************************************** Routine for converting a arbitrary floating point number into a string. - The code in this function was inspired from Fred Bayer's pdouble.c. - Since pdouble.c was released as Public Domain, I'm releasing this - code as public domain as well. + The code in this function was inspired from Dave Hylands's previous + version, which was itself inspired from Fred Bayer's pdouble.c. The original code can be found in https://github.com/dhylands/format-float - Dave Hylands - ***********************************************************************/ -#if MICROPY_FLOAT_IMPL == MICROPY_FLOAT_IMPL_FLOAT -// 1 sign bit, 8 exponent bits, and 23 mantissa bits. -// exponent values 0 and 255 are reserved, exponent can be 1 to 254. -// exponent is stored with a bias of 127. -// The min and max floats are on the order of 1x10^37 and 1x10^-37 - -#define FPTYPE float -#define FPCONST(x) x##F -#define FPROUND_TO_ONE 0.9999995F -#define FPDECEXP 32 -#define FPMIN_BUF_SIZE 6 // +9e+99 +// Float formatting debug code is intended for use in ports/unix only, +// as it uses the libc float printing function as a reference. +#define DEBUG_FLOAT_FORMATTING 0 + +#if DEBUG_FLOAT_FORMATTING +#define DEBUG_PRINTF(...) fprintf(stderr, __VA_ARGS__) +#else +#define DEBUG_PRINTF(...) +#endif + +#if MICROPY_FLOAT_FORMAT_IMPL == MICROPY_FLOAT_FORMAT_IMPL_EXACT || MICROPY_FLOAT_IMPL == MICROPY_FLOAT_IMPL_DOUBLE +#define MP_FFUINT_FMT "%lu" +#else +#define MP_FFUINT_FMT "%u" +#endif + +static inline int fp_expval(mp_float_t x) { + mp_float_union_t fb = { x }; + return (int)fb.p.exp - MP_FLOAT_EXP_OFFSET; +} -#define FLT_SIGN_MASK 0x80000000 +#if MICROPY_FLOAT_IMPL == MICROPY_FLOAT_IMPL_DOUBLE -static inline int fp_signbit(float x) { - mp_float_union_t fb = {x}; - return fb.i & FLT_SIGN_MASK; +static inline int fp_isless1(mp_float_t x) { + return x < 1.0; } -#define fp_isnan(x) isnan(x) -#define fp_isinf(x) isinf(x) -static inline int fp_iszero(float x) { - mp_float_union_t fb = {x}; - return fb.i == 0; + +static inline int fp_iszero(mp_float_t x) { + return x == 0.0; } -static inline int fp_isless1(float x) { - mp_float_union_t fb = {x}; + +#if MICROPY_FLOAT_FORMAT_IMPL != MICROPY_FLOAT_FORMAT_IMPL_APPROX +static inline int fp_equal(mp_float_t x, mp_float_t y) { + return x == y; +} +#else +static inline mp_float_t fp_diff(mp_float_t x, mp_float_t y) { + return x - y; +} +#endif + +#elif MICROPY_FLOAT_IMPL == MICROPY_FLOAT_IMPL_FLOAT + +// The functions below are roughly equivalent to the ones above, +// but they are optimized to reduce code footprint by skipping +// handling for special values such as nan, inf, +/-0.0 +// for ports where FP support is done in software. +// +// They also take into account lost bits of REPR_C as needed. + +static inline int fp_isless1(mp_float_t x) { + mp_float_union_t fb = { x }; return fb.i < 0x3f800000; } -#elif MICROPY_FLOAT_IMPL == MICROPY_FLOAT_IMPL_DOUBLE +static inline int fp_iszero(mp_float_t x) { + mp_float_union_t x_check = { x }; + return !x_check.i; // this is valid for REPR_C as well +} -#define FPTYPE double -#define FPCONST(x) x -#define FPROUND_TO_ONE 0.999999999995 -#define FPDECEXP 256 -#define FPMIN_BUF_SIZE 7 // +9e+199 -#define fp_signbit(x) signbit(x) -#define fp_isnan(x) isnan(x) -#define fp_isinf(x) isinf(x) -#define fp_iszero(x) (x == 0) -#define fp_isless1(x) (x < 1.0) +#if MICROPY_FLOAT_FORMAT_IMPL != MICROPY_FLOAT_FORMAT_IMPL_APPROX +static inline int fp_equal(mp_float_t x, mp_float_t y) { + mp_float_union_t x_check = { x }; + mp_float_union_t y_check = { y }; + #if MICROPY_OBJ_REPR == MICROPY_OBJ_REPR_C + return (x_check.i & ~3) == (y_check.i & ~3); + #else + return x_check.i == y_check.i; + #endif +} +#else +static inline mp_float_t fp_diff(mp_float_t x, mp_float_t y) { + #if MICROPY_OBJ_REPR == MICROPY_OBJ_REPR_C + mp_float_union_t x_check = { x }; + mp_float_union_t y_check = { y }; + x_check.i &= ~3; + y_check.i &= ~3; + return x_check.f - y_check.f; + #else + return x - y; + #endif +} +#endif -#endif // MICROPY_FLOAT_IMPL == MICROPY_FLOAT_IMPL_FLOAT/DOUBLE +#endif -static inline int fp_expval(FPTYPE x) { - mp_float_union_t fb = {x}; - return (int)((fb.i >> MP_FLOAT_FRAC_BITS) & (~(0xFFFFFFFF << MP_FLOAT_EXP_BITS))) - MP_FLOAT_EXP_OFFSET; +#if MICROPY_FLOAT_IMPL == MICROPY_FLOAT_IMPL_FLOAT +#define FPMIN_BUF_SIZE 6 // +9e+99 +#define MAX_MANTISSA_DIGITS (9) +#define SAFE_MANTISSA_DIGITS (6) +#elif MICROPY_FLOAT_IMPL == MICROPY_FLOAT_IMPL_DOUBLE +#define FPMIN_BUF_SIZE 7 // +9e+199 +#define MAX_MANTISSA_DIGITS (19) +#define SAFE_MANTISSA_DIGITS (16) +#endif + +// Internal formatting flags +#define FMT_MODE_E 0x01 // render using scientific notation (%e) +#define FMT_MODE_G 0x02 // render using general format (%g) +#define FMT_MODE_F 0x04 // render using using expanded fixed-point format (%f) +#define FMT_E_CASE 0x20 // don't change this value (used for case conversion!) + +static char *mp_prepend_zeros(char *s, int cnt) { + *s++ = '0'; + *s++ = '.'; + while (cnt > 0) { + *s++ = '0'; + cnt--; + } + return s; } -int mp_format_float(FPTYPE f, char *buf, size_t buf_size, char fmt, int prec, char sign) { +// Helper to convert a decimal mantissa (provided as an mp_large_float_uint_t) to string +static int mp_format_mantissa(mp_large_float_uint_t mantissa, mp_large_float_uint_t mantissa_cap, char *buf, char *s, + int num_digits, int max_exp_zeros, int trailing_zeros, int dec, int e, int fmt_flags) { - char *s = buf; + DEBUG_PRINTF("mantissa=" MP_FFUINT_FMT " exp=%d (cap=" MP_FFUINT_FMT "):\n", mantissa, e, mantissa_cap); - if (buf_size <= FPMIN_BUF_SIZE) { - // FPMIN_BUF_SIZE is the minimum size needed to store any FP number. - // If the buffer does not have enough room for this (plus null terminator) - // then don't try to format the float. + if (mantissa) { + // If rounding/searching created an extra digit or removed too many, fix mantissa first + if (mantissa >= mantissa_cap) { + if (fmt_flags & FMT_MODE_F) { + assert(e >= 0); + num_digits++; + dec++; + } else { + mantissa /= 10; + e++; + } + } + } - if (buf_size >= 2) { - *s++ = '?'; + // When 'g' format is used, replace small exponents by explicit zeros + if ((fmt_flags & FMT_MODE_G) && e != 0) { + if (e >= 0) { + // If 0 < e < max_exp_zeros, expand positive exponent into trailing zeros + if (e < max_exp_zeros) { + dec += e; + if (dec >= num_digits) { + trailing_zeros = dec - (num_digits - 1); + } + e = 0; + } + } else { + // If -4 <= e < 0, expand negative exponent without losing significant digits + if (e >= -4) { + int cnt = 0; + while (e < 0 && !(mantissa % 10)) { + mantissa /= 10; + cnt++; + e++; + } + num_digits -= cnt; + s = mp_prepend_zeros(s, cnt - e - 1); + dec = 255; + e = 0; + } } - if (buf_size >= 1) { - *s = '\0'; + } + + // Convert the integer mantissa to string + for (int digit = num_digits - 1; digit >= 0; digit--) { + int digit_ofs = (digit > dec ? digit + 1 : digit); + s[digit_ofs] = '0' + (int)(mantissa % 10); + mantissa /= 10; + } + int dot = (dec >= 255); + if (dec + 1 < num_digits) { + dot = 1; + s++; + s[dec] = '.'; + } + s += num_digits; + #if DEBUG_FLOAT_FORMATTING + *s = 0; + DEBUG_PRINTF(" = %s exp=%d num_digits=%d zeros=%d dec=%d\n", buf, e, num_digits, trailing_zeros, dec); + #endif + + // Append or remove trailing zeros, as required by format + if (trailing_zeros) { + dec -= num_digits - 1; + while (trailing_zeros--) { + if (!dec--) { + *s++ = '.'; + dot = 1; + } + *s++ = '0'; } - return buf_size >= 2; } - if (fp_signbit(f) && !fp_isnan(f)) { - *s++ = '-'; - f = -f; - } else { - if (sign) { - *s++ = sign; + if (fmt_flags & FMT_MODE_G) { + // 'g' format requires to remove trailing zeros after decimal point + if (dot) { + while (s[-1] == '0') { + s--; + } + if (s[-1] == '.') { + s--; + } + } + } + + // Append the exponent if needed + if (((e != 0) || (fmt_flags & FMT_MODE_E)) && !(fmt_flags & FMT_MODE_F)) { + *s++ = 'E' | (fmt_flags & FMT_E_CASE); + if (e >= 0) { + *s++ = '+'; + } else { + *s++ = '-'; + e = -e; } + if (e >= 100) { + *s++ = '0' + (e / 100); + } + *s++ = '0' + ((e / 10) % 10); + *s++ = '0' + (e % 10); } + *s = '\0'; + DEBUG_PRINTF(" ===> %s\n", buf); - // buf_remaining contains bytes available for digits and exponent. - // It is buf_size minus room for the sign and null byte. - int buf_remaining = buf_size - 1 - (s - buf); + return s - buf; +} +// minimal value expected for buf_size, to avoid checking everywhere for overflow +#define MIN_BUF_SIZE (MAX_MANTISSA_DIGITS + 10) + +int mp_format_float(mp_float_t f_entry, char *buf_entry, size_t buf_size, char fmt, int prec, char sign) { + assert(buf_size >= MIN_BUF_SIZE); + + // Handle sign + mp_float_t f = f_entry; + char *buf = buf_entry; + if (signbit(f_entry) && !isnan(f_entry)) { + f = -f; + sign = '-'; + } + if (sign) { + *buf++ = sign; + buf_size--; + } + + // Handle inf/nan + char uc = fmt & 0x20; { - char uc = fmt & 0x20; - if (fp_isinf(f)) { + char *s = buf; + if (isinf(f)) { *s++ = 'I' ^ uc; *s++ = 'N' ^ uc; *s++ = 'F' ^ uc; goto ret; - } else if (fp_isnan(f)) { + } else if (isnan(f)) { *s++ = 'N' ^ uc; *s++ = 'A' ^ uc; *s++ = 'N' ^ uc; ret: *s = '\0'; - return s - buf; + return s - buf_entry; } } + // Decode format character + int fmt_flags = (unsigned char)uc; // setup FMT_E_CASE, clear all other bits + char lofmt = (char)(fmt | 0x20); // fmt in lowercase + if (lofmt == 'f') { + fmt_flags |= FMT_MODE_F; + } else if (lofmt == 'g') { + fmt_flags |= FMT_MODE_G; + } else { + fmt_flags |= FMT_MODE_E; + } + + // When precision is unspecified, default to 6 if (prec < 0) { prec = 6; } - char e_char = 'E' | (fmt & 0x20); // e_char will match case of fmt - fmt |= 0x20; // Force fmt to be lowercase - char org_fmt = fmt; - if (fmt == 'g' && prec == 0) { - prec = 1; + // Use high precision for `repr`, but switch to exponent mode + // after 16 digits in any case to match CPython behaviour + int max_exp_zeros = (prec < (int)buf_size - 3 ? prec : (int)buf_size - 3); + if (prec == MP_FLOAT_REPR_PREC) { + prec = MAX_MANTISSA_DIGITS; + max_exp_zeros = 16; } - int e; - int dec = 0; - char e_sign = '\0'; - int num_digits = 0; - int signed_e = 0; - // Approximate power of 10 exponent from binary exponent. - // abs(e_guess) is lower bound on abs(power of 10 exponent). - int e_guess = (int)(fp_expval(f) * FPCONST(0.3010299956639812)); // 1/log2(10). - if (fp_iszero(f)) { - e = 0; - if (fmt == 'f') { - // Truncate precision to prevent buffer overflow - if (prec + 2 > buf_remaining) { - prec = buf_remaining - 2; - } - num_digits = prec + 1; - } else { - // Truncate precision to prevent buffer overflow - if (prec + 6 > buf_remaining) { - prec = buf_remaining - 6; - } - if (fmt == 'e') { - e_sign = '+'; - } + // Precompute the exact decimal exponent of f, such that + // abs(e) is lower bound on abs(power of 10 exponent). + int e = 0; + if (!fp_iszero(f)) { + // Approximate power of 10 exponent from binary exponent. + e = (int)(fp_expval(f) * MICROPY_FLOAT_CONST(0.3010299956639812)); // 1/log2(10). + int positive_exp = !fp_isless1(f); + mp_float_t u_base = (mp_float_t)mp_decimal_exp((mp_large_float_t)1.0, e + positive_exp); + while ((f >= u_base) == positive_exp) { + e += (positive_exp ? 1 : -1); + u_base = (mp_float_t)mp_decimal_exp((mp_large_float_t)1.0, e + positive_exp); } - } else if (fp_isless1(f)) { - FPTYPE f_entry = f; // Save f in case we go to 'f' format. - // Build negative exponent - e = -e_guess; - FPTYPE u_base = MICROPY_FLOAT_C_FUN(pow)(10, -e); - while (u_base > f) { - ++e; - u_base = MICROPY_FLOAT_C_FUN(pow)(10, -e); - } - // Normalize out the inferred unit. Use divide because - // pow(10, e) * pow(10, -e) is slightly < 1 for some e in float32 - // (e.g. print("%.12f" % ((1e13) * (1e-13)))) - f /= u_base; - - // If the user specified 'g' format, and e is <= 4, then we'll switch - // to the fixed format ('f') - - if (fmt == 'f' || (fmt == 'g' && e <= 4)) { - fmt = 'f'; - dec = 0; + } - if (org_fmt == 'g') { - prec += (e - 1); - } + // For 'e' format, prec is # digits after the decimal + // For 'f' format, prec is # digits after the decimal + // For 'g' format, prec is the max number of significant digits + // + // For 'e' & 'g' format, there will be a single digit before the decimal + // For 'f' format, zeros must be expanded instead of using an exponent. + // Make sure there is enough room in the buffer for them, or switch to format 'g'. + if ((fmt_flags & FMT_MODE_F) && e > 0) { + int req_size = e + prec + 2; + if (req_size > (int)buf_size) { + fmt_flags ^= FMT_MODE_F; + fmt_flags |= FMT_MODE_G; + prec++; + } + } - // truncate precision to prevent buffer overflow - if (prec + 2 > buf_remaining) { - prec = buf_remaining - 2; + // To work independently of the format, we precompute: + // - the max number of significant digits to produce + // - the number of leading zeros to prepend (mode f only) + // - the number of trailing zeros to append + int max_digits = prec; + int lead_zeros = 0; + int trail_zeros = 0; + if (fmt_flags & FMT_MODE_F) { + if (max_digits > (int)buf_size - 3) { + // cannot satisfy requested number of decimals given buf_size, sorry + max_digits = (int)buf_size - 3; + } + if (e < 0) { + if (max_digits > 2 && e < -2) { + // Insert explicit leading zeros + lead_zeros = (-e < max_digits ? -e : max_digits) - 2; + max_digits -= lead_zeros; + } else { + max_digits++; } - - num_digits = prec; - signed_e = 0; - f = f_entry; - ++num_digits; } else { - // For e & g formats, we'll be printing the exponent, so set the - // sign. - e_sign = '-'; - dec = 0; - - if (prec > (buf_remaining - FPMIN_BUF_SIZE)) { - prec = buf_remaining - FPMIN_BUF_SIZE; - if (fmt == 'g') { - prec++; - } - } - signed_e = -e; + max_digits += e + 1; } } else { - // Build positive exponent. - // We don't modify f at this point to avoid inaccuracies from - // scaling it. Instead, we find the product of powers of 10 - // that is not greater than it, and use that to start the - // mantissa. - e = e_guess; - FPTYPE next_u = MICROPY_FLOAT_C_FUN(pow)(10, e + 1); - while (f >= next_u) { - ++e; - next_u = MICROPY_FLOAT_C_FUN(pow)(10, e + 1); + if (!(fmt_flags & FMT_MODE_G) || max_digits == 0) { + max_digits++; } + } + if (max_digits > MAX_MANTISSA_DIGITS) { + // use trailing zeros to avoid overflowing the mantissa + trail_zeros = max_digits - MAX_MANTISSA_DIGITS; + max_digits = MAX_MANTISSA_DIGITS; + } + int overhead = (fmt_flags & FMT_MODE_F ? 3 : FPMIN_BUF_SIZE + 1); + if (trail_zeros > (int)buf_size - max_digits - overhead) { + // cannot satisfy requested number of decimals given buf_size, sorry + trail_zeros = (int)buf_size - max_digits - overhead; + } - // If the user specified fixed format (fmt == 'f') and e makes the - // number too big to fit into the available buffer, then we'll - // switch to the 'e' format. - - if (fmt == 'f') { - if (e >= buf_remaining) { - fmt = 'e'; - } else if ((e + prec + 2) > buf_remaining) { - prec = buf_remaining - e - 2; - if (prec < 0) { - // This means no decimal point, so we can add one back - // for the decimal. - prec++; - } - } - } - if (fmt == 'e' && prec > (buf_remaining - FPMIN_BUF_SIZE)) { - prec = buf_remaining - FPMIN_BUF_SIZE; - } - if (fmt == 'g') { - // Truncate precision to prevent buffer overflow - if (prec + (FPMIN_BUF_SIZE - 1) > buf_remaining) { - prec = buf_remaining - (FPMIN_BUF_SIZE - 1); - } - } - // If the user specified 'g' format, and e is < prec, then we'll switch - // to the fixed format. + // When the caller asks for more precision than available for sure, + // Look for a shorter (rounded) representation first, and only dig + // into more digits if there is no short representation. + int num_digits = (SAFE_MANTISSA_DIGITS < max_digits ? SAFE_MANTISSA_DIGITS : max_digits); +try_again: + ; - if (fmt == 'g' && e < prec) { - fmt = 'f'; - prec -= (e + 1); - } - if (fmt == 'f') { - dec = e; - num_digits = prec + e + 1; + char *s = buf; + int extra_zeros = trail_zeros + (max_digits - num_digits); + int decexp; + int dec = 0; + + if (fp_iszero(f)) { + // no need for scaling 0.0 + decexp = 0; + } else if (fmt_flags & FMT_MODE_F) { + decexp = num_digits - 1; + if (e < 0) { + // Negative exponent: we keep a single leading zero in the mantissa, + // as using more would waste precious digits needed for accuracy. + if (lead_zeros > 0) { + // We are using leading zeros + s = mp_prepend_zeros(s, lead_zeros); + decexp += lead_zeros + 1; + dec = 255; // no decimal dot + } else { + // Small negative exponent, work directly on the mantissa + dec = 0; + } } else { - e_sign = '+'; + // Positive exponent: we will add trailing zeros separately + decexp -= e; + dec = e; } - signed_e = e; + } else { + decexp = num_digits - e - 1; } - if (prec < 0) { - // This can happen when the prec is trimmed to prevent buffer overflow - prec = 0; + DEBUG_PRINTF("input=%.19g e=%d fmt=%c max_d=%d num_d=%d decexp=%d dec=%d l0=%d r0=%d\n", + (double)f, e, lofmt, max_digits, num_digits, decexp, dec, lead_zeros, extra_zeros); + + // At this point, + // - buf points to beginning of output buffer for the unsigned representation + // - num_digits == the number of mantissa digits to add + // - (dec + 1) == the number of digits to print before adding a decimal point + // - decexp == the power of 10 exponent to apply to f to get the decimal mantissa + // - e == the power of 10 exponent to append ('e' or 'g' format) + mp_large_float_uint_t mantissa_cap = 10; + for (int n = 1; n < num_digits; n++) { + mantissa_cap *= 10; } - // At this point e contains the absolute value of the power of 10 exponent. - // (dec + 1) == the number of dgits before the decimal. - - // For e, prec is # digits after the decimal - // For f, prec is # digits after the decimal - // For g, prec is the max number of significant digits - // - // For e & g there will be a single digit before the decimal - // for f there will be e digits before the decimal - - if (fmt == 'e') { - num_digits = prec + 1; - } else if (fmt == 'g') { - if (prec == 0) { - prec = 1; + // Build the decimal mantissa into a large uint + mp_large_float_uint_t mantissa = 1; + if (sizeof(mp_large_float_t) == sizeof(mp_float_t) && num_digits > SAFE_MANTISSA_DIGITS && decexp > 1) { + // if we don't have large floats, use integer multiply to produce the last digits + if (num_digits > SAFE_MANTISSA_DIGITS + 1 && decexp > 2) { + mantissa = 100; + decexp -= 2; + } else { + mantissa = 10; + decexp -= 1; } - num_digits = prec; } - - int d = 0; - for (int digit_index = signed_e; num_digits >= 0; --digit_index) { - FPTYPE u_base = FPCONST(1.0); - if (digit_index > 0) { - // Generate 10^digit_index for positive digit_index. - u_base = MICROPY_FLOAT_C_FUN(pow)(10, digit_index); - } - for (d = 0; d < 9; ++d) { - if (f < u_base) { - break; - } - f -= u_base; - } - // We calculate one more digit than we display, to use in rounding - // below. So only emit the digit if it's one that we display. - if (num_digits > 0) { - // Emit this number (the leading digit). - *s++ = '0' + d; - if (dec == 0 && prec > 0) { - *s++ = '.'; - } - } - --dec; - --num_digits; - if (digit_index <= 0) { - // Once we get below 1.0, we scale up f instead of calculating - // negative powers of 10 in u_base. This provides better - // renditions of exact decimals like 1/16 etc. - f *= FPCONST(10.0); + mp_large_float_t mantissa_f = mp_decimal_exp((mp_large_float_t)f, decexp); + mantissa *= (mp_large_float_uint_t)(mantissa_f + (mp_large_float_t)0.5); + DEBUG_PRINTF("input=%.19g fmt=%c num_digits=%d dec=%d mantissa=" MP_FFUINT_FMT " r0=%d\n", (double)f, lofmt, num_digits, dec, mantissa, extra_zeros); + + // Finally convert the decimal mantissa to a floating-point string, according to formatting rules + int reprlen = mp_format_mantissa(mantissa, mantissa_cap, buf, s, num_digits, max_exp_zeros, extra_zeros, dec, e, fmt_flags); + assert(reprlen + 1 <= (int)buf_size); + + #if MICROPY_FLOAT_FORMAT_IMPL != MICROPY_FLOAT_FORMAT_IMPL_APPROX + + if (num_digits < max_digits) { + // The initial precision might not be sufficient for an exact representation + // for all numbers. If the result is not exact, restart using next precision. + // parse the resulting number and compare against the original + mp_float_t check; + DEBUG_PRINTF("input=%.19g, compare to float('%s')\n", (double)f, buf); + mp_parse_float_internal(buf, reprlen, &check); + if (!fp_equal(check, f)) { + num_digits++; + DEBUG_PRINTF("Not perfect, retry using more digits (%d)\n", num_digits); + goto try_again; } } - // Rounding. If the next digit to print is >= 5, round up. - if (d >= 5) { - char *rs = s; - rs--; - while (1) { - if (*rs == '.') { - rs--; - continue; - } - if (*rs < '0' || *rs > '9') { - // + or - - rs++; // So we sit on the digit to the right of the sign - break; + + #else + + // The initial decimal mantissa might not have been be completely accurate due + // to the previous loating point operations. The best way to verify this is to + // parse the resulting number and compare against the original + mp_float_t check; + DEBUG_PRINTF("input=%.19g, compare to float('%s')\n", (double)f, buf); + mp_parse_float_internal(buf, reprlen, &check); + mp_float_t diff = fp_diff(check, f); + mp_float_t best_diff = diff; + mp_large_float_uint_t best_mantissa = mantissa; + + if (fp_iszero(diff)) { + // we have a perfect match + DEBUG_PRINTF(MP_FFUINT_FMT ": perfect match (direct)\n", mantissa); + } else { + // In order to get the best possible representation, we will perform a + // dichotomic search for a reversible representation. + // This will also provide optimal rounding on the fly. + unsigned err_range = 1; + if (num_digits > SAFE_MANTISSA_DIGITS) { + err_range <<= 3 * (num_digits - SAFE_MANTISSA_DIGITS); + } + int maxruns = 3 + 3 * (MAX_MANTISSA_DIGITS - SAFE_MANTISSA_DIGITS); + while (maxruns-- > 0) { + // update mantissa according to dichotomic search + if (signbit(diff)) { + mantissa += err_range; + } else { + // mantissa is expected to always have more significant digits than err_range + assert(mantissa >= err_range); + mantissa -= err_range; } - if (*rs < '9') { - (*rs)++; + // retry conversion + reprlen = mp_format_mantissa(mantissa, mantissa_cap, buf, s, num_digits, max_exp_zeros, extra_zeros, dec, e, fmt_flags); + assert(reprlen + 1 <= (int)buf_size); + DEBUG_PRINTF("input=%.19g, compare to float('%s')\n", (double)f, buf); + mp_parse_float_internal(buf, reprlen, &check); + DEBUG_PRINTF("check=%.19g num_digits=%d e=%d mantissa=" MP_FFUINT_FMT "\n", (double)check, num_digits, e, mantissa); + diff = fp_diff(check, f); + if (fp_iszero(diff)) { + // we have a perfect match + DEBUG_PRINTF(MP_FFUINT_FMT ": perfect match\n", mantissa); break; } - *rs = '0'; - if (rs == buf) { - break; + // keep track of our best estimate + mp_float_t delta = MICROPY_FLOAT_C_FUN(fabs)(diff) - MICROPY_FLOAT_C_FUN(fabs)(best_diff); + if (signbit(delta) || (fp_iszero(delta) && !(mantissa % 10u))) { + best_diff = diff; + best_mantissa = mantissa; } - rs--; - } - if (*rs == '0') { - // We need to insert a 1 - if (rs[1] == '.' && fmt != 'f') { - // We're going to round 9.99 to 10.00 - // Move the decimal point - rs[0] = '.'; - rs[1] = '0'; - if (e_sign == '-') { - e--; - if (e == 0) { - e_sign = '+'; - } - } else { - e++; - } + // string repr is not perfect: continue a dichotomic improvement + DEBUG_PRINTF(MP_FFUINT_FMT ": %.19g, err_range=%d\n", mantissa, (double)check, err_range); + if (err_range > 1) { + err_range >>= 1; } else { - // Need at extra digit at the end to make room for the leading '1' - // but if we're at the buffer size limit, just drop the final digit. - if ((size_t)(s + 1 - buf) < buf_size) { - s++; + // We have tried all possible mantissa, without finding a reversible repr. + // Check if we have an alternate precision to try. + if (num_digits < max_digits) { + num_digits++; + DEBUG_PRINTF("Failed to find a perfect match, try with more digits (%d)\n", num_digits); + goto try_again; } + // Otherwise, keep the closest one, which is either the first one or the last one. + if (mantissa == best_mantissa) { + // Last guess is the best one + DEBUG_PRINTF(MP_FFUINT_FMT ": last guess was the best one\n", mantissa); + } else { + // We had a better guess earlier + DEBUG_PRINTF(MP_FFUINT_FMT ": use best guess\n", mantissa); + reprlen = mp_format_mantissa(best_mantissa, mantissa_cap, buf, s, num_digits, max_exp_zeros, extra_zeros, dec, e, fmt_flags); + } + break; } - char *ss = s; - while (ss > rs) { - *ss = ss[-1]; - ss--; - } - *rs = '1'; } } + #endif - // verify that we did not overrun the input buffer so far - assert((size_t)(s + 1 - buf) <= buf_size); - - if (org_fmt == 'g' && prec > 0) { - // Remove trailing zeros and a trailing decimal point - while (s[-1] == '0') { - s--; - } - if (s[-1] == '.') { - s--; - } - } - // Append the exponent - if (e_sign) { - *s++ = e_char; - *s++ = e_sign; - if (FPMIN_BUF_SIZE == 7 && e >= 100) { - *s++ = '0' + (e / 100); - } - *s++ = '0' + ((e / 10) % 10); - *s++ = '0' + (e % 10); - } - *s = '\0'; - - // verify that we did not overrun the input buffer - assert((size_t)(s + 1 - buf) <= buf_size); - - return s - buf; + return buf + reprlen - buf_entry; } #endif // MICROPY_FLOAT_IMPL != MICROPY_FLOAT_IMPL_NONE diff --git a/py/formatfloat.h b/py/formatfloat.h index 9a1643b4d..7b1414672 100644 --- a/py/formatfloat.h +++ b/py/formatfloat.h @@ -29,6 +29,7 @@ #include "py/mpconfig.h" #if MICROPY_PY_BUILTINS_FLOAT +#define MP_FLOAT_REPR_PREC (99) // magic `prec` value for optimal `repr` behaviour int mp_format_float(mp_float_t f, char *buf, size_t bufSize, char fmt, int prec, char sign); #endif @@ -277,6 +277,25 @@ typedef union _mp_float_union_t { mp_float_uint_t i; } mp_float_union_t; +#if MICROPY_FLOAT_FORMAT_IMPL == MICROPY_FLOAT_FORMAT_IMPL_EXACT + +#if MICROPY_FLOAT_IMPL == MICROPY_FLOAT_IMPL_FLOAT +// Exact float conversion requires using internally a bigger sort of floating point +typedef double mp_large_float_t; +#elif MICROPY_FLOAT_IMPL == MICROPY_FLOAT_IMPL_DOUBLE +typedef long double mp_large_float_t; +#endif +// Always use a 64 bit mantissa for formatting and parsing +typedef uint64_t mp_large_float_uint_t; + +#else // MICROPY_FLOAT_FORMAT_IMPL != MICROPY_FLOAT_FORMAT_IMPL_EXACT + +// No bigger floating points +typedef mp_float_t mp_large_float_t; +typedef mp_float_uint_t mp_large_float_uint_t; + +#endif + #endif // MICROPY_PY_BUILTINS_FLOAT /** ROM string compression *************/ diff --git a/py/mpconfig.h b/py/mpconfig.h index c316aa4b2..caa63fef3 100644 --- a/py/mpconfig.h +++ b/py/mpconfig.h @@ -861,6 +861,27 @@ typedef double mp_float_t; #define MICROPY_PY_BUILTINS_COMPLEX (MICROPY_PY_BUILTINS_FLOAT) #endif +// Float to string conversion implementations +// +// Note that the EXACT method is only available if the compiler supports +// floating points larger than mp_float_t: +// - with MICROPY_FLOAT_IMPL_FLOAT, the compiler needs to support `double` +// - with MICROPY_FLOAT_IMPL_DOUBLE, the compiler needs to support `long double` +// +#define MICROPY_FLOAT_FORMAT_IMPL_BASIC (0) // smallest code, but inexact +#define MICROPY_FLOAT_FORMAT_IMPL_APPROX (1) // slightly bigger, almost perfect +#define MICROPY_FLOAT_FORMAT_IMPL_EXACT (2) // bigger code, and 100% exact repr + +#ifndef MICROPY_FLOAT_FORMAT_IMPL +#if MICROPY_FLOAT_IMPL == MICROPY_FLOAT_IMPL_FLOAT +#define MICROPY_FLOAT_FORMAT_IMPL (MICROPY_FLOAT_FORMAT_IMPL_APPROX) +#elif defined(__SIZEOF_LONG_DOUBLE__) && __SIZEOF_LONG_DOUBLE__ > __SIZEOF_DOUBLE__ +#define MICROPY_FLOAT_FORMAT_IMPL (MICROPY_FLOAT_FORMAT_IMPL_EXACT) +#else +#define MICROPY_FLOAT_FORMAT_IMPL (MICROPY_FLOAT_FORMAT_IMPL_APPROX) +#endif +#endif + // Whether to use the native _Float16 for 16-bit float support #ifndef MICROPY_FLOAT_USE_NATIVE_FLT16 #ifdef __FLT16_MAX__ diff --git a/py/mpprint.c b/py/mpprint.c index f1d8bd0c5..bd7a25087 100644 --- a/py/mpprint.c +++ b/py/mpprint.c @@ -338,7 +338,7 @@ int mp_print_mp_int(const mp_print_t *print, mp_obj_t x, unsigned int base, int #if MICROPY_PY_BUILTINS_FLOAT int mp_print_float(const mp_print_t *print, mp_float_t f, char fmt, unsigned int flags, char fill, int width, int prec) { - char buf[32]; + char buf[36]; char sign = '\0'; int chrs = 0; @@ -349,11 +349,17 @@ int mp_print_float(const mp_print_t *print, mp_float_t f, char fmt, unsigned int sign = ' '; } - int len = mp_format_float(f, buf, sizeof(buf), fmt, prec, sign); + int len = mp_format_float(f, buf, sizeof(buf) - 3, fmt, prec, sign); char *s = buf; - if ((flags & PF_FLAG_ADD_PERCENT) && (size_t)(len + 1) < sizeof(buf)) { + if ((flags & PF_FLAG_ALWAYS_DECIMAL) && strchr(buf, '.') == NULL && strchr(buf, 'e') == NULL && strchr(buf, 'n') == NULL) { + buf[len++] = '.'; + buf[len++] = '0'; + buf[len] = '\0'; + } + + if (flags & PF_FLAG_ADD_PERCENT) { buf[len++] = '%'; buf[len] = '\0'; } diff --git a/py/mpprint.h b/py/mpprint.h index 583f00bda..250ea24b8 100644 --- a/py/mpprint.h +++ b/py/mpprint.h @@ -36,6 +36,7 @@ #define PF_FLAG_CENTER_ADJUST (0x020) #define PF_FLAG_ADD_PERCENT (0x040) #define PF_FLAG_SHOW_OCTAL_LETTER (0x080) +#define PF_FLAG_ALWAYS_DECIMAL (0x100) #define PF_FLAG_SEP_POS (9) // must be above all the above PF_FLAGs #if MICROPY_PY_IO && MICROPY_PY_SYS_STDFILES diff --git a/py/objcomplex.c b/py/objcomplex.c index 85b585284..805899edf 100644 --- a/py/objcomplex.c +++ b/py/objcomplex.c @@ -45,29 +45,18 @@ typedef struct _mp_obj_complex_t { static void complex_print(const mp_print_t *print, mp_obj_t o_in, mp_print_kind_t kind) { (void)kind; mp_obj_complex_t *o = MP_OBJ_TO_PTR(o_in); - #if MICROPY_FLOAT_IMPL == MICROPY_FLOAT_IMPL_FLOAT - char buf[16]; - #if MICROPY_OBJ_REPR == MICROPY_OBJ_REPR_C - const int precision = 6; - #else - const int precision = 7; - #endif - #else - char buf[32]; - const int precision = 16; - #endif - if (o->real == 0) { - mp_format_float(o->imag, buf, sizeof(buf), 'g', precision, '\0'); - mp_printf(print, "%sj", buf); + const char *suffix; + int flags = 0; + if (o->real != 0) { + mp_print_str(print, "("); + mp_print_float(print, o->real, 'g', 0, '\0', -1, MP_FLOAT_REPR_PREC); + flags = PF_FLAG_SHOW_SIGN; + suffix = "j)"; } else { - mp_format_float(o->real, buf, sizeof(buf), 'g', precision, '\0'); - mp_printf(print, "(%s", buf); - if (o->imag >= 0 || isnan(o->imag)) { - mp_print_str(print, "+"); - } - mp_format_float(o->imag, buf, sizeof(buf), 'g', precision, '\0'); - mp_printf(print, "%sj)", buf); + suffix = "j"; } + mp_print_float(print, o->imag, 'g', flags, '\0', -1, MP_FLOAT_REPR_PREC); + mp_print_str(print, suffix); } static mp_obj_t complex_make_new(const mp_obj_type_t *type_in, size_t n_args, size_t n_kw, const mp_obj_t *args) { diff --git a/py/objfloat.c b/py/objfloat.c index 81b0daa62..125b576fb 100644 --- a/py/objfloat.c +++ b/py/objfloat.c @@ -110,23 +110,7 @@ mp_int_t mp_float_hash(mp_float_t src) { static void float_print(const mp_print_t *print, mp_obj_t o_in, mp_print_kind_t kind) { (void)kind; mp_float_t o_val = mp_obj_float_get(o_in); - #if MICROPY_FLOAT_IMPL == MICROPY_FLOAT_IMPL_FLOAT - char buf[16]; - #if MICROPY_OBJ_REPR == MICROPY_OBJ_REPR_C - const int precision = 6; - #else - const int precision = 7; - #endif - #else - char buf[32]; - const int precision = 16; - #endif - mp_format_float(o_val, buf, sizeof(buf), 'g', precision, '\0'); - mp_print_str(print, buf); - if (strchr(buf, '.') == NULL && strchr(buf, 'e') == NULL && strchr(buf, 'n') == NULL) { - // Python floats always have decimal point (unless inf or nan) - mp_print_str(print, ".0"); - } + mp_print_float(print, o_val, 'g', PF_FLAG_ALWAYS_DECIMAL, '\0', -1, MP_FLOAT_REPR_PREC); } static mp_obj_t float_make_new(const mp_obj_type_t *type_in, size_t n_args, size_t n_kw, const mp_obj_t *args) { diff --git a/py/parsenum.c b/py/parsenum.c index 019491b51..e18002306 100644 --- a/py/parsenum.c +++ b/py/parsenum.c @@ -210,7 +210,7 @@ typedef enum { } parse_dec_in_t; // MANTISSA_MAX is used to retain precision while not overflowing mantissa -#define MANTISSA_MAX (sizeof(mp_float_uint_t) == 8 ? 0x1999999999999998ULL : 0x19999998U) +#define MANTISSA_MAX (sizeof(mp_large_float_uint_t) == 8 ? 0x1999999999999998ULL : 0x19999998U) // MAX_EXACT_POWER_OF_5 is the largest value of x so that 5^x can be stored exactly in a float #if MICROPY_FLOAT_IMPL == MICROPY_FLOAT_IMPL_FLOAT @@ -220,11 +220,45 @@ typedef enum { #endif // Helper to compute `num * (10.0 ** dec_exp)` -mp_float_t mp_decimal_exp(mp_float_t num, int dec_exp) { - - if (dec_exp == 0 || num == MICROPY_FLOAT_CONST(0.0)) { +mp_large_float_t mp_decimal_exp(mp_large_float_t num, int dec_exp) { + if (dec_exp == 0 || num == (mp_large_float_t)(0.0)) { return num; } + + #if MICROPY_FLOAT_FORMAT_IMPL == MICROPY_FLOAT_FORMAT_IMPL_EXACT + + // If the assert below fails, it means you have chosen MICROPY_FLOAT_FORMAT_IMPL_EXACT + // manually on a platform where `larger floats` are not supported, which would + // result in inexact conversions. To fix this issue, change your `mpconfigport.h` + // and select MICROPY_FLOAT_FORMAT_IMPL_APPROX instead + assert(sizeof(mp_large_float_t) > sizeof(mp_float_t)); + + // Perform power using simple multiplications, to avoid + // dependency to higher-precision pow() function + int neg_exp = (dec_exp < 0); + if (neg_exp) { + dec_exp = -dec_exp; + } + mp_large_float_t res = num; + mp_large_float_t expo = (mp_large_float_t)10.0; + while (dec_exp) { + if (dec_exp & 1) { + if (neg_exp) { + res /= expo; + } else { + res *= expo; + } + } + dec_exp >>= 1; + if (dec_exp) { + expo *= expo; + } + } + return res; + + #else + // MICROPY_FLOAT_FORMAT_IMPL != MICROPY_FLOAT_FORMAT_IMPL_EXACT + mp_float_union_t res = {num}; // Multiply first by (2.0 ** dec_exp) via the exponent // - this will ensure that the result of `pow()` is always in mp_float_t range @@ -238,12 +272,14 @@ mp_float_t mp_decimal_exp(mp_float_t num, int dec_exp) { } else { res.f *= MICROPY_FLOAT_C_FUN(pow)(5, dec_exp); } - return (mp_float_t)res.f; + return (mp_large_float_t)res.f; + + #endif } // Break out inner digit accumulation routine to ease trailing zero deferral. -static mp_float_uint_t accept_digit(mp_float_uint_t p_mantissa, unsigned int dig, int *p_exp_extra, int in) { +static mp_large_float_uint_t accept_digit(mp_large_float_uint_t p_mantissa, unsigned int dig, int *p_exp_extra, int in) { // Core routine to ingest an additional digit. if (p_mantissa < MANTISSA_MAX) { // dec_val won't overflow so keep accumulating @@ -267,7 +303,7 @@ const char *mp_parse_float_internal(const char *str, size_t len, mp_float_t *res parse_dec_in_t in = PARSE_DEC_IN_INTG; bool exp_neg = false; - mp_float_uint_t mantissa = 0; + mp_large_float_uint_t mantissa = 0; int exp_val = 0; int exp_extra = 0; int trailing_zeros_intg = 0, trailing_zeros_frac = 0; diff --git a/py/parsenum.h b/py/parsenum.h index a807cb09d..d532cb194 100644 --- a/py/parsenum.h +++ b/py/parsenum.h @@ -35,7 +35,7 @@ mp_obj_t mp_parse_num_integer(const char *restrict str, size_t len, int base, mp_lexer_t *lex); #if MICROPY_PY_BUILTINS_FLOAT -mp_float_t mp_decimal_exp(mp_float_t num, int dec_exp); +mp_large_float_t mp_decimal_exp(mp_large_float_t num, int dec_exp); const char *mp_parse_float_internal(const char *str, size_t len, mp_float_t *res); #endif |