From faff8f8e47f18c7d589453e2e0d841d2bd96c1ac Mon Sep 17 00:00:00 2001 From: Dean Rasheed Date: Sat, 4 Feb 2023 09:48:51 +0000 Subject: Allow underscores in integer and numeric constants. This allows underscores to be used in integer and numeric literals, and their corresponding type input functions, for visual grouping. For example: 1_500_000_000 3.14159_26535_89793 0xffff_ffff 0b_1001_0001 A single underscore is allowed between any 2 digits, or immediately after the base prefix indicator of non-decimal integers, per SQL:202x draft. Peter Eisentraut and Dean Rasheed Discussion: https://postgr.es/m/84aae844-dc55-a4be-86d9-4f0fa405cc97%40enterprisedb.com --- src/backend/utils/adt/numutils.c | 273 ++++++++++++++++++++++++++++++--------- 1 file changed, 213 insertions(+), 60 deletions(-) (limited to 'src/backend/utils/adt/numutils.c') diff --git a/src/backend/utils/adt/numutils.c b/src/backend/utils/adt/numutils.c index b0e412e7c67..471fbb7ee63 100644 --- a/src/backend/utils/adt/numutils.c +++ b/src/backend/utils/adt/numutils.c @@ -141,48 +141,99 @@ pg_strtoint16_safe(const char *s, Node *escontext) { firstdigit = ptr += 2; - while (*ptr && isxdigit((unsigned char) *ptr)) + while (*ptr) { - if (unlikely(tmp > -(PG_INT16_MIN / 16))) - goto out_of_range; - - tmp = tmp * 16 + hexlookup[(unsigned char) *ptr++]; + if (isxdigit((unsigned char) *ptr)) + { + if (unlikely(tmp > -(PG_INT16_MIN / 16))) + goto out_of_range; + + tmp = tmp * 16 + hexlookup[(unsigned char) *ptr++]; + } + else if (*ptr == '_') + { + /* underscore must be followed by more digits */ + ptr++; + if (*ptr == '\0' || !isxdigit((unsigned char) *ptr)) + goto invalid_syntax; + } + else + break; } } else if (ptr[0] == '0' && (ptr[1] == 'o' || ptr[1] == 'O')) { firstdigit = ptr += 2; - while (*ptr && (*ptr >= '0' && *ptr <= '7')) + while (*ptr) { - if (unlikely(tmp > -(PG_INT16_MIN / 8))) - goto out_of_range; - - tmp = tmp * 8 + (*ptr++ - '0'); + if (*ptr >= '0' && *ptr <= '7') + { + if (unlikely(tmp > -(PG_INT16_MIN / 8))) + goto out_of_range; + + tmp = tmp * 8 + (*ptr++ - '0'); + } + else if (*ptr == '_') + { + /* underscore must be followed by more digits */ + ptr++; + if (*ptr == '\0' || *ptr < '0' || *ptr > '7') + goto invalid_syntax; + } + else + break; } } else if (ptr[0] == '0' && (ptr[1] == 'b' || ptr[1] == 'B')) { firstdigit = ptr += 2; - while (*ptr && (*ptr >= '0' && *ptr <= '1')) + while (*ptr) { - if (unlikely(tmp > -(PG_INT16_MIN / 2))) - goto out_of_range; - - tmp = tmp * 2 + (*ptr++ - '0'); + if (*ptr >= '0' && *ptr <= '1') + { + if (unlikely(tmp > -(PG_INT16_MIN / 2))) + goto out_of_range; + + tmp = tmp * 2 + (*ptr++ - '0'); + } + else if (*ptr == '_') + { + /* underscore must be followed by more digits */ + ptr++; + if (*ptr == '\0' || *ptr < '0' || *ptr > '1') + goto invalid_syntax; + } + else + break; } } else { firstdigit = ptr; - while (*ptr && isdigit((unsigned char) *ptr)) + while (*ptr) { - if (unlikely(tmp > -(PG_INT16_MIN / 10))) - goto out_of_range; - - tmp = tmp * 10 + (*ptr++ - '0'); + if (isdigit((unsigned char) *ptr)) + { + if (unlikely(tmp > -(PG_INT16_MIN / 10))) + goto out_of_range; + + tmp = tmp * 10 + (*ptr++ - '0'); + } + else if (*ptr == '_') + { + /* underscore may not be first */ + if (unlikely(ptr == firstdigit)) + goto invalid_syntax; + /* and it must be followed by more digits */ + ptr++; + if (*ptr == '\0' || !isdigit((unsigned char) *ptr)) + goto invalid_syntax; + } + else + break; } } @@ -268,48 +319,99 @@ pg_strtoint32_safe(const char *s, Node *escontext) { firstdigit = ptr += 2; - while (*ptr && isxdigit((unsigned char) *ptr)) + while (*ptr) { - if (unlikely(tmp > -(PG_INT32_MIN / 16))) - goto out_of_range; - - tmp = tmp * 16 + hexlookup[(unsigned char) *ptr++]; + if (isxdigit((unsigned char) *ptr)) + { + if (unlikely(tmp > -(PG_INT32_MIN / 16))) + goto out_of_range; + + tmp = tmp * 16 + hexlookup[(unsigned char) *ptr++]; + } + else if (*ptr == '_') + { + /* underscore must be followed by more digits */ + ptr++; + if (*ptr == '\0' || !isxdigit((unsigned char) *ptr)) + goto invalid_syntax; + } + else + break; } } else if (ptr[0] == '0' && (ptr[1] == 'o' || ptr[1] == 'O')) { firstdigit = ptr += 2; - while (*ptr && (*ptr >= '0' && *ptr <= '7')) + while (*ptr) { - if (unlikely(tmp > -(PG_INT32_MIN / 8))) - goto out_of_range; - - tmp = tmp * 8 + (*ptr++ - '0'); + if (*ptr >= '0' && *ptr <= '7') + { + if (unlikely(tmp > -(PG_INT32_MIN / 8))) + goto out_of_range; + + tmp = tmp * 8 + (*ptr++ - '0'); + } + else if (*ptr == '_') + { + /* underscore must be followed by more digits */ + ptr++; + if (*ptr == '\0' || *ptr < '0' || *ptr > '7') + goto invalid_syntax; + } + else + break; } } else if (ptr[0] == '0' && (ptr[1] == 'b' || ptr[1] == 'B')) { firstdigit = ptr += 2; - while (*ptr && (*ptr >= '0' && *ptr <= '1')) + while (*ptr) { - if (unlikely(tmp > -(PG_INT32_MIN / 2))) - goto out_of_range; - - tmp = tmp * 2 + (*ptr++ - '0'); + if (*ptr >= '0' && *ptr <= '1') + { + if (unlikely(tmp > -(PG_INT32_MIN / 2))) + goto out_of_range; + + tmp = tmp * 2 + (*ptr++ - '0'); + } + else if (*ptr == '_') + { + /* underscore must be followed by more digits */ + ptr++; + if (*ptr == '\0' || *ptr < '0' || *ptr > '1') + goto invalid_syntax; + } + else + break; } } else { firstdigit = ptr; - while (*ptr && isdigit((unsigned char) *ptr)) + while (*ptr) { - if (unlikely(tmp > -(PG_INT32_MIN / 10))) - goto out_of_range; - - tmp = tmp * 10 + (*ptr++ - '0'); + if (isdigit((unsigned char) *ptr)) + { + if (unlikely(tmp > -(PG_INT32_MIN / 10))) + goto out_of_range; + + tmp = tmp * 10 + (*ptr++ - '0'); + } + else if (*ptr == '_') + { + /* underscore may not be first */ + if (unlikely(ptr == firstdigit)) + goto invalid_syntax; + /* and it must be followed by more digits */ + ptr++; + if (*ptr == '\0' || !isdigit((unsigned char) *ptr)) + goto invalid_syntax; + } + else + break; } } @@ -395,48 +497,99 @@ pg_strtoint64_safe(const char *s, Node *escontext) { firstdigit = ptr += 2; - while (*ptr && isxdigit((unsigned char) *ptr)) + while (*ptr) { - if (unlikely(tmp > -(PG_INT64_MIN / 16))) - goto out_of_range; - - tmp = tmp * 16 + hexlookup[(unsigned char) *ptr++]; + if (isxdigit((unsigned char) *ptr)) + { + if (unlikely(tmp > -(PG_INT64_MIN / 16))) + goto out_of_range; + + tmp = tmp * 16 + hexlookup[(unsigned char) *ptr++]; + } + else if (*ptr == '_') + { + /* underscore must be followed by more digits */ + ptr++; + if (*ptr == '\0' || !isxdigit((unsigned char) *ptr)) + goto invalid_syntax; + } + else + break; } } else if (ptr[0] == '0' && (ptr[1] == 'o' || ptr[1] == 'O')) { firstdigit = ptr += 2; - while (*ptr && (*ptr >= '0' && *ptr <= '7')) + while (*ptr) { - if (unlikely(tmp > -(PG_INT64_MIN / 8))) - goto out_of_range; - - tmp = tmp * 8 + (*ptr++ - '0'); + if (*ptr >= '0' && *ptr <= '7') + { + if (unlikely(tmp > -(PG_INT64_MIN / 8))) + goto out_of_range; + + tmp = tmp * 8 + (*ptr++ - '0'); + } + else if (*ptr == '_') + { + /* underscore must be followed by more digits */ + ptr++; + if (*ptr == '\0' || *ptr < '0' || *ptr > '7') + goto invalid_syntax; + } + else + break; } } else if (ptr[0] == '0' && (ptr[1] == 'b' || ptr[1] == 'B')) { firstdigit = ptr += 2; - while (*ptr && (*ptr >= '0' && *ptr <= '1')) + while (*ptr) { - if (unlikely(tmp > -(PG_INT64_MIN / 2))) - goto out_of_range; - - tmp = tmp * 2 + (*ptr++ - '0'); + if (*ptr >= '0' && *ptr <= '1') + { + if (unlikely(tmp > -(PG_INT64_MIN / 2))) + goto out_of_range; + + tmp = tmp * 2 + (*ptr++ - '0'); + } + else if (*ptr == '_') + { + /* underscore must be followed by more digits */ + ptr++; + if (*ptr == '\0' || *ptr < '0' || *ptr > '1') + goto invalid_syntax; + } + else + break; } } else { firstdigit = ptr; - while (*ptr && isdigit((unsigned char) *ptr)) + while (*ptr) { - if (unlikely(tmp > -(PG_INT64_MIN / 10))) - goto out_of_range; - - tmp = tmp * 10 + (*ptr++ - '0'); + if (isdigit((unsigned char) *ptr)) + { + if (unlikely(tmp > -(PG_INT64_MIN / 10))) + goto out_of_range; + + tmp = tmp * 10 + (*ptr++ - '0'); + } + else if (*ptr == '_') + { + /* underscore may not be first */ + if (unlikely(ptr == firstdigit)) + goto invalid_syntax; + /* and it must be followed by more digits */ + ptr++; + if (*ptr == '\0' || !isdigit((unsigned char) *ptr)) + goto invalid_syntax; + } + else + break; } } -- cgit v1.2.3