diff options
| -rw-r--r-- | src/backend/utils/adt/numutils.c | 306 | 
1 files changed, 279 insertions, 27 deletions
| diff --git a/src/backend/utils/adt/numutils.c b/src/backend/utils/adt/numutils.c index 471fbb7ee63..d07a5602076 100644 --- a/src/backend/utils/adt/numutils.c +++ b/src/backend/utils/adt/numutils.c @@ -97,9 +97,18 @@ static const int8 hexlookup[128] = {  };  /* - * Convert input string to a signed 16 bit integer. + * Convert input string to a signed 16 bit integer.  Input strings may be + * expressed in base-10, hexadecimal, octal, or binary format, all of which + * can be prefixed by an optional sign character, either '+' (the default) or + * '-' for negative numbers.  Hex strings are recognized by the digits being + * prefixed by 0x or 0X while octal strings are recognized by the 0o or 0O + * prefix.  The binary representation is recognized by the 0b or 0B prefix.   * - * Allows any number of leading or trailing whitespace characters. + * Allows any number of leading or trailing whitespace characters.  Digits may + * optionally be separated by a single underscore character.  These can only + * come between digits and not before or after the digits.  Underscores have + * no effect on the return value and are supported only to assist in improving + * the human readability of the input strings.   *   * pg_strtoint16() will throw ereport() upon bad input format or overflow;   * while pg_strtoint16_safe() instead returns such complaints in *escontext, @@ -122,9 +131,84 @@ pg_strtoint16_safe(const char *s, Node *escontext)  	const char *firstdigit;  	uint16		tmp = 0;  	bool		neg = false; +	unsigned char digit; + +	/* +	 * The majority of cases are likely to be base-10 digits without any +	 * underscore separator characters.  We'll first try to parse the string +	 * with the assumption that's the case and only fallback on a slower +	 * implementation which handles hex, octal and binary strings and +	 * underscores if the fastpath version cannot parse the string. +	 */ + +	/* leave it up to the slow path to look for leading spaces */ + +	if (*ptr == '-') +	{ +		ptr++; +		neg = true; +	} + +	/* a leading '+' is uncommon so leave that for the slow path */ + +	/* process the first digit */ +	digit = (*ptr - '0'); + +	/* +	 * Exploit unsigned arithmetic to save having to check both the upper and +	 * lower bounds of the digit. +	 */ +	if (likely(digit < 10)) +	{ +		ptr++; +		tmp = digit; +	} +	else +	{ +		/* we need at least one digit */ +		goto slow; +	} + +	/* process remaining digits */ +	for (;;) +	{ +		digit = (*ptr - '0'); + +		if (digit >= 10) +			break; + +		ptr++; + +		if (unlikely(tmp > -(PG_INT16_MIN / 10))) +			goto out_of_range; + +		tmp = tmp * 10 + digit; +	} + +	/* when the string does not end in a digit, let the slow path handle it */ +	if (unlikely(*ptr != '\0')) +		goto slow; + +	if (neg) +	{ +		/* check the negative equivalent will fit without overflowing */ +		if (unlikely(tmp > (uint16) (-(PG_INT16_MIN + 1)) + 1)) +			goto out_of_range; +		return -((int16) tmp); +	} + +	if (unlikely(tmp > PG_INT16_MAX)) +		goto out_of_range; + +	return (int16) tmp; + +slow: +	tmp = 0; +	ptr = s; +	/* no need to reset neg */  	/* skip leading spaces */ -	while (likely(*ptr) && isspace((unsigned char) *ptr)) +	while (isspace((unsigned char) *ptr))  		ptr++;  	/* handle sign */ @@ -141,7 +225,7 @@ pg_strtoint16_safe(const char *s, Node *escontext)  	{  		firstdigit = ptr += 2; -		while (*ptr) +		for (;;)  		{  			if (isxdigit((unsigned char) *ptr))  			{ @@ -165,7 +249,7 @@ pg_strtoint16_safe(const char *s, Node *escontext)  	{  		firstdigit = ptr += 2; -		while (*ptr) +		for (;;)  		{  			if (*ptr >= '0' && *ptr <= '7')  			{ @@ -189,7 +273,7 @@ pg_strtoint16_safe(const char *s, Node *escontext)  	{  		firstdigit = ptr += 2; -		while (*ptr) +		for (;;)  		{  			if (*ptr >= '0' && *ptr <= '1')  			{ @@ -213,9 +297,9 @@ pg_strtoint16_safe(const char *s, Node *escontext)  	{  		firstdigit = ptr; -		while (*ptr) +		for (;;)  		{ -			if (isdigit((unsigned char) *ptr)) +			if (*ptr >= '0' && *ptr <= '9')  			{  				if (unlikely(tmp > -(PG_INT16_MIN / 10)))  					goto out_of_range; @@ -242,7 +326,7 @@ pg_strtoint16_safe(const char *s, Node *escontext)  		goto invalid_syntax;  	/* allow trailing whitespace, but not other trailing chars */ -	while (*ptr != '\0' && isspace((unsigned char) *ptr)) +	while (isspace((unsigned char) *ptr))  		ptr++;  	if (unlikely(*ptr != '\0')) @@ -275,9 +359,18 @@ invalid_syntax:  }  /* - * Convert input string to a signed 32 bit integer. + * Convert input string to a signed 32 bit integer.  Input strings may be + * expressed in base-10, hexadecimal, octal, or binary format, all of which + * can be prefixed by an optional sign character, either '+' (the default) or + * '-' for negative numbers.  Hex strings are recognized by the digits being + * prefixed by 0x or 0X while octal strings are recognized by the 0o or 0O + * prefix.  The binary representation is recognized by the 0b or 0B prefix.   * - * Allows any number of leading or trailing whitespace characters. + * Allows any number of leading or trailing whitespace characters.  Digits may + * optionally be separated by a single underscore character.  These can only + * come between digits and not before or after the digits.  Underscores have + * no effect on the return value and are supported only to assist in improving + * the human readability of the input strings.   *   * pg_strtoint32() will throw ereport() upon bad input format or overflow;   * while pg_strtoint32_safe() instead returns such complaints in *escontext, @@ -300,9 +393,84 @@ pg_strtoint32_safe(const char *s, Node *escontext)  	const char *firstdigit;  	uint32		tmp = 0;  	bool		neg = false; +	unsigned char digit; + +	/* +	 * The majority of cases are likely to be base-10 digits without any +	 * underscore separator characters.  We'll first try to parse the string +	 * with the assumption that's the case and only fallback on a slower +	 * implementation which handles hex, octal and binary strings and +	 * underscores if the fastpath version cannot parse the string. +	 */ + +	/* leave it up to the slow path to look for leading spaces */ + +	if (*ptr == '-') +	{ +		ptr++; +		neg = true; +	} + +	/* a leading '+' is uncommon so leave that for the slow path */ + +	/* process the first digit */ +	digit = (*ptr - '0'); + +	/* +	 * Exploit unsigned arithmetic to save having to check both the upper and +	 * lower bounds of the digit. +	 */ +	if (likely(digit < 10)) +	{ +		ptr++; +		tmp = digit; +	} +	else +	{ +		/* we need at least one digit */ +		goto slow; +	} + +	/* process remaining digits */ +	for (;;) +	{ +		digit = (*ptr - '0'); + +		if (digit >= 10) +			break; + +		ptr++; + +		if (unlikely(tmp > -(PG_INT32_MIN / 10))) +			goto out_of_range; + +		tmp = tmp * 10 + digit; +	} + +	/* when the string does not end in a digit, let the slow path handle it */ +	if (unlikely(*ptr != '\0')) +		goto slow; + +	if (neg) +	{ +		/* check the negative equivalent will fit without overflowing */ +		if (unlikely(tmp > (uint32) (-(PG_INT32_MIN + 1)) + 1)) +			goto out_of_range; +		return -((int32) tmp); +	} + +	if (unlikely(tmp > PG_INT32_MAX)) +		goto out_of_range; + +	return (int32) tmp; + +slow: +	tmp = 0; +	ptr = s; +	/* no need to reset neg */  	/* skip leading spaces */ -	while (likely(*ptr) && isspace((unsigned char) *ptr)) +	while (isspace((unsigned char) *ptr))  		ptr++;  	/* handle sign */ @@ -319,7 +487,7 @@ pg_strtoint32_safe(const char *s, Node *escontext)  	{  		firstdigit = ptr += 2; -		while (*ptr) +		for (;;)  		{  			if (isxdigit((unsigned char) *ptr))  			{ @@ -343,7 +511,7 @@ pg_strtoint32_safe(const char *s, Node *escontext)  	{  		firstdigit = ptr += 2; -		while (*ptr) +		for (;;)  		{  			if (*ptr >= '0' && *ptr <= '7')  			{ @@ -367,7 +535,7 @@ pg_strtoint32_safe(const char *s, Node *escontext)  	{  		firstdigit = ptr += 2; -		while (*ptr) +		for (;;)  		{  			if (*ptr >= '0' && *ptr <= '1')  			{ @@ -391,9 +559,9 @@ pg_strtoint32_safe(const char *s, Node *escontext)  	{  		firstdigit = ptr; -		while (*ptr) +		for (;;)  		{ -			if (isdigit((unsigned char) *ptr)) +			if (*ptr >= '0' && *ptr <= '9')  			{  				if (unlikely(tmp > -(PG_INT32_MIN / 10)))  					goto out_of_range; @@ -420,7 +588,7 @@ pg_strtoint32_safe(const char *s, Node *escontext)  		goto invalid_syntax;  	/* allow trailing whitespace, but not other trailing chars */ -	while (*ptr != '\0' && isspace((unsigned char) *ptr)) +	while (isspace((unsigned char) *ptr))  		ptr++;  	if (unlikely(*ptr != '\0')) @@ -453,9 +621,18 @@ invalid_syntax:  }  /* - * Convert input string to a signed 64 bit integer. + * Convert input string to a signed 64 bit integer.  Input strings may be + * expressed in base-10, hexadecimal, octal, or binary format, all of which + * can be prefixed by an optional sign character, either '+' (the default) or + * '-' for negative numbers.  Hex strings are recognized by the digits being + * prefixed by 0x or 0X while octal strings are recognized by the 0o or 0O + * prefix.  The binary representation is recognized by the 0b or 0B prefix.   * - * Allows any number of leading or trailing whitespace characters. + * Allows any number of leading or trailing whitespace characters.  Digits may + * optionally be separated by a single underscore character.  These can only + * come between digits and not before or after the digits.  Underscores have + * no effect on the return value and are supported only to assist in improving + * the human readability of the input strings.   *   * pg_strtoint64() will throw ereport() upon bad input format or overflow;   * while pg_strtoint64_safe() instead returns such complaints in *escontext, @@ -478,9 +655,84 @@ pg_strtoint64_safe(const char *s, Node *escontext)  	const char *firstdigit;  	uint64		tmp = 0;  	bool		neg = false; +	unsigned char digit; + +	/* +	 * The majority of cases are likely to be base-10 digits without any +	 * underscore separator characters.  We'll first try to parse the string +	 * with the assumption that's the case and only fallback on a slower +	 * implementation which handles hex, octal and binary strings and +	 * underscores if the fastpath version cannot parse the string. +	 */ + +	/* leave it up to the slow path to look for leading spaces */ + +	if (*ptr == '-') +	{ +		ptr++; +		neg = true; +	} + +	/* a leading '+' is uncommon so leave that for the slow path */ + +	/* process the first digit */ +	digit = (*ptr - '0'); + +	/* +	 * Exploit unsigned arithmetic to save having to check both the upper and +	 * lower bounds of the digit. +	 */ +	if (likely(digit < 10)) +	{ +		ptr++; +		tmp = digit; +	} +	else +	{ +		/* we need at least one digit */ +		goto slow; +	} + +	/* process remaining digits */ +	for (;;) +	{ +		digit = (*ptr - '0'); + +		if (digit >= 10) +			break; + +		ptr++; + +		if (unlikely(tmp > -(PG_INT64_MIN / 10))) +			goto out_of_range; + +		tmp = tmp * 10 + digit; +	} + +	/* when the string does not end in a digit, let the slow path handle it */ +	if (unlikely(*ptr != '\0')) +		goto slow; + +	if (neg) +	{ +		/* check the negative equivalent will fit without overflowing */ +		if (unlikely(tmp > (uint64) (-(PG_INT64_MIN + 1)) + 1)) +			goto out_of_range; +		return -((int64) tmp); +	} + +	if (unlikely(tmp > PG_INT64_MAX)) +		goto out_of_range; + +	return (int64) tmp; + +slow: +	tmp = 0; +	ptr = s; +	/* no need to reset neg */  	/* skip leading spaces */ -	while (*ptr && isspace((unsigned char) *ptr)) +	while (isspace((unsigned char) *ptr))  		ptr++;  	/* handle sign */ @@ -497,7 +749,7 @@ pg_strtoint64_safe(const char *s, Node *escontext)  	{  		firstdigit = ptr += 2; -		while (*ptr) +		for (;;)  		{  			if (isxdigit((unsigned char) *ptr))  			{ @@ -521,7 +773,7 @@ pg_strtoint64_safe(const char *s, Node *escontext)  	{  		firstdigit = ptr += 2; -		while (*ptr) +		for (;;)  		{  			if (*ptr >= '0' && *ptr <= '7')  			{ @@ -545,7 +797,7 @@ pg_strtoint64_safe(const char *s, Node *escontext)  	{  		firstdigit = ptr += 2; -		while (*ptr) +		for (;;)  		{  			if (*ptr >= '0' && *ptr <= '1')  			{ @@ -569,9 +821,9 @@ pg_strtoint64_safe(const char *s, Node *escontext)  	{  		firstdigit = ptr; -		while (*ptr) +		for (;;)  		{ -			if (isdigit((unsigned char) *ptr)) +			if (*ptr >= '0' && *ptr <= '9')  			{  				if (unlikely(tmp > -(PG_INT64_MIN / 10)))  					goto out_of_range; @@ -598,7 +850,7 @@ pg_strtoint64_safe(const char *s, Node *escontext)  		goto invalid_syntax;  	/* allow trailing whitespace, but not other trailing chars */ -	while (*ptr != '\0' && isspace((unsigned char) *ptr)) +	while (isspace((unsigned char) *ptr))  		ptr++;  	if (unlikely(*ptr != '\0')) | 
