1 files changed, 155 insertions, 179 deletions
diff --git a/src/backend/utils/mb/wchar.c b/src/backend/utils/mb/wchar.c
index 39f6efc2412..10c46654e0a 100644
--- a/src/backend/utils/mb/wchar.c
+++ b/src/backend/utils/mb/wchar.c
@@ -1337,85 +1337,78 @@ pg_utf8_islegal(const unsigned char *source, int length)
 #ifndef FRONTEND
 
 /*
- * Generic character increment function.
+ * Generic character incrementer function.
  *
  * Not knowing anything about the properties of the encoding in use, we just
- * keep incrementing the last byte until pg_verifymbstr() likes the result,
- * or we run out of values to try.
- *
- * Like all character-increment functions, we must restore the original input
- * string on failure.
+ * keep incrementing the last byte until we get a validly-encoded result,
+ * or we run out of values to try.  We don't bother to try incrementing
+ * higher-order bytes, so there's no growth in runtime for wider characters.
+ * (If we did try to do that, we'd need to consider the likelihood that 255
+ * is not a valid final byte in the encoding.)
  */
 static bool
 pg_generic_charinc(unsigned char *charptr, int len)
 {
- 	unsigned char *lastchar = (unsigned char *) (charptr + len - 1);
- 	unsigned char savelastchar = *lastchar;
- 	const char *const_charptr = (const char *)charptr;
- 
- 	while (*lastchar < (unsigned char) 255)
- 	{
- 		(*lastchar)++;
- 		if (!pg_verifymbstr(const_charptr, len, true))
- 			continue;
- 		return true;
- 	}
- 
- 	*lastchar = savelastchar;
- 	return false;
+	unsigned char *lastbyte = charptr + len - 1;
+	mbverifier	mbverify;
+
+	/* We can just invoke the character verifier directly. */
+	mbverify = pg_wchar_table[GetDatabaseEncoding()].mbverify;
+
+	while (*lastbyte < (unsigned char) 255)
+	{
+		(*lastbyte)++;
+		if ((*mbverify) (charptr, len) == len)
+			return true;
+	}
+
+	return false;
 }
 
 /*
- * UTF-8 character increment function.
+ * UTF-8 character incrementer function.
  *
  * For a one-byte character less than 0x7F, we just increment the byte.
  *
  * For a multibyte character, every byte but the first must fall between 0x80
  * and 0xBF; and the first byte must be between 0xC0 and 0xF4.  We increment
- * the last byte that's not already at its maximum value, and set any following
- * bytes back to 0x80.  If we can't find a byte that's less than the maximum
- * allowable vale, we simply fail.  We also have some special-case logic to
- * skip regions used for surrogate pair handling, as those should not occur in
- * valid UTF-8.
+ * the last byte that's not already at its maximum value.  If we can't find a
+ * byte that's less than the maximum allowable value, we simply fail.  We also
+ * need some special-case logic to skip regions used for surrogate pair
+ * handling, as those should not occur in valid UTF-8.
  *
- * Like all character-increment functions, we must restore the original input
- * string on failure.
+ * Note that we don't reset lower-order bytes back to their minimums, since
+ * we can't afford to make an exhaustive search (see make_greater_string).
  */
 static bool
 pg_utf8_increment(unsigned char *charptr, int length)
 {
- 	unsigned char a;
- 	unsigned char bak[4];
+	unsigned char a;
 	unsigned char limit;
 
- 	switch (length)
- 	{
- 		default:
- 			/* reject lengths 5 and 6 for now */
- 			return false;
- 		case 4:
-			bak[3] = charptr[3];
- 			a = charptr[3];
- 			if (a < 0xBF)
- 			{
- 				charptr[3]++;
- 				break;
- 			}
- 			charptr[3] = 0x80;
- 			/* FALL THRU */
- 		case 3:
-			bak[2] = charptr[2];
- 			a = charptr[2];
- 			if (a < 0xBF)
- 			{
- 				charptr[2]++;
- 				break;
- 			}
- 			charptr[2] = 0x80;
- 			/* FALL THRU */
- 		case 2:
-			bak[1] = charptr[1];
- 			a = charptr[1];
+	switch (length)
+	{
+		default:
+			/* reject lengths 5 and 6 for now */
+			return false;
+		case 4:
+			a = charptr[3];
+			if (a < 0xBF)
+			{
+				charptr[3]++;
+				break;
+			}
+			/* FALL THRU */
+		case 3:
+			a = charptr[2];
+			if (a < 0xBF)
+			{
+				charptr[2]++;
+				break;
+			}
+			/* FALL THRU */
+		case 2:
+			a = charptr[1];
 			switch (*charptr)
 			{
 				case 0xED:
@@ -1430,147 +1423,126 @@ pg_utf8_increment(unsigned char *charptr, int length)
 			}
 			if (a < limit)
 			{
- 				charptr[1]++;
- 				break;
- 			}
- 			charptr[1] = 0x80;
- 			/* FALL THRU */
- 		case 1:
-			bak[0] = *charptr;
- 			a = *charptr;
- 			if (a == 0x7F || a == 0xDF || a == 0xEF || a == 0xF4)
-			{
-				/* Restore original string. */
-				memcpy(charptr, bak, length);
- 				return false;
- 			}
- 			charptr[0]++;
- 			break;
- 	}
+				charptr[1]++;
+				break;
+			}
+			/* FALL THRU */
+		case 1:
+			a = *charptr;
+			if (a == 0x7F || a == 0xDF || a == 0xEF || a == 0xF4)
+				return false;
+			charptr[0]++;
+			break;
+	}
 
- 	return true;
+	return true;
 }
 
 /*
- * EUC-JP character increment function.
+ * EUC-JP character incrementer function.
  *
- * If the sequence starts with SS2(0x8e), it must be a two-byte sequence
- * representing JIS X 0201 characters with the second byte ranges between
- * 0xa1 and 0xde.  We just increment the last byte if it's less than 0xde,
- * and otherwise rewrite whole the sequence to 0xa1 0xa1.
+ * If the sequence starts with SS2 (0x8e), it must be a two-byte sequence
+ * representing JIS X 0201 characters with the second byte ranging between
+ * 0xa1 and 0xdf.  We just increment the last byte if it's less than 0xdf,
+ * and otherwise rewrite the whole sequence to 0xa1 0xa1.
  *
- * If the sequence starts with SS3(0x8f), it must be a three-byte sequence
- * which the last two bytes ranges between 0xa1 and 0xfe.  The last byte
- * is incremented, carrying overflow to the second-to-last byte.
+ * If the sequence starts with SS3 (0x8f), it must be a three-byte sequence
+ * in which the last two bytes range between 0xa1 and 0xfe.  The last byte
+ * is incremented if possible, otherwise the second-to-last byte.
  *
- * If the sequence starts with the values other than the aboves and its MSB
+ * If the sequence starts with a value other than the above and its MSB
  * is set, it must be a two-byte sequence representing JIS X 0208 characters
- * with both bytes ranges between 0xa1 and 0xfe.  The last byte is incremented,
- * carrying overflow to the second-to-last byte.
+ * with both bytes ranging between 0xa1 and 0xfe.  The last byte is
+ * incremented if possible, otherwise the second-to-last byte.
  *
- * Otherwise the sequence is consists of single byte representing ASCII
- * characters. It is incremented up to 0x7f.
- *    
- * Only three EUC-JP byte sequences shown below - which have no character
- * allocated - make this function to fail in spite of its validity: 0x7f,
- * 0xfe 0xfe, 0x8f 0xfe 0xfe.
+ * Otherwise, the sequence is a single-byte ASCII character. It is
+ * incremented up to 0x7f.
  */
 static bool
 pg_eucjp_increment(unsigned char *charptr, int length)
 {
- 	unsigned char bak[3];
- 	unsigned char c1, c2;
- 	signed int i;
+	unsigned char c1,
+				c2;
+	int			i;
 
- 	c1 = *charptr;
+	c1 = *charptr;
 
- 	switch (c1)
- 	{
- 		case SS2:	/* JIS X 0201 */
- 			if (length != 2)
+	switch (c1)
+	{
+		case SS2:				/* JIS X 0201 */
+			if (length != 2)
 				return false;
 
- 			c2 = charptr[1];
-
- 			if (c2 > 0xde)
- 				charptr[0] = charptr[1] = 0xa1;
- 			else if (c2 < 0xa1)
- 				charptr[1] = 0xa1;
- 			else
- 				charptr[1]++;
+			c2 = charptr[1];
 
- 			break;
+			if (c2 >= 0xdf)
+				charptr[0] = charptr[1] = 0xa1;
+			else if (c2 < 0xa1)
+				charptr[1] = 0xa1;
+			else
+				charptr[1]++;
+			break;
 
- 		case SS3:	/* JIS X 0212 */
- 			if (length != 3)
+		case SS3:				/* JIS X 0212 */
+			if (length != 3)
 				return false;
 
- 			for (i = 2; i > 0; i--)
- 			{
-				bak[i] = charptr[i];
- 				c2 = charptr[i];
- 				if (c2 < 0xa1)
- 				{
- 					charptr[i] = 0xa1;
- 					return true;
- 				}
- 				else if (c2 < 0xfe)
- 				{
- 					charptr[i]++;
- 					break;
- 				}
- 				charptr[i] = 0xa1;
- 			}
-
- 			if (i == 0)	  /* Out of 3-byte code region */
- 			{
-				charptr[1] = bak[1];
-				charptr[2] = bak[2];
- 				return false;
- 			}
- 			break;
-
- 		default:
- 			if (IS_HIGHBIT_SET(c1))	 /* JIS X 0208? */
- 			{
- 				if (length != 2)
+			for (i = 2; i > 0; i--)
+			{
+				c2 = charptr[i];
+				if (c2 < 0xa1)
+				{
+					charptr[i] = 0xa1;
+					return true;
+				}
+				else if (c2 < 0xfe)
+				{
+					charptr[i]++;
+					return true;
+				}
+			}
+
+			/* Out of 3-byte code region */
+			return false;
+
+		default:
+			if (IS_HIGHBIT_SET(c1))		/* JIS X 0208? */
+			{
+				if (length != 2)
+					return false;
+
+				for (i = 1; i >= 0; i--)
+				{
+					c2 = charptr[i];
+					if (c2 < 0xa1)
+					{
+						charptr[i] = 0xa1;
+						return true;
+					}
+					else if (c2 < 0xfe)
+					{
+						charptr[i]++;
+						return true;
+					}
+				}
+
+				/* Out of 2 byte code region */
+				return false;
+			}
+			else
+			{	/* ASCII, single byte */
+				if (c1 > 0x7e)
 					return false;
+				(*charptr)++;
+			}
+			break;
+	}
 
- 				for (i = 1 ; i >= 0 ; i--)	/* i must be signed */
- 				{
-					bak[i] = charptr[i];
- 					c2 = charptr[i];
- 					if (c2 < 0xa1)
- 					{
- 						charptr[i] = 0xa1;
- 						return true;
- 					}
- 					else if (c2 < 0xfe)
- 					{
- 						charptr[i]++;
- 						break;
- 					}
- 					charptr[i] = 0xa1;
- 				}
-
- 				if (i < 0)	/* Out of 2 byte code region */
- 				{
- 					charptr[0] = bak[0];
- 					charptr[1] = bak[1];
- 					return false;
- 				}
- 			}
- 			else
- 			{	/* ASCII, single byte */
- 				if (c1 > 0x7e)
- 					return false;
- 				(*charptr)++;
- 			}
- 	}
-
- 	return true;
+	return true;
 }
-#endif
+
+#endif /* !FRONTEND */
+
 
 /*
  *-------------------------------------------------------------------
@@ -1697,19 +1669,23 @@ pg_database_encoding_max_length(void)
 }
 
 /*
- * give the character incrementer for the encoding for the current database
+ * get the character incrementer for the encoding for the current database
  */
 mbcharacter_incrementer
 pg_database_encoding_character_incrementer(void)
 {
+	/*
+	 * Eventually it might be best to add a field to pg_wchar_table[],
+	 * but for now we just use a switch.
+	 */
 	switch (GetDatabaseEncoding())
 	{
 		case PG_UTF8:
 			return pg_utf8_increment;
-			
+
 		case PG_EUC_JP:
 			return pg_eucjp_increment;
-			
+
 		default:
 			return pg_generic_charinc;
 	}
@@ -1908,4 +1884,4 @@ report_untranslatable_char(int src_encoding, int dest_encoding,
 			 pg_enc2name_tbl[dest_encoding].name)));
 }
 
-#endif
+#endif /* !FRONTEND */