Optimize vector8_has_le() on AArch64.

Presently, the SIMD implementation of this function uses unsigned saturating subtraction to find bytes less than or equal to the given value, which is a workaround for the lack of unsigned comparison instructions on some architectures. However, Neon offers vminvq_u8(), which returns the minimum (unsigned) value in the vector. This commit adds a Neon-specific implementation that uses vminvq_u8() to optimize vector8_has_le() on AArch64. In passing, adjust the SSE2 implementation to use vector8_min() and vector8_eq() to find values less than or equal to the given value. This was the only use of vector8_ssub(), so it has been removed. Reviewed-by: John Naylor <johncnaylorls@gmail.com> Discussion: https://postgr.es/m/aNHDNDSHleq0ogC_%40nathan
author: Nathan Bossart <nathan@postgresql.org> 2025-10-03 14:02:47 -0500
committer: Nathan Bossart <nathan@postgresql.org> 2025-10-03 14:02:47 -0500
commit: f8f4afe751fc75e1d3093fa634934018f440c29c (patch)
tree: f3d706f466366e567d5bb448392b763fefa8aee3 /src
parent: 74b41f5a77b8586356d02227c92e7e47380ac228 (diff)
1 files changed, 10 insertions, 27 deletions
diff --git a/src/include/port/simd.h b/src/include/port/simd.h
index 97c5f353022..5f5737707a8 100644
--- a/src/include/port/simd.h
+++ b/src/include/port/simd.h
@@ -86,7 +86,6 @@ static inline uint32 vector8_highbit_mask(const Vector8 v);
 static inline Vector8 vector8_or(const Vector8 v1, const Vector8 v2);
 #ifndef USE_NO_SIMD
 static inline Vector32 vector32_or(const Vector32 v1, const Vector32 v2);
-static inline Vector8 vector8_ssub(const Vector8 v1, const Vector8 v2);
 #endif
 
 /*
@@ -213,6 +212,10 @@ static inline bool
 vector8_has_le(const Vector8 v, const uint8 c)
 {
 	bool		result = false;
+#ifdef USE_SSE2
+	Vector8		umin;
+	Vector8		cmpe;
+#endif
 
 	/* pre-compute the result for assert checking */
 #ifdef USE_ASSERT_CHECKING
@@ -250,14 +253,12 @@ vector8_has_le(const Vector8 v, const uint8 c)
 			}
 		}
 	}
-#else
-
-	/*
-	 * Use saturating subtraction to find bytes <= c, which will present as
-	 * NUL bytes.  This approach is a workaround for the lack of unsigned
-	 * comparison instructions on some architectures.
-	 */
-	result = vector8_has_zero(vector8_ssub(v, vector8_broadcast(c)));
+#elif defined(USE_SSE2)
+	umin = vector8_min(v, vector8_broadcast(c));
+	cmpe = vector8_eq(umin, v);
+	result = vector8_is_highbit_set(cmpe);
+#elif defined(USE_NEON)
+	result = vminvq_u8(v) <= c;
 #endif
 
 	Assert(assert_result == result);
@@ -359,24 +360,6 @@ vector32_or(const Vector32 v1, const Vector32 v2)
 #endif							/* ! USE_NO_SIMD */
 
 /*
- * Return the result of subtracting the respective elements of the input
- * vectors using saturation (i.e., if the operation would yield a value less
- * than zero, zero is returned instead).  For more information on saturation
- * arithmetic, see https://en.wikipedia.org/wiki/Saturation_arithmetic
- */
-#ifndef USE_NO_SIMD
-static inline Vector8
-vector8_ssub(const Vector8 v1, const Vector8 v2)
-{
-#ifdef USE_SSE2
-	return _mm_subs_epu8(v1, v2);
-#elif defined(USE_NEON)
-	return vqsubq_u8(v1, v2);
-#endif
-}
-#endif							/* ! USE_NO_SIMD */
-
-/*
  * Return a vector with all bits set in each lane where the corresponding
  * lanes in the inputs are equal.
  */
author	Nathan Bossart <nathan@postgresql.org>	2025-10-03 14:02:47 -0500
committer	Nathan Bossart <nathan@postgresql.org>	2025-10-03 14:02:47 -0500
commit	f8f4afe751fc75e1d3093fa634934018f440c29c (patch)
tree	f3d706f466366e567d5bb448392b763fefa8aee3 /src
parent	74b41f5a77b8586356d02227c92e7e47380ac228 (diff)