summaryrefslogtreecommitdiff
path: root/src/include/port/simd.h
diff options
context:
space:
mode:
Diffstat (limited to 'src/include/port/simd.h')
-rw-r--r--src/include/port/simd.h37
1 files changed, 10 insertions, 27 deletions
diff --git a/src/include/port/simd.h b/src/include/port/simd.h
index 97c5f353022..5f5737707a8 100644
--- a/src/include/port/simd.h
+++ b/src/include/port/simd.h
@@ -86,7 +86,6 @@ static inline uint32 vector8_highbit_mask(const Vector8 v);
static inline Vector8 vector8_or(const Vector8 v1, const Vector8 v2);
#ifndef USE_NO_SIMD
static inline Vector32 vector32_or(const Vector32 v1, const Vector32 v2);
-static inline Vector8 vector8_ssub(const Vector8 v1, const Vector8 v2);
#endif
/*
@@ -213,6 +212,10 @@ static inline bool
vector8_has_le(const Vector8 v, const uint8 c)
{
bool result = false;
+#ifdef USE_SSE2
+ Vector8 umin;
+ Vector8 cmpe;
+#endif
/* pre-compute the result for assert checking */
#ifdef USE_ASSERT_CHECKING
@@ -250,14 +253,12 @@ vector8_has_le(const Vector8 v, const uint8 c)
}
}
}
-#else
-
- /*
- * Use saturating subtraction to find bytes <= c, which will present as
- * NUL bytes. This approach is a workaround for the lack of unsigned
- * comparison instructions on some architectures.
- */
- result = vector8_has_zero(vector8_ssub(v, vector8_broadcast(c)));
+#elif defined(USE_SSE2)
+ umin = vector8_min(v, vector8_broadcast(c));
+ cmpe = vector8_eq(umin, v);
+ result = vector8_is_highbit_set(cmpe);
+#elif defined(USE_NEON)
+ result = vminvq_u8(v) <= c;
#endif
Assert(assert_result == result);
@@ -359,24 +360,6 @@ vector32_or(const Vector32 v1, const Vector32 v2)
#endif /* ! USE_NO_SIMD */
/*
- * Return the result of subtracting the respective elements of the input
- * vectors using saturation (i.e., if the operation would yield a value less
- * than zero, zero is returned instead). For more information on saturation
- * arithmetic, see https://en.wikipedia.org/wiki/Saturation_arithmetic
- */
-#ifndef USE_NO_SIMD
-static inline Vector8
-vector8_ssub(const Vector8 v1, const Vector8 v2)
-{
-#ifdef USE_SSE2
- return _mm_subs_epu8(v1, v2);
-#elif defined(USE_NEON)
- return vqsubq_u8(v1, v2);
-#endif
-}
-#endif /* ! USE_NO_SIMD */
-
-/*
* Return a vector with all bits set in each lane where the corresponding
* lanes in the inputs are equal.
*/