summaryrefslogtreecommitdiff
path: root/src/include/port/pg_bitutils.h
diff options
context:
space:
mode:
authorNathan Bossart <nathan@postgresql.org>2025-03-28 16:20:20 -0500
committerNathan Bossart <nathan@postgresql.org>2025-03-28 16:20:20 -0500
commit519338ace410d9b1ffb13176b8802b0307ff0531 (patch)
treecef689c0b92e9678b1b5cf0110b0ba3a37c8ebe0 /src/include/port/pg_bitutils.h
parent3c8e463b0d885e0d976f6a13a1fb78187b25c86f (diff)
Optimize popcount functions with ARM SVE intrinsics.
This commit introduces SVE implementations of pg_popcount{32,64}. Unlike the Neon versions, we need an additional configure-time check to determine if the compiler supports SVE intrinsics, and we need a runtime check to determine if the current CPU supports SVE instructions. Our testing showed that the SVE implementations are much faster for larger inputs and are comparable to the status quo for smaller inputs. Author: "Devanga.Susmitha@fujitsu.com" <Devanga.Susmitha@fujitsu.com> Co-authored-by: "Chiranmoy.Bhattacharya@fujitsu.com" <Chiranmoy.Bhattacharya@fujitsu.com> Co-authored-by: "Malladi, Rama" <ramamalladi@hotmail.com> Reviewed-by: John Naylor <johncnaylorls@gmail.com> Reviewed-by: Kirill Reshke <reshkekirill@gmail.com> Discussion: https://postgr.es/m/010101936e4aaa70-b474ab9e-b9ce-474d-a3ba-a3dc223d295c-000000%40us-west-2.amazonses.com Discussion: https://postgr.es/m/OSZPR01MB84990A9A02A3515C6E85A65B8B2A2%40OSZPR01MB8499.jpnprd01.prod.outlook.com
Diffstat (limited to 'src/include/port/pg_bitutils.h')
-rw-r--r--src/include/port/pg_bitutils.h17
1 files changed, 17 insertions, 0 deletions
diff --git a/src/include/port/pg_bitutils.h b/src/include/port/pg_bitutils.h
index a387f77c2c0..c7901bf8ddc 100644
--- a/src/include/port/pg_bitutils.h
+++ b/src/include/port/pg_bitutils.h
@@ -324,6 +324,23 @@ extern uint64 pg_popcount_avx512(const char *buf, int bytes);
extern uint64 pg_popcount_masked_avx512(const char *buf, int bytes, bits8 mask);
#endif
+#elif POPCNT_AARCH64
+/* Use the Neon version of pg_popcount{32,64} without function pointer. */
+extern int pg_popcount32(uint32 word);
+extern int pg_popcount64(uint64 word);
+
+/*
+ * We can try to use an SVE-optimized pg_popcount() on some systems For that,
+ * we do use a function pointer.
+ */
+#ifdef USE_SVE_POPCNT_WITH_RUNTIME_CHECK
+extern PGDLLIMPORT uint64 (*pg_popcount_optimized) (const char *buf, int bytes);
+extern PGDLLIMPORT uint64 (*pg_popcount_masked_optimized) (const char *buf, int bytes, bits8 mask);
+#else
+extern uint64 pg_popcount_optimized(const char *buf, int bytes);
+extern uint64 pg_popcount_masked_optimized(const char *buf, int bytes, bits8 mask);
+#endif
+
#else
/* Use a portable implementation -- no need for a function pointer. */
extern int pg_popcount32(uint32 word);